def calculate_change(data, time_delta): ''' time_delta is calendar days ''' if data is not None: data = data.sort_values(['time_index', 'realtime_start']) release = data.groupby('realtime_start').agg(lambda x:x.sort_values('time_index').iloc[-1]).reset_index() history = pd.Series() for idx in release.time_index: subset = data[data.time_index == idx] effective_date = subset.realtime_start.min() + timedelta(time_delta) subset2 = subset[subset.realtime_start >= effective_date] history.loc[effective_date] = subset.value.iloc[-1] if subset2.empty else subset2.value.iloc[0] release.index = release.realtime_start history = tu.resample(history, release) ans = release.value - history td = (release['realtime_start'] - release['time_index']).mean().days historic = data.groupby('time_index').agg(lambda x:x.sort_values('realtime_start').iloc[0]) historic = historic.loc[historic.index < release.time_index.min()] observation = historic.value pasttime = observation.copy() pasttime.index -= timedelta(time_delta) ans2 = pasttime - tu.resample(observation, pasttime) ans2.index = observation.index + timedelta(td) ans = pd.concat([ans2, ans], axis=0) return None if ans.empty else tu.remove_outliers(ans) else: return None
def get_asset_returns(self, lookback, *args, **kwargs): p = tu.resample(self.asset_prices, self.timeline) if self.simple_returns: asset_returns = p.diff(lookback) else: asset_returns = p.diff(lookback) / p.shift(lookback) return asset_returns
def load_asset_prices(self): logger.info('Loading asset prices') self.asset_prices = self.asset_data_loader(self.assets, start_date=self._load_start, end_date=self._load_end) p = tu.resample(self.asset_prices, self.timeline) self._asset_returns = p.diff()
def cache_series_release_data(series_name): logger.info('Caching derived series release data - %s' % series_name) data = get_series_all_release(series_name) if data is not None: ans = [] release = calculate_first_release(data) if release is not None: release.name = series_name + '|release' ans.append(release) release = calculate_extended_first_release(data) if release is not None: release.name = series_name + '|extendedrelease' ans.append(release) change = calculate_change(data, 5) if change is not None: change.name = series_name + '|change' ans.append(change) change = calculate_change(data, 365) if change is not None: change.name = series_name + '|annualchange' ans.append(change) revision = calculate_revision(data) if revision is not None: revision = tu.resample(revision, release, carry_forward=False).fillna(0.) revision.name = series_name + '|revision' ans.append(revision) if len(ans) > 0: ans = pd.concat(ans, axis=1) logger.info('Storing derived series release data - %s' % series_name) tu.store_timeseries(ans, DATABASE_NAME, CACHE_TABLE_NAME)
def plot_signal_forecasting_power(data, lookback, bins): sig = data['signal'] cr = tu.resample(data['returns']['Asset Returns'].cumsum(), sig).diff() vu.bin_plot(sig.iloc[-lookback:].shift(), cr.iloc[-lookback:], bins) plt.axvline(sig.iloc[-1, 0], ls='--') plt.xlabel('Signal') plt.ylabel('Forward asset returns') plt.title('Signal Forecasting Relationship', weight='bold')
def run_simulation(self): s = tu.resample(self.signal, self.timeline).iloc[:, 0] y = np.sign(s) y2 = y.copy() x = self._asset_returns.iloc[:, 0] * y y2[x < 0.] *= 2. y.name = 'Original' y2.name = 'Reversal' self.positions = pd.concat([y, y2], axis=1)
def plot_performance_autocorrelation(data, bins): r = data['returns']['Asset Returns'].resample('W').sum() s = tu.resample(data['signal'].iloc[:, 0], r) x = np.sign(s) * r x.name = 'Past' y = np.sign(s) * r.shift(-1) y.name = 'Future' vu.bin_plot(x, y, bins) plt.title('Past vs Future performance')
def load_dataset(self): logger.info('Loading returns') self.rtn, self.rm, self.vol, self.volume = get_dataset( self.universe, max_spread=self.max_spread) acc2 = self.rtn.cumsum() self.dd = acc2.rolling(13, min_periods=1).max() - acc2 logger.info('Loading fundamental data') self.financials = load_financials(self.universe) data = get_financials_overall_score(self.financials) self.score = tu.resample(data, self.rtn).reindex(self.rtn.columns, axis=1)
def load_dataset(self): logger.info('Loading returns') self.rtn, self.rm, self.vol, self.volume = cross.get_dataset( self.universe, max_spread=self.max_spread) logger.info('Loading fundamental data') self.financials = cross.load_financials(self.universe) x = cross.get_financials_overall_score(self.financials) self.score = tu.resample(x, self.rtn).reindex(self.rtn.columns, axis=1) s, p = load_channel_signal(self.universe) self.sig = s.reindex(self.rtn.columns, axis=1).fillna(0.) self.z = p.reindex(self.rtn.columns, axis=1).fillna(0.)
def calculate_returns(self): if self.signals is not None: logger.info('Simulating portfolio') self._pos = calculate_signal_positions(self.signals[~self._r.isnull()], self.top, self.long_only) self.positions = tu.resample(self._pos.ffill(limit=self.holding_period).divide(self.stock_vol), self.stock_returns) self.stock_pnl = self.stock_returns.mul(self.positions)[self.start_date:] self.pnl = self.stock_pnl.sum(axis=1) self.pnl.name = 'PnL' self.alpha = self.stock_alpha.mul(self.positions, level=1, axis=1).groupby(axis=1, level=0).sum()[self.start_date:] tmp = pd.concat([self.pnl, self.alpha], axis=1) self.analytics = pu.get_returns_analytics(tmp)
def calculate_returns(self): logger.info('Simulating strategy returns') self.asset_returns = self.asset_prices.resample( 'B').last().ffill().diff() start_date = self.start_date if self.start_date > self.positions.first_valid_index( ) else self.positions.first_valid_index() strategy_returns = [] for c in self.positions.columns: rtn = tu.resample( self.positions[c], self.asset_returns).shift() * self.asset_returns.iloc[:, 0] rtn.name = c strategy_returns.append(rtn) self.strategy_returns = pd.concat(strategy_returns, axis=1)[start_date:] self.asset_returns = self.asset_returns[start_date:]
def create_estimation_data(self, param=None): ans = [] if param is None: param = {} for ticker in self.inputs: data = self.dataset[ticker] if data is not None: if self.data_transform_func is not None: data = self.data_transform_func(data, **param) input = tu.resample(data, self.timeline) ans.append(input) if self.signal_model: asset_returns = self.get_asset_returns(**param) for c in asset_returns.columns: hisrtn = np.sign(input) * asset_returns[c] hisrtn.name = c + ticker ans.append(hisrtn) return pd.concat(ans, axis=1)
def calculate_returns(self): logger.info('Simulating strategy returns') p = self.asset_prices.resample('B').last().ffill() self.asset_returns = p.diff( ) if self.simple_returns else p.diff() / p.shift() positions = self.position_component( **{ 'signal': self.signal, 'normalized_signal': self.normalized_signal }) self.positions = tu.resample(positions, self.asset_returns).fillna(0.).shift() start_date = self.start_date if self.start_date > self.positions.first_valid_index( ) else self.positions.first_valid_index() self.strategy_returns = self.positions.shift() * self.asset_returns self.strategy_returns = self.strategy_returns[start_date:] self.oos_strategy_returns = self.strategy_returns[self.sample_date:] self.asset_returns = self.asset_returns[start_date:] self.oos_asset_returns = self.asset_returns[self.sample_date:]
def run_ftse250_check(capital=200): r, rm, posvol, volume = sm.get_ftse250_data() f = sm.get_fundamentals('FTSE250') score = tu.resample(f, r) sig_date, pos, pnls = sm.run_package(r, rm, posvol, volume, score, capital) table = np.round(100. * pd.concat(pnls, axis=1).iloc[-6:], 2) table.columns = [x + ' (%)' for x in table.columns] table.index = [x.strftime('%Y-%m-%d') for x in table.index] filename = plot_pnl(pnls) mail = Email('*****@*****.**', ['*****@*****.**'], 'FTSE250 Stocks') mail.add_date(dt.today()) mail.add_image(filename, 600, 400) mail.add_text('PnL Summary', bold=True) mail.add_table(table, width=700) for i, x in enumerate(pos): mail.add_text('%s Positions' % pnls[i].name) table2 = np.round(x.fillna(0.)) mail.add_table(table2, width=400) mail.send_email()
def load_stock_data(self): logger.info('Loading stock returns') r = stocks.load_google_returns(self.start_date - relativedelta(years=1), self.end_date, data_table=stocks.UK_STOCKS) rx = stocks.load_google_returns(self.start_date - relativedelta(years=1), self.end_date, data_table=stocks.GLOBAL_ASSETS) vm = stocks.load_google_returns(self.start_date - relativedelta(years=1), self.end_date, 'Volume', data_table=stocks.UK_STOCKS) a = alpha.load_alpha(self.start_date - relativedelta(years=1), self.end_date, data_table=alpha.UK_ALPHA) self.market_returns = rx.loc[:, 'MCX'] self.stock_returns = r.loc[:, r.columns.isin(self.u.index)] self.asset_names = self.stock_returns.columns self.stock_alpha = a.loc[:, a.columns.get_level_values(1).isin(self.u.index)] alpha_returns = a.loc[:, a.columns.get_level_values(0) == 'Alpha'] alpha_returns = alpha_returns.groupby(level=1, axis=1).sum().loc[:, self.asset_names] self.stock_volume = vm.loc[:, self.stock_returns.columns] self._r = self.stock_returns.cumsum().ffill(limit=5).resample('W').last().diff() self._rs = alpha_returns.cumsum().ffill(limit=5).resample('W').last().diff() w = self.stock_returns.resample('W').sum().abs() v = w[w > 0].rolling(52, min_periods=13).median().ffill().bfill() self.stock_vol = tu.resample(v, self._r) self.stock_vol[self.stock_vol < STOCK_VOL_FLOOR] = STOCK_VOL_FLOOR self.r = self._r.divide(v) self.rs = self._rs.divide(v) self.rm = self.rs.subtract(self.rs.mean(axis=1), axis=0)
def _shape(self, data): return tu.resample(data, self.timeline).loc[:, self.asset_names]
def load_signal(self): logger.info('Loading signal') signal = self.signal_loader(start_date=self._load_start, end_date=self._load_end) self.signal = tu.resample(signal, self.timeline)
def testResampleWithForefilling(self): self.assertTrue(c.equals(tu.resample(a, b, True)))
def testResampleWithoutForefilling(self): self.assertTrue(d.equals(tu.resample(a, b, False)))