def test_securities_reindexed_like_intraday(self): """ Tests get_securities_reindexed_like with Date and Time in the index. """ closes = pd.DataFrame(np.random.rand(4, 2), columns=[12345, 23456], index=pd.MultiIndex.from_product( [ pd.date_range(start="2018-05-01", periods=2, freq="D", tz="America/New_York", name="Date"), ["09:30:00", "09:31:00"], ], names=["Date", "Time"])) def mock_download_master_file(f, *args, **kwargs): securities = pd.DataFrame( dict(ConId=[12345, 23456], Symbol=["ABC", "DEF"])) securities.to_csv(f, index=False) f.seek(0) with patch('quantrocket.master.download_master_file', new=mock_download_master_file): securities = get_securities_reindexed_like(closes, domain="main", fields="Symbol") securities = securities.reset_index() securities.loc[:, "Date"] = securities.Date.dt.strftime( "%Y-%m-%dT%H:%M:%S%z") self.assertListEqual(securities.to_dict(orient="records"), [{ 12345: 'ABC', 23456: 'DEF', 'Date': '2018-05-01T00:00:00-0400', 'Field': 'Symbol', 'Time': '09:30:00' }, { 12345: 'ABC', 23456: 'DEF', 'Date': '2018-05-01T00:00:00-0400', 'Field': 'Symbol', 'Time': '09:31:00' }, { 12345: 'ABC', 23456: 'DEF', 'Date': '2018-05-02T00:00:00-0400', 'Field': 'Symbol', 'Time': '09:30:00' }, { 12345: 'ABC', 23456: 'DEF', 'Date': '2018-05-02T00:00:00-0400', 'Field': 'Symbol', 'Time': '09:31:00' }])
def load_adjusted_array(self, domain, columns, dates, sids, mask): fields = [c.name for c in columns] real_sids = [ self.zipline_sids_to_real_sids[zipline_sid] for zipline_sid in sids ] reindex_like = pd.DataFrame(None, index=dates, columns=real_sids) reindex_like.index.name = "Date" securities = get_securities_reindexed_like(reindex_like, fields=fields) out = {} for column in columns: missing_value = MISSING_VALUES_BY_DTYPE[column.dtype] if column.dtype == datetime64ns_dtype: # pd.to_datetime handles NaNs in pandas 0.22 while .astype(column.dtype) doesn't values = securities.loc[column.name].apply( pd.to_datetime).fillna(missing_value).values else: values = securities.loc[column.name].astype( column.dtype).fillna(missing_value).values out[column] = AdjustedArray(values, adjustments={}, missing_value=missing_value) return out
def add_securities_master_features(self, prices, features): """ Features from the securities master: - ADR? - sector """ closes = prices.loc["Close"] securities = get_securities_reindexed_like( closes, fields=["sharadar_Category", "sharadar_Sector"]) # Is it an ADR? categories = securities.loc["sharadar_Category"] unique_categories = categories.iloc[0].unique() # this dataset includes several ADR classifications, all of which start with "ADR " features["are_adrs"] = categories.isin([ cat for cat in unique_categories if cat.startswith("ADR ") ]).astype(int) # Which sector? (sectors must be one-hot encoded - see usage guide for more) sectors = securities.loc["sharadar_Sector"] for sector in sectors.stack().unique(): features["sector_{}".format(sector)] = ( sectors == sector).astype(int)
def test_pass_conids_and_domain_based_on_reindex_like( self, mock_download_master_file): """ Tests that conids and domain are correctly passed to the download_master_file function based on reindex_like. """ closes = pd.DataFrame(np.random.rand(3, 2), columns=[12345, 23456], index=pd.date_range(start="2018-05-01", periods=3, freq="D", name="Date")) def _mock_download_master_file(f, *args, **kwargs): securities = pd.DataFrame( dict(ConId=[12345, 23456], Symbol=["ABC", "DEF"], Etf=[1, 0], Delisted=[0, 1], Currency=["USD", "USD"])) securities.to_csv(f, index=False) f.seek(0) mock_download_master_file.side_effect = _mock_download_master_file get_securities_reindexed_like( closes, domain="main", fields=["Symbol", "Etf", "Delisted", "Currency"]) download_master_file_call = mock_download_master_file.mock_calls[0] _, args, kwargs = download_master_file_call self.assertListEqual(kwargs["conids"], [12345, 23456]) self.assertEqual(kwargs["domain"], "main") self.assertListEqual(kwargs["fields"], ["Symbol", "Etf", "Delisted", "Currency"])
def prices_to_signals(self, prices): closes = prices.loc["Close"] # Compute dollar volume mask dollar_volumes = prices.loc["Volume"] * closes avg_dollar_volumes = dollar_volumes.rolling(window=22).mean() are_eligible = avg_dollar_volumes >= self.MIN_DOLLAR_VOLUME sectypes = get_securities_reindexed_like( closes, "edi_SecTypeCode").loc["edi_SecTypeCode"] are_eligible &= sectypes == "EQS" if self.LIMIT_TO_CURRENCY: currencies = get_securities_reindexed_like( closes, "Currency").loc["Currency"] are_eligible &= currencies == self.LIMIT_TO_CURRENCY # Compute big losers mask prior_returns = (closes - closes.shift()) / closes.shift() big_losers = prior_returns <= -0.10 short_signals = big_losers & are_eligible return -short_signals.astype(int)
def load_adjusted_array(self, domain, columns, dates, sids, mask): fields = [c.name for c in columns] real_sids = [ self.zipline_sids_to_real_sids[zipline_sid] for zipline_sid in sids ] reindex_like = pd.DataFrame(None, index=dates, columns=real_sids) reindex_like.index.name = "Date" securities = get_securities_reindexed_like(reindex_like, fields=fields) out = {} for column in columns: missing_value = MISSING_VALUES_BY_DTYPE[column.dtype] out[column] = AdjustedArray(securities.loc[column.name].astype( column.dtype).fillna(missing_value).values, adjustments={}, missing_value=missing_value) return out
def test_securities_reindexed_like(self): """ Tests get_securities_reindexed_like. """ closes = pd.DataFrame(np.random.rand(3, 2), columns=[12345, 23456], index=pd.date_range(start="2018-05-01", periods=3, freq="D", tz="America/New_York", name="Date")) def mock_download_master_file(f, *args, **kwargs): securities = pd.DataFrame( dict(ConId=[12345, 23456], Symbol=["ABC", "DEF"], Etf=[1, 0], Delisted=[0, 1], Currency=["USD", "EUR"])) securities.to_csv(f, index=False) f.seek(0) with patch('quantrocket.master.download_master_file', new=mock_download_master_file): securities = get_securities_reindexed_like( closes, domain="main", fields=["Symbol", "Etf", "Delisted", "Currency"]) securities = securities.reset_index() securities.loc[:, "Date"] = securities.Date.dt.strftime( "%Y-%m-%dT%H:%M:%S%z") self.assertListEqual(securities.to_dict(orient="records"), [{ 'Field': 'Currency', 'Date': '2018-05-01T00:00:00-0400', 12345: 'USD', 23456: 'EUR' }, { 'Field': 'Currency', 'Date': '2018-05-02T00:00:00-0400', 12345: 'USD', 23456: 'EUR' }, { 'Field': 'Currency', 'Date': '2018-05-03T00:00:00-0400', 12345: 'USD', 23456: 'EUR' }, { 'Field': 'Delisted', 'Date': '2018-05-01T00:00:00-0400', 12345: False, 23456: True }, { 'Field': 'Delisted', 'Date': '2018-05-02T00:00:00-0400', 12345: False, 23456: True }, { 'Field': 'Delisted', 'Date': '2018-05-03T00:00:00-0400', 12345: False, 23456: True }, { 'Field': 'Etf', 'Date': '2018-05-01T00:00:00-0400', 12345: True, 23456: False }, { 'Field': 'Etf', 'Date': '2018-05-02T00:00:00-0400', 12345: True, 23456: False }, { 'Field': 'Etf', 'Date': '2018-05-03T00:00:00-0400', 12345: True, 23456: False }, { 'Field': 'Symbol', 'Date': '2018-05-01T00:00:00-0400', 12345: 'ABC', 23456: 'DEF' }, { 'Field': 'Symbol', 'Date': '2018-05-02T00:00:00-0400', 12345: 'ABC', 23456: 'DEF' }, { 'Field': 'Symbol', 'Date': '2018-05-03T00:00:00-0400', 12345: 'ABC', 23456: 'DEF' }])