def test_reversability(self): class F(Filter): inputs = () window_length = 0 missing_value = False f = F() column_data = array( [[True, f.missing_value], [True, f.missing_value], [True, True]], dtype=bool, ) assert_equal(f.postprocess(column_data.ravel()), column_data.ravel()) # only include the non-missing data pipeline_output = pd.Series( data=True, index=pd.MultiIndex.from_arrays([ [pd.Timestamp('2014-01-01'), pd.Timestamp('2014-01-02'), pd.Timestamp('2014-01-03'), pd.Timestamp('2014-01-03')], [0, 0, 0, 1], ]), ) assert_equal( f.to_workspace_value(pipeline_output, pd.Index([0, 1])), column_data, )
def test_session_closes_in_range(self): found_closes = self.calendar.session_closes_in_range( self.answers.index[0], self.answers.index[-1], ) assert_equal(found_closes, self.answers['market_close'])
def test_reversability_int64(self): class F(Classifier): inputs = () window_length = 0 dtype = int64_dtype missing_value = -1 f = F() column_data = np.array( [[0, f.missing_value], [1, f.missing_value], [2, 3]], ) assert_equal(f.postprocess(column_data.ravel()), column_data.ravel()) # only include the non-missing data pipeline_output = pd.Series( data=[0, 1, 2, 3], index=pd.MultiIndex.from_arrays([ [pd.Timestamp('2014-01-01'), pd.Timestamp('2014-01-02'), pd.Timestamp('2014-01-03'), pd.Timestamp('2014-01-03')], [0, 0, 0, 1], ]), dtype=int64_dtype, ) assert_equal( f.to_workspace_value(pipeline_output, pd.Index([0, 1])), column_data, )
def test_bundle(self): environ = {'CSVDIR': test_resource_path('csvdir_samples', 'csvdir')} ingest('csvdir', environ=environ) bundle = load('csvdir', environ=environ) sids = 0, 1, 2, 3 assert_equal(set(bundle.asset_finder.sids), set(sids)) for equity in bundle.asset_finder.retrieve_all(sids): assert_equal(equity.start_date, self.asset_start, msg=equity) assert_equal(equity.end_date, self.asset_end, msg=equity) sessions = self.calendar.all_sessions actual = bundle.equity_daily_bar_reader.load_raw_arrays( self.columns, sessions[sessions.get_loc(self.asset_start, 'bfill')], sessions[sessions.get_loc(self.asset_end, 'ffill')], sids, ) expected_pricing, expected_adjustments = self._expected_data( bundle.asset_finder, ) assert_equal(actual, expected_pricing, array_decimal=2) adjustments_for_cols = bundle.adjustment_reader.load_adjustments( self.columns, sessions, pd.Index(sids), ) assert_equal([sorted(adj.keys()) for adj in adjustments_for_cols], expected_adjustments)
def _empty_ingest(self, _wrote_to=[]): """Run the nth empty ingest. Returns ------- wrote_to : str The timestr of the bundle written. """ if not self.bundles: @self.register('bundle', calendar_name='NYSE', start_session=pd.Timestamp('2014', tz='UTC'), end_session=pd.Timestamp('2014', tz='UTC')) def _(environ, asset_db_writer, minute_bar_writer, daily_bar_writer, adjustment_writer, calendar, start_session, end_session, cache, show_progress, output_dir): _wrote_to.append(output_dir) _wrote_to[:] = [] self.ingest('bundle', environ=self.environ) assert_equal(len(_wrote_to), 1, msg='ingest was called more than once') ingestions = self._list_bundle() assert_in( _wrote_to[0], ingestions, msg='output_dir was not in the bundle directory', ) return _wrote_to[0]
def test_reversability(self, dtype_): class F(Factor): inputs = () dtype = dtype_ window_length = 0 f = F() column_data = array( [[0, f.missing_value], [1, f.missing_value], [2, 3]], dtype=dtype_, ) assert_equal(f.postprocess(column_data.ravel()), column_data.ravel()) # only include the non-missing data pipeline_output = pd.Series( data=array([0, 1, 2, 3], dtype=dtype_), index=pd.MultiIndex.from_arrays([ [ pd.Timestamp('2014-01-01'), pd.Timestamp('2014-01-02'), pd.Timestamp('2014-01-03'), pd.Timestamp('2014-01-03') ], [0, 0, 0, 1], ]), ) assert_equal( f.to_workspace_value(pipeline_output, pd.Index([0, 1])), column_data, )
def _check_bundles(self, names): assert_equal(set(self.bundles.keys()), names) for name in names: self.unregister(name) assert_false(self.bundles)
def test_ingest_assets_versions(self): versions = (1, 2) called = [False] @self.register('bundle', create_writers=False) def bundle_ingest_no_create_writers(*args, **kwargs): called[0] = True now = pd.Timestamp.utcnow() with self.assertRaisesRegexp(ValueError, "ingest .* creates writers .* downgrade"): self.ingest('bundle', self.environ, assets_versions=versions, timestamp=now - pd.Timedelta(seconds=1)) assert_false(called[0]) assert_equal(len(ingestions_for_bundle('bundle', self.environ)), 1) @self.register('bundle', create_writers=True) def bundle_ingest_create_writers(environ, asset_db_writer, minute_bar_writer, daily_bar_writer, adjustment_writer, calendar, start_session, end_session, cache, show_progress, output_dir): self.assertIsNotNone(asset_db_writer) self.assertIsNotNone(minute_bar_writer) self.assertIsNotNone(daily_bar_writer) self.assertIsNotNone(adjustment_writer) equities = make_simple_equity_info( tuple(range(3)), self.START_DATE, self.END_DATE, ) asset_db_writer.write(equities=equities) called[0] = True # Explicitly use different timestamp; otherwise, test could run so fast # that first ingestion is re-used. self.ingest('bundle', self.environ, assets_versions=versions, timestamp=now) assert_true(called[0]) ingestions = ingestions_for_bundle('bundle', self.environ) assert_equal(len(ingestions), 2) for version in sorted(set(versions) | {ASSET_DB_VERSION}): eng = sa.create_engine('sqlite:///' + asset_db_path( 'bundle', to_bundle_ingest_dirname(ingestions[0]), # most recent self.environ, version, )) metadata = sa.MetaData() metadata.reflect(eng) version_table = metadata.tables['version_info'] check_version_info(eng, version_table, version)
def check_roundtrip(arr): assert_equal( arr.as_string_array(), LabelArray( arr.as_string_array(), arr.missing_value, ).as_string_array(), )
def test_price_rounding(self, frequency, field): equity = self.asset_finder.retrieve_asset(2) future = self.asset_finder.retrieve_asset(10001) cf = self.data_portal.asset_finder.create_continuous_future( 'BUZ', 0, 'calendar', None, ) minutes = self.nyse_calendar.minutes_for_session(self.trading_days[0]) if frequency == '1m': minute = minutes[0] expected_equity_volume = 100 expected_future_volume = 100 data_frequency = 'minute' else: minute = minutes[0].normalize() expected_equity_volume = 100 * US_EQUITIES_MINUTES_PER_DAY expected_future_volume = 100 * FUTURES_MINUTES_PER_DAY data_frequency = 'daily' # Equity prices should be floored to three decimal places. expected_equity_values = { 'open': 1.005, 'high': 1.005, 'low': 1.005, 'close': 1.005, 'volume': expected_equity_volume, } # Futures prices should be rounded to four decimal places. expected_future_values = { 'open': 1.0055, 'high': 1.0059, 'low': 1.0051, 'close': 1.0055, 'volume': expected_future_volume, } result = self.data_portal.get_history_window( assets=[equity, future, cf], end_dt=minute, bar_count=1, frequency=frequency, field=field, data_frequency=data_frequency, ) expected_result = pd.DataFrame( { equity: expected_equity_values[field], future: expected_future_values[field], cf: expected_future_values[field], }, index=[minute], dtype=float64_dtype, ) assert_equal(result, expected_result)
def check_equivalent_terms(self, terms): self.assertTrue(len(terms) > 1, "Need at least two terms to compare") pipe = Pipeline(terms) start, end = self.trading_days[[-10, -1]] results = self.pipeline_engine.run_pipeline(pipe, start, end) first_column = results.iloc[:, 0] for name in terms: assert_equal(results.loc[:, name], first_column, check_names=False)
def manual_narrow_condense_back_to_valid_size_slow(self): """This test is really slow so we don't want it run by default. """ # tests that we don't try to create an 'int24' (which is meaningless) categories = self.create_categories(24, plus_one=False) categories.append(categories[0]) arr = LabelArray(categories, missing_value=categories[0]) assert_equal(arr.itemsize, 4) self.check_roundtrip(arr)
def test_input_validation(self, arg): window_length = 52 with self.assertRaises(ValueError) as e: IchimokuKinkoHyo(**{arg: window_length + 1}) assert_equal( str(e.exception), '%s must be <= the window_length: 53 > 52' % arg, )
def test_map_ignores_missing_value(self, missing): data = np.array([missing, 'B', 'C'], dtype=object) la = LabelArray(data, missing_value=missing) def increment_char(c): return chr(ord(c) + 1) result = la.map(increment_char) expected = LabelArray([missing, 'C', 'D'], missing_value=missing) assert_equal(result.as_string_array(), expected.as_string_array())
def test_load_empty(self): """ For the case where raw data is empty, make sure we have a result for all sids, that the dimensions are correct, and that we have the correct missing value. """ raw_events = pd.DataFrame( columns=["sid", "timestamp", "event_date", "float", "int", "datetime", "string"] ) next_value_columns = { EventDataSet.next_datetime: 'datetime', EventDataSet.next_event_date: 'event_date', EventDataSet.next_float: 'float', EventDataSet.next_int: 'int', EventDataSet.next_string: 'string', EventDataSet.next_string_custom_missing: 'string' } previous_value_columns = { EventDataSet.previous_datetime: 'datetime', EventDataSet.previous_event_date: 'event_date', EventDataSet.previous_float: 'float', EventDataSet.previous_int: 'int', EventDataSet.previous_string: 'string', EventDataSet.previous_string_custom_missing: 'string' } loader = EventsLoader( raw_events, next_value_columns, previous_value_columns ) engine = SimplePipelineEngine( lambda x: loader, self.trading_days, self.asset_finder, ) results = engine.run_pipeline( Pipeline({c.name: c.latest for c in EventDataSet.columns}), start_date=self.trading_days[0], end_date=self.trading_days[-1], ) assets = self.asset_finder.retrieve_all(self.ASSET_FINDER_EQUITY_SIDS) dates = self.trading_days expected = self.frame_containing_all_missing_values( index=pd.MultiIndex.from_product([dates, assets]), columns=EventDataSet.columns, ) assert_equal(results, expected)
def compare_with_empyrical(self, dependents, independent): INFINITY = 1000000 # close enough result = vectorized_beta( dependents, independent, allowed_missing=INFINITY, ) expected = np.array([ empyrical_beta(dependents[:, i].ravel(), independent.ravel()) for i in range(dependents.shape[1]) ]) assert_equal(result, expected, array_decimal=7) return result
def test_narrow_condense_back_to_valid_size(self): categories = ['a'] * (2**8 + 1) arr = LabelArray(categories, missing_value=categories[0]) assert_equal(arr.itemsize, 1) self.check_roundtrip(arr) # longer than int16 but still fits when deduped categories = self.create_categories(16, plus_one=False) categories.append(categories[0]) arr = LabelArray(categories, missing_value=categories[0]) assert_equal(arr.itemsize, 2) self.check_roundtrip(arr)
def test_wma1(self): wma1 = LinearWeightedMovingAverage(inputs=(USEquityPricing.close, ), window_length=10) today = pd.Timestamp('2015') assets = np.arange(5, dtype=np.int64) data = np.ones((10, 5)) out = np.zeros(data.shape[1]) wma1.compute(today, assets, out, data) assert_equal(out, np.ones(5))
def test_aroon_basic(self, lows, highs, expected_out): aroon = Aroon(window_length=self.window_length) today = pd.Timestamp('2014', tz='utc') assets = pd.Index(np.arange(self.nassets, dtype=np.int64)) shape = (self.nassets, ) out = np.recarray(shape=shape, dtype=self.dtype, buf=np.empty(shape=shape, dtype=self.dtype)) aroon.compute(today, assets, out, lows, highs) assert_equal(out, expected_out)
def test_wma2(self): wma2 = LinearWeightedMovingAverage(inputs=(USEquityPricing.close, ), window_length=10) today = pd.Timestamp('2015') assets = np.arange(5, dtype=np.int64) data = np.arange(50, dtype=np.float64).reshape((10, 5)) out = np.zeros(data.shape[1]) wma2.compute(today, assets, out, data) assert_equal(out, np.array([30., 31., 32., 33., 34.]))
def test_tr_basic(self): tr = TrueRange() today = pd.Timestamp('2014') assets = np.arange(3, dtype=np.int64) out = np.empty(3, dtype=np.float64) highs = np.full((2, 3), 3.) lows = np.full((2, 3), 2.) closes = np.full((2, 3), 1.) tr.compute(today, assets, out, highs, lows, closes) assert_equal(out, np.full((3, ), 2.))
def test_copy_categories_list(self): """regression test for #1927 """ categories = ['a', 'b', 'c'] LabelArray( [None, 'a', 'b', 'c'], missing_value=None, categories=categories, ) # before #1927 we didn't take a copy and would insert the missing value # (None) into the list assert_equal(categories, ['a', 'b', 'c'])
def test_map(self, f): data = np.array( [['E', 'GHIJ', 'HIJKLMNOP', 'DEFGHIJ'], ['CDE', 'ABCDEFGHIJKLMNOPQ', 'DEFGHIJKLMNOPQRS', 'ABCDEFGHIJK'], ['DEFGHIJKLMNOPQR', 'DEFGHI', 'DEFGHIJ', 'FGHIJK'], ['EFGHIJKLM', 'EFGHIJKLMNOPQRS', 'ABCDEFGHI', 'DEFGHIJ']], dtype=object, ) la = LabelArray(data, missing_value=None) numpy_transformed = np.vectorize(f)(data) la_transformed = la.map(f).as_string_array() assert_equal(numpy_transformed, la_transformed)
def test_simple_beta_matches_regression(self): run_pipeline = self.run_pipeline simple_beta = SimpleBeta(target=self.my_asset, regression_length=10) complex_beta = RollingLinearRegressionOfReturns( target=self.my_asset, returns_length=2, regression_length=10, ).beta pipe = Pipeline({'simple': simple_beta, 'complex': complex_beta}) results = run_pipeline( pipe, self.pipeline_start_date, self.pipeline_end_date, ) assert_equal(results['simple'], results['complex'], check_names=False)
def test_rate_of_change_percentage(self, test_name, data, expected): window_length = len(data) rocp = RateOfChangePercentage( inputs=(USEquityPricing.close, ), window_length=window_length, ) today = pd.Timestamp('2014') assets = np.arange(5, dtype=np.int64) # broadcast data across assets data = np.array(data)[:, np.newaxis] * np.ones(len(assets)) out = np.zeros(len(assets)) rocp.compute(today, assets, out, data) assert_equal(out, np.full((len(assets), ), expected))
def test_MACD_window_length_generation(self, seed): rng = RandomState(seed) signal_period = rng.randint(1, 90) fast_period = rng.randint(signal_period + 1, signal_period + 100) slow_period = rng.randint(fast_period + 1, fast_period + 100) ewma = MovingAverageConvergenceDivergenceSignal( fast_period=fast_period, slow_period=slow_period, signal_period=signal_period, ) assert_equal( ewma.window_length, slow_period + signal_period - 1, )
def test_register_call(self): def ingest(*args): pass @apply @subtest(((c, ) for c in 'abcde'), 'name') def _(name): self.register(name, ingest) assert_in(name, self.bundles) assert_is(self.bundles[name].ingest, ingest) assert_equal( valmap(op.attrgetter('ingest'), self.bundles), {k: ingest for k in 'abcde'}, ) self._check_bundles(set('abcde'))
def test_example(self, example_name): actual_perf = examples.run_example( example_name, # This should match the invocation in # gateway/tests/resources/rebuild_example_data environ={ 'GATEWAY_ROOT': self.tmpdir.getpath('example_data/root'), }, ) assert_equal( actual_perf[examples._cols_to_check], self.expected_perf[example_name][examples._cols_to_check], # There is a difference in the datetime columns in pandas # 0.16 and 0.17 because in 16 they are object and in 17 they are # datetime[ns, UTC]. We will just ignore the dtypes for now. check_dtype=False, )
def test_fso_expected_basic(self): """ Simple test of expected output from fast stochastic oscillator """ fso = FastStochasticOscillator() today = pd.Timestamp('2015') assets = np.arange(3, dtype=np.float64) out = np.empty(shape=(3, ), dtype=np.float64) highs = np.full((50, 3), 3, dtype=np.float64) lows = np.full((50, 3), 2, dtype=np.float64) closes = np.full((50, 3), 4, dtype=np.float64) fso.compute(today, assets, out, closes, lows, highs) # Expected %K assert_equal(out, np.full((3, ), 200, dtype=np.float64))
def check_previous_value_results(self, column, results, dates): """ Check previous value results for a single column. """ # Verify that we got a result for every sid. self.assert_result_contains_all_sids(results) events = self.raw_events_no_nulls # Remove timezone info from trading days, since the outputs # from pandas won't be tz_localized. dates = dates.tz_localize(None) for asset, asset_result in results.iteritems(): relevant_events = events[events.sid == asset.sid] self.assertEqual(len(relevant_events), 2) v1, v2 = relevant_events[self.previous_value_columns[column]] event1_first_eligible = max( # .ix doesn't work here because the frame index contains # integers, so 0 is still interpreted as a key. relevant_events.iloc[0].loc[['event_date', 'timestamp']], ) event2_first_eligible = max( relevant_events.iloc[1].loc[['event_date', 'timestamp']] ) for date, computed_value in zip(dates, asset_result): if date >= event2_first_eligible: # If we've seen event 2, it should win even if we've seen # event 1, because events are sorted by event_date. self.assertEqual(computed_value, v2) elif date >= event1_first_eligible: # If we've seen event 1 but not event 2, event 1 should # win. self.assertEqual(computed_value, v1) else: # If we haven't seen either event, then we should have # column.missing_value. assert_equal( computed_value, column.missing_value, # Coerce from Timestamp to datetime64. allow_datetime_coercions=True, )