def _check_bundles(self, names): assert_equal(set(self.bundles.keys()), names) for name in names: self.unregister(name) assert_false(self.bundles)
def test_reversability(self, dtype_): class F(Factor): inputs = () dtype = dtype_ window_length = 0 f = F() column_data = array( [[0, f.missing_value], [1, f.missing_value], [2, 3]], dtype=dtype_, ) assert_equal(f.postprocess(column_data.ravel()), column_data.ravel()) # only include the non-missing data pipeline_output = pd.Series( data=array([0, 1, 2, 3], dtype=dtype_), index=pd.MultiIndex.from_arrays([ [ pd.Timestamp('2014-01-01'), pd.Timestamp('2014-01-02'), pd.Timestamp('2014-01-03'), pd.Timestamp('2014-01-03') ], [0, 0, 0, 1], ]), ) assert_equal( f.to_workspace_value(pipeline_output, pd.Index([0, 1])), column_data, )
def test_reversability(self, dtype_): class F(Factor): inputs = () dtype = dtype_ window_length = 0 f = F() column_data = array( [[0, f.missing_value], [1, f.missing_value], [2, 3]], dtype=dtype_, ) assert_equal(f.postprocess(column_data.ravel()), column_data.ravel()) # only include the non-missing data pipeline_output = pd.Series( data=array([0, 1, 2, 3], dtype=dtype_), index=pd.MultiIndex.from_arrays([ [pd.Timestamp('2014-01-01'), pd.Timestamp('2014-01-02'), pd.Timestamp('2014-01-03'), pd.Timestamp('2014-01-03')], [0, 0, 0, 1], ]), ) assert_equal( f.to_workspace_value(pipeline_output, pd.Index([0, 1])), column_data, )
def test_reversability(self): class F(Filter): inputs = () window_length = 0 missing_value = False f = F() column_data = array( [[True, f.missing_value], [True, f.missing_value], [True, True]], dtype=bool, ) assert_equal(f.postprocess(column_data.ravel()), column_data.ravel()) # only include the non-missing data pipeline_output = pd.Series( data=True, index=pd.MultiIndex.from_arrays([ [ pd.Timestamp('2014-01-01'), pd.Timestamp('2014-01-02'), pd.Timestamp('2014-01-03'), pd.Timestamp('2014-01-03') ], [0, 0, 0, 1], ]), ) assert_equal( f.to_workspace_value(pipeline_output, pd.Index([0, 1])), column_data, )
def test_session_closes_in_range(self): found_closes = self.calendar.session_closes_in_range( self.answers.index[0], self.answers.index[-1], ) assert_equal(found_closes, self.answers['market_close'])
def test_repr(self): assert_equal( repr(self.Term().alias('ayy lmao')), "Aliased%s(Term(...), name='ayy lmao')" % ( self.Term.__base__.__name__, ), )
def test_reversability_int64(self): class F(Classifier): inputs = () window_length = 0 dtype = int64_dtype missing_value = -1 f = F() column_data = np.array( [[0, f.missing_value], [1, f.missing_value], [2, 3]], ) assert_equal(f.postprocess(column_data.ravel()), column_data.ravel()) # only include the non-missing data pipeline_output = pd.Series( data=[0, 1, 2, 3], index=pd.MultiIndex.from_arrays([ [ pd.Timestamp('2014-01-01'), pd.Timestamp('2014-01-02'), pd.Timestamp('2014-01-03'), pd.Timestamp('2014-01-03') ], [0, 0, 0, 1], ]), dtype=int64_dtype, ) assert_equal( f.to_workspace_value(pipeline_output, pd.Index([0, 1])), column_data, )
def _check_bundles(self, names): assert_equal(set(self.bundles.keys()), names) for name in names: self.unregister(name) assert_false(self.bundles)
def test_reversability(self): class F(Filter): inputs = () window_length = 0 missing_value = False f = F() column_data = array( [[True, f.missing_value], [True, f.missing_value], [True, True]], dtype=bool, ) assert_equal(f.postprocess(column_data.ravel()), column_data.ravel()) # only include the non-missing data pipeline_output = pd.Series( data=True, index=pd.MultiIndex.from_arrays([ [pd.Timestamp('2014-01-01'), pd.Timestamp('2014-01-02'), pd.Timestamp('2014-01-03'), pd.Timestamp('2014-01-03')], [0, 0, 0, 1], ]), ) assert_equal( f.to_workspace_value(pipeline_output, pd.Index([0, 1])), column_data, )
def _empty_ingest(self, _wrote_to=[]): """Run the nth empty ingest. Returns ------- wrote_to : str The timestr of the bundle written. """ if not self.bundles: @self.register('bundle', calendar_name='NYSE', start_session=pd.Timestamp('2014', tz='UTC'), end_session=pd.Timestamp('2014', tz='UTC')) def _(environ, asset_db_writer, minute_bar_writer, daily_bar_writer, adjustment_writer, calendar, start_session, end_session, cache, show_progress, output_dir): _wrote_to.append(output_dir) _wrote_to[:] = [] self.ingest('bundle', environ=self.environ) assert_equal(len(_wrote_to), 1, msg='ingest was called more than once') ingestions = self._list_bundle() assert_in( _wrote_to[0], ingestions, msg='output_dir was not in the bundle directory', ) return _wrote_to[0]
def test_session_closes_in_range(self): found_closes = self.calendar.session_closes_in_range( self.answers.index[0], self.answers.index[-1], ) assert_equal(found_closes, self.answers['market_close'])
def test_ingest_assets_versions(self): versions = (1, 2) called = [False] @self.register('bundle', create_writers=False) def bundle_ingest_no_create_writers(*args, **kwargs): called[0] = True now = pd.Timestamp.utcnow() with self.assertRaisesRegexp(ValueError, "ingest .* creates writers .* downgrade"): self.ingest('bundle', self.environ, assets_versions=versions, timestamp=now - pd.Timedelta(seconds=1)) assert_false(called[0]) assert_equal(len(ingestions_for_bundle('bundle', self.environ)), 1) @self.register('bundle', create_writers=True) def bundle_ingest_create_writers(environ, asset_db_writer, minute_bar_writer, daily_bar_writer, adjustment_writer, calendar, start_session, end_session, cache, show_progress, output_dir): self.assertIsNotNone(asset_db_writer) self.assertIsNotNone(minute_bar_writer) self.assertIsNotNone(daily_bar_writer) self.assertIsNotNone(adjustment_writer) equities = make_simple_equity_info( tuple(range(3)), self.START_DATE, self.END_DATE, ) asset_db_writer.write(equities=equities) called[0] = True # Explicitly use different timestamp; otherwise, test could run so fast # that first ingestion is re-used. self.ingest('bundle', self.environ, assets_versions=versions, timestamp=now) assert_true(called[0]) ingestions = ingestions_for_bundle('bundle', self.environ) assert_equal(len(ingestions), 2) for version in sorted(set(versions) | {ASSET_DB_VERSION}): eng = sa.create_engine('sqlite:///' + asset_db_path( 'bundle', to_bundle_ingest_dirname(ingestions[0]), # most recent self.environ, version, )) metadata = sa.MetaData() metadata.reflect(eng) version_table = metadata.tables['version_info'] check_version_info(eng, version_table, version)
def check_roundtrip(arr): assert_equal( arr.as_string_array(), LabelArray( arr.as_string_array(), arr.missing_value, ).as_string_array(), )
def check_roundtrip(arr): assert_equal( arr.as_string_array(), LabelArray( arr.as_string_array(), arr.missing_value, ).as_string_array(), )
def _test_price_rounding(self, frequency, field): equity = self.asset_finder.retrieve_asset(2) future = self.asset_finder.retrieve_asset(10001) cf = self.data_portal.asset_finder.create_continuous_future( 'BUZ', 0, 'calendar', None, ) minutes = self.nyse_calendar.minutes_for_session(self.trading_days[0]) if frequency == '1m': minute = minutes[0] expected_equity_volume = 100 expected_future_volume = 100 data_frequency = 'minute' else: minute = minutes[0].normalize() expected_equity_volume = 100 * US_EQUITIES_MINUTES_PER_DAY expected_future_volume = 100 * FUTURES_MINUTES_PER_DAY data_frequency = 'daily' # Equity prices should be floored to three decimal places. expected_equity_values = { 'open': 1.005, 'high': 1.005, 'low': 1.005, 'close': 1.005, 'volume': expected_equity_volume, } # Futures prices should be rounded to four decimal places. expected_future_values = { 'open': 1.0055, 'high': 1.0059, 'low': 1.0051, 'close': 1.0055, 'volume': expected_future_volume, } result = self.data_portal.get_history_window( assets=[equity, future, cf], end_dt=minute, bar_count=1, frequency=frequency, field=field, data_frequency=data_frequency, ) expected_result = pd.DataFrame( { equity: expected_equity_values[field], future: expected_future_values[field], cf: expected_future_values[field], }, index=[minute], dtype=float64_dtype, ) assert_equal(result, expected_result)
def manual_narrow_condense_back_to_valid_size_slow(self): """This test is really slow so we don't want it run by default. """ # tests that we don't try to create an 'int24' (which is meaningless) categories = self.create_categories(24, plus_one=False) categories.append(categories[0]) arr = LabelArray(categories, missing_value=categories[0]) assert_equal(arr.itemsize, 4) self.check_roundtrip(arr)
def manual_narrow_condense_back_to_valid_size_slow(self): """This test is really slow so we don't want it run by default. """ # tests that we don't try to create an 'int24' (which is meaningless) categories = self.create_categories(24, plus_one=False) categories.append(categories[0]) arr = LabelArray(categories, missing_value=categories[0]) assert_equal(arr.itemsize, 4) self.check_roundtrip(arr)
def test_map_ignores_missing_value(self, missing): data = np.array([missing, 'B', 'C'], dtype=object) la = LabelArray(data, missing_value=missing) def increment_char(c): return chr(ord(c) + 1) result = la.map(increment_char) expected = LabelArray([missing, 'C', 'D'], missing_value=missing) assert_equal(result.as_string_array(), expected.as_string_array())
def test_input_validation(self, arg): window_length = 52 with self.assertRaises(ValueError) as e: IchimokuKinkoHyo(**{arg: window_length + 1}) assert_equal( str(e.exception), '%s must be <= the window_length: 53 > 52' % arg, )
def test_input_validation(self, arg): window_length = 52 with self.assertRaises(ValueError) as e: IchimokuKinkoHyo(**{arg: window_length + 1}) assert_equal( str(e.exception), '%s must be <= the window_length: 53 > 52' % arg, )
def test_map_ignores_missing_value(self, missing): data = np.array([missing, 'B', 'C'], dtype=object) la = LabelArray(data, missing_value=missing) def increment_char(c): return chr(ord(c) + 1) result = la.map(increment_char) expected = LabelArray([missing, 'C', 'D'], missing_value=missing) assert_equal(result.as_string_array(), expected.as_string_array())
def test_price_rounding(self, frequency, field): equity = self.asset_finder.retrieve_asset(2) future = self.asset_finder.retrieve_asset(10001) cf = self.data_portal.asset_finder.create_continuous_future( 'BUZ', 0, 'calendar', None, ) minutes = self.nyse_calendar.minutes_for_session(self.trading_days[0]) if frequency == '1m': minute = minutes[0] expected_equity_volume = 100 expected_future_volume = 100 data_frequency = 'minute' else: minute = minutes[0].normalize() expected_equity_volume = 100 * US_EQUITIES_MINUTES_PER_DAY expected_future_volume = 100 * FUTURES_MINUTES_PER_DAY data_frequency = 'daily' # Equity prices should be floored to three decimal places. expected_equity_values = { 'open': 1.005, 'high': 1.005, 'low': 1.005, 'close': 1.005, 'volume': expected_equity_volume, } # Futures prices should be rounded to four decimal places. expected_future_values = { 'open': 1.0055, 'high': 1.0059, 'low': 1.0051, 'close': 1.0055, 'volume': expected_future_volume, } result = self.data_portal.get_history_window( assets=[equity, future, cf], end_dt=minute, bar_count=1, frequency=frequency, field=field, data_frequency=data_frequency, ) expected_result = pd.DataFrame( { equity: expected_equity_values[field], future: expected_future_values[field], cf: expected_future_values[field], }, index=[minute], dtype=float64_dtype, ) assert_equal(result, expected_result)
def test_load_empty(self): """ For the case where raw data is empty, make sure we have a result for all sids, that the dimensions are correct, and that we have the correct missing value. """ raw_events = pd.DataFrame( columns=["sid", "timestamp", "event_date", "float", "int", "datetime", "string"] ) next_value_columns = { EventDataSet.next_datetime: 'datetime', EventDataSet.next_event_date: 'event_date', EventDataSet.next_float: 'float', EventDataSet.next_int: 'int', EventDataSet.next_string: 'string', EventDataSet.next_string_custom_missing: 'string' } previous_value_columns = { EventDataSet.previous_datetime: 'datetime', EventDataSet.previous_event_date: 'event_date', EventDataSet.previous_float: 'float', EventDataSet.previous_int: 'int', EventDataSet.previous_string: 'string', EventDataSet.previous_string_custom_missing: 'string' } loader = EventsLoader( raw_events, next_value_columns, previous_value_columns ) engine = SimplePipelineEngine( lambda x: loader, self.trading_days, self.asset_finder, ) results = engine.run_pipeline( Pipeline({c.name: c.latest for c in EventDataSet.columns}), start_date=self.trading_days[0], end_date=self.trading_days[-1], ) assets = self.asset_finder.retrieve_all(self.ASSET_FINDER_EQUITY_SIDS) dates = self.trading_days expected = self.frame_containing_all_missing_values( index=pd.MultiIndex.from_product([dates, assets]), columns=EventDataSet.columns, ) assert_equal(results, expected)
def test_load_empty(self): """ For the case where raw data is empty, make sure we have a result for all sids, that the dimensions are correct, and that we have the correct missing value. """ raw_events = pd.DataFrame( columns=["sid", "timestamp", "event_date", "float", "int", "datetime", "string"] ) next_value_columns = { EventDataSet.next_datetime: 'datetime', EventDataSet.next_event_date: 'event_date', EventDataSet.next_float: 'float', EventDataSet.next_int: 'int', EventDataSet.next_string: 'string', EventDataSet.next_string_custom_missing: 'string' } previous_value_columns = { EventDataSet.previous_datetime: 'datetime', EventDataSet.previous_event_date: 'event_date', EventDataSet.previous_float: 'float', EventDataSet.previous_int: 'int', EventDataSet.previous_string: 'string', EventDataSet.previous_string_custom_missing: 'string' } loader = EventsLoader( raw_events, next_value_columns, previous_value_columns ) engine = SimplePipelineEngine( lambda x: loader, self.trading_days, self.asset_finder, ) results = engine.run_pipeline( Pipeline({c.name: c.latest for c in EventDataSet.columns}), start_date=self.trading_days[0], end_date=self.trading_days[-1], ) assets = self.asset_finder.retrieve_all(self.ASSET_FINDER_EQUITY_SIDS) dates = self.trading_days expected = self.frame_containing_all_missing_values( index=pd.MultiIndex.from_product([dates, assets]), columns=EventDataSet.columns, ) assert_equal(results, expected)
def test_aroon_basic(self, lows, highs, expected_out): aroon = Aroon(window_length=self.window_length) today = pd.Timestamp('2014', tz='utc') assets = pd.Index(np.arange(self.nassets, dtype=np.int64)) shape = (self.nassets,) out = np.recarray(shape=shape, dtype=self.dtype, buf=np.empty(shape=shape, dtype=self.dtype)) aroon.compute(today, assets, out, lows, highs) assert_equal(out, expected_out)
def test_aroon_basic(self, lows, highs, expected_out): aroon = Aroon(window_length=self.window_length) today = pd.Timestamp('2014', tz='utc') assets = pd.Index(np.arange(self.nassets, dtype=np.int64)) shape = (self.nassets,) out = np.recarray(shape=shape, dtype=self.dtype, buf=np.empty(shape=shape, dtype=self.dtype)) aroon.compute(today, assets, out, lows, highs) assert_equal(out, expected_out)
def test_wma1(self): wma1 = LinearWeightedMovingAverage(inputs=(USEquityPricing.close, ), window_length=10) today = pd.Timestamp('2015') assets = np.arange(5, dtype=np.int64) data = np.ones((10, 5)) out = np.zeros(data.shape[1]) wma1.compute(today, assets, out, data) assert_equal(out, np.ones(5))
def test_wma2(self): wma2 = LinearWeightedMovingAverage(inputs=(USEquityPricing.close, ), window_length=10) today = pd.Timestamp('2015') assets = np.arange(5, dtype=np.int64) data = np.arange(50, dtype=np.float64).reshape((10, 5)) out = np.zeros(data.shape[1]) wma2.compute(today, assets, out, data) assert_equal(out, np.array([30., 31., 32., 33., 34.]))
def test_narrow_condense_back_to_valid_size(self): categories = ['a'] * (2 ** 8 + 1) arr = LabelArray(categories, missing_value=categories[0]) assert_equal(arr.itemsize, 1) self.check_roundtrip(arr) # longer than int16 but still fits when deduped categories = self.create_categories(16, plus_one=False) categories.append(categories[0]) arr = LabelArray(categories, missing_value=categories[0]) assert_equal(arr.itemsize, 2) self.check_roundtrip(arr)
def test_narrow_condense_back_to_valid_size(self): categories = ['a'] * (2**8 + 1) arr = LabelArray(categories, missing_value=categories[0]) assert_equal(arr.itemsize, 1) self.check_roundtrip(arr) # longer than int16 but still fits when deduped categories = self.create_categories(16, plus_one=False) categories.append(categories[0]) arr = LabelArray(categories, missing_value=categories[0]) assert_equal(arr.itemsize, 2) self.check_roundtrip(arr)
def test_tr_basic(self): tr = TrueRange() today = pd.Timestamp('2014') assets = np.arange(3, dtype=np.int64) out = np.empty(3, dtype=np.float64) highs = np.full((2, 3), 3.) lows = np.full((2, 3), 2.) closes = np.full((2, 3), 1.) tr.compute(today, assets, out, highs, lows, closes) assert_equal(out, np.full((3,), 2.))
def test_tr_basic(self): tr = TrueRange() today = pd.Timestamp('2014') assets = np.arange(3, dtype=np.int64) out = np.empty(3, dtype=np.float64) highs = np.full((2, 3), 3.) lows = np.full((2, 3), 2.) closes = np.full((2, 3), 1.) tr.compute(today, assets, out, highs, lows, closes) assert_equal(out, np.full((3,), 2.))
def test_map(self, f): data = np.array( [['E', 'GHIJ', 'HIJKLMNOP', 'DEFGHIJ'], ['CDE', 'ABCDEFGHIJKLMNOPQ', 'DEFGHIJKLMNOPQRS', 'ABCDEFGHIJK'], ['DEFGHIJKLMNOPQR', 'DEFGHI', 'DEFGHIJ', 'FGHIJK'], ['EFGHIJKLM', 'EFGHIJKLMNOPQRS', 'ABCDEFGHI', 'DEFGHIJ']], dtype=object, ) la = LabelArray(data, missing_value=None) numpy_transformed = np.vectorize(f)(data) la_transformed = la.map(f).as_string_array() assert_equal(numpy_transformed, la_transformed)
def test_map(self, f): data = np.array( [['E', 'GHIJ', 'HIJKLMNOP', 'DEFGHIJ'], ['CDE', 'ABCDEFGHIJKLMNOPQ', 'DEFGHIJKLMNOPQRS', 'ABCDEFGHIJK'], ['DEFGHIJKLMNOPQR', 'DEFGHI', 'DEFGHIJ', 'FGHIJK'], ['EFGHIJKLM', 'EFGHIJKLMNOPQRS', 'ABCDEFGHI', 'DEFGHIJ']], dtype=object, ) la = LabelArray(data, missing_value=None) numpy_transformed = np.vectorize(f)(data) la_transformed = la.map(f).as_string_array() assert_equal(numpy_transformed, la_transformed)
def test_wma2(self): wma2 = LinearWeightedMovingAverage( inputs=(USEquityPricing.close,), window_length=10 ) today = pd.Timestamp('2015') assets = np.arange(5, dtype=np.int64) data = np.arange(50, dtype=np.float64).reshape((10, 5)) out = np.zeros(data.shape[1]) wma2.compute(today, assets, out, data) assert_equal(out, np.array([30., 31., 32., 33., 34.]))
def test_wma1(self): wma1 = LinearWeightedMovingAverage( inputs=(USEquityPricing.close,), window_length=10 ) today = pd.Timestamp('2015') assets = np.arange(5, dtype=np.int64) data = np.ones((10, 5)) out = np.zeros(data.shape[1]) wma1.compute(today, assets, out, data) assert_equal(out, np.ones(5))
def test_MACD_window_length_generation(self, seed): rng = RandomState(seed) signal_period = rng.randint(1, 90) fast_period = rng.randint(signal_period + 1, signal_period + 100) slow_period = rng.randint(fast_period + 1, fast_period + 100) ewma = MovingAverageConvergenceDivergenceSignal( fast_period=fast_period, slow_period=slow_period, signal_period=signal_period, ) assert_equal( ewma.window_length, slow_period + signal_period - 1, )
def test_rate_of_change_percentage(self, test_name, data, expected): window_length = len(data) rocp = RateOfChangePercentage( inputs=(USEquityPricing.close,), window_length=window_length, ) today = pd.Timestamp('2014') assets = np.arange(5, dtype=np.int64) # broadcast data across assets data = np.array(data)[:, np.newaxis] * np.ones(len(assets)) out = np.zeros(len(assets)) rocp.compute(today, assets, out, data) assert_equal(out, np.full((len(assets),), expected))
def test_rate_of_change_percentage(self, test_name, data, expected): window_length = len(data) rocp = RateOfChangePercentage( inputs=(USEquityPricing.close,), window_length=window_length, ) today = pd.Timestamp('2014') assets = np.arange(5, dtype=np.int64) # broadcast data across assets data = np.array(data)[:, np.newaxis] * np.ones(len(assets)) out = np.zeros(len(assets)) rocp.compute(today, assets, out, data) assert_equal(out, np.full((len(assets),), expected))
def test_MACD_window_length_generation(self, seed): rng = RandomState(seed) signal_period = rng.randint(1, 90) fast_period = rng.randint(signal_period + 1, signal_period + 100) slow_period = rng.randint(fast_period + 1, fast_period + 100) ewma = MovingAverageConvergenceDivergenceSignal( fast_period=fast_period, slow_period=slow_period, signal_period=signal_period, ) assert_equal( ewma.window_length, slow_period + signal_period - 1, )
def test_register_call(self): def ingest(*args): pass @apply @subtest(((c,) for c in 'abcde'), 'name') def _(name): self.register(name, ingest) assert_in(name, self.bundles) assert_is(self.bundles[name].ingest, ingest) assert_equal( valmap(op.attrgetter('ingest'), self.bundles), {k: ingest for k in 'abcde'}, ) self._check_bundles(set('abcde'))
def test_register_call(self): def ingest(*args): pass @apply @subtest(((c, ) for c in 'abcde'), 'name') def _(name): self.register(name, ingest) assert_in(name, self.bundles) assert_is(self.bundles[name].ingest, ingest) assert_equal( valmap(op.attrgetter('ingest'), self.bundles), {k: ingest for k in 'abcde'}, ) self._check_bundles(set('abcde'))
def test_example(self, example_name): actual_perf = examples.run_example( example_name, # This should match the invocation in # catalyst/tests/resources/rebuild_example_data environ={ 'ZIPLINE_ROOT': self.tmpdir.getpath('example_data/root'), }, ) assert_equal( actual_perf[examples._cols_to_check], self.expected_perf[example_name][examples._cols_to_check], # There is a difference in the datetime columns in pandas # 0.16 and 0.17 because in 16 they are object and in 17 they are # datetime[ns, UTC]. We will just ignore the dtypes for now. check_dtype=False, )
def test_example(self, example_name): actual_perf = examples.run_example( example_name, # This should match the invocation in # catalyst/tests/resources/rebuild_example_data environ={ 'ZIPLINE_ROOT': self.tmpdir.getpath('example_data/root'), }, ) assert_equal( actual_perf[examples._cols_to_check], self.expected_perf[example_name][examples._cols_to_check], # There is a difference in the datetime columns in pandas # 0.16 and 0.17 because in 16 they are object and in 17 they are # datetime[ns, UTC]. We will just ignore the dtypes for now. check_dtype=False, )
def test_fso_expected_basic(self): """ Simple test of expected output from fast stochastic oscillator """ fso = FastStochasticOscillator() today = pd.Timestamp('2015') assets = np.arange(3, dtype=np.float64) out = np.empty(shape=(3,), dtype=np.float64) highs = np.full((50, 3), 3, dtype=np.float64) lows = np.full((50, 3), 2, dtype=np.float64) closes = np.full((50, 3), 4, dtype=np.float64) fso.compute(today, assets, out, closes, lows, highs) # Expected %K assert_equal(out, np.full((3,), 200, dtype=np.float64))
def test_fso_expected_basic(self): """ Simple test of expected output from fast stochastic oscillator """ fso = FastStochasticOscillator() today = pd.Timestamp('2015') assets = np.arange(3, dtype=np.float64) out = np.empty(shape=(3,), dtype=np.float64) highs = np.full((50, 3), 3, dtype=np.float64) lows = np.full((50, 3), 2, dtype=np.float64) closes = np.full((50, 3), 4, dtype=np.float64) fso.compute(today, assets, out, closes, lows, highs) # Expected %K assert_equal(out, np.full((3,), 200, dtype=np.float64))
def check_previous_value_results(self, column, results, dates): """ Check previous value results for a single column. """ # Verify that we got a result for every sid. self.assert_result_contains_all_sids(results) events = self.raw_events_no_nulls # Remove timezone info from trading days, since the outputs # from pandas won't be tz_localized. dates = dates.tz_localize(None) for asset, asset_result in results.iteritems(): relevant_events = events[events.sid == asset.sid] self.assertEqual(len(relevant_events), 2) v1, v2 = relevant_events[self.previous_value_columns[column]] event1_first_eligible = max( # .ix doesn't work here because the frame index contains # integers, so 0 is still interpreted as a key. relevant_events.iloc[0].loc[['event_date', 'timestamp']], ) event2_first_eligible = max( relevant_events.iloc[1].loc[['event_date', 'timestamp']] ) for date, computed_value in zip(dates, asset_result): if date >= event2_first_eligible: # If we've seen event 2, it should win even if we've seen # event 1, because events are sorted by event_date. self.assertEqual(computed_value, v2) elif date >= event1_first_eligible: # If we've seen event 1 but not event 2, event 1 should # win. self.assertEqual(computed_value, v1) else: # If we haven't seen either event, then we should have # column.missing_value. assert_equal( computed_value, column.missing_value, # Coerce from Timestamp to datetime64. allow_datetime_coercions=True, )
def check_previous_value_results(self, column, results, dates): """ Check previous value results for a single column. """ # Verify that we got a result for every sid. self.assert_result_contains_all_sids(results) events = self.raw_events_no_nulls # Remove timezone info from trading days, since the outputs # from pandas won't be tz_localized. dates = dates.tz_localize(None) for asset, asset_result in results.iteritems(): relevant_events = events[events.sid == asset.sid] self.assertEqual(len(relevant_events), 2) v1, v2 = relevant_events[self.previous_value_columns[column]] event1_first_eligible = max( # .ix doesn't work here because the frame index contains # integers, so 0 is still interpreted as a key. relevant_events.iloc[0].loc[['event_date', 'timestamp']], ) event2_first_eligible = max( relevant_events.iloc[1].loc[['event_date', 'timestamp']] ) for date, computed_value in zip(dates, asset_result): if date >= event2_first_eligible: # If we've seen event 2, it should win even if we've seen # event 1, because events are sorted by event_date. self.assertEqual(computed_value, v2) elif date >= event1_first_eligible: # If we've seen event 1 but not event 2, event 1 should # win. self.assertEqual(computed_value, v1) else: # If we haven't seen either event, then we should have # column.missing_value. assert_equal( computed_value, column.missing_value, # Coerce from Timestamp to datetime64. allow_datetime_coercions=True, )
def test_fso_expected_with_talib(self, seed): """ Test the output that is returned from the fast stochastic oscillator is the same as that from the ta-lib STOCHF function. """ window_length = 14 nassets = 6 rng = np.random.RandomState(seed=seed) input_size = (window_length, nassets) # values from 9 to 12 closes = 9.0 + (rng.random_sample(input_size) * 3.0) # Values from 13 to 15 highs = 13.0 + (rng.random_sample(input_size) * 2.0) # Values from 6 to 8. lows = 6.0 + (rng.random_sample(input_size) * 2.0) expected_out_k = [] for i in range(nassets): fastk, fastd = talib.STOCHF( high=highs[:, i], low=lows[:, i], close=closes[:, i], fastk_period=window_length, fastd_period=1, ) expected_out_k.append(fastk[-1]) expected_out_k = np.array(expected_out_k) today = pd.Timestamp('2015') out = np.empty(shape=(nassets,), dtype=np.float) assets = np.arange(nassets, dtype=np.float) fso = FastStochasticOscillator() fso.compute( today, assets, out, closes, lows, highs ) assert_equal(out, expected_out_k, array_decimal=6)
def test_fso_expected_with_talib(self, seed): """ Test the output that is returned from the fast stochastic oscillator is the same as that from the ta-lib STOCHF function. """ window_length = 14 nassets = 6 rng = np.random.RandomState(seed=seed) input_size = (window_length, nassets) # values from 9 to 12 closes = 9.0 + (rng.random_sample(input_size) * 3.0) # Values from 13 to 15 highs = 13.0 + (rng.random_sample(input_size) * 2.0) # Values from 6 to 8. lows = 6.0 + (rng.random_sample(input_size) * 2.0) expected_out_k = [] for i in range(nassets): fastk, fastd = talib.STOCHF( high=highs[:, i], low=lows[:, i], close=closes[:, i], fastk_period=window_length, fastd_period=1, ) expected_out_k.append(fastk[-1]) expected_out_k = np.array(expected_out_k) today = pd.Timestamp('2015') out = np.empty(shape=(nassets,), dtype=np.float) assets = np.arange(nassets, dtype=np.float) fso = FastStochasticOscillator() fso.compute( today, assets, out, closes, lows, highs ) assert_equal(out, expected_out_k, array_decimal=6)
def test_parameterized_term_default_value_with_not_specified(self): defaults = {'a': 'default for a', 'b': NotSpecified} class F(Factor): params = defaults inputs = (SomeDataSet.foo,) dtype = 'f8' window_length = 5 pattern = r"F expected a keyword parameter 'b'\." with assert_raises_regex(TypeError, pattern): F() with assert_raises_regex(TypeError, pattern): F(a='new a') assert_equal(F(b='new b').params, assoc(defaults, 'b', 'new b')) assert_equal( F(a='new a', b='new b').params, {'a': 'new a', 'b': 'new b'}, )
def test_parameterized_term_default_value_with_not_specified(self): defaults = {'a': 'default for a', 'b': NotSpecified} class F(Factor): params = defaults inputs = (SomeDataSet.foo,) dtype = 'f8' window_length = 5 pattern = r"F expected a keyword parameter 'b'\." with assert_raises_regex(TypeError, pattern): F() with assert_raises_regex(TypeError, pattern): F(a='new a') assert_equal(F(b='new b').params, assoc(defaults, 'b', 'new b')) assert_equal( F(a='new a', b='new b').params, {'a': 'new a', 'b': 'new b'}, )
def test_compose_mro(self): composed = compose_types(C, D) assert_equal(composed.f(), C.f()) assert_equal(composed.g(), D.g()) assert_equal(composed().delegate(), ('C.delegate', 'D.delegate'))
def test_compose_mro(self): composed = compose_types(C, D) assert_equal(composed.f(), C.f()) assert_equal(composed.g(), D.g()) assert_equal(composed().delegate(), ('C.delegate', 'D.delegate'))
def test_reversability_categorical(self): class F(Classifier): inputs = () window_length = 0 dtype = categorical_dtype missing_value = '<missing>' f = F() column_data = LabelArray( np.array([['a', f.missing_value], ['b', f.missing_value], ['c', 'd']], ), missing_value=f.missing_value, ) assert_equal( f.postprocess(column_data.ravel()), pd.Categorical( ['a', f.missing_value, 'b', f.missing_value, 'c', 'd'], ), ) # only include the non-missing data pipeline_output = pd.Series( data=['a', 'b', 'c', 'd'], index=pd.MultiIndex.from_arrays([ [ pd.Timestamp('2014-01-01'), pd.Timestamp('2014-01-02'), pd.Timestamp('2014-01-03'), pd.Timestamp('2014-01-03') ], [0, 0, 0, 1], ]), dtype='category', ) assert_equal( f.to_workspace_value(pipeline_output, pd.Index([0, 1])), column_data, )
def test_map_shrinks_code_storage_if_possible(self): arr = LabelArray( # Drop the last value so we fit in a uint16 with None as a missing # value. self.create_categories(16, plus_one=False)[:-1], missing_value=None, ) self.assertEqual(arr.itemsize, 2) def either_A_or_B(s): return ('A', 'B')[sum(ord(c) for c in s) % 2] result = arr.map(either_A_or_B) self.assertEqual(set(result.categories), {'A', 'B', None}) self.assertEqual(result.itemsize, 1) assert_equal( np.vectorize(either_A_or_B)(arr.as_string_array()), result.as_string_array(), )
def test_map_shrinks_code_storage_if_possible(self): arr = LabelArray( # Drop the last value so we fit in a uint16 with None as a missing # value. self.create_categories(16, plus_one=False)[:-1], missing_value=None, ) self.assertEqual(arr.itemsize, 2) def either_A_or_B(s): return ('A', 'B')[sum(ord(c) for c in s) % 2] result = arr.map(either_A_or_B) self.assertEqual(set(result.categories), {'A', 'B', None}) self.assertEqual(result.itemsize, 1) assert_equal( np.vectorize(either_A_or_B)(arr.as_string_array()), result.as_string_array(), )
def check_next_value_results(self, column, results, dates): """ Check results for a single column. """ self.assert_result_contains_all_sids(results) events = self.raw_events_no_nulls # Remove timezone info from trading days, since the outputs # from pandas won't be tz_localized. dates = dates.tz_localize(None) for asset, asset_result in results.iteritems(): relevant_events = events[events.sid == asset.sid] self.assertEqual(len(relevant_events), 2) v1, v2 = relevant_events[self.next_value_columns[column]] e1, e2 = relevant_events['event_date'] t1, t2 = relevant_events['timestamp'] for date, computed_value in zip(dates, asset_result): if t1 <= date <= e1: # If we've seen event 2, it should win even if we've seen # event 1, because events are sorted by event_date. self.assertEqual(computed_value, v1) elif t2 <= date <= e2: # If we've seen event 1 but not event 2, event 1 should # win. self.assertEqual(computed_value, v2) else: # If we haven't seen either event, then we should have # column.missing_value. assert_equal( computed_value, column.missing_value, # Coerce from Timestamp to datetime64. allow_datetime_coercions=True, )
def test_parameterized_term_non_hashable_arg(self): with assert_raises(TypeError) as e: self.SomeFactorParameterized(a=[], b=1) assert_equal( str(e.exception), "SomeFactorParameterized expected a hashable value for parameter" " 'a', but got [] instead.", ) with assert_raises(TypeError) as e: self.SomeFactorParameterized(a=1, b=[]) assert_equal( str(e.exception), "SomeFactorParameterized expected a hashable value for parameter" " 'b', but got [] instead.", ) with assert_raises(TypeError) as e: self.SomeFactorParameterized(a=[], b=[]) assert_regex( str(e.exception), r"SomeFactorParameterized expected a hashable value for parameter" r" '(a|b)', but got \[\] instead\.", )
def test_parameterized_term_default_value(self): defaults = {'a': 'default for a', 'b': 'default for b'} class F(Factor): params = defaults inputs = (SomeDataSet.foo,) dtype = 'f8' window_length = 5 assert_equal(F().params, defaults) assert_equal(F(a='new a').params, assoc(defaults, 'a', 'new a')) assert_equal(F(b='new b').params, assoc(defaults, 'b', 'new b')) assert_equal( F(a='new a', b='new b').params, {'a': 'new a', 'b': 'new b'}, )