Ejemplo n.º 1
0
    def _check_bundles(self, names):
        assert_equal(set(self.bundles.keys()), names)

        for name in names:
            self.unregister(name)

        assert_false(self.bundles)
Ejemplo n.º 2
0
    def test_reversability(self, dtype_):
        class F(Factor):
            inputs = ()
            dtype = dtype_
            window_length = 0

        f = F()
        column_data = array(
            [[0, f.missing_value], [1, f.missing_value], [2, 3]],
            dtype=dtype_,
        )

        assert_equal(f.postprocess(column_data.ravel()), column_data.ravel())

        # only include the non-missing data
        pipeline_output = pd.Series(
            data=array([0, 1, 2, 3], dtype=dtype_),
            index=pd.MultiIndex.from_arrays([
                [
                    pd.Timestamp('2014-01-01'),
                    pd.Timestamp('2014-01-02'),
                    pd.Timestamp('2014-01-03'),
                    pd.Timestamp('2014-01-03')
                ],
                [0, 0, 0, 1],
            ]),
        )

        assert_equal(
            f.to_workspace_value(pipeline_output, pd.Index([0, 1])),
            column_data,
        )
Ejemplo n.º 3
0
    def test_reversability(self, dtype_):
        class F(Factor):
            inputs = ()
            dtype = dtype_
            window_length = 0

        f = F()
        column_data = array(
            [[0, f.missing_value],
             [1, f.missing_value],
             [2, 3]],
            dtype=dtype_,
        )

        assert_equal(f.postprocess(column_data.ravel()), column_data.ravel())

        # only include the non-missing data
        pipeline_output = pd.Series(
            data=array([0, 1, 2, 3], dtype=dtype_),
            index=pd.MultiIndex.from_arrays([
                [pd.Timestamp('2014-01-01'),
                 pd.Timestamp('2014-01-02'),
                 pd.Timestamp('2014-01-03'),
                 pd.Timestamp('2014-01-03')],
                [0, 0, 0, 1],
            ]),
        )

        assert_equal(
            f.to_workspace_value(pipeline_output, pd.Index([0, 1])),
            column_data,
        )
Ejemplo n.º 4
0
    def test_reversability(self):
        class F(Filter):
            inputs = ()
            window_length = 0
            missing_value = False

        f = F()
        column_data = array(
            [[True, f.missing_value], [True, f.missing_value], [True, True]],
            dtype=bool,
        )

        assert_equal(f.postprocess(column_data.ravel()), column_data.ravel())

        # only include the non-missing data
        pipeline_output = pd.Series(
            data=True,
            index=pd.MultiIndex.from_arrays([
                [
                    pd.Timestamp('2014-01-01'),
                    pd.Timestamp('2014-01-02'),
                    pd.Timestamp('2014-01-03'),
                    pd.Timestamp('2014-01-03')
                ],
                [0, 0, 0, 1],
            ]),
        )

        assert_equal(
            f.to_workspace_value(pipeline_output, pd.Index([0, 1])),
            column_data,
        )
Ejemplo n.º 5
0
    def test_session_closes_in_range(self):
        found_closes = self.calendar.session_closes_in_range(
            self.answers.index[0],
            self.answers.index[-1],
        )

        assert_equal(found_closes, self.answers['market_close'])
Ejemplo n.º 6
0
 def test_repr(self):
     assert_equal(
         repr(self.Term().alias('ayy lmao')),
         "Aliased%s(Term(...), name='ayy lmao')" % (
             self.Term.__base__.__name__,
         ),
     )
Ejemplo n.º 7
0
    def test_reversability_int64(self):
        class F(Classifier):
            inputs = ()
            window_length = 0
            dtype = int64_dtype
            missing_value = -1

        f = F()
        column_data = np.array(
            [[0, f.missing_value], [1, f.missing_value], [2, 3]], )

        assert_equal(f.postprocess(column_data.ravel()), column_data.ravel())

        # only include the non-missing data
        pipeline_output = pd.Series(
            data=[0, 1, 2, 3],
            index=pd.MultiIndex.from_arrays([
                [
                    pd.Timestamp('2014-01-01'),
                    pd.Timestamp('2014-01-02'),
                    pd.Timestamp('2014-01-03'),
                    pd.Timestamp('2014-01-03')
                ],
                [0, 0, 0, 1],
            ]),
            dtype=int64_dtype,
        )

        assert_equal(
            f.to_workspace_value(pipeline_output, pd.Index([0, 1])),
            column_data,
        )
Ejemplo n.º 8
0
    def _check_bundles(self, names):
        assert_equal(set(self.bundles.keys()), names)

        for name in names:
            self.unregister(name)

        assert_false(self.bundles)
Ejemplo n.º 9
0
    def test_reversability(self):
        class F(Filter):
            inputs = ()
            window_length = 0
            missing_value = False

        f = F()
        column_data = array(
            [[True, f.missing_value],
             [True, f.missing_value],
             [True, True]],
            dtype=bool,
        )

        assert_equal(f.postprocess(column_data.ravel()), column_data.ravel())

        # only include the non-missing data
        pipeline_output = pd.Series(
            data=True,
            index=pd.MultiIndex.from_arrays([
                [pd.Timestamp('2014-01-01'),
                 pd.Timestamp('2014-01-02'),
                 pd.Timestamp('2014-01-03'),
                 pd.Timestamp('2014-01-03')],
                [0, 0, 0, 1],
            ]),
        )

        assert_equal(
            f.to_workspace_value(pipeline_output, pd.Index([0, 1])),
            column_data,
        )
Ejemplo n.º 10
0
    def _empty_ingest(self, _wrote_to=[]):
        """Run the nth empty ingest.

        Returns
        -------
        wrote_to : str
            The timestr of the bundle written.
        """
        if not self.bundles:

            @self.register('bundle',
                           calendar_name='NYSE',
                           start_session=pd.Timestamp('2014', tz='UTC'),
                           end_session=pd.Timestamp('2014', tz='UTC'))
            def _(environ, asset_db_writer, minute_bar_writer,
                  daily_bar_writer, adjustment_writer, calendar, start_session,
                  end_session, cache, show_progress, output_dir):
                _wrote_to.append(output_dir)

        _wrote_to[:] = []
        self.ingest('bundle', environ=self.environ)
        assert_equal(len(_wrote_to), 1, msg='ingest was called more than once')
        ingestions = self._list_bundle()
        assert_in(
            _wrote_to[0],
            ingestions,
            msg='output_dir was not in the bundle directory',
        )
        return _wrote_to[0]
Ejemplo n.º 11
0
    def test_session_closes_in_range(self):
        found_closes = self.calendar.session_closes_in_range(
            self.answers.index[0],
            self.answers.index[-1],
        )

        assert_equal(found_closes, self.answers['market_close'])
Ejemplo n.º 12
0
    def test_ingest_assets_versions(self):
        versions = (1, 2)

        called = [False]

        @self.register('bundle', create_writers=False)
        def bundle_ingest_no_create_writers(*args, **kwargs):
            called[0] = True

        now = pd.Timestamp.utcnow()
        with self.assertRaisesRegexp(ValueError,
                                     "ingest .* creates writers .* downgrade"):
            self.ingest('bundle',
                        self.environ,
                        assets_versions=versions,
                        timestamp=now - pd.Timedelta(seconds=1))
        assert_false(called[0])
        assert_equal(len(ingestions_for_bundle('bundle', self.environ)), 1)

        @self.register('bundle', create_writers=True)
        def bundle_ingest_create_writers(environ, asset_db_writer,
                                         minute_bar_writer, daily_bar_writer,
                                         adjustment_writer, calendar,
                                         start_session, end_session, cache,
                                         show_progress, output_dir):
            self.assertIsNotNone(asset_db_writer)
            self.assertIsNotNone(minute_bar_writer)
            self.assertIsNotNone(daily_bar_writer)
            self.assertIsNotNone(adjustment_writer)

            equities = make_simple_equity_info(
                tuple(range(3)),
                self.START_DATE,
                self.END_DATE,
            )
            asset_db_writer.write(equities=equities)
            called[0] = True

        # Explicitly use different timestamp; otherwise, test could run so fast
        # that first ingestion is re-used.
        self.ingest('bundle',
                    self.environ,
                    assets_versions=versions,
                    timestamp=now)
        assert_true(called[0])

        ingestions = ingestions_for_bundle('bundle', self.environ)
        assert_equal(len(ingestions), 2)
        for version in sorted(set(versions) | {ASSET_DB_VERSION}):
            eng = sa.create_engine('sqlite:///' + asset_db_path(
                'bundle',
                to_bundle_ingest_dirname(ingestions[0]),  # most recent
                self.environ,
                version,
            ))
            metadata = sa.MetaData()
            metadata.reflect(eng)
            version_table = metadata.tables['version_info']
            check_version_info(eng, version_table, version)
Ejemplo n.º 13
0
 def check_roundtrip(arr):
     assert_equal(
         arr.as_string_array(),
         LabelArray(
             arr.as_string_array(),
             arr.missing_value,
         ).as_string_array(),
     )
Ejemplo n.º 14
0
 def check_roundtrip(arr):
     assert_equal(
         arr.as_string_array(),
         LabelArray(
             arr.as_string_array(),
             arr.missing_value,
         ).as_string_array(),
     )
Ejemplo n.º 15
0
    def _test_price_rounding(self, frequency, field):
        equity = self.asset_finder.retrieve_asset(2)
        future = self.asset_finder.retrieve_asset(10001)
        cf = self.data_portal.asset_finder.create_continuous_future(
            'BUZ',
            0,
            'calendar',
            None,
        )
        minutes = self.nyse_calendar.minutes_for_session(self.trading_days[0])

        if frequency == '1m':
            minute = minutes[0]
            expected_equity_volume = 100
            expected_future_volume = 100
            data_frequency = 'minute'
        else:
            minute = minutes[0].normalize()
            expected_equity_volume = 100 * US_EQUITIES_MINUTES_PER_DAY
            expected_future_volume = 100 * FUTURES_MINUTES_PER_DAY
            data_frequency = 'daily'

        # Equity prices should be floored to three decimal places.
        expected_equity_values = {
            'open': 1.005,
            'high': 1.005,
            'low': 1.005,
            'close': 1.005,
            'volume': expected_equity_volume,
        }
        # Futures prices should be rounded to four decimal places.
        expected_future_values = {
            'open': 1.0055,
            'high': 1.0059,
            'low': 1.0051,
            'close': 1.0055,
            'volume': expected_future_volume,
        }

        result = self.data_portal.get_history_window(
            assets=[equity, future, cf],
            end_dt=minute,
            bar_count=1,
            frequency=frequency,
            field=field,
            data_frequency=data_frequency,
        )
        expected_result = pd.DataFrame(
            {
                equity: expected_equity_values[field],
                future: expected_future_values[field],
                cf: expected_future_values[field],
            },
            index=[minute],
            dtype=float64_dtype,
        )

        assert_equal(result, expected_result)
Ejemplo n.º 16
0
 def manual_narrow_condense_back_to_valid_size_slow(self):
     """This test is really slow so we don't want it run by default.
     """
     # tests that we don't try to create an 'int24' (which is meaningless)
     categories = self.create_categories(24, plus_one=False)
     categories.append(categories[0])
     arr = LabelArray(categories, missing_value=categories[0])
     assert_equal(arr.itemsize, 4)
     self.check_roundtrip(arr)
Ejemplo n.º 17
0
 def manual_narrow_condense_back_to_valid_size_slow(self):
     """This test is really slow so we don't want it run by default.
     """
     # tests that we don't try to create an 'int24' (which is meaningless)
     categories = self.create_categories(24, plus_one=False)
     categories.append(categories[0])
     arr = LabelArray(categories, missing_value=categories[0])
     assert_equal(arr.itemsize, 4)
     self.check_roundtrip(arr)
Ejemplo n.º 18
0
    def test_map_ignores_missing_value(self, missing):
        data = np.array([missing, 'B', 'C'], dtype=object)
        la = LabelArray(data, missing_value=missing)

        def increment_char(c):
            return chr(ord(c) + 1)

        result = la.map(increment_char)
        expected = LabelArray([missing, 'C', 'D'], missing_value=missing)
        assert_equal(result.as_string_array(), expected.as_string_array())
Ejemplo n.º 19
0
    def test_input_validation(self, arg):
        window_length = 52

        with self.assertRaises(ValueError) as e:
            IchimokuKinkoHyo(**{arg: window_length + 1})

        assert_equal(
            str(e.exception),
            '%s must be <= the window_length: 53 > 52' % arg,
        )
Ejemplo n.º 20
0
    def test_input_validation(self, arg):
        window_length = 52

        with self.assertRaises(ValueError) as e:
            IchimokuKinkoHyo(**{arg: window_length + 1})

        assert_equal(
            str(e.exception),
            '%s must be <= the window_length: 53 > 52' % arg,
        )
Ejemplo n.º 21
0
    def test_map_ignores_missing_value(self, missing):
        data = np.array([missing, 'B', 'C'], dtype=object)
        la = LabelArray(data, missing_value=missing)

        def increment_char(c):
            return chr(ord(c) + 1)

        result = la.map(increment_char)
        expected = LabelArray([missing, 'C', 'D'], missing_value=missing)
        assert_equal(result.as_string_array(), expected.as_string_array())
Ejemplo n.º 22
0
    def test_price_rounding(self, frequency, field):
        equity = self.asset_finder.retrieve_asset(2)
        future = self.asset_finder.retrieve_asset(10001)
        cf = self.data_portal.asset_finder.create_continuous_future(
            'BUZ', 0, 'calendar', None,
        )
        minutes = self.nyse_calendar.minutes_for_session(self.trading_days[0])

        if frequency == '1m':
            minute = minutes[0]
            expected_equity_volume = 100
            expected_future_volume = 100
            data_frequency = 'minute'
        else:
            minute = minutes[0].normalize()
            expected_equity_volume = 100 * US_EQUITIES_MINUTES_PER_DAY
            expected_future_volume = 100 * FUTURES_MINUTES_PER_DAY
            data_frequency = 'daily'

        # Equity prices should be floored to three decimal places.
        expected_equity_values = {
            'open': 1.005,
            'high': 1.005,
            'low': 1.005,
            'close': 1.005,
            'volume': expected_equity_volume,
        }
        # Futures prices should be rounded to four decimal places.
        expected_future_values = {
            'open': 1.0055,
            'high': 1.0059,
            'low': 1.0051,
            'close': 1.0055,
            'volume': expected_future_volume,
        }

        result = self.data_portal.get_history_window(
            assets=[equity, future, cf],
            end_dt=minute,
            bar_count=1,
            frequency=frequency,
            field=field,
            data_frequency=data_frequency,
        )
        expected_result = pd.DataFrame(
            {
                equity: expected_equity_values[field],
                future: expected_future_values[field],
                cf: expected_future_values[field],
            },
            index=[minute],
            dtype=float64_dtype,
        )

        assert_equal(result, expected_result)
Ejemplo n.º 23
0
    def test_load_empty(self):
        """
        For the case where raw data is empty, make sure we have a result for
        all sids, that the dimensions are correct, and that we have the
        correct missing value.
        """
        raw_events = pd.DataFrame(
            columns=["sid",
                     "timestamp",
                     "event_date",
                     "float",
                     "int",
                     "datetime",
                     "string"]
        )
        next_value_columns = {
            EventDataSet.next_datetime: 'datetime',
            EventDataSet.next_event_date: 'event_date',
            EventDataSet.next_float: 'float',
            EventDataSet.next_int: 'int',
            EventDataSet.next_string: 'string',
            EventDataSet.next_string_custom_missing: 'string'
        }
        previous_value_columns = {
            EventDataSet.previous_datetime: 'datetime',
            EventDataSet.previous_event_date: 'event_date',
            EventDataSet.previous_float: 'float',
            EventDataSet.previous_int: 'int',
            EventDataSet.previous_string: 'string',
            EventDataSet.previous_string_custom_missing: 'string'
        }
        loader = EventsLoader(
            raw_events, next_value_columns, previous_value_columns
        )
        engine = SimplePipelineEngine(
            lambda x: loader,
            self.trading_days,
            self.asset_finder,
        )

        results = engine.run_pipeline(
            Pipeline({c.name: c.latest for c in EventDataSet.columns}),
            start_date=self.trading_days[0],
            end_date=self.trading_days[-1],
        )

        assets = self.asset_finder.retrieve_all(self.ASSET_FINDER_EQUITY_SIDS)
        dates = self.trading_days

        expected = self.frame_containing_all_missing_values(
            index=pd.MultiIndex.from_product([dates, assets]),
            columns=EventDataSet.columns,
        )

        assert_equal(results, expected)
Ejemplo n.º 24
0
    def test_load_empty(self):
        """
        For the case where raw data is empty, make sure we have a result for
        all sids, that the dimensions are correct, and that we have the
        correct missing value.
        """
        raw_events = pd.DataFrame(
            columns=["sid",
                     "timestamp",
                     "event_date",
                     "float",
                     "int",
                     "datetime",
                     "string"]
        )
        next_value_columns = {
            EventDataSet.next_datetime: 'datetime',
            EventDataSet.next_event_date: 'event_date',
            EventDataSet.next_float: 'float',
            EventDataSet.next_int: 'int',
            EventDataSet.next_string: 'string',
            EventDataSet.next_string_custom_missing: 'string'
        }
        previous_value_columns = {
            EventDataSet.previous_datetime: 'datetime',
            EventDataSet.previous_event_date: 'event_date',
            EventDataSet.previous_float: 'float',
            EventDataSet.previous_int: 'int',
            EventDataSet.previous_string: 'string',
            EventDataSet.previous_string_custom_missing: 'string'
        }
        loader = EventsLoader(
            raw_events, next_value_columns, previous_value_columns
        )
        engine = SimplePipelineEngine(
            lambda x: loader,
            self.trading_days,
            self.asset_finder,
        )

        results = engine.run_pipeline(
            Pipeline({c.name: c.latest for c in EventDataSet.columns}),
            start_date=self.trading_days[0],
            end_date=self.trading_days[-1],
        )

        assets = self.asset_finder.retrieve_all(self.ASSET_FINDER_EQUITY_SIDS)
        dates = self.trading_days

        expected = self.frame_containing_all_missing_values(
            index=pd.MultiIndex.from_product([dates, assets]),
            columns=EventDataSet.columns,
        )

        assert_equal(results, expected)
Ejemplo n.º 25
0
    def test_aroon_basic(self, lows, highs, expected_out):
        aroon = Aroon(window_length=self.window_length)
        today = pd.Timestamp('2014', tz='utc')
        assets = pd.Index(np.arange(self.nassets, dtype=np.int64))
        shape = (self.nassets,)
        out = np.recarray(shape=shape, dtype=self.dtype,
                          buf=np.empty(shape=shape, dtype=self.dtype))

        aroon.compute(today, assets, out, lows, highs)

        assert_equal(out, expected_out)
Ejemplo n.º 26
0
    def test_aroon_basic(self, lows, highs, expected_out):
        aroon = Aroon(window_length=self.window_length)
        today = pd.Timestamp('2014', tz='utc')
        assets = pd.Index(np.arange(self.nassets, dtype=np.int64))
        shape = (self.nassets,)
        out = np.recarray(shape=shape, dtype=self.dtype,
                          buf=np.empty(shape=shape, dtype=self.dtype))

        aroon.compute(today, assets, out, lows, highs)

        assert_equal(out, expected_out)
Ejemplo n.º 27
0
    def test_wma1(self):
        wma1 = LinearWeightedMovingAverage(inputs=(USEquityPricing.close, ),
                                           window_length=10)

        today = pd.Timestamp('2015')
        assets = np.arange(5, dtype=np.int64)

        data = np.ones((10, 5))
        out = np.zeros(data.shape[1])

        wma1.compute(today, assets, out, data)
        assert_equal(out, np.ones(5))
Ejemplo n.º 28
0
    def test_wma2(self):
        wma2 = LinearWeightedMovingAverage(inputs=(USEquityPricing.close, ),
                                           window_length=10)

        today = pd.Timestamp('2015')
        assets = np.arange(5, dtype=np.int64)

        data = np.arange(50, dtype=np.float64).reshape((10, 5))
        out = np.zeros(data.shape[1])

        wma2.compute(today, assets, out, data)
        assert_equal(out, np.array([30., 31., 32., 33., 34.]))
Ejemplo n.º 29
0
    def test_narrow_condense_back_to_valid_size(self):
        categories = ['a'] * (2 ** 8 + 1)
        arr = LabelArray(categories, missing_value=categories[0])
        assert_equal(arr.itemsize, 1)
        self.check_roundtrip(arr)

        # longer than int16 but still fits when deduped
        categories = self.create_categories(16, plus_one=False)
        categories.append(categories[0])
        arr = LabelArray(categories, missing_value=categories[0])
        assert_equal(arr.itemsize, 2)
        self.check_roundtrip(arr)
Ejemplo n.º 30
0
    def test_narrow_condense_back_to_valid_size(self):
        categories = ['a'] * (2**8 + 1)
        arr = LabelArray(categories, missing_value=categories[0])
        assert_equal(arr.itemsize, 1)
        self.check_roundtrip(arr)

        # longer than int16 but still fits when deduped
        categories = self.create_categories(16, plus_one=False)
        categories.append(categories[0])
        arr = LabelArray(categories, missing_value=categories[0])
        assert_equal(arr.itemsize, 2)
        self.check_roundtrip(arr)
Ejemplo n.º 31
0
    def test_tr_basic(self):
        tr = TrueRange()

        today = pd.Timestamp('2014')
        assets = np.arange(3, dtype=np.int64)
        out = np.empty(3, dtype=np.float64)

        highs = np.full((2, 3), 3.)
        lows = np.full((2, 3), 2.)
        closes = np.full((2, 3), 1.)

        tr.compute(today, assets, out, highs, lows, closes)
        assert_equal(out, np.full((3,), 2.))
Ejemplo n.º 32
0
    def test_tr_basic(self):
        tr = TrueRange()

        today = pd.Timestamp('2014')
        assets = np.arange(3, dtype=np.int64)
        out = np.empty(3, dtype=np.float64)

        highs = np.full((2, 3), 3.)
        lows = np.full((2, 3), 2.)
        closes = np.full((2, 3), 1.)

        tr.compute(today, assets, out, highs, lows, closes)
        assert_equal(out, np.full((3,), 2.))
Ejemplo n.º 33
0
    def test_map(self, f):
        data = np.array(
            [['E', 'GHIJ', 'HIJKLMNOP', 'DEFGHIJ'],
             ['CDE', 'ABCDEFGHIJKLMNOPQ', 'DEFGHIJKLMNOPQRS', 'ABCDEFGHIJK'],
             ['DEFGHIJKLMNOPQR', 'DEFGHI', 'DEFGHIJ', 'FGHIJK'],
             ['EFGHIJKLM', 'EFGHIJKLMNOPQRS', 'ABCDEFGHI', 'DEFGHIJ']],
            dtype=object,
        )
        la = LabelArray(data, missing_value=None)

        numpy_transformed = np.vectorize(f)(data)
        la_transformed = la.map(f).as_string_array()

        assert_equal(numpy_transformed, la_transformed)
Ejemplo n.º 34
0
    def test_map(self, f):
        data = np.array(
            [['E', 'GHIJ', 'HIJKLMNOP', 'DEFGHIJ'],
             ['CDE', 'ABCDEFGHIJKLMNOPQ', 'DEFGHIJKLMNOPQRS', 'ABCDEFGHIJK'],
             ['DEFGHIJKLMNOPQR', 'DEFGHI', 'DEFGHIJ', 'FGHIJK'],
             ['EFGHIJKLM', 'EFGHIJKLMNOPQRS', 'ABCDEFGHI', 'DEFGHIJ']],
            dtype=object,
        )
        la = LabelArray(data, missing_value=None)

        numpy_transformed = np.vectorize(f)(data)
        la_transformed = la.map(f).as_string_array()

        assert_equal(numpy_transformed, la_transformed)
Ejemplo n.º 35
0
    def test_wma2(self):
        wma2 = LinearWeightedMovingAverage(
            inputs=(USEquityPricing.close,),
            window_length=10
        )

        today = pd.Timestamp('2015')
        assets = np.arange(5, dtype=np.int64)

        data = np.arange(50, dtype=np.float64).reshape((10, 5))
        out = np.zeros(data.shape[1])

        wma2.compute(today, assets, out, data)
        assert_equal(out, np.array([30.,  31.,  32.,  33.,  34.]))
Ejemplo n.º 36
0
    def test_wma1(self):
        wma1 = LinearWeightedMovingAverage(
            inputs=(USEquityPricing.close,),
            window_length=10
        )

        today = pd.Timestamp('2015')
        assets = np.arange(5, dtype=np.int64)

        data = np.ones((10, 5))
        out = np.zeros(data.shape[1])

        wma1.compute(today, assets, out, data)
        assert_equal(out, np.ones(5))
Ejemplo n.º 37
0
    def test_MACD_window_length_generation(self, seed):
        rng = RandomState(seed)

        signal_period = rng.randint(1, 90)
        fast_period = rng.randint(signal_period + 1, signal_period + 100)
        slow_period = rng.randint(fast_period + 1, fast_period + 100)
        ewma = MovingAverageConvergenceDivergenceSignal(
            fast_period=fast_period,
            slow_period=slow_period,
            signal_period=signal_period,
        )
        assert_equal(
            ewma.window_length,
            slow_period + signal_period - 1,
        )
Ejemplo n.º 38
0
    def test_rate_of_change_percentage(self, test_name, data, expected):
        window_length = len(data)

        rocp = RateOfChangePercentage(
            inputs=(USEquityPricing.close,),
            window_length=window_length,
        )
        today = pd.Timestamp('2014')
        assets = np.arange(5, dtype=np.int64)
        # broadcast data across assets
        data = np.array(data)[:, np.newaxis] * np.ones(len(assets))

        out = np.zeros(len(assets))
        rocp.compute(today, assets, out, data)
        assert_equal(out, np.full((len(assets),), expected))
Ejemplo n.º 39
0
    def test_rate_of_change_percentage(self, test_name, data, expected):
        window_length = len(data)

        rocp = RateOfChangePercentage(
            inputs=(USEquityPricing.close,),
            window_length=window_length,
        )
        today = pd.Timestamp('2014')
        assets = np.arange(5, dtype=np.int64)
        # broadcast data across assets
        data = np.array(data)[:, np.newaxis] * np.ones(len(assets))

        out = np.zeros(len(assets))
        rocp.compute(today, assets, out, data)
        assert_equal(out, np.full((len(assets),), expected))
Ejemplo n.º 40
0
    def test_MACD_window_length_generation(self, seed):
        rng = RandomState(seed)

        signal_period = rng.randint(1, 90)
        fast_period = rng.randint(signal_period + 1, signal_period + 100)
        slow_period = rng.randint(fast_period + 1, fast_period + 100)
        ewma = MovingAverageConvergenceDivergenceSignal(
            fast_period=fast_period,
            slow_period=slow_period,
            signal_period=signal_period,
        )
        assert_equal(
            ewma.window_length,
            slow_period + signal_period - 1,
        )
Ejemplo n.º 41
0
    def test_register_call(self):
        def ingest(*args):
            pass

        @apply
        @subtest(((c,) for c in 'abcde'), 'name')
        def _(name):
            self.register(name, ingest)
            assert_in(name, self.bundles)
            assert_is(self.bundles[name].ingest, ingest)

        assert_equal(
            valmap(op.attrgetter('ingest'), self.bundles),
            {k: ingest for k in 'abcde'},
        )
        self._check_bundles(set('abcde'))
Ejemplo n.º 42
0
    def test_register_call(self):
        def ingest(*args):
            pass

        @apply
        @subtest(((c, ) for c in 'abcde'), 'name')
        def _(name):
            self.register(name, ingest)
            assert_in(name, self.bundles)
            assert_is(self.bundles[name].ingest, ingest)

        assert_equal(
            valmap(op.attrgetter('ingest'), self.bundles),
            {k: ingest
             for k in 'abcde'},
        )
        self._check_bundles(set('abcde'))
Ejemplo n.º 43
0
 def test_example(self, example_name):
     actual_perf = examples.run_example(
         example_name,
         # This should match the invocation in
         # catalyst/tests/resources/rebuild_example_data
         environ={
             'ZIPLINE_ROOT': self.tmpdir.getpath('example_data/root'),
         },
     )
     assert_equal(
         actual_perf[examples._cols_to_check],
         self.expected_perf[example_name][examples._cols_to_check],
         # There is a difference in the datetime columns in pandas
         # 0.16 and 0.17 because in 16 they are object and in 17 they are
         # datetime[ns, UTC]. We will just ignore the dtypes for now.
         check_dtype=False,
     )
Ejemplo n.º 44
0
 def test_example(self, example_name):
     actual_perf = examples.run_example(
         example_name,
         # This should match the invocation in
         # catalyst/tests/resources/rebuild_example_data
         environ={
             'ZIPLINE_ROOT': self.tmpdir.getpath('example_data/root'),
         },
     )
     assert_equal(
         actual_perf[examples._cols_to_check],
         self.expected_perf[example_name][examples._cols_to_check],
         # There is a difference in the datetime columns in pandas
         # 0.16 and 0.17 because in 16 they are object and in 17 they are
         # datetime[ns, UTC]. We will just ignore the dtypes for now.
         check_dtype=False,
     )
Ejemplo n.º 45
0
    def test_fso_expected_basic(self):
        """
        Simple test of expected output from fast stochastic oscillator
        """
        fso = FastStochasticOscillator()

        today = pd.Timestamp('2015')
        assets = np.arange(3, dtype=np.float64)
        out = np.empty(shape=(3,), dtype=np.float64)

        highs = np.full((50, 3), 3, dtype=np.float64)
        lows = np.full((50, 3), 2, dtype=np.float64)
        closes = np.full((50, 3), 4, dtype=np.float64)

        fso.compute(today, assets, out, closes, lows, highs)

        # Expected %K
        assert_equal(out, np.full((3,), 200, dtype=np.float64))
Ejemplo n.º 46
0
    def test_fso_expected_basic(self):
        """
        Simple test of expected output from fast stochastic oscillator
        """
        fso = FastStochasticOscillator()

        today = pd.Timestamp('2015')
        assets = np.arange(3, dtype=np.float64)
        out = np.empty(shape=(3,), dtype=np.float64)

        highs = np.full((50, 3), 3, dtype=np.float64)
        lows = np.full((50, 3), 2, dtype=np.float64)
        closes = np.full((50, 3), 4, dtype=np.float64)

        fso.compute(today, assets, out, closes, lows, highs)

        # Expected %K
        assert_equal(out, np.full((3,), 200, dtype=np.float64))
Ejemplo n.º 47
0
    def check_previous_value_results(self, column, results, dates):
        """
        Check previous value results for a single column.
        """
        # Verify that we got a result for every sid.
        self.assert_result_contains_all_sids(results)

        events = self.raw_events_no_nulls
        # Remove timezone info from trading days, since the outputs
        # from pandas won't be tz_localized.
        dates = dates.tz_localize(None)

        for asset, asset_result in results.iteritems():
            relevant_events = events[events.sid == asset.sid]
            self.assertEqual(len(relevant_events), 2)

            v1, v2 = relevant_events[self.previous_value_columns[column]]
            event1_first_eligible = max(
                # .ix doesn't work here because the frame index contains
                # integers, so 0 is still interpreted as a key.
                relevant_events.iloc[0].loc[['event_date', 'timestamp']],
            )
            event2_first_eligible = max(
                relevant_events.iloc[1].loc[['event_date', 'timestamp']]
            )

            for date, computed_value in zip(dates, asset_result):
                if date >= event2_first_eligible:
                    # If we've seen event 2, it should win even if we've seen
                    # event 1, because events are sorted by event_date.
                    self.assertEqual(computed_value, v2)
                elif date >= event1_first_eligible:
                    # If we've seen event 1 but not event 2, event 1 should
                    # win.
                    self.assertEqual(computed_value, v1)
                else:
                    # If we haven't seen either event, then we should have
                    # column.missing_value.
                    assert_equal(
                        computed_value,
                        column.missing_value,
                        # Coerce from Timestamp to datetime64.
                        allow_datetime_coercions=True,
                    )
Ejemplo n.º 48
0
    def check_previous_value_results(self, column, results, dates):
        """
        Check previous value results for a single column.
        """
        # Verify that we got a result for every sid.
        self.assert_result_contains_all_sids(results)

        events = self.raw_events_no_nulls
        # Remove timezone info from trading days, since the outputs
        # from pandas won't be tz_localized.
        dates = dates.tz_localize(None)

        for asset, asset_result in results.iteritems():
            relevant_events = events[events.sid == asset.sid]
            self.assertEqual(len(relevant_events), 2)

            v1, v2 = relevant_events[self.previous_value_columns[column]]
            event1_first_eligible = max(
                # .ix doesn't work here because the frame index contains
                # integers, so 0 is still interpreted as a key.
                relevant_events.iloc[0].loc[['event_date', 'timestamp']],
            )
            event2_first_eligible = max(
                relevant_events.iloc[1].loc[['event_date', 'timestamp']]
            )

            for date, computed_value in zip(dates, asset_result):
                if date >= event2_first_eligible:
                    # If we've seen event 2, it should win even if we've seen
                    # event 1, because events are sorted by event_date.
                    self.assertEqual(computed_value, v2)
                elif date >= event1_first_eligible:
                    # If we've seen event 1 but not event 2, event 1 should
                    # win.
                    self.assertEqual(computed_value, v1)
                else:
                    # If we haven't seen either event, then we should have
                    # column.missing_value.
                    assert_equal(
                        computed_value,
                        column.missing_value,
                        # Coerce from Timestamp to datetime64.
                        allow_datetime_coercions=True,
                    )
Ejemplo n.º 49
0
    def test_fso_expected_with_talib(self, seed):
        """
        Test the output that is returned from the fast stochastic oscillator
        is the same as that from the ta-lib STOCHF function.
        """
        window_length = 14
        nassets = 6
        rng = np.random.RandomState(seed=seed)

        input_size = (window_length, nassets)

        # values from 9 to 12
        closes = 9.0 + (rng.random_sample(input_size) * 3.0)

        # Values from 13 to 15
        highs = 13.0 + (rng.random_sample(input_size) * 2.0)

        # Values from 6 to 8.
        lows = 6.0 + (rng.random_sample(input_size) * 2.0)

        expected_out_k = []
        for i in range(nassets):
            fastk, fastd = talib.STOCHF(
                high=highs[:, i],
                low=lows[:, i],
                close=closes[:, i],
                fastk_period=window_length,
                fastd_period=1,
            )

            expected_out_k.append(fastk[-1])
        expected_out_k = np.array(expected_out_k)

        today = pd.Timestamp('2015')
        out = np.empty(shape=(nassets,), dtype=np.float)
        assets = np.arange(nassets, dtype=np.float)

        fso = FastStochasticOscillator()
        fso.compute(
            today, assets, out, closes, lows, highs
        )

        assert_equal(out, expected_out_k, array_decimal=6)
Ejemplo n.º 50
0
    def test_fso_expected_with_talib(self, seed):
        """
        Test the output that is returned from the fast stochastic oscillator
        is the same as that from the ta-lib STOCHF function.
        """
        window_length = 14
        nassets = 6
        rng = np.random.RandomState(seed=seed)

        input_size = (window_length, nassets)

        # values from 9 to 12
        closes = 9.0 + (rng.random_sample(input_size) * 3.0)

        # Values from 13 to 15
        highs = 13.0 + (rng.random_sample(input_size) * 2.0)

        # Values from 6 to 8.
        lows = 6.0 + (rng.random_sample(input_size) * 2.0)

        expected_out_k = []
        for i in range(nassets):
            fastk, fastd = talib.STOCHF(
                high=highs[:, i],
                low=lows[:, i],
                close=closes[:, i],
                fastk_period=window_length,
                fastd_period=1,
            )

            expected_out_k.append(fastk[-1])
        expected_out_k = np.array(expected_out_k)

        today = pd.Timestamp('2015')
        out = np.empty(shape=(nassets,), dtype=np.float)
        assets = np.arange(nassets, dtype=np.float)

        fso = FastStochasticOscillator()
        fso.compute(
            today, assets, out, closes, lows, highs
        )

        assert_equal(out, expected_out_k, array_decimal=6)
Ejemplo n.º 51
0
    def test_parameterized_term_default_value_with_not_specified(self):
        defaults = {'a': 'default for a', 'b': NotSpecified}

        class F(Factor):
            params = defaults

            inputs = (SomeDataSet.foo,)
            dtype = 'f8'
            window_length = 5

        pattern = r"F expected a keyword parameter 'b'\."
        with assert_raises_regex(TypeError, pattern):
            F()
        with assert_raises_regex(TypeError, pattern):
            F(a='new a')

        assert_equal(F(b='new b').params, assoc(defaults, 'b', 'new b'))
        assert_equal(
            F(a='new a', b='new b').params,
            {'a': 'new a', 'b': 'new b'},
        )
Ejemplo n.º 52
0
    def test_parameterized_term_default_value_with_not_specified(self):
        defaults = {'a': 'default for a', 'b': NotSpecified}

        class F(Factor):
            params = defaults

            inputs = (SomeDataSet.foo,)
            dtype = 'f8'
            window_length = 5

        pattern = r"F expected a keyword parameter 'b'\."
        with assert_raises_regex(TypeError, pattern):
            F()
        with assert_raises_regex(TypeError, pattern):
            F(a='new a')

        assert_equal(F(b='new b').params, assoc(defaults, 'b', 'new b'))
        assert_equal(
            F(a='new a', b='new b').params,
            {'a': 'new a', 'b': 'new b'},
        )
Ejemplo n.º 53
0
    def test_compose_mro(self):
        composed = compose_types(C, D)

        assert_equal(composed.f(), C.f())
        assert_equal(composed.g(), D.g())

        assert_equal(composed().delegate(), ('C.delegate', 'D.delegate'))
Ejemplo n.º 54
0
    def test_compose_mro(self):
        composed = compose_types(C, D)

        assert_equal(composed.f(), C.f())
        assert_equal(composed.g(), D.g())

        assert_equal(composed().delegate(), ('C.delegate', 'D.delegate'))
Ejemplo n.º 55
0
    def test_reversability_categorical(self):
        class F(Classifier):
            inputs = ()
            window_length = 0
            dtype = categorical_dtype
            missing_value = '<missing>'

        f = F()
        column_data = LabelArray(
            np.array([['a', f.missing_value], ['b', f.missing_value],
                      ['c', 'd']], ),
            missing_value=f.missing_value,
        )

        assert_equal(
            f.postprocess(column_data.ravel()),
            pd.Categorical(
                ['a', f.missing_value, 'b', f.missing_value, 'c', 'd'], ),
        )

        # only include the non-missing data
        pipeline_output = pd.Series(
            data=['a', 'b', 'c', 'd'],
            index=pd.MultiIndex.from_arrays([
                [
                    pd.Timestamp('2014-01-01'),
                    pd.Timestamp('2014-01-02'),
                    pd.Timestamp('2014-01-03'),
                    pd.Timestamp('2014-01-03')
                ],
                [0, 0, 0, 1],
            ]),
            dtype='category',
        )

        assert_equal(
            f.to_workspace_value(pipeline_output, pd.Index([0, 1])),
            column_data,
        )
Ejemplo n.º 56
0
    def test_map_shrinks_code_storage_if_possible(self):
        arr = LabelArray(
            # Drop the last value so we fit in a uint16 with None as a missing
            # value.
            self.create_categories(16, plus_one=False)[:-1],
            missing_value=None,
        )

        self.assertEqual(arr.itemsize, 2)

        def either_A_or_B(s):
            return ('A', 'B')[sum(ord(c) for c in s) % 2]

        result = arr.map(either_A_or_B)

        self.assertEqual(set(result.categories), {'A', 'B', None})
        self.assertEqual(result.itemsize, 1)

        assert_equal(
            np.vectorize(either_A_or_B)(arr.as_string_array()),
            result.as_string_array(),
        )
Ejemplo n.º 57
0
    def test_map_shrinks_code_storage_if_possible(self):
        arr = LabelArray(
            # Drop the last value so we fit in a uint16 with None as a missing
            # value.
            self.create_categories(16, plus_one=False)[:-1],
            missing_value=None,
        )

        self.assertEqual(arr.itemsize, 2)

        def either_A_or_B(s):
            return ('A', 'B')[sum(ord(c) for c in s) % 2]

        result = arr.map(either_A_or_B)

        self.assertEqual(set(result.categories), {'A', 'B', None})
        self.assertEqual(result.itemsize, 1)

        assert_equal(
            np.vectorize(either_A_or_B)(arr.as_string_array()),
            result.as_string_array(),
        )
Ejemplo n.º 58
0
    def check_next_value_results(self, column, results, dates):
        """
        Check results for a single column.
        """
        self.assert_result_contains_all_sids(results)

        events = self.raw_events_no_nulls
        # Remove timezone info from trading days, since the outputs
        # from pandas won't be tz_localized.
        dates = dates.tz_localize(None)
        for asset, asset_result in results.iteritems():
            relevant_events = events[events.sid == asset.sid]
            self.assertEqual(len(relevant_events), 2)

            v1, v2 = relevant_events[self.next_value_columns[column]]
            e1, e2 = relevant_events['event_date']
            t1, t2 = relevant_events['timestamp']

            for date, computed_value in zip(dates, asset_result):
                if t1 <= date <= e1:
                    # If we've seen event 2, it should win even if we've seen
                    # event 1, because events are sorted by event_date.
                    self.assertEqual(computed_value, v1)
                elif t2 <= date <= e2:
                    # If we've seen event 1 but not event 2, event 1 should
                    # win.
                    self.assertEqual(computed_value, v2)
                else:
                    # If we haven't seen either event, then we should have
                    # column.missing_value.
                    assert_equal(
                        computed_value,
                        column.missing_value,
                        # Coerce from Timestamp to datetime64.
                        allow_datetime_coercions=True,
                    )
Ejemplo n.º 59
0
    def test_parameterized_term_non_hashable_arg(self):
        with assert_raises(TypeError) as e:
            self.SomeFactorParameterized(a=[], b=1)
        assert_equal(
            str(e.exception),
            "SomeFactorParameterized expected a hashable value for parameter"
            " 'a', but got [] instead.",
        )

        with assert_raises(TypeError) as e:
            self.SomeFactorParameterized(a=1, b=[])
        assert_equal(
            str(e.exception),
            "SomeFactorParameterized expected a hashable value for parameter"
            " 'b', but got [] instead.",
        )

        with assert_raises(TypeError) as e:
            self.SomeFactorParameterized(a=[], b=[])
        assert_regex(
            str(e.exception),
            r"SomeFactorParameterized expected a hashable value for parameter"
            r" '(a|b)', but got \[\] instead\.",
        )
Ejemplo n.º 60
0
    def test_parameterized_term_default_value(self):
        defaults = {'a': 'default for a', 'b': 'default for b'}

        class F(Factor):
            params = defaults

            inputs = (SomeDataSet.foo,)
            dtype = 'f8'
            window_length = 5

        assert_equal(F().params, defaults)
        assert_equal(F(a='new a').params, assoc(defaults, 'a', 'new a'))
        assert_equal(F(b='new b').params, assoc(defaults, 'b', 'new b'))
        assert_equal(
            F(a='new a', b='new b').params,
            {'a': 'new a', 'b': 'new b'},
        )