Beispiel #1
0
    def test_reversability(self):
        class F(Filter):
            inputs = ()
            window_length = 0
            missing_value = False

        f = F()
        column_data = array(
            [[True, f.missing_value],
             [True, f.missing_value],
             [True, True]],
            dtype=bool,
        )

        assert_equal(f.postprocess(column_data.ravel()), column_data.ravel())

        # only include the non-missing data
        pipeline_output = pd.Series(
            data=True,
            index=pd.MultiIndex.from_arrays([
                [pd.Timestamp('2014-01-01'),
                 pd.Timestamp('2014-01-02'),
                 pd.Timestamp('2014-01-03'),
                 pd.Timestamp('2014-01-03')],
                [0, 0, 0, 1],
            ]),
        )

        assert_equal(
            f.to_workspace_value(pipeline_output, pd.Index([0, 1])),
            column_data,
        )
Beispiel #2
0
    def test_session_closes_in_range(self):
        found_closes = self.calendar.session_closes_in_range(
            self.answers.index[0],
            self.answers.index[-1],
        )

        assert_equal(found_closes, self.answers['market_close'])
Beispiel #3
0
    def test_reversability_int64(self):
        class F(Classifier):
            inputs = ()
            window_length = 0
            dtype = int64_dtype
            missing_value = -1

        f = F()
        column_data = np.array(
            [[0, f.missing_value],
             [1, f.missing_value],
             [2, 3]],
        )

        assert_equal(f.postprocess(column_data.ravel()), column_data.ravel())

        # only include the non-missing data
        pipeline_output = pd.Series(
            data=[0, 1, 2, 3],
            index=pd.MultiIndex.from_arrays([
                [pd.Timestamp('2014-01-01'),
                 pd.Timestamp('2014-01-02'),
                 pd.Timestamp('2014-01-03'),
                 pd.Timestamp('2014-01-03')],
                [0, 0, 0, 1],
            ]),
            dtype=int64_dtype,
        )

        assert_equal(
            f.to_workspace_value(pipeline_output, pd.Index([0, 1])),
            column_data,
        )
Beispiel #4
0
    def test_bundle(self):
        environ = {'CSVDIR': test_resource_path('csvdir_samples', 'csvdir')}

        ingest('csvdir', environ=environ)
        bundle = load('csvdir', environ=environ)
        sids = 0, 1, 2, 3
        assert_equal(set(bundle.asset_finder.sids), set(sids))

        for equity in bundle.asset_finder.retrieve_all(sids):
            assert_equal(equity.start_date, self.asset_start, msg=equity)
            assert_equal(equity.end_date, self.asset_end, msg=equity)

        sessions = self.calendar.all_sessions
        actual = bundle.equity_daily_bar_reader.load_raw_arrays(
            self.columns,
            sessions[sessions.get_loc(self.asset_start, 'bfill')],
            sessions[sessions.get_loc(self.asset_end, 'ffill')],
            sids,
        )

        expected_pricing, expected_adjustments = self._expected_data(
            bundle.asset_finder, )
        assert_equal(actual, expected_pricing, array_decimal=2)

        adjustments_for_cols = bundle.adjustment_reader.load_adjustments(
            self.columns,
            sessions,
            pd.Index(sids),
        )
        assert_equal([sorted(adj.keys()) for adj in adjustments_for_cols],
                     expected_adjustments)
Beispiel #5
0
    def _empty_ingest(self, _wrote_to=[]):
        """Run the nth empty ingest.

        Returns
        -------
        wrote_to : str
            The timestr of the bundle written.
        """
        if not self.bundles:

            @self.register('bundle',
                           calendar_name='NYSE',
                           start_session=pd.Timestamp('2014', tz='UTC'),
                           end_session=pd.Timestamp('2014', tz='UTC'))
            def _(environ, asset_db_writer, minute_bar_writer,
                  daily_bar_writer, adjustment_writer, calendar, start_session,
                  end_session, cache, show_progress, output_dir):
                _wrote_to.append(output_dir)

        _wrote_to[:] = []
        self.ingest('bundle', environ=self.environ)
        assert_equal(len(_wrote_to), 1, msg='ingest was called more than once')
        ingestions = self._list_bundle()
        assert_in(
            _wrote_to[0],
            ingestions,
            msg='output_dir was not in the bundle directory',
        )
        return _wrote_to[0]
Beispiel #6
0
    def test_reversability(self, dtype_):
        class F(Factor):
            inputs = ()
            dtype = dtype_
            window_length = 0

        f = F()
        column_data = array(
            [[0, f.missing_value], [1, f.missing_value], [2, 3]],
            dtype=dtype_,
        )

        assert_equal(f.postprocess(column_data.ravel()), column_data.ravel())

        # only include the non-missing data
        pipeline_output = pd.Series(
            data=array([0, 1, 2, 3], dtype=dtype_),
            index=pd.MultiIndex.from_arrays([
                [
                    pd.Timestamp('2014-01-01'),
                    pd.Timestamp('2014-01-02'),
                    pd.Timestamp('2014-01-03'),
                    pd.Timestamp('2014-01-03')
                ],
                [0, 0, 0, 1],
            ]),
        )

        assert_equal(
            f.to_workspace_value(pipeline_output, pd.Index([0, 1])),
            column_data,
        )
Beispiel #7
0
    def _check_bundles(self, names):
        assert_equal(set(self.bundles.keys()), names)

        for name in names:
            self.unregister(name)

        assert_false(self.bundles)
Beispiel #8
0
    def test_ingest_assets_versions(self):
        versions = (1, 2)

        called = [False]

        @self.register('bundle', create_writers=False)
        def bundle_ingest_no_create_writers(*args, **kwargs):
            called[0] = True

        now = pd.Timestamp.utcnow()
        with self.assertRaisesRegexp(ValueError,
                                     "ingest .* creates writers .* downgrade"):
            self.ingest('bundle',
                        self.environ,
                        assets_versions=versions,
                        timestamp=now - pd.Timedelta(seconds=1))
        assert_false(called[0])
        assert_equal(len(ingestions_for_bundle('bundle', self.environ)), 1)

        @self.register('bundle', create_writers=True)
        def bundle_ingest_create_writers(environ, asset_db_writer,
                                         minute_bar_writer, daily_bar_writer,
                                         adjustment_writer, calendar,
                                         start_session, end_session, cache,
                                         show_progress, output_dir):
            self.assertIsNotNone(asset_db_writer)
            self.assertIsNotNone(minute_bar_writer)
            self.assertIsNotNone(daily_bar_writer)
            self.assertIsNotNone(adjustment_writer)

            equities = make_simple_equity_info(
                tuple(range(3)),
                self.START_DATE,
                self.END_DATE,
            )
            asset_db_writer.write(equities=equities)
            called[0] = True

        # Explicitly use different timestamp; otherwise, test could run so fast
        # that first ingestion is re-used.
        self.ingest('bundle',
                    self.environ,
                    assets_versions=versions,
                    timestamp=now)
        assert_true(called[0])

        ingestions = ingestions_for_bundle('bundle', self.environ)
        assert_equal(len(ingestions), 2)
        for version in sorted(set(versions) | {ASSET_DB_VERSION}):
            eng = sa.create_engine('sqlite:///' + asset_db_path(
                'bundle',
                to_bundle_ingest_dirname(ingestions[0]),  # most recent
                self.environ,
                version,
            ))
            metadata = sa.MetaData()
            metadata.reflect(eng)
            version_table = metadata.tables['version_info']
            check_version_info(eng, version_table, version)
Beispiel #9
0
 def check_roundtrip(arr):
     assert_equal(
         arr.as_string_array(),
         LabelArray(
             arr.as_string_array(),
             arr.missing_value,
         ).as_string_array(),
     )
Beispiel #10
0
    def test_price_rounding(self, frequency, field):
        equity = self.asset_finder.retrieve_asset(2)
        future = self.asset_finder.retrieve_asset(10001)
        cf = self.data_portal.asset_finder.create_continuous_future(
            'BUZ',
            0,
            'calendar',
            None,
        )
        minutes = self.nyse_calendar.minutes_for_session(self.trading_days[0])

        if frequency == '1m':
            minute = minutes[0]
            expected_equity_volume = 100
            expected_future_volume = 100
            data_frequency = 'minute'
        else:
            minute = minutes[0].normalize()
            expected_equity_volume = 100 * US_EQUITIES_MINUTES_PER_DAY
            expected_future_volume = 100 * FUTURES_MINUTES_PER_DAY
            data_frequency = 'daily'

        # Equity prices should be floored to three decimal places.
        expected_equity_values = {
            'open': 1.005,
            'high': 1.005,
            'low': 1.005,
            'close': 1.005,
            'volume': expected_equity_volume,
        }
        # Futures prices should be rounded to four decimal places.
        expected_future_values = {
            'open': 1.0055,
            'high': 1.0059,
            'low': 1.0051,
            'close': 1.0055,
            'volume': expected_future_volume,
        }

        result = self.data_portal.get_history_window(
            assets=[equity, future, cf],
            end_dt=minute,
            bar_count=1,
            frequency=frequency,
            field=field,
            data_frequency=data_frequency,
        )
        expected_result = pd.DataFrame(
            {
                equity: expected_equity_values[field],
                future: expected_future_values[field],
                cf: expected_future_values[field],
            },
            index=[minute],
            dtype=float64_dtype,
        )

        assert_equal(result, expected_result)
Beispiel #11
0
    def check_equivalent_terms(self, terms):
        self.assertTrue(len(terms) > 1, "Need at least two terms to compare")
        pipe = Pipeline(terms)

        start, end = self.trading_days[[-10, -1]]
        results = self.pipeline_engine.run_pipeline(pipe, start, end)
        first_column = results.iloc[:, 0]
        for name in terms:
            assert_equal(results.loc[:, name], first_column, check_names=False)
Beispiel #12
0
 def manual_narrow_condense_back_to_valid_size_slow(self):
     """This test is really slow so we don't want it run by default.
     """
     # tests that we don't try to create an 'int24' (which is meaningless)
     categories = self.create_categories(24, plus_one=False)
     categories.append(categories[0])
     arr = LabelArray(categories, missing_value=categories[0])
     assert_equal(arr.itemsize, 4)
     self.check_roundtrip(arr)
Beispiel #13
0
    def test_input_validation(self, arg):
        window_length = 52

        with self.assertRaises(ValueError) as e:
            IchimokuKinkoHyo(**{arg: window_length + 1})

        assert_equal(
            str(e.exception),
            '%s must be <= the window_length: 53 > 52' % arg,
        )
Beispiel #14
0
    def test_map_ignores_missing_value(self, missing):
        data = np.array([missing, 'B', 'C'], dtype=object)
        la = LabelArray(data, missing_value=missing)

        def increment_char(c):
            return chr(ord(c) + 1)

        result = la.map(increment_char)
        expected = LabelArray([missing, 'C', 'D'], missing_value=missing)
        assert_equal(result.as_string_array(), expected.as_string_array())
Beispiel #15
0
    def test_load_empty(self):
        """
        For the case where raw data is empty, make sure we have a result for
        all sids, that the dimensions are correct, and that we have the
        correct missing value.
        """
        raw_events = pd.DataFrame(
            columns=["sid",
                     "timestamp",
                     "event_date",
                     "float",
                     "int",
                     "datetime",
                     "string"]
        )
        next_value_columns = {
            EventDataSet.next_datetime: 'datetime',
            EventDataSet.next_event_date: 'event_date',
            EventDataSet.next_float: 'float',
            EventDataSet.next_int: 'int',
            EventDataSet.next_string: 'string',
            EventDataSet.next_string_custom_missing: 'string'
        }
        previous_value_columns = {
            EventDataSet.previous_datetime: 'datetime',
            EventDataSet.previous_event_date: 'event_date',
            EventDataSet.previous_float: 'float',
            EventDataSet.previous_int: 'int',
            EventDataSet.previous_string: 'string',
            EventDataSet.previous_string_custom_missing: 'string'
        }
        loader = EventsLoader(
            raw_events, next_value_columns, previous_value_columns
        )
        engine = SimplePipelineEngine(
            lambda x: loader,
            self.trading_days,
            self.asset_finder,
        )

        results = engine.run_pipeline(
            Pipeline({c.name: c.latest for c in EventDataSet.columns}),
            start_date=self.trading_days[0],
            end_date=self.trading_days[-1],
        )

        assets = self.asset_finder.retrieve_all(self.ASSET_FINDER_EQUITY_SIDS)
        dates = self.trading_days

        expected = self.frame_containing_all_missing_values(
            index=pd.MultiIndex.from_product([dates, assets]),
            columns=EventDataSet.columns,
        )

        assert_equal(results, expected)
Beispiel #16
0
 def compare_with_empyrical(self, dependents, independent):
     INFINITY = 1000000  # close enough
     result = vectorized_beta(
         dependents, independent, allowed_missing=INFINITY,
     )
     expected = np.array([
         empyrical_beta(dependents[:, i].ravel(), independent.ravel())
         for i in range(dependents.shape[1])
     ])
     assert_equal(result, expected, array_decimal=7)
     return result
Beispiel #17
0
    def test_narrow_condense_back_to_valid_size(self):
        categories = ['a'] * (2**8 + 1)
        arr = LabelArray(categories, missing_value=categories[0])
        assert_equal(arr.itemsize, 1)
        self.check_roundtrip(arr)

        # longer than int16 but still fits when deduped
        categories = self.create_categories(16, plus_one=False)
        categories.append(categories[0])
        arr = LabelArray(categories, missing_value=categories[0])
        assert_equal(arr.itemsize, 2)
        self.check_roundtrip(arr)
Beispiel #18
0
    def test_wma1(self):
        wma1 = LinearWeightedMovingAverage(inputs=(USEquityPricing.close, ),
                                           window_length=10)

        today = pd.Timestamp('2015')
        assets = np.arange(5, dtype=np.int64)

        data = np.ones((10, 5))
        out = np.zeros(data.shape[1])

        wma1.compute(today, assets, out, data)
        assert_equal(out, np.ones(5))
Beispiel #19
0
    def test_aroon_basic(self, lows, highs, expected_out):
        aroon = Aroon(window_length=self.window_length)
        today = pd.Timestamp('2014', tz='utc')
        assets = pd.Index(np.arange(self.nassets, dtype=np.int64))
        shape = (self.nassets, )
        out = np.recarray(shape=shape,
                          dtype=self.dtype,
                          buf=np.empty(shape=shape, dtype=self.dtype))

        aroon.compute(today, assets, out, lows, highs)

        assert_equal(out, expected_out)
Beispiel #20
0
    def test_wma2(self):
        wma2 = LinearWeightedMovingAverage(inputs=(USEquityPricing.close, ),
                                           window_length=10)

        today = pd.Timestamp('2015')
        assets = np.arange(5, dtype=np.int64)

        data = np.arange(50, dtype=np.float64).reshape((10, 5))
        out = np.zeros(data.shape[1])

        wma2.compute(today, assets, out, data)
        assert_equal(out, np.array([30., 31., 32., 33., 34.]))
Beispiel #21
0
    def test_tr_basic(self):
        tr = TrueRange()

        today = pd.Timestamp('2014')
        assets = np.arange(3, dtype=np.int64)
        out = np.empty(3, dtype=np.float64)

        highs = np.full((2, 3), 3.)
        lows = np.full((2, 3), 2.)
        closes = np.full((2, 3), 1.)

        tr.compute(today, assets, out, highs, lows, closes)
        assert_equal(out, np.full((3, ), 2.))
Beispiel #22
0
    def test_copy_categories_list(self):
        """regression test for #1927
        """
        categories = ['a', 'b', 'c']

        LabelArray(
            [None, 'a', 'b', 'c'],
            missing_value=None,
            categories=categories,
        )

        # before #1927 we didn't take a copy and would insert the missing value
        # (None) into the list
        assert_equal(categories, ['a', 'b', 'c'])
Beispiel #23
0
    def test_map(self, f):
        data = np.array(
            [['E', 'GHIJ', 'HIJKLMNOP', 'DEFGHIJ'],
             ['CDE', 'ABCDEFGHIJKLMNOPQ', 'DEFGHIJKLMNOPQRS', 'ABCDEFGHIJK'],
             ['DEFGHIJKLMNOPQR', 'DEFGHI', 'DEFGHIJ', 'FGHIJK'],
             ['EFGHIJKLM', 'EFGHIJKLMNOPQRS', 'ABCDEFGHI', 'DEFGHIJ']],
            dtype=object,
        )
        la = LabelArray(data, missing_value=None)

        numpy_transformed = np.vectorize(f)(data)
        la_transformed = la.map(f).as_string_array()

        assert_equal(numpy_transformed, la_transformed)
Beispiel #24
0
 def test_simple_beta_matches_regression(self):
     run_pipeline = self.run_pipeline
     simple_beta = SimpleBeta(target=self.my_asset, regression_length=10)
     complex_beta = RollingLinearRegressionOfReturns(
         target=self.my_asset,
         returns_length=2,
         regression_length=10,
     ).beta
     pipe = Pipeline({'simple': simple_beta, 'complex': complex_beta})
     results = run_pipeline(
         pipe,
         self.pipeline_start_date,
         self.pipeline_end_date,
     )
     assert_equal(results['simple'], results['complex'], check_names=False)
Beispiel #25
0
    def test_rate_of_change_percentage(self, test_name, data, expected):
        window_length = len(data)

        rocp = RateOfChangePercentage(
            inputs=(USEquityPricing.close, ),
            window_length=window_length,
        )
        today = pd.Timestamp('2014')
        assets = np.arange(5, dtype=np.int64)
        # broadcast data across assets
        data = np.array(data)[:, np.newaxis] * np.ones(len(assets))

        out = np.zeros(len(assets))
        rocp.compute(today, assets, out, data)
        assert_equal(out, np.full((len(assets), ), expected))
Beispiel #26
0
    def test_MACD_window_length_generation(self, seed):
        rng = RandomState(seed)

        signal_period = rng.randint(1, 90)
        fast_period = rng.randint(signal_period + 1, signal_period + 100)
        slow_period = rng.randint(fast_period + 1, fast_period + 100)
        ewma = MovingAverageConvergenceDivergenceSignal(
            fast_period=fast_period,
            slow_period=slow_period,
            signal_period=signal_period,
        )
        assert_equal(
            ewma.window_length,
            slow_period + signal_period - 1,
        )
Beispiel #27
0
    def test_register_call(self):
        def ingest(*args):
            pass

        @apply
        @subtest(((c, ) for c in 'abcde'), 'name')
        def _(name):
            self.register(name, ingest)
            assert_in(name, self.bundles)
            assert_is(self.bundles[name].ingest, ingest)

        assert_equal(
            valmap(op.attrgetter('ingest'), self.bundles),
            {k: ingest
             for k in 'abcde'},
        )
        self._check_bundles(set('abcde'))
Beispiel #28
0
 def test_example(self, example_name):
     actual_perf = examples.run_example(
         example_name,
         # This should match the invocation in
         # gateway/tests/resources/rebuild_example_data
         environ={
             'GATEWAY_ROOT': self.tmpdir.getpath('example_data/root'),
         },
     )
     assert_equal(
         actual_perf[examples._cols_to_check],
         self.expected_perf[example_name][examples._cols_to_check],
         # There is a difference in the datetime columns in pandas
         # 0.16 and 0.17 because in 16 they are object and in 17 they are
         # datetime[ns, UTC]. We will just ignore the dtypes for now.
         check_dtype=False,
     )
Beispiel #29
0
    def test_fso_expected_basic(self):
        """
        Simple test of expected output from fast stochastic oscillator
        """
        fso = FastStochasticOscillator()

        today = pd.Timestamp('2015')
        assets = np.arange(3, dtype=np.float64)
        out = np.empty(shape=(3, ), dtype=np.float64)

        highs = np.full((50, 3), 3, dtype=np.float64)
        lows = np.full((50, 3), 2, dtype=np.float64)
        closes = np.full((50, 3), 4, dtype=np.float64)

        fso.compute(today, assets, out, closes, lows, highs)

        # Expected %K
        assert_equal(out, np.full((3, ), 200, dtype=np.float64))
Beispiel #30
0
    def check_previous_value_results(self, column, results, dates):
        """
        Check previous value results for a single column.
        """
        # Verify that we got a result for every sid.
        self.assert_result_contains_all_sids(results)

        events = self.raw_events_no_nulls
        # Remove timezone info from trading days, since the outputs
        # from pandas won't be tz_localized.
        dates = dates.tz_localize(None)

        for asset, asset_result in results.iteritems():
            relevant_events = events[events.sid == asset.sid]
            self.assertEqual(len(relevant_events), 2)

            v1, v2 = relevant_events[self.previous_value_columns[column]]
            event1_first_eligible = max(
                # .ix doesn't work here because the frame index contains
                # integers, so 0 is still interpreted as a key.
                relevant_events.iloc[0].loc[['event_date', 'timestamp']],
            )
            event2_first_eligible = max(
                relevant_events.iloc[1].loc[['event_date', 'timestamp']]
            )

            for date, computed_value in zip(dates, asset_result):
                if date >= event2_first_eligible:
                    # If we've seen event 2, it should win even if we've seen
                    # event 1, because events are sorted by event_date.
                    self.assertEqual(computed_value, v2)
                elif date >= event1_first_eligible:
                    # If we've seen event 1 but not event 2, event 1 should
                    # win.
                    self.assertEqual(computed_value, v1)
                else:
                    # If we haven't seen either event, then we should have
                    # column.missing_value.
                    assert_equal(
                        computed_value,
                        column.missing_value,
                        # Coerce from Timestamp to datetime64.
                        allow_datetime_coercions=True,
                    )