def make_equity_info(cls):
     out = pd.concat(
         [
             # 15 assets on each exchange. Each asset lives for 5 days.
             # A new asset starts each day.
             make_rotating_equity_info(
                 num_assets=20,
                 first_start=cls.START_DATE,
                 frequency=get_calendar(exchange).day,
                 periods_between_starts=1,
                 # NOTE: The asset_lifetime parameter name is a bit
                 #       misleading. It determines the number of trading
                 #       days between each asset's start_date and end_date,
                 #       so assets created with this method actual "live"
                 #       for (asset_lifetime + 1) days. But, since pipeline
                 #       doesn't show you an asset the day it IPOs, this
                 #       number matches the number of days that each asset
                 #       should appear in a pipeline output.
                 asset_lifetime=5,
                 exchange=exchange,
             )
             for exchange in cls.EXCHANGE_INFO.exchange
         ],
         ignore_index=True,
     )
     assert_equal(out.end_date.max(), cls.END_DATE)
     return out
Example #2
0
    def _empty_ingest(self, _wrote_to=[]):
        """Run the nth empty ingest.

        Returns
        -------
        wrote_to : str
            The timestr of the bundle written.
        """
        if not self.bundles:
            @self.register('bundle',
                           calendar=pd.DatetimeIndex([pd.Timestamp('2014')]))
            def _(environ,
                  asset_db_writer,
                  minute_bar_writer,
                  daily_bar_writer,
                  adjustment_writer,
                  calendar,
                  cache,
                  show_progress,
                  output_dir):
                _wrote_to.append(output_dir)

        _wrote_to.clear()
        self.ingest('bundle', environ=self.environ)
        assert_equal(len(_wrote_to), 1, msg='ingest was called more than once')
        ingestions = self._list_bundle()
        assert_in(
            _wrote_to[0],
            ingestions,
            msg='output_dir was not in the bundle directory',
        )
        return _wrote_to[0]
    def _check_bundles(self, names):
        assert_equal(set(self.bundles.keys()), names)

        for name in names:
            self.unregister(name)

        assert_false(self.bundles)
    def test_inheritance(self):
        class Parent(DataSetFamily):
            extra_dims = [
                ('dim_0', {'a', 'b', 'c'}),
                ('dim_1', {'d', 'e', 'f'}),
            ]

            column_0 = Column('f8')
            column_1 = Column('?')

        class Child(Parent):
            column_2 = Column('O')
            column_3 = Column('i8', -1)

        assert_is_subclass(Child, Parent)
        assert_equal(Child.extra_dims, Parent.extra_dims)

        ChildSlice = Child.slice(dim_0='a', dim_1='d')

        expected_child_slice_columns = frozenset({
            ChildSlice.column_0,
            ChildSlice.column_1,
            ChildSlice.column_2,
            ChildSlice.column_3,
        })
        assert_equal(ChildSlice.columns, expected_child_slice_columns)
Example #5
0
    def test_reversability_int64(self):
        class F(Classifier):
            inputs = ()
            window_length = 0
            dtype = int64_dtype
            missing_value = -1

        f = F()
        column_data = np.array(
            [[0, f.missing_value],
             [1, f.missing_value],
             [2, 3]],
        )

        assert_equal(f.postprocess(column_data.ravel()), column_data.ravel())

        # only include the non-missing data
        pipeline_output = pd.Series(
            data=[0, 1, 2, 3],
            index=pd.MultiIndex.from_arrays([
                [pd.Timestamp('2014-01-01'),
                 pd.Timestamp('2014-01-02'),
                 pd.Timestamp('2014-01-03'),
                 pd.Timestamp('2014-01-03')],
                [0, 0, 0, 1],
            ]),
            dtype=int64_dtype,
        )

        assert_equal(
            f.to_workspace_value(pipeline_output, pd.Index([0, 1])),
            column_data,
        )
Example #6
0
    def test_bundle(self):
        environ = {
            'CSVDIR': test_resource_path('csvdir_samples', 'csvdir')
        }

        ingest('csvdir', environ=environ)
        bundle = load('csvdir', environ=environ)
        sids = 0, 1, 2, 3
        assert_equal(set(bundle.asset_finder.sids), set(sids))

        for equity in bundle.asset_finder.retrieve_all(sids):
            assert_equal(equity.start_date, self.asset_start, msg=equity)
            assert_equal(equity.end_date, self.asset_end, msg=equity)

        sessions = self.calendar.all_sessions
        actual = bundle.equity_daily_bar_reader.load_raw_arrays(
            self.columns,
            sessions[sessions.get_loc(self.asset_start, 'bfill')],
            sessions[sessions.get_loc(self.asset_end, 'ffill')],
            sids,
        )

        expected_pricing, expected_adjustments = self._expected_data(
            bundle.asset_finder,
        )
        assert_equal(actual, expected_pricing, array_decimal=2)

        adjs_for_cols = bundle.adjustment_reader.load_pricing_adjustments(
            self.columns,
            sessions,
            pd.Index(sids),
        )
        assert_equal([sorted(adj.keys()) for adj in adjs_for_cols],
                     expected_adjustments)
    def test_reversability(self, dtype_):
        class F(Factor):
            inputs = ()
            dtype = dtype_
            window_length = 0

        f = F()
        column_data = array(
            [[0, f.missing_value],
             [1, f.missing_value],
             [2, 3]],
            dtype=dtype_,
        )

        assert_equal(f.postprocess(column_data.ravel()), column_data.ravel())

        # only include the non-missing data
        pipeline_output = pd.Series(
            data=array([0, 1, 2, 3], dtype=dtype_),
            index=pd.MultiIndex.from_arrays([
                [pd.Timestamp('2014-01-01'),
                 pd.Timestamp('2014-01-02'),
                 pd.Timestamp('2014-01-03'),
                 pd.Timestamp('2014-01-03')],
                [0, 0, 0, 1],
            ]),
        )

        assert_equal(
            f.to_workspace_value(pipeline_output, pd.Index([0, 1])),
            column_data,
        )
    def test_multiple_qtrs_requested(self):
        dataset1 = QuartersEstimates(1)
        dataset2 = QuartersEstimates(2)
        engine = SimplePipelineEngine(
            lambda x: self.loader,
            self.trading_days,
            self.asset_finder,
        )

        results = engine.run_pipeline(
            Pipeline(
                merge([{c.name + '1': c.latest for c in dataset1.columns},
                       {c.name + '2': c.latest for c in dataset2.columns}])
            ),
            start_date=self.trading_days[0],
            end_date=self.trading_days[-1],
        )
        q1_columns = [col.name + '1' for col in self.columns]
        q2_columns = [col.name + '2' for col in self.columns]

        # We now expect a column for 1 quarter out and a column for 2
        # quarters out for each of the dataset columns.
        assert_equal(sorted(np.array(q1_columns + q2_columns)),
                     sorted(results.columns.values))
        assert_equal(self.expected_out.sort(axis=1),
                     results.xs(0, level=1).sort(axis=1))
Example #9
0
    def test_session_closes_in_range(self):
        found_closes = self.calendar.session_closes_in_range(
            self.answers.index[0],
            self.answers.index[-1],
        )

        assert_equal(found_closes, self.answers['market_close'])
Example #10
0
    def test_reversability(self):
        class F(Filter):
            inputs = ()
            window_length = 0
            missing_value = False

        f = F()
        column_data = array(
            [[True, f.missing_value],
             [True, f.missing_value],
             [True, True]],
            dtype=bool,
        )

        assert_equal(f.postprocess(column_data.ravel()), column_data.ravel())

        # only include the non-missing data
        pipeline_output = pd.Series(
            data=True,
            index=pd.MultiIndex.from_arrays([
                [pd.Timestamp('2014-01-01'),
                 pd.Timestamp('2014-01-02'),
                 pd.Timestamp('2014-01-03'),
                 pd.Timestamp('2014-01-03')],
                [0, 0, 0, 1],
            ]),
        )

        assert_equal(
            f.to_workspace_value(pipeline_output, pd.Index([0, 1])),
            column_data,
        )
Example #11
0
 def test_repr(self):
     assert_equal(
         repr(self.Term().alias('ayy lmao')),
         "Aliased%s(Term(...), name='ayy lmao')" % (
             self.Term.__base__.__name__,
         ),
     )
Example #12
0
    def test_v5_to_v4_selects_most_recent_ticker(self):
        T = pd.Timestamp
        AssetDBWriter(self.engine).write(
            equities=pd.DataFrame(
                [['A', 'A', T('2014-01-01'), T('2014-01-02')],
                 ['B', 'B', T('2014-01-01'), T('2014-01-02')],
                 # these two are both ticker sid 2
                 ['B', 'C', T('2014-01-03'), T('2014-01-04')],
                 ['C', 'C', T('2014-01-01'), T('2014-01-02')]],
                index=[0, 1, 2, 2],
                columns=['symbol', 'asset_name', 'start_date', 'end_date'],
            ),
        )

        downgrade(self.engine, 4)
        metadata = sa.MetaData(self.engine)
        metadata.reflect()

        def select_fields(r):
            return r.sid, r.symbol, r.asset_name, r.start_date, r.end_date

        expected_data = {
            (0, 'A', 'A', T('2014-01-01').value, T('2014-01-02').value),
            (1, 'B', 'B', T('2014-01-01').value, T('2014-01-02').value),
            (2, 'B', 'C', T('2014-01-01').value, T('2014-01-04').value),
        }
        actual_data = set(map(
            select_fields,
            sa.select(metadata.tables['equities'].c).execute(),
        ))

        assert_equal(expected_data, actual_data)
Example #13
0
        def do_checks(cls, colnames):

            specialized = cls.specialize(domain)

            # Specializations should be memoized.
            self.assertIs(specialized, cls.specialize(domain))

            # Specializations should have the same name.
            assert_equal(specialized.__name__, cls.__name__)
            self.assertIs(specialized.domain, domain)

            for attr in colnames:
                original = getattr(cls, attr)
                new = getattr(specialized, attr)

                # We should get a new column from the specialization, which
                # should be the same object that we would get from specializing
                # the original column.
                self.assertIsNot(original, new)
                self.assertIs(new, original.specialize(domain))

                # Columns should be bound to their respective datasets.
                self.assertIs(original.dataset, cls)
                self.assertIs(new.dataset, specialized)

                # The new column should have the domain of the specialization.
                assert_equal(new.domain, domain)

                # Names, dtypes, and missing_values should match.
                assert_equal(original.name, new.name)
                assert_equal(original.dtype, new.dtype)
                assert_equal(original.missing_value, new.missing_value)
Example #14
0
    def test_stock_dividends(self):
        sids = np.arange(5)
        dates = self.trading_calendar.all_sessions.tz_convert(None)

        def T(n):
            return dates[n]

        sort_key = ['sid', 'ex_date', 'payment_sid', 'ratio']
        input_ = pd.DataFrame(
            [[0, T(0), 1.5, 1],
             [0, T(1), 0.5, 2],

             # the same asset has two stock dividends for different assets on
             # the same day
             [1, T(0), 1, 2],
             [1, T(0), 1.2, 3]],
            columns=['sid', 'ex_date', 'ratio', 'payment_sid'],
        ).sort_values(sort_key)

        # give every extra date field a unique date so that we can make sure
        # they appear unchanged in the dividends payouts
        ix = 0
        for col in 'declared_date', 'record_date', 'pay_date':
            extra_dates = dates[ix:ix + len(input_)]
            ix += len(input_)
            input_[col] = extra_dates

        self.writer_without_pricing(dates, sids).write(stock_dividends=input_)
        dfs = self.component_dataframes()

        output = dfs.pop('stock_dividend_payouts').sort_values(sort_key)
        self.assert_all_empty(dfs)

        assert_equal(output, input_)
Example #15
0
    def test_parse_namespaces(self):
        n = Namespace()

        create_args(
            [
                "first.second.a=blah1",
                "first.second.b=blah2",
                "first.third=blah3",
                "second.a=blah4",
                "second.b=blah5",
            ],
            n
        )

        assert_equal(n.first.second.a, 'blah1')
        assert_equal(n.first.second.b, 'blah2')
        assert_equal(n.first.third, 'blah3')
        assert_equal(n.second.a, 'blah4')
        assert_equal(n.second.b, 'blah5')

        n = Namespace()

        msg = "Conflicting assignments at namespace level 'second'"
        with assert_raises_str(ValueError, msg):
            create_args(
                [
                    "first.second.a=blah1",
                    "first.second.b=blah2",
                    "first.second=blah3",
                ],
                n
            )
Example #16
0
    def test_fso_expected_with_talib(self):
        """
        Test the output that is returned from the fast stochastic oscillator
        is the same as that from the ta-lib STOCHF function.
        """
        window_length = 14
        nassets = 6
        closes = np.random.random_integers(1, 6, size=(50, nassets))*1.0
        highs = np.random.random_integers(4, 6, size=(50, nassets))*1.0
        lows = np.random.random_integers(1, 3, size=(50, nassets))*1.0

        expected_out_k = []
        for i in range(nassets):
            e = talib.STOCHF(
                high=highs[:, i],
                low=lows[:, i],
                close=closes[:, i],
                fastk_period=window_length,
            )

            expected_out_k.append(e[0][-1])
        expected_out_k = np.array(expected_out_k)

        today = pd.Timestamp('2015')
        out = np.empty(shape=(nassets,), dtype=np.float)
        assets = np.arange(nassets, dtype=np.float)

        fso = FastStochasticOscillator()
        fso.compute(
            today, assets, out, closes, lows, highs
        )

        assert_equal(out, expected_out_k)
Example #17
0
    def test_mixed_generics(self):
        """
        Test that we can run pipelines with mixed generic/non-generic terms.

        This test is a regression test for failures encountered during
        development where having a mix of generic and non-generic columns in
        the term graph caused bugs in our extra row accounting.
        """
        USTestingDataSet = TestingDataSet.specialize(US_EQUITIES)
        base_terms = {
            'sum3_generic': Sum.create(TestingDataSet.float_col, 3),
            'sum3_special': Sum.create(USTestingDataSet.float_col, 3),
            'sum10_generic': Sum.create(TestingDataSet.float_col, 10),
            'sum10_special': Sum.create(USTestingDataSet.float_col, 10),
        }

        def run(ts):
            pipe = Pipeline(ts, domain=US_EQUITIES)
            start = self.trading_days[-5]
            end = self.trading_days[-1]
            return self.run_pipeline(pipe, start, end)

        base_result = run(base_terms)

        for subset in powerset(base_terms):
            subset_terms = {t: base_terms[t] for t in subset}
            result = run(subset_terms).sort_index(axis=1)
            expected = base_result[list(subset)].sort_index(axis=1)
            assert_equal(result, expected)
Example #18
0
 def check_roundtrip(arr):
     assert_equal(
         arr.as_string_array(),
         LabelArray(
             arr.as_string_array(),
             arr.missing_value,
         ).as_string_array(),
     )
    def test_generic_pipeline_with_explicit_domain(self, domain):
        calendar = domain.calendar
        pipe = Pipeline({
            'open': EquityPricing.open.latest,
            'high': EquityPricing.high.latest,
            'low': EquityPricing.low.latest,
            'close': EquityPricing.close.latest,
            'volume': EquityPricing.volume.latest,
        }, domain=domain)

        sessions = self.daily_bar_sessions[calendar.name]

        # Run the pipeline for a 7 day chunk in the middle of our data.
        #
        # Using this region ensures that there are assets that never appear in
        # the pipeline both because they end too soon, and because they start
        # too late.
        start, end = sessions[[-17, -10]]
        result = self.run_pipeline(pipe, start, end)

        all_assets = self.assets_by_calendar[calendar]

        # We expect the index of the result to contain all assets that were
        # alive during the interval between our start and end (not including
        # the asset's IPO date).
        expected_assets = [
            a for a in all_assets
            if alive_in_range(a, start, end, include_asset_start_date=False)
        ]
        # off by 1 from above to be inclusive of the end date
        expected_dates = sessions[-17:-9]

        for col in pipe.columns:
            # result_date should look like this:
            #
            #     E     F     G     H     I     J     K     L     M     N     O     P # noqa
            # 24.17 25.17 26.17 27.17 28.17   NaN   NaN   NaN   NaN   NaN   NaN   NaN # noqa
            #   NaN 25.18 26.18 27.18 28.18 29.18   NaN   NaN   NaN   NaN   NaN   NaN # noqa
            #   NaN   NaN 26.23 27.23 28.23 29.23 30.23   NaN   NaN   NaN   NaN   NaN # noqa
            #   NaN   NaN   NaN 27.28 28.28 29.28 30.28 31.28   NaN   NaN   NaN   NaN # noqa
            #   NaN   NaN   NaN   NaN 28.30 29.30 30.30 31.30 32.30   NaN   NaN   NaN # noqa
            #   NaN   NaN   NaN   NaN   NaN 29.29 30.29 31.29 32.29 33.29   NaN   NaN # noqa
            #   NaN   NaN   NaN   NaN   NaN   NaN 30.27 31.27 32.27 33.27 34.27   NaN # noqa
            #   NaN   NaN   NaN   NaN   NaN   NaN   NaN 31.29 32.29 33.29 34.29 35.29 # noqa
            result_data = result[col].unstack()

            # Check indices.
            assert_equal(pd.Index(expected_assets), result_data.columns)
            assert_equal(expected_dates, result_data.index)

            # Check values.
            for asset in expected_assets:
                for date in expected_dates:
                    value = result_data.at[date, asset]
                    self.check_expected_latest_value(
                        calendar, col, date, asset, value,
                    )
Example #20
0
    def init_instance_fixtures(self):
        super(MetricsSetCoreTestCase, self).init_instance_fixtures()

        self.metrics_sets, self.register, self.unregister, self.load = (
            _make_metrics_set_core()
        )

        # make sure this starts empty
        assert_equal(self.metrics_sets, mappingproxy({}))
Example #21
0
 def manual_narrow_condense_back_to_valid_size_slow(self):
     """This test is really slow so we don't want it run by default.
     """
     # tests that we don't try to create an 'int24' (which is meaningless)
     categories = self.create_categories(24, plus_one=False)
     categories.append(categories[0])
     arr = LabelArray(categories, missing_value=categories[0])
     assert_equal(arr.itemsize, 4)
     self.check_roundtrip(arr)
Example #22
0
    def test_coerce_to_uint32_price(self, field, expected):
        # NOTE: 130.23 is not perfectly representable as a double, but we
        # shouldn't truncate and be off by an entire cent
        coerced = coerce_to_uint32(
            array([0.001, 1, 100, 100.5, 1000.005, 130.23], dtype=float64),
            DEFAULT_SCALING_FACTORS[field],
        )

        assert_equal(coerced, expected)
    def check_equivalent_terms(self, terms):
        self.assertTrue(len(terms) > 1, "Need at least two terms to compare")
        pipe = Pipeline(terms)

        start, end = self.trading_days[[-10, -1]]
        results = self.pipeline_engine.run_pipeline(pipe, start, end)
        first_column = results.iloc[:, 0]
        for name in terms:
            assert_equal(results.loc[:, name], first_column, check_names=False)
Example #24
0
    def test_load_empty(self):
        """
        For the case where raw data is empty, make sure we have a result for
        all sids, that the dimensions are correct, and that we have the
        correct missing value.
        """
        raw_events = pd.DataFrame(
            columns=["sid",
                     "timestamp",
                     "event_date",
                     "float",
                     "int",
                     "datetime",
                     "string"]
        )
        next_value_columns = {
            EventDataSet_US.next_datetime: 'datetime',
            EventDataSet_US.next_event_date: 'event_date',
            EventDataSet_US.next_float: 'float',
            EventDataSet_US.next_int: 'int',
            EventDataSet_US.next_string: 'string',
            EventDataSet_US.next_string_custom_missing: 'string'
        }
        previous_value_columns = {
            EventDataSet_US.previous_datetime: 'datetime',
            EventDataSet_US.previous_event_date: 'event_date',
            EventDataSet_US.previous_float: 'float',
            EventDataSet_US.previous_int: 'int',
            EventDataSet_US.previous_string: 'string',
            EventDataSet_US.previous_string_custom_missing: 'string'
        }
        loader = EventsLoader(
            raw_events, next_value_columns, previous_value_columns
        )
        engine = SimplePipelineEngine(
            lambda x: loader,
            self.asset_finder,
        )

        results = engine.run_pipeline(
            Pipeline({
                c.name: c.latest for c in EventDataSet_US.columns
            }, domain=US_EQUITIES),
            start_date=self.trading_days[0],
            end_date=self.trading_days[-1],
        )

        assets = self.asset_finder.retrieve_all(self.ASSET_FINDER_EQUITY_SIDS)
        dates = self.trading_days

        expected = self.frame_containing_all_missing_values(
            index=pd.MultiIndex.from_product([dates, assets]),
            columns=EventDataSet_US.columns,
        )

        assert_equal(results, expected)
 def compute(self, today, assets, out, estimate):
     today_idx = trading_days.get_loc(today)
     today_timeline = timelines[
         num_announcements_out
     ].loc[today].reindex(
         trading_days[:today_idx + 1]
     ).values
     timeline_start_idx = (len(today_timeline) - window_len)
     assert_equal(estimate,
                  today_timeline[timeline_start_idx:])
Example #26
0
    def test_input_validation(self, arg):
        window_length = 52

        with self.assertRaises(ValueError) as e:
            IchimokuKinkoHyo(**{arg: window_length + 1})

        assert_equal(
            str(e.exception),
            '%s must be <= the window_length: 53 > 52' % arg,
        )
 def test_quarter_normalization(self):
     input_yrs = pd.Series(range(2011, 2015), dtype=np.int64)
     input_qtrs = pd.Series(range(1, 5), dtype=np.int64)
     result_years, result_quarters = split_normalized_quarters(
         normalize_quarters(input_yrs, input_qtrs)
     )
     # Can't use assert_series_equal here with check_names=False
     # because that still fails due to name differences.
     assert_equal(input_yrs, result_years)
     assert_equal(input_qtrs, result_quarters)
Example #28
0
    def test_map_ignores_missing_value(self, missing):
        data = np.array([missing, 'B', 'C'], dtype=object)
        la = LabelArray(data, missing_value=missing)

        def increment_char(c):
            return chr(ord(c) + 1)

        result = la.map(increment_char)
        expected = LabelArray([missing, 'C', 'D'], missing_value=missing)
        assert_equal(result.as_string_array(), expected.as_string_array())
Example #29
0
    def test_price_rounding(self, frequency, field):
        equity = self.asset_finder.retrieve_asset(2)
        future = self.asset_finder.retrieve_asset(10001)
        cf = self.data_portal.asset_finder.create_continuous_future(
            'BUZ', 0, 'calendar', None,
        )
        minutes = self.nyse_calendar.minutes_for_session(self.trading_days[0])

        if frequency == '1m':
            minute = minutes[0]
            expected_equity_volume = 100
            expected_future_volume = 100
            data_frequency = 'minute'
        else:
            minute = minutes[0].normalize()
            expected_equity_volume = 100 * US_EQUITIES_MINUTES_PER_DAY
            expected_future_volume = 100 * FUTURES_MINUTES_PER_DAY
            data_frequency = 'daily'

        # Equity prices should be floored to three decimal places.
        expected_equity_values = {
            'open': 1.005,
            'high': 1.005,
            'low': 1.005,
            'close': 1.005,
            'volume': expected_equity_volume,
        }
        # Futures prices should be rounded to four decimal places.
        expected_future_values = {
            'open': 1.0055,
            'high': 1.0059,
            'low': 1.0051,
            'close': 1.0055,
            'volume': expected_future_volume,
        }

        result = self.data_portal.get_history_window(
            assets=[equity, future, cf],
            end_dt=minute,
            bar_count=1,
            frequency=frequency,
            field=field,
            data_frequency=data_frequency,
        )
        expected_result = pd.DataFrame(
            {
                equity: expected_equity_values[field],
                future: expected_future_values[field],
                cf: expected_future_values[field],
            },
            index=[minute],
            dtype=float64_dtype,
        )

        assert_equal(result, expected_result)
Example #30
0
    def test_aroon_basic(self, lows, highs, expected_out):
        aroon = Aroon(window_length=self.window_length)
        today = pd.Timestamp('2014', tz='utc')
        assets = pd.Index(np.arange(self.nassets, dtype=np.int64))
        shape = (self.nassets,)
        out = np.recarray(shape=shape, dtype=self.dtype,
                          buf=np.empty(shape=shape, dtype=self.dtype))

        aroon.compute(today, assets, out, lows, highs)

        assert_equal(out, expected_out)
    def test_bundle_doesnt_exist(self, fnname):
        with assert_raises(UnknownBundle) as e:
            getattr(self, fnname)('ayy', environ=self.environ)

        assert_equal(e.exception.name, 'ayy')
Example #32
0
    def test_user_input(self):
        zipline.extension_args = Namespace()

        runner = CliRunner()
        result = runner.invoke(main.main, [
            '-xfirst.second.a=blah1',
            '-xfirst.second.b=blah2',
            '-xfirst.third=blah3',
            '-xsecond.a.b=blah4',
            '-xsecond.b.a=blah5',
            '-xa1=value1',
            '-xb_=value2',
            'bundles',
        ])

        assert_equal(result.exit_code, 0)  # assert successful invocation
        assert_equal(zipline.extension_args.first.second.a, 'blah1')
        assert_equal(zipline.extension_args.first.second.b, 'blah2')
        assert_equal(zipline.extension_args.first.third, 'blah3')
        assert_equal(zipline.extension_args.second.a.b, 'blah4')
        assert_equal(zipline.extension_args.second.b.a, 'blah5')
        assert_equal(zipline.extension_args.a1, 'value1')
        assert_equal(zipline.extension_args.b_, 'value2')
Example #33
0
 def test_graph_repr(self):
     for name in ('a', 'b'):
         assert_equal(
             self.Term().alias(name).graph_repr(),
             name,
         )
Example #34
0
    def test_bundle(self):
        url_map = merge(
            {
                format_wiki_url(
                    self.api_key,
                    symbol,
                    self.start_date,
                    self.end_date,
                ): test_resource_path('quandl_samples', symbol + '.csv.gz')
                for symbol in self.symbols
            },
            {
                format_metadata_url(self.api_key, n): test_resource_path(
                    'quandl_samples',
                    'metadata-%d.csv.gz' % n,
                )
                for n in (1, 2)
            },
        )
        zipline_root = self.enter_instance_context(tmp_dir()).path
        environ = {
            'ZIPLINE_ROOT': zipline_root,
            'QUANDL_API_KEY': self.api_key,
        }

        with patch_read_csv(url_map, strict=True):
            ingest('quandl', environ=environ)

        bundle = load('quandl', environ=environ)
        sids = 0, 1, 2, 3
        assert_equal(set(bundle.asset_finder.sids), set(sids))

        for equity in bundle.asset_finder.retrieve_all(sids):
            assert_equal(equity.start_date, self.asset_start, msg=equity)
            assert_equal(equity.end_date, self.asset_end, msg=equity)

        cal = self.calendar
        actual = bundle.equity_daily_bar_reader.load_raw_arrays(
            self.columns,
            cal[cal.get_loc(self.asset_start, 'bfill')],
            cal[cal.get_loc(self.asset_end, 'ffill')],
            sids,
        )
        expected_pricing, expected_adjustments = self._expected_data(
            bundle.asset_finder, )
        assert_equal(actual, expected_pricing, array_decimal=2)

        adjustments_for_cols = bundle.adjustment_reader.load_adjustments(
            self.columns,
            cal,
            pd.Index(sids),
        )

        for column, adjustments, expected in zip(self.columns,
                                                 adjustments_for_cols,
                                                 expected_adjustments):
            assert_equal(
                adjustments,
                expected,
                msg=column,
            )
Example #35
0
    def test_ichimoku_kinko_hyo(self):
        window_length = 52
        today = pd.Timestamp('2014', tz='utc')
        nassets = 5
        assets = pd.Index(np.arange(nassets))
        days_col = np.arange(window_length)[:, np.newaxis]
        highs = np.arange(nassets) + 2 + days_col
        closes = np.arange(nassets) + 1 + days_col
        lows = np.arange(nassets) + days_col

        tenkan_sen_length = 9
        kijun_sen_length = 26
        chikou_span_length = 26
        ichimoku_kinko_hyo = IchimokuKinkoHyo(
            window_length=window_length,
            tenkan_sen_length=tenkan_sen_length,
            kijun_sen_length=kijun_sen_length,
            chikou_span_length=chikou_span_length,
        )

        dtype = [
            ('tenkan_sen', 'f8'),
            ('kijun_sen', 'f8'),
            ('senkou_span_a', 'f8'),
            ('senkou_span_b', 'f8'),
            ('chikou_span', 'f8'),
        ]
        out = np.recarray(
            shape=(nassets,),
            dtype=dtype,
            buf=np.empty(shape=(nassets,), dtype=dtype),
        )
        ichimoku_kinko_hyo.compute(
            today,
            assets,
            out,
            highs,
            lows,
            closes,
            tenkan_sen_length,
            kijun_sen_length,
            chikou_span_length,
        )

        expected_tenkan_sen = np.array([
            (53 + 43) / 2,
            (54 + 44) / 2,
            (55 + 45) / 2,
            (56 + 46) / 2,
            (57 + 47) / 2,
        ])
        expected_kijun_sen = np.array([
            (53 + 26) / 2,
            (54 + 27) / 2,
            (55 + 28) / 2,
            (56 + 29) / 2,
            (57 + 30) / 2,
        ])
        expected_senkou_span_a = (expected_tenkan_sen + expected_kijun_sen) / 2
        expected_senkou_span_b = np.array([
            (53 + 0) / 2,
            (54 + 1) / 2,
            (55 + 2) / 2,
            (56 + 3) / 2,
            (57 + 4) / 2,
        ])
        expected_chikou_span = np.array([
            27.0,
            28.0,
            29.0,
            30.0,
            31.0,
        ])

        assert_equal(
            out.tenkan_sen,
            expected_tenkan_sen,
            msg='tenkan_sen',
        )
        assert_equal(
            out.kijun_sen,
            expected_kijun_sen,
            msg='kijun_sen',
        )
        assert_equal(
            out.senkou_span_a,
            expected_senkou_span_a,
            msg='senkou_span_a',
        )
        assert_equal(
            out.senkou_span_b,
            expected_senkou_span_b,
            msg='senkou_span_b',
        )
        assert_equal(
            out.chikou_span,
            expected_chikou_span,
            msg='chikou_span',
        )
Example #36
0
    def test_lookup_symbol_change_ticker(self):
        T = partial(pd.Timestamp, tz='utc')
        metadata = pd.DataFrame.from_records(
            [
                # sid 0
                {
                    'symbol': 'A',
                    'asset_name': 'Asset A',
                    'start_date': T('2014-01-01'),
                    'end_date': T('2014-01-05'),
                    'exchange': "TEST",
                },
                {
                    'symbol': 'B',
                    'asset_name': 'Asset B',
                    'start_date': T('2014-01-06'),
                    'end_date': T('2014-01-10'),
                    'exchange': "TEST",
                },

                # sid 1
                {
                    'symbol': 'C',
                    'asset_name': 'Asset C',
                    'start_date': T('2014-01-01'),
                    'end_date': T('2014-01-05'),
                    'exchange': "TEST",
                },
                {
                    'symbol': 'A',  # claiming the unused symbol 'A'
                    'asset_name': 'Asset A',
                    'start_date': T('2014-01-06'),
                    'end_date': T('2014-01-10'),
                    'exchange': "TEST",
                },
            ],
            index=[0, 0, 1, 1],
        )
        self.write_assets(equities=metadata)
        finder = self.asset_finder

        # note: these assertions walk forward in time, starting at assertions
        # about ownership before the start_date and ending with assertions
        # after the end_date; new assertions should be inserted in the correct
        # locations

        # no one held 'A' before 01
        with self.assertRaises(SymbolNotFound):
            finder.lookup_symbol('A', T('2013-12-31'))

        # no one held 'C' before 01
        with self.assertRaises(SymbolNotFound):
            finder.lookup_symbol('C', T('2013-12-31'))

        for asof in pd.date_range('2014-01-01', '2014-01-05', tz='utc'):
            # from 01 through 05 sid 0 held 'A'
            A_result = finder.lookup_symbol('A', asof)
            assert_equal(
                A_result,
                finder.retrieve_asset(0),
                msg=str(asof),
            )
            # The symbol and asset_name should always be the last held values
            assert_equal(A_result.symbol, 'B')
            assert_equal(A_result.asset_name, 'Asset B')

            # from 01 through 05 sid 1 held 'C'
            C_result = finder.lookup_symbol('C', asof)
            assert_equal(
                C_result,
                finder.retrieve_asset(1),
                msg=str(asof),
            )
            # The symbol and asset_name should always be the last held values
            assert_equal(C_result.symbol, 'A')
            assert_equal(C_result.asset_name, 'Asset A')

        # no one held 'B' before 06
        with self.assertRaises(SymbolNotFound):
            finder.lookup_symbol('B', T('2014-01-05'))

        # no one held 'C' after 06, however, no one has claimed it yet
        # so it still maps to sid 1
        assert_equal(
            finder.lookup_symbol('C', T('2014-01-07')),
            finder.retrieve_asset(1),
        )

        for asof in pd.date_range('2014-01-06', '2014-01-11', tz='utc'):
            # from 06 through 10 sid 0 held 'B'
            # we test through the 11th because sid 1 is the last to hold 'B'
            # so it should ffill
            B_result = finder.lookup_symbol('B', asof)
            assert_equal(
                B_result,
                finder.retrieve_asset(0),
                msg=str(asof),
            )
            assert_equal(B_result.symbol, 'B')
            assert_equal(B_result.asset_name, 'Asset B')

            # from 06 through 10 sid 1 held 'A'
            # we test through the 11th because sid 1 is the last to hold 'A'
            # so it should ffill
            A_result = finder.lookup_symbol('A', asof)
            assert_equal(
                A_result,
                finder.retrieve_asset(1),
                msg=str(asof),
            )
            assert_equal(A_result.symbol, 'A')
            assert_equal(A_result.asset_name, 'Asset A')
    def test_bytes_array_to_native_str_object_array(self):
        a = array([b"abc", b"def"], dtype="S3")
        result = bytes_array_to_native_str_object_array(a)
        expected = array(["abc", "def"], dtype=object)

        assert_equal(result, expected)
Example #38
0
    def test_calculate_dividend_ratio(self):
        first_date_ix = 200
        dates = self.trading_calendar.all_sessions[
            first_date_ix:first_date_ix + 3]

        def T(n):
            return dates[n].tz_convert(None)

        close = pd.DataFrame(
            [
                [10.0, 0.5, 30.0],  # noqa
                [9.5, 0.4, np.nan],  # noqa
                [15.0, 0.6, 35.0]
            ],  # noqa
            columns=[0, 1, 2],
            index=dates,
        )

        dividends = pd.DataFrame(
            [
                # ex_date of 0 means that we cannot get the previous day's
                # close, so we should not expect to see this dividend in the
                # output
                [0, T(0), 10],

                # previous price was 0.4, meaning the dividend amount
                # is greater than or equal to price and the ratio would be
                # negative. we should warn and drop this row
                [1, T(1), 0.51],

                # previous price was 0.4, meaning the dividend amount
                # is exactly equal to price and the ratio would be 0.
                # we should warn and drop this row
                [1, T(2), 0.4],

                # previous price is nan, so we cannot compute the ratio.
                # we should warn and drop this row
                [2, T(2), 10],

                # previous price was 10, expected ratio is 0.95
                [0, T(1), 0.5],

                # previous price was 0.4, expected ratio is 0.9
                [1, T(2), 0.04]
            ],
            columns=['sid', 'ex_date', 'amount'],
        )

        # give every extra date field a unique date so that we can make sure
        # they appear unchanged in the dividends payouts
        ix = first_date_ix
        for col in 'declared_date', 'record_date', 'pay_date':
            extra_dates = self.trading_calendar.all_sessions[ix:ix +
                                                             len(dividends)]
            ix += len(dividends)
            dividends[col] = extra_dates

        self.writer_from_close(close).write(dividends=dividends)
        dfs = self.component_dataframes()
        dividend_payouts = dfs.pop('dividend_payouts')
        dividend_ratios = dfs.pop('dividends')
        self.assert_all_empty(dfs)

        payout_sort_key = ['sid', 'ex_date', 'amount']
        dividend_payouts = dividend_payouts.sort_values(payout_sort_key)
        dividend_payouts = dividend_payouts.reset_index(drop=True)

        expected_dividend_payouts = dividend_payouts.sort_values(
            payout_sort_key, )
        expected_dividend_payouts = expected_dividend_payouts.reset_index(
            drop=True, )
        assert_equal(dividend_payouts, expected_dividend_payouts)

        expected_dividend_ratios = pd.DataFrame(
            [[T(1), 0.95, 0], [T(2), 0.90, 1]],
            columns=['effective_date', 'ratio', 'sid'],
        )
        dividend_ratios = dividend_ratios.sort_values(
            ['effective_date', 'sid'], )
        dividend_ratios = dividend_ratios.reset_index(drop=True)
        assert_equal(dividend_ratios, expected_dividend_ratios)

        self.assertTrue(
            self.log_handler.has_warning(
                "Couldn't compute ratio for dividend sid=2, ex_date=1990-10-18,"
                " amount=10.000", ))
        self.assertTrue(
            self.log_handler.has_warning(
                'Dividend ratio <= 0 for dividend sid=1, ex_date=1990-10-17,'
                ' amount=0.510', ))
        self.assertTrue(
            self.log_handler.has_warning(
                'Dividend ratio <= 0 for dividend sid=1, ex_date=1990-10-18,'
                ' amount=0.400', ))
Example #39
0
    def test_order_equity_non_targeted(self, order_method, amount):
        # Every day, place an order for $10000 worth of sid(1)
        algotext = """
import zipline.api as api

def initialize(context):
    api.set_slippage(api.slippage.FixedSlippage(spread=0.0))
    api.set_commission(api.commission.PerShare(0))

    context.equity = api.sid(1)

    api.schedule_function(
        func=do_order,
        date_rule=api.date_rules.every_day(),
        time_rule=api.time_rules.market_open(),
    )

def do_order(context, data):
    context.ordered = True
    api.{order_func}(context.equity, {arg})
     """.format(order_func=order_method, arg=amount)
        result = self.run_algorithm(script=algotext)

        for orders in result.orders.values:
            assert_equal(len(orders), 1)
            assert_equal(orders[0]['amount'], 5000)
            assert_equal(orders[0]['sid'], self.EQUITY)

        for i, positions in enumerate(result.positions.values, start=1):
            assert_equal(len(positions), 1)
            assert_equal(positions[0]['amount'], 5000.0 * i)
            assert_equal(positions[0]['sid'], self.EQUITY)
 def do_check(independent):
     result = vectorized_pearson_r(dependent,
                                   independent,
                                   allowed_missing=0)
     assert_equal(result, np.array([1.0, 1.0, -1.0]))
Example #41
0
    def test_order_future_targeted(self, order_method, amount):
        # Every day, place an order for a target of $10000 worth of sid(2).
        # With no commissions or slippage, we should only place one order.
        algotext = """
import zipline.api as api

def initialize(context):
    api.set_slippage(us_futures=api.slippage.FixedSlippage(spread=0.0))
    api.set_commission(us_futures=api.commission.PerTrade(0.0))

    context.future = api.sid(2)

    api.schedule_function(
        func=do_order,
        date_rule=api.date_rules.every_day(),
        time_rule=api.time_rules.market_open(),
    )

def do_order(context, data):
    context.ordered = True
    api.{order_func}(context.future, {arg})
     """.format(order_func=order_method, arg=amount)

        result = self.run_algorithm(script=algotext)

        # We should get one order on the first day.
        assert_equal([len(ords) for ords in result.orders], [1, 0, 0, 0])
        order = result.orders.iloc[0][0]
        assert_equal(order['amount'], 500)
        assert_equal(order['sid'], self.FUTURE)

        # Our position at the end of each day should be worth $10,000.
        for positions in result.positions.values:
            assert_equal(len(positions), 1)
            assert_equal(positions[0]['amount'], 500.0)
            assert_equal(positions[0]['sid'], self.FUTURE)
    def test_clean_before_after(self):
        first = self._empty_ingest()
        assert_equal(
            self.clean(
                'bundle',
                before=self._ts_of_run(first),
                environ=self.environ,
            ),
            set(),
        )
        assert_equal(
            self._list_bundle(),
            {first},
            msg='directory should not have changed (before)',
        )

        assert_equal(
            self.clean(
                'bundle',
                after=self._ts_of_run(first),
                environ=self.environ,
            ),
            set(),
        )
        assert_equal(
            self._list_bundle(),
            {first},
            msg='directory should not have changed (after)',
        )

        assert_equal(
            self.clean(
                'bundle',
                before=self._ts_of_run(first) + _1_ns,
                environ=self.environ,
            ),
            {first},
        )
        assert_equal(
            self._list_bundle(),
            set(),
            msg='directory now be empty (before)',
        )

        second = self._empty_ingest()
        assert_equal(
            self.clean(
                'bundle',
                after=self._ts_of_run(second) - _1_ns,
                environ=self.environ,
            ),
            {second},
        )
        assert_equal(
            self._list_bundle(),
            set(),
            msg='directory now be empty (after)',
        )

        third = self._empty_ingest()
        fourth = self._empty_ingest()
        fifth = self._empty_ingest()
        sixth = self._empty_ingest()

        assert_equal(
            self._list_bundle(),
            {third, fourth, fifth, sixth},
            msg='larger set of ingestions did no happen correctly',
        )

        assert_equal(
            self.clean(
                'bundle',
                before=self._ts_of_run(fourth),
                after=self._ts_of_run(fifth),
                environ=self.environ,
            ),
            {third, sixth},
        )

        assert_equal(
            self._list_bundle(),
            {fourth, fifth},
            msg='did not strip first and last directories',
        )
    def test_clean_keep_last(self):
        first = self._empty_ingest()

        assert_equal(
            self.clean('bundle', keep_last=1, environ=self.environ),
            set(),
        )
        assert_equal(
            self._list_bundle(),
            {first},
            msg='directory should not have changed',
        )

        second = self._empty_ingest()
        assert_equal(
            self._list_bundle(),
            {first, second},
            msg='two ingestions are not present',
        )
        assert_equal(
            self.clean('bundle', keep_last=1, environ=self.environ),
            {first},
        )
        assert_equal(
            self._list_bundle(),
            {second},
            msg='first ingestion was not removed with keep_last=2',
        )

        third = self._empty_ingest()
        fourth = self._empty_ingest()
        fifth = self._empty_ingest()

        assert_equal(
            self._list_bundle(),
            {second, third, fourth, fifth},
            msg='larger set of ingestions did not happen correctly',
        )

        assert_equal(
            self.clean('bundle', keep_last=2, environ=self.environ),
            {second, third},
        )

        assert_equal(
            self._list_bundle(),
            {fourth, fifth},
            msg='keep_last=2 did not remove the correct number of ingestions',
        )

        with assert_raises(BadClean):
            self.clean('bundle', keep_last=-1, environ=self.environ)

        assert_equal(
            self._list_bundle(),
            {fourth, fifth},
            msg='keep_last=-1 removed some ingestions',
        )

        assert_equal(
            self.clean('bundle', keep_last=0, environ=self.environ),
            {fourth, fifth},
        )

        assert_equal(
            self._list_bundle(),
            set(),
            msg='keep_last=0 did not remove the correct number of ingestions',
        )
Example #44
0
    def test_parse_args(self):
        n = Namespace()

        arg_dict = {}

        arg_list = [
            'key=value',
            'arg1=test1',
            'arg2=test2',
            'arg_3=test3',
            '_arg_4_=test4',
        ]
        for arg in arg_list:
            parse_extension_arg(arg, arg_dict)
        assert_equal(
            arg_dict, {
                '_arg_4_': 'test4',
                'arg_3': 'test3',
                'arg2': 'test2',
                'arg1': 'test1',
                'key': 'value',
            })
        create_args(arg_list, n)
        assert_equal(n.key, 'value')
        assert_equal(n.arg1, 'test1')
        assert_equal(n.arg2, 'test2')
        assert_equal(n.arg_3, 'test3')
        assert_equal(n._arg_4_, 'test4')

        msg = ("invalid extension argument '1=test3', "
               "must be in key=value form")
        with assert_raises_str(ValueError, msg):
            parse_extension_arg('1=test3', {})
        msg = ("invalid extension argument 'arg4 test4', "
               "must be in key=value form")
        with assert_raises_str(ValueError, msg):
            parse_extension_arg('arg4 test4', {})
        msg = ("invalid extension argument 'arg5.1=test5', "
               "must be in key=value form")
        with assert_raises_str(ValueError, msg):
            parse_extension_arg('arg5.1=test5', {})
        msg = ("invalid extension argument 'arg6.6arg=test6', "
               "must be in key=value form")
        with assert_raises_str(ValueError, msg):
            parse_extension_arg('arg6.6arg=test6', {})
        msg = ("invalid extension argument 'arg7.-arg7=test7', "
               "must be in key=value form")
        with assert_raises_str(ValueError, msg):
            parse_extension_arg('arg7.-arg7=test7', {})
Example #45
0
    def test_order_equity_targeted(self, order_method, amount):
        # Every day, place an order for a target of $10000 worth of sid(1).
        # With no commissions or slippage, we should only place one order.
        algotext = """
import zipline.api as api

def initialize(context):
    api.set_slippage(api.slippage.FixedSlippage(spread=0.0))
    api.set_commission(api.commission.PerShare(0))

    context.equity = api.sid(1)

    api.schedule_function(
        func=do_order,
        date_rule=api.date_rules.every_day(),
        time_rule=api.time_rules.market_open(),
    )

def do_order(context, data):
    context.ordered = True
    api.{order_func}(context.equity, {arg})
     """.format(order_func=order_method, arg=amount)

        result = self.run_algorithm(script=algotext)

        assert_equal([len(ords) for ords in result.orders], [1, 0, 0, 0])
        order = result.orders.iloc[0][0]
        assert_equal(order["amount"], 5000)
        assert_equal(order["sid"], self.EQUITY)

        for positions in result.positions.values:
            assert_equal(len(positions), 1)
            assert_equal(positions[0]["amount"], 5000.0)
            assert_equal(positions[0]["sid"], self.EQUITY)
Example #46
0
    def test_benchmark_argument_handling(self):
        runner = CliRunner()

        # CLI validates that the algo file exists, so create an empty file.
        algo_path = self.tmpdir.getpath('dummy_algo.py')
        with open(algo_path, 'w'):
            pass

        def run_and_get_benchmark_spec(benchmark_args):
            """
            Run the cli, mocking out `main._run`, and return the benchmark_spec
            passed to _run..
            """
            args = [
                '--no-default-extension',
                'run',
                '-s',
                '2014-01-02',
                '-e 2015-01-02',
                '--algofile',
                algo_path,
            ] + benchmark_args

            mock_spec = mock.create_autospec(main._run)

            with mock.patch.object(main, '_run', spec=mock_spec) as mock_run:
                result = runner.invoke(main.main, args, catch_exceptions=False)

            if result.exit_code != 0:
                raise AssertionError(
                    "Cli run failed with {exc}\n\n"
                    "Output was:\n\n"
                    "{output}".format(exc=result.exception,
                                      output=result.output), )

            mock_run.assert_called_once()

            return mock_run.call_args[1]['benchmark_spec']

        spec = run_and_get_benchmark_spec([])
        assert_equal(spec.benchmark_returns, None)
        assert_equal(spec.benchmark_file, None)
        assert_equal(spec.benchmark_sid, None)
        assert_equal(spec.benchmark_symbol, None)
        assert_equal(spec.no_benchmark, False)

        spec = run_and_get_benchmark_spec(['--no-benchmark'])
        assert_equal(spec.benchmark_returns, None)
        assert_equal(spec.benchmark_file, None)
        assert_equal(spec.benchmark_sid, None)
        assert_equal(spec.benchmark_symbol, None)
        assert_equal(spec.no_benchmark, True)

        for symbol in 'AAPL', 'SPY':
            spec = run_and_get_benchmark_spec(['--benchmark-symbol', symbol])
            assert_equal(spec.benchmark_returns, None)
            assert_equal(spec.benchmark_file, None)
            assert_equal(spec.benchmark_sid, None)
            assert_equal(spec.benchmark_symbol, symbol)
            assert_equal(spec.no_benchmark, False)

        for sid in 2, 3:
            spec = run_and_get_benchmark_spec(['--benchmark-sid', str(sid)])
            assert_equal(spec.benchmark_returns, None)
            assert_equal(spec.benchmark_file, None)
            assert_equal(spec.benchmark_sid, sid)
            assert_equal(spec.benchmark_symbol, None)
            assert_equal(spec.no_benchmark, False)

        # CLI also validates the returns file exists.
        bm_path = self.tmpdir.getpath('returns.csv')
        with open(bm_path, 'w'):
            pass

        spec = run_and_get_benchmark_spec(['--benchmark-file', bm_path])
        assert_equal(spec.benchmark_returns, None)
        assert_equal(spec.benchmark_file, bm_path)
        assert_equal(spec.benchmark_sid, None)
        assert_equal(spec.benchmark_symbol, None)
        assert_equal(spec.no_benchmark, False)
    def test_generic_pipeline_with_explicit_domain(self, domain):
        calendar = domain.calendar
        pipe = Pipeline(
            {
                'open': EquityPricing.open.latest,
                'high': EquityPricing.high.latest,
                'low': EquityPricing.low.latest,
                'close': EquityPricing.close.latest,
                'volume': EquityPricing.volume.latest,
            },
            domain=domain)

        sessions = self.daily_bar_sessions[calendar.name]

        # Run the pipeline for a 7 day chunk in the middle of our data.
        #
        # Using this region ensures that there are assets that never appear in
        # the pipeline both because they end too soon, and because they start
        # too late.
        start, end = sessions[[-17, -10]]
        result = self.run_pipeline(pipe, start, end)

        all_assets = self.assets_by_calendar[calendar]

        # We expect the index of the result to contain all assets that were
        # alive during the interval between our start and end (not including
        # the asset's IPO date).
        expected_assets = [
            a for a in all_assets
            if alive_in_range(a, start, end, include_asset_start_date=False)
        ]
        # off by 1 from above to be inclusive of the end date
        expected_dates = sessions[-17:-9]

        for col in pipe.columns:
            # result_date should look like this:
            #
            #     E     F     G     H     I     J     K     L     M     N     O     P # noqa
            # 24.17 25.17 26.17 27.17 28.17   NaN   NaN   NaN   NaN   NaN   NaN   NaN # noqa
            #   NaN 25.18 26.18 27.18 28.18 29.18   NaN   NaN   NaN   NaN   NaN   NaN # noqa
            #   NaN   NaN 26.23 27.23 28.23 29.23 30.23   NaN   NaN   NaN   NaN   NaN # noqa
            #   NaN   NaN   NaN 27.28 28.28 29.28 30.28 31.28   NaN   NaN   NaN   NaN # noqa
            #   NaN   NaN   NaN   NaN 28.30 29.30 30.30 31.30 32.30   NaN   NaN   NaN # noqa
            #   NaN   NaN   NaN   NaN   NaN 29.29 30.29 31.29 32.29 33.29   NaN   NaN # noqa
            #   NaN   NaN   NaN   NaN   NaN   NaN 30.27 31.27 32.27 33.27 34.27   NaN # noqa
            #   NaN   NaN   NaN   NaN   NaN   NaN   NaN 31.29 32.29 33.29 34.29 35.29 # noqa
            result_data = result[col].unstack()

            # Check indices.
            assert_equal(pd.Index(expected_assets), result_data.columns)
            assert_equal(expected_dates, result_data.index)

            # Check values.
            for asset in expected_assets:
                for date in expected_dates:
                    value = result_data.at[date, asset]
                    self.check_expected_latest_value(
                        calendar,
                        col,
                        date,
                        asset,
                        value,
                    )
Example #48
0
 def assert_all_empty(self, dfs):
     for k, v in dfs.items():
         assert_equal(len(v), 0, msg='%s dataframe should be empty' % k)
Example #49
0
    def test_unadjusted_get_value(self):
        """Test get_value() on both a price field (CLOSE) and VOLUME."""
        reader = self.daily_bar_reader

        def make_failure_msg(asset, date, field):
            return "Unexpected value for sid={}; date={}; field={}.".format(
                asset,
                date.date(),
                field
            )

        for asset in self.assets:
            # Dates to check.
            asset_start = self.asset_start(asset)

            asset_dates = self.dates_for_asset(asset)
            asset_middle = asset_dates[len(asset_dates) // 2]

            asset_end = self.asset_end(asset)

            # At beginning
            assert_equal(
                reader.get_value(asset, asset_start, CLOSE),
                expected_bar_value_with_holes(
                    asset_id=asset,
                    date=asset_start,
                    colname=CLOSE,
                    holes=self.holes,
                    missing_value=nan,
                ),
                msg=make_failure_msg(asset, asset_start, CLOSE),
            )

            # Middle
            assert_equal(
                reader.get_value(asset, asset_middle, CLOSE),
                expected_bar_value_with_holes(
                    asset_id=asset,
                    date=asset_middle,
                    colname=CLOSE,
                    holes=self.holes,
                    missing_value=nan,
                ),
                msg=make_failure_msg(asset, asset_middle, CLOSE),
            )

            # End
            assert_equal(
                reader.get_value(asset, asset_end, CLOSE),
                expected_bar_value_with_holes(
                    asset_id=asset,
                    date=asset_end,
                    colname=CLOSE,
                    holes=self.holes,
                    missing_value=nan,
                ),
                msg=make_failure_msg(asset, asset_end, CLOSE),
            )

            # Ensure that volume does not have float adjustment applied.
            assert_equal(
                reader.get_value(asset, asset_start, VOLUME),
                expected_bar_value_with_holes(
                    asset_id=asset,
                    date=asset_start,
                    colname=VOLUME,
                    holes=self.holes,
                    missing_value=0,
                ),
                msg=make_failure_msg(asset, asset_start, VOLUME),
            )
Example #50
0
    def test_calculate_dividend_ratio(self):
        first_date_ix = 200
        dates = self.trading_calendar.all_sessions[first_date_ix : first_date_ix + 3]

        before_pricing_data = (dates[0] - self.trading_calendar.day).tz_convert("UTC")
        one_day_past_pricing_data = (dates[-1] + self.trading_calendar.day).tz_convert(
            "UTC"
        )
        ten_days_past_pricing_data = (
            dates[-1] + self.trading_calendar.day * 10
        ).tz_convert("UTC")

        def T(n):
            return dates[n].tz_convert("UTC")

        close = pd.DataFrame(
            [
                [10.0, 0.5, 30.0],  # noqa
                [9.5, 0.4, np.nan],  # noqa
                [15.0, 0.6, np.nan],
            ],  # noqa
            columns=[0, 1, 2],
            index=dates,
        )

        dividends = pd.DataFrame(
            [
                # ex_date of >=0 means that we cannot get the previous day's
                # close, so we should not expect to see this dividend in the
                # output
                [0, before_pricing_data, 10],
                [0, T(0), 10],
                # previous price was 0.4, meaning the dividend amount
                # is greater than or equal to price and the ratio would be
                # negative. we should warn and drop this row
                [1, T(1), 0.51],
                # previous price was 0.4, meaning the dividend amount
                # is exactly equal to price and the ratio would be 0.
                # we should warn and drop this row
                [1, T(2), 0.4],
                # previous price is nan, so we cannot compute the ratio.
                # we should warn and drop this row
                [2, T(2), 10],
                # previous price was 10, expected ratio is 0.95
                [0, T(1), 0.5],
                # previous price was 0.4, expected ratio is 0.9
                [1, T(2), 0.04],
                # we shouldn't crash in the process of warning/dropping this
                # row even though it is past the range of `dates`
                [2, one_day_past_pricing_data, 0.1],
                [2, ten_days_past_pricing_data, 0.1],
            ],
            columns=["sid", "ex_date", "amount"],
        )

        # give every extra date field a unique date so that we can make sure
        # they appear unchanged in the dividends payouts
        ix = first_date_ix
        for col in "declared_date", "record_date", "pay_date":
            extra_dates = self.trading_calendar.all_sessions[ix : ix + len(dividends)]
            ix += len(dividends)
            dividends[col] = extra_dates

        self.writer_from_close(close).write(dividends=dividends)
        dfs = self.component_dataframes()
        dividend_payouts = dfs.pop("dividend_payouts")
        dividend_ratios = dfs.pop("dividends")
        self.assert_all_empty(dfs)

        payout_sort_key = ["sid", "ex_date", "amount"]
        dividend_payouts = dividend_payouts.sort_values(payout_sort_key)
        dividend_payouts = dividend_payouts.reset_index(drop=True)

        expected_dividend_payouts = dividend_payouts.sort_values(
            payout_sort_key,
        )
        expected_dividend_payouts.reset_index(drop=True, inplace=True)
        assert_equal(dividend_payouts, expected_dividend_payouts)

        expected_dividend_ratios = pd.DataFrame(
            [[T(1), 0.95, 0], [T(2), 0.90, 1]],
            columns=["effective_date", "ratio", "sid"],
        )
        dividend_ratios.sort_values(
            ["effective_date", "sid"],
            inplace=True,
        )
        dividend_ratios.reset_index(drop=True, inplace=True)
        assert_equal(dividend_ratios, expected_dividend_ratios)

        self.assertTrue(
            self.log_handler.has_warning(
                "Couldn't compute ratio for dividend sid=2, ex_date=1990-10-18,"
                " amount=10.000",
            )
        )
        self.assertTrue(
            self.log_handler.has_warning(
                "Couldn't compute ratio for dividend sid=2, ex_date=1990-10-19,"
                " amount=0.100",
            )
        )
        self.assertTrue(
            self.log_handler.has_warning(
                "Couldn't compute ratio for dividend sid=2, ex_date=1990-11-01,"
                " amount=0.100",
            )
        )
        self.assertTrue(
            self.log_handler.has_warning(
                "Dividend ratio <= 0 for dividend sid=1, ex_date=1990-10-17,"
                " amount=0.510",
            )
        )
        self.assertTrue(
            self.log_handler.has_warning(
                "Dividend ratio <= 0 for dividend sid=1, ex_date=1990-10-18,"
                " amount=0.400",
            )
        )
Example #51
0
 def test_sessions(self):
     assert_equal(self.daily_bar_reader.sessions, self.sessions)
Example #52
0
 def test_reduce(self):
     assert_equal(
         pickle.loads(pickle.dumps(self.future)).to_dict(),
         self.future.to_dict(),
     )
Example #53
0
 def assert_result_contains_all_sids(self, results):
     assert_equal(
         list(map(int, results.columns)),
         self.ASSET_FINDER_EQUITY_SIDS,
     )
Example #54
0
    def test_ingest(self):
        start = pd.Timestamp('2014-01-06', tz='utc')
        end = pd.Timestamp('2014-01-10', tz='utc')
        trading_days = get_calendar('NYSE').all_trading_days
        calendar = trading_days[trading_days.slice_indexer(start, end)]
        minutes = get_calendar('NYSE').trading_minutes_for_days_in_range(
            calendar[0], calendar[-1]
        )

        sids = tuple(range(3))
        equities = make_simple_equity_info(
            sids,
            calendar[0],
            calendar[-1],
        )

        daily_bar_data = make_bar_data(equities, calendar)
        minute_bar_data = make_bar_data(equities, minutes)
        first_split_ratio = 0.5
        second_split_ratio = 0.1
        splits = pd.DataFrame.from_records([
            {
                'effective_date': str_to_seconds('2014-01-08'),
                'ratio': first_split_ratio,
                'sid': 0,
            },
            {
                'effective_date': str_to_seconds('2014-01-09'),
                'ratio': second_split_ratio,
                'sid': 1,
            },
        ])

        schedule = get_calendar('NYSE').schedule

        @self.register(
            'bundle',
            calendar=calendar,
            opens=schedule.market_open[calendar[0]:calendar[-1]],
            closes=schedule.market_close[calendar[0]: calendar[-1]],
        )
        def bundle_ingest(environ,
                          asset_db_writer,
                          minute_bar_writer,
                          daily_bar_writer,
                          adjustment_writer,
                          calendar,
                          cache,
                          show_progress,
                          output_dir):
            assert_is(environ, self.environ)

            asset_db_writer.write(equities=equities)
            minute_bar_writer.write(minute_bar_data)
            daily_bar_writer.write(daily_bar_data)
            adjustment_writer.write(splits=splits)

            assert_is_instance(calendar, pd.DatetimeIndex)
            assert_is_instance(cache, dataframe_cache)
            assert_is_instance(show_progress, bool)

        self.ingest('bundle', environ=self.environ)
        bundle = self.load('bundle', environ=self.environ)

        assert_equal(set(bundle.asset_finder.sids), set(sids))

        columns = 'open', 'high', 'low', 'close', 'volume'

        actual = bundle.minute_bar_reader.load_raw_arrays(
            columns,
            minutes[0],
            minutes[-1],
            sids,
        )

        for actual_column, colname in zip(actual, columns):
            assert_equal(
                actual_column,
                expected_bar_values_2d(minutes, equities, colname),
                msg=colname,
            )

        actual = bundle.daily_bar_reader.load_raw_arrays(
            columns,
            calendar[0],
            calendar[-1],
            sids,
        )
        for actual_column, colname in zip(actual, columns):
            assert_equal(
                actual_column,
                expected_bar_values_2d(calendar, equities, colname),
                msg=colname,
            )
        adjustments_for_cols = bundle.adjustment_reader.load_adjustments(
            columns,
            calendar,
            pd.Index(sids),
        )
        for column, adjustments in zip(columns, adjustments_for_cols[:-1]):
            # iterate over all the adjustments but `volume`
            assert_equal(
                adjustments,
                {
                    2: [Float64Multiply(
                        first_row=0,
                        last_row=2,
                        first_col=0,
                        last_col=0,
                        value=first_split_ratio,
                    )],
                    3: [Float64Multiply(
                        first_row=0,
                        last_row=3,
                        first_col=1,
                        last_col=1,
                        value=second_split_ratio,
                    )],
                },
                msg=column,
            )

        # check the volume, the value should be 1/ratio
        assert_equal(
            adjustments_for_cols[-1],
            {
                2: [Float64Multiply(
                    first_row=0,
                    last_row=2,
                    first_col=0,
                    last_col=0,
                    value=1 / first_split_ratio,
                )],
                3: [Float64Multiply(
                    first_row=0,
                    last_row=3,
                    first_col=1,
                    last_col=1,
                    value=1 / second_split_ratio,
                )],
            },
            msg='volume',
        )
Example #55
0
    def test_bundle(self):
        def get_symbol_from_url(url):
            params = parse_qs(urlparse(url).query)
            symbol, = params['s']
            return symbol

        def pricing_callback(request):
            headers = {
                'content-encoding': 'gzip',
                'content-type': 'text/csv',
            }
            path = test_resource_path(
                'yahoo_samples',
                get_symbol_from_url(request.url) + '.csv.gz',
            )
            with open(path, 'rb') as f:
                return (
                    200,
                    headers,
                    f.read(),
                )

        for _ in range(3):
            self.responses.add_callback(
                self.responses.GET,
                'http://ichart.finance.yahoo.com/table.csv',
                pricing_callback,
            )

        def adjustments_callback(request):
            path = test_resource_path(
                'yahoo_samples',
                get_symbol_from_url(request.url) + '.adjustments.gz',
            )
            return 200, {}, read_compressed(path)

        for _ in range(3):
            self.responses.add_callback(
                self.responses.GET,
                'http://ichart.finance.yahoo.com/x',
                adjustments_callback,
            )

        self.register(
            'bundle',
            yahoo_equities(self.symbols),
            calendar=self.calendar,
            start_session=self.asset_start,
            end_session=self.asset_end,
        )

        zipline_root = self.enter_instance_context(tmp_dir()).path
        environ = {
            'ZIPLINE_ROOT': zipline_root,
        }

        self.ingest('bundle', environ=environ, show_progress=False)
        bundle = self.load('bundle', environ=environ)

        sids = 0, 1, 2
        equities = bundle.asset_finder.retrieve_all(sids)
        for equity, expected_symbol in zip(equities, self.symbols):
            assert_equal(equity.symbol, expected_symbol)

        for equity in bundle.asset_finder.retrieve_all(sids):
            assert_equal(equity.start_date, self.asset_start, msg=equity)
            assert_equal(equity.end_date, self.asset_end, msg=equity)

        sessions = self.sessions
        actual = bundle.equity_daily_bar_reader.load_raw_arrays(
            self.columns,
            sessions[sessions.get_loc(self.asset_start, 'bfill')],
            sessions[sessions.get_loc(self.asset_end, 'ffill')],
            sids,
        )
        expected_pricing, expected_adjustments = self._expected_data()
        assert_equal(actual, expected_pricing, array_decimal=2)

        adjustments_for_cols = bundle.adjustment_reader.load_adjustments(
            self.columns,
            self.sessions,
            pd.Index(sids),
        )

        for column, adjustments, expected in zip(self.columns,
                                                 adjustments_for_cols,
                                                 expected_adjustments):
            assert_equal(
                adjustments,
                expected,
                msg=column,
                decimal=4,
            )
Example #56
0
        def do_checks(cls, colnames):

            specialized = cls.specialize(domain)

            # Specializations should be memoized.
            self.assertIs(specialized, cls.specialize(domain))
            self.assertIs(specialized, specialized.specialize(domain))

            # Specializations should have the same name and module
            assert_equal(specialized.__name__, cls.__name__)
            assert_equal(specialized.__module__, cls.__module__)
            self.assertIs(specialized.domain, domain)

            for attr in colnames:
                original = getattr(cls, attr)
                new = getattr(specialized, attr)

                # We should get a new column from the specialization, which
                # should be the same object that we would get from specializing
                # the original column.
                self.assertIsNot(original, new)
                self.assertIs(new, original.specialize(domain))

                # Columns should be bound to their respective datasets.
                self.assertIs(original.dataset, cls)
                self.assertIs(new.dataset, specialized)

                # The new column should have the domain of the specialization.
                assert_equal(new.domain, domain)

                # Names, dtypes, and missing_values should match.
                assert_equal(original.name, new.name)
                assert_equal(original.dtype, new.dtype)
                assert_equal(original.missing_value, new.missing_value)
    def test_ingest(self):
        calendar = get_calendar('NYSE')
        sessions = calendar.sessions_in_range(self.START_DATE, self.END_DATE)
        minutes = calendar.minutes_for_sessions_in_range(
            self.START_DATE,
            self.END_DATE,
        )

        sids = tuple(range(3))
        equities = make_simple_equity_info(
            sids,
            self.START_DATE,
            self.END_DATE,
        )

        daily_bar_data = make_bar_data(equities, sessions)
        minute_bar_data = make_bar_data(equities, minutes)
        first_split_ratio = 0.5
        second_split_ratio = 0.1
        splits = pd.DataFrame.from_records([
            {
                'effective_date': str_to_seconds('2014-01-08'),
                'ratio': first_split_ratio,
                'sid': 0,
            },
            {
                'effective_date': str_to_seconds('2014-01-09'),
                'ratio': second_split_ratio,
                'sid': 1,
            },
        ])

        @self.register(
            'bundle',
            calendar_name='NYSE',
            start_session=self.START_DATE,
            end_session=self.END_DATE,
        )
        def bundle_ingest(environ, asset_db_writer, minute_bar_writer,
                          daily_bar_writer, adjustment_writer, calendar,
                          start_session, end_session, cache, show_progress,
                          output_dir):
            assert_is(environ, self.environ)

            asset_db_writer.write(equities=equities)
            minute_bar_writer.write(minute_bar_data)
            daily_bar_writer.write(daily_bar_data)
            adjustment_writer.write(splits=splits)

            assert_is_instance(calendar, TradingCalendar)
            assert_is_instance(cache, dataframe_cache)
            assert_is_instance(show_progress, bool)

        self.ingest('bundle', environ=self.environ)
        bundle = self.load('bundle', environ=self.environ)

        assert_equal(set(bundle.asset_finder.sids), set(sids))

        columns = 'open', 'high', 'low', 'close', 'volume'

        actual = bundle.equity_minute_bar_reader.load_raw_arrays(
            columns,
            minutes[0],
            minutes[-1],
            sids,
        )

        for actual_column, colname in zip(actual, columns):
            assert_equal(
                actual_column,
                expected_bar_values_2d(minutes, equities, colname),
                msg=colname,
            )

        actual = bundle.equity_daily_bar_reader.load_raw_arrays(
            columns,
            self.START_DATE,
            self.END_DATE,
            sids,
        )
        for actual_column, colname in zip(actual, columns):
            assert_equal(
                actual_column,
                expected_bar_values_2d(sessions, equities, colname),
                msg=colname,
            )
        adjustments_for_cols = bundle.adjustment_reader.load_adjustments(
            columns,
            sessions,
            pd.Index(sids),
        )
        for column, adjustments in zip(columns, adjustments_for_cols[:-1]):
            # iterate over all the adjustments but `volume`
            assert_equal(
                adjustments,
                {
                    2: [
                        Float64Multiply(
                            first_row=0,
                            last_row=2,
                            first_col=0,
                            last_col=0,
                            value=first_split_ratio,
                        )
                    ],
                    3: [
                        Float64Multiply(
                            first_row=0,
                            last_row=3,
                            first_col=1,
                            last_col=1,
                            value=second_split_ratio,
                        )
                    ],
                },
                msg=column,
            )

        # check the volume, the value should be 1/ratio
        assert_equal(
            adjustments_for_cols[-1],
            {
                2: [
                    Float64Multiply(
                        first_row=0,
                        last_row=2,
                        first_col=0,
                        last_col=0,
                        value=1 / first_split_ratio,
                    )
                ],
                3: [
                    Float64Multiply(
                        first_row=0,
                        last_row=3,
                        first_col=1,
                        last_col=1,
                        value=1 / second_split_ratio,
                    )
                ],
            },
            msg='volume',
        )
Example #58
0
 def assert_identical_results(self, left, right, start_date, end_date):
     """Assert that two pipelines produce the same results.
     """
     left_result = self.run_pipeline(left, start_date, end_date)
     right_result = self.run_pipeline(right, start_date, end_date)
     assert_equal(left_result, right_result)
Example #59
0
    def test_position_weights(self):
        sids = (1, 133, 1000)
        equity_1, equity_133, future_1000 = self.asset_finder.retrieve_all(sids)

        def initialize(algo, sids_and_amounts, *args, **kwargs):
            algo.ordered = False
            algo.sids_and_amounts = sids_and_amounts
            algo.set_commission(us_equities=PerTrade(0), us_futures=PerTrade(0))
            algo.set_slippage(us_equities=FixedSlippage(0), us_futures=FixedSlippage(0))

        def handle_data(algo, data):
            if not algo.ordered:
                for s, amount in algo.sids_and_amounts:
                    algo.order(algo.sid(s), amount)
                algo.ordered = True

            algo.record(position_weights=algo.portfolio.current_portfolio_weights)

        daily_stats = self.run_algorithm(
            sids_and_amounts=zip(sids, [2, -1, 1]),
            initialize=initialize,
            handle_data=handle_data,
        )

        expected_position_weights = [
            # No positions held on the first day.
            pd.Series({}),
            # Each equity's position value is its price times the number of
            # shares held. In this example, we hold a long position in 2 shares
            # of equity_1 so its weight is (95.0 * 2) = 190.0 divided by the
            # total portfolio value. The total portfolio value is the sum of
            # cash ($905.00) plus the value of all equity positions.
            #
            # For a futures contract, its weight is the unit price times number
            # of shares held times the multiplier. For future_1000, this is
            # (2.0 * 1 * 100) = 200.0 divided by total portfolio value.
            pd.Series(
                {
                    equity_1: 190.0 / (190.0 - 95.0 + 905.0),
                    equity_133: -95.0 / (190.0 - 95.0 + 905.0),
                    future_1000: 200.0 / (190.0 - 95.0 + 905.0),
                }
            ),
            pd.Series(
                {
                    equity_1: 200.0 / (200.0 - 100.0 + 905.0),
                    equity_133: -100.0 / (200.0 - 100.0 + 905.0),
                    future_1000: 200.0 / (200.0 - 100.0 + 905.0),
                }
            ),
            pd.Series(
                {
                    equity_1: 210.0 / (210.0 - 105.0 + 905.0),
                    equity_133: -105.0 / (210.0 - 105.0 + 905.0),
                    future_1000: 200.0 / (210.0 - 105.0 + 905.0),
                }
            ),
        ]

        for i, expected in enumerate(expected_position_weights):
            assert_equal(daily_stats.iloc[i]["position_weights"], expected)