コード例 #1
0
ファイル: test_history.py プロジェクト: zhoulingjun/zipline
    def test_history_container(self,
                               name,
                               specs,
                               sids,
                               dt,
                               updates,
                               expected):

        for spec in specs:
            # Sanity check on test input.
            self.assertEqual(len(expected[spec.key_str]), len(updates))

        container = HistoryContainer(
            {spec.key_str: spec for spec in specs}, sids, dt, 'minute',
        )

        for update_count, update in enumerate(updates):

            bar_dt = self.bar_data_dt(update)
            container.update(update, bar_dt)

            for spec in specs:
                pd.util.testing.assert_frame_equal(
                    container.get_history(spec, bar_dt),
                    expected[spec.key_str][update_count],
                    check_dtype=False,
                    check_column_type=True,
                    check_index_type=True,
                    check_frame_type=True,
                )
コード例 #2
0
ファイル: test_history.py プロジェクト: zhoulingjun/zipline
    def test_history_grow_length(self,
                                 freq,
                                 field,
                                 data_frequency,
                                 construct_digest):
        bar_count = 2 if construct_digest else 1
        spec = history.HistorySpec(
            bar_count=bar_count,
            frequency=freq,
            field=field,
            ffill=True,
            data_frequency=data_frequency,
        )
        specs = {spec.key_str: spec}
        initial_sids = [1]
        initial_dt = pd.Timestamp(
            '2013-06-28 13:31'
            if data_frequency == 'minute'
            else '2013-06-28 12:00AM',
            tz='UTC',
        )

        container = HistoryContainer(
            specs, initial_sids, initial_dt, data_frequency,
        )

        if construct_digest:
            self.assertEqual(
                container.digest_panels[spec.frequency].window_length, 1,
            )

        bar_data = BarData()
        container.update(bar_data, initial_dt)

        to_add = (
            history.HistorySpec(
                bar_count=bar_count + 1,
                frequency=freq,
                field=field,
                ffill=True,
                data_frequency=data_frequency,
            ),
            history.HistorySpec(
                bar_count=bar_count + 2,
                frequency=freq,
                field=field,
                ffill=True,
                data_frequency=data_frequency,
            ),
        )

        for spec in to_add:
            container.ensure_spec(spec, initial_dt, bar_data)

            self.assertEqual(
                container.digest_panels[spec.frequency].window_length,
                spec.bar_count - 1,
            )

            self.assert_history(container, spec, initial_dt)
コード例 #3
0
    def test_multiple_specs_on_same_bar(self):
        """
        Test that a ffill and non ffill spec both get
        the correct results when called on the same tick
        """
        spec = history.HistorySpec(
            bar_count=3,
            frequency='1m',
            field='price',
            ffill=True,
            data_frequency='minute',
            env=self.env,
        )
        no_fill_spec = history.HistorySpec(
            bar_count=3,
            frequency='1m',
            field='price',
            ffill=False,
            data_frequency='minute',
            env=self.env,
        )

        specs = {spec.key_str: spec, no_fill_spec.key_str: no_fill_spec}
        initial_sids = [
            1,
        ]
        initial_dt = pd.Timestamp('2013-06-28 9:31AM',
                                  tz='US/Eastern').tz_convert('UTC')

        container = HistoryContainer(
            specs,
            initial_sids,
            initial_dt,
            'minute',
            env=self.env,
        )

        bar_data = BarData()
        container.update(bar_data, initial_dt)
        # Add data on bar two of first day.
        second_bar_dt = pd.Timestamp('2013-06-28 9:32AM',
                                     tz='US/Eastern').tz_convert('UTC')
        bar_data[1] = {'price': 10, 'dt': second_bar_dt}
        container.update(bar_data, second_bar_dt)

        third_bar_dt = pd.Timestamp('2013-06-28 9:33AM',
                                    tz='US/Eastern').tz_convert('UTC')

        del bar_data[1]

        # add nan for 3rd bar
        container.update(bar_data, third_bar_dt)
        prices = container.get_history(spec, third_bar_dt)
        no_fill_prices = container.get_history(no_fill_spec, third_bar_dt)
        self.assertEqual(prices.values[-1], 10)
        self.assertTrue(np.isnan(no_fill_prices.values[-1]),
                        "Last price should be np.nan")
コード例 #4
0
    def test_history_add_field(self, bar_count, freq, pair, data_frequency):
        first, second = pair
        spec = history.HistorySpec(
            bar_count=bar_count,
            frequency=freq,
            field=first,
            ffill=True,
            data_frequency=data_frequency,
            env=self.env,
        )
        specs = {spec.key_str: spec}
        initial_sids = [1]
        initial_dt = pd.Timestamp(
            '2013-06-28 13:31'
            if data_frequency == 'minute' else '2013-06-28 12:00AM',
            tz='UTC',
        )

        container = HistoryContainer(specs,
                                     initial_sids,
                                     initial_dt,
                                     data_frequency,
                                     env=self.env)

        if bar_count > 1:
            self.assertEqual(
                container.digest_panels[spec.frequency].window_length,
                1,
            )

        bar_data = BarData()
        container.update(bar_data, initial_dt)

        new_spec = history.HistorySpec(
            bar_count,
            frequency=freq,
            field=second,
            ffill=True,
            data_frequency=data_frequency,
            env=self.env,
        )

        container.ensure_spec(new_spec, initial_dt, bar_data)

        if bar_count > 1:
            digest_panel = container.digest_panels[new_spec.frequency]
            self.assertEqual(digest_panel.window_length, bar_count - 1)
            self.assertIn(second, digest_panel.items)
        else:
            self.assertNotIn(new_spec.frequency, container.digest_panels)

        with warnings.catch_warnings():
            warnings.simplefilter('ignore')

            self.assert_history(container, new_spec, initial_dt)
コード例 #5
0
    def run(self, source, overwrite_sim_params=True,
            benchmark_return_source=None):
        """Run the algorithm.

        :Arguments:
            source : can be either:
                     - pandas.DataFrame
                     - zipline source
                     - list of sources

               If pandas.DataFrame is provided, it must have the
               following structure:
               * column names must consist of ints representing the
                 different sids
               * index must be DatetimeIndex
               * array contents should be price info.

        :Returns:
            daily_stats : pandas.DataFrame
              Daily performance metrics such as returns, alpha etc.

        """
        if isinstance(source, list):
            if overwrite_sim_params:
                warnings.warn("""List of sources passed, will not attempt to extract sids, and start and end
 dates. Make sure to set the correct fields in sim_params passed to
 __init__().""", UserWarning)
                overwrite_sim_params = False
        elif isinstance(source, pd.DataFrame):
            # if DataFrame provided, wrap in DataFrameSource
            source = DataFrameSource(source)
        elif isinstance(source, pd.Panel):
            source = DataPanelSource(source)

        if isinstance(source, list):
            self.set_sources(source)
        else:
            self.set_sources([source])

        # Override sim_params if params are provided by the source.
        if overwrite_sim_params:
            if hasattr(source, 'start'):
                self.sim_params.period_start = source.start
            if hasattr(source, 'end'):
                self.sim_params.period_end = source.end
            all_sids = [sid for s in self.sources for sid in s.sids]
            self.sim_params.sids = set(all_sids)
            # Changing period_start and period_close might require updating
            # of first_open and last_close.
            self.sim_params._update_internal()

        # Create history containers
        if len(self.history_specs) != 0:
            self.history_container = HistoryContainer(
                self.history_specs,
                self.sim_params.sids,
                self.sim_params.first_open)

        # Create transforms by wrapping them into StatefulTransforms
        self.transforms = []
        for namestring, trans_descr in iteritems(self.registered_transforms):
            sf = StatefulTransform(
                trans_descr['class'],
                *trans_descr['args'],
                **trans_descr['kwargs']
            )
            sf.namestring = namestring

            self.transforms.append(sf)

        # force a reset of the performance tracker, in case
        # this is a repeat run of the algorithm.
        self.perf_tracker = None

        # create transforms and zipline
        self.gen = self._create_generator(self.sim_params)

        with ZiplineAPI(self):
            # loop through simulated_trading, each iteration returns a
            # perf dictionary
            perfs = []
            for perf in self.gen:
                perfs.append(perf)

            # convert perf dict to pandas dataframe
            daily_stats = self._create_daily_stats(perfs)

        self.analyze(daily_stats)

        return daily_stats
コード例 #6
0
    def run(self, source, sim_params=None, benchmark_return_source=None):
        """Run the algorithm.

        :Arguments:
            source : can be either:
                     - pandas.DataFrame
                     - zipline source
                     - list of zipline sources

               If pandas.DataFrame is provided, it must have the
               following structure:
               * column names must consist of ints representing the
                 different sids
               * index must be DatetimeIndex
               * array contents should be price info.

        :Returns:
            daily_stats : pandas.DataFrame
              Daily performance metrics such as returns, alpha etc.

        """
        if isinstance(source, (list, tuple)):
            assert self.sim_params is not None or sim_params is not None, \
                """When providing a list of sources, \
                sim_params have to be specified as a parameter
                or in the constructor."""
        elif isinstance(source, pd.DataFrame):
            # if DataFrame provided, wrap in DataFrameSource
            source = DataFrameSource(source)
        elif isinstance(source, pd.Panel):
            source = DataPanelSource(source)

        if not isinstance(source, (list, tuple)):
            self.sources = [source]
        else:
            self.sources = source

        # Check for override of sim_params.
        # If it isn't passed to this function,
        # use the default params set with the algorithm.
        # Else, we create simulation parameters using the start and end of the
        # source provided.
        if sim_params is None:
            if self.sim_params is None:
                start = source.start
                end = source.end
                sim_params = create_simulation_parameters(
                    start=start,
                    end=end,
                    capital_base=self.capital_base,
                )
            else:
                sim_params = self.sim_params

        # update sim params to ensure it's set
        self.sim_params = sim_params
        if self.sim_params.sids is None:
            all_sids = [sid for s in self.sources for sid in s.sids]
            self.sim_params.sids = set(all_sids)

        # Create history containers
        if len(self.history_specs) != 0:
            self.history_container = HistoryContainer(
                self.history_specs, self.sim_params.sids,
                self.sim_params.first_open)

        # Create transforms by wrapping them into StatefulTransforms
        self.transforms = []
        for namestring, trans_descr in iteritems(self.registered_transforms):
            sf = StatefulTransform(trans_descr['class'], *trans_descr['args'],
                                   **trans_descr['kwargs'])
            sf.namestring = namestring

            self.transforms.append(sf)

        # force a reset of the performance tracker, in case
        # this is a repeat run of the algorithm.
        self.perf_tracker = None

        # create transforms and zipline
        self.gen = self._create_generator(sim_params)

        with ZiplineAPI(self):
            # loop through simulated_trading, each iteration returns a
            # perf dictionary
            perfs = []
            for perf in self.gen:
                perfs.append(perf)

            # convert perf dict to pandas dataframe
            daily_stats = self._create_daily_stats(perfs)

        self.analyze(daily_stats)

        return daily_stats
コード例 #7
0
ファイル: test_history.py プロジェクト: zhoulingjun/zipline
    def test_container_nans_and_daily_roll(self):

        spec = history.HistorySpec(
            bar_count=3,
            frequency='1d',
            field='price',
            ffill=True,
            data_frequency='minute'
        )
        specs = {spec.key_str: spec}
        initial_sids = [1, ]
        initial_dt = pd.Timestamp(
            '2013-06-28 9:31AM', tz='US/Eastern').tz_convert('UTC')

        container = HistoryContainer(
            specs, initial_sids, initial_dt, 'minute'
        )

        bar_data = BarData()
        container.update(bar_data, initial_dt)
        # Since there was no backfill because of no db.
        # And no first bar of data, so all values should be nans.
        prices = container.get_history(spec, initial_dt)
        nan_values = np.isnan(prices[1])
        self.assertTrue(all(nan_values), nan_values)

        # Add data on bar two of first day.
        second_bar_dt = pd.Timestamp(
            '2013-06-28 9:32AM', tz='US/Eastern').tz_convert('UTC')

        bar_data[1] = {
            'price': 10,
            'dt': second_bar_dt
        }
        container.update(bar_data, second_bar_dt)

        prices = container.get_history(spec, second_bar_dt)
        # Prices should be
        #                             1
        # 2013-06-26 20:00:00+00:00 NaN
        # 2013-06-27 20:00:00+00:00 NaN
        # 2013-06-28 13:32:00+00:00  10

        self.assertTrue(np.isnan(prices[1].ix[0]))
        self.assertTrue(np.isnan(prices[1].ix[1]))
        self.assertEqual(prices[1].ix[2], 10)

        third_bar_dt = pd.Timestamp(
            '2013-06-28 9:33AM', tz='US/Eastern').tz_convert('UTC')

        del bar_data[1]

        container.update(bar_data, third_bar_dt)

        prices = container.get_history(spec, third_bar_dt)
        # The one should be forward filled

        # Prices should be
        #                             1
        # 2013-06-26 20:00:00+00:00 NaN
        # 2013-06-27 20:00:00+00:00 NaN
        # 2013-06-28 13:33:00+00:00  10

        self.assertEquals(prices[1][third_bar_dt], 10)

        # Note that we did not fill in data at the close.
        # There was a bug where a nan was being introduced because of the
        # last value of 'raw' data was used, instead of a ffilled close price.

        day_two_first_bar_dt = pd.Timestamp(
            '2013-07-01 9:31AM', tz='US/Eastern').tz_convert('UTC')

        bar_data[1] = {
            'price': 20,
            'dt': day_two_first_bar_dt
        }

        container.update(bar_data, day_two_first_bar_dt)

        prices = container.get_history(spec, day_two_first_bar_dt)

        # Prices Should Be

        #                              1
        # 2013-06-27 20:00:00+00:00  nan
        # 2013-06-28 20:00:00+00:00   10
        # 2013-07-01 13:31:00+00:00   20

        self.assertTrue(np.isnan(prices[1].ix[0]))
        self.assertEqual(prices[1].ix[1], 10)
        self.assertEqual(prices[1].ix[2], 20)

        # Clear out the bar data

        del bar_data[1]

        day_three_first_bar_dt = pd.Timestamp(
            '2013-07-02 9:31AM', tz='US/Eastern').tz_convert('UTC')

        container.update(bar_data, day_three_first_bar_dt)

        prices = container.get_history(spec, day_three_first_bar_dt)

        #                             1
        # 2013-06-28 20:00:00+00:00  10
        # 2013-07-01 20:00:00+00:00  20
        # 2013-07-02 13:31:00+00:00  20

        self.assertTrue(prices[1].ix[0], 10)
        self.assertTrue(prices[1].ix[1], 20)
        self.assertTrue(prices[1].ix[2], 20)

        day_four_first_bar_dt = pd.Timestamp(
            '2013-07-03 9:31AM', tz='US/Eastern').tz_convert('UTC')

        container.update(bar_data, day_four_first_bar_dt)

        prices = container.get_history(spec, day_four_first_bar_dt)

        #                             1
        # 2013-07-01 20:00:00+00:00  20
        # 2013-07-02 20:00:00+00:00  20
        # 2013-07-03 13:31:00+00:00  20

        self.assertEqual(prices[1].ix[0], 20)
        self.assertEqual(prices[1].ix[1], 20)
        self.assertEqual(prices[1].ix[2], 20)