Python dataframe_cache Examples, zipline.utils.cache.dataframe_cache Python Examples

Example #1

0

Show file

File: test_examples.py Project: jasonwirth/zipline

    def init_class_fixtures(cls):
        super(ExamplesTests, cls).init_class_fixtures()

        register("test", lambda *args: None)
        cls.add_class_callback(partial(unregister, "test"))

        with tarfile.open(test_resource_path("example_data.tar.gz")) as tar:
            tar.extractall(cls.tmpdir.path)

        cls.expected_perf = dataframe_cache(
            cls.tmpdir.getpath("example_data/expected_perf/%s" % pd.__version__.replace(".", "-")),
            serialization="pickle",
        )

        # We need to call gc.collect before tearing down our class because we
        # have a cycle between TradingAlgorithm and AlgorithmSimulator which
        # ultimately holds a reference to the pipeline engine passed to the
        # tests here.

        # This means that we're not guaranteed to have deleted our disk-backed
        # resource readers (e.g. SQLiteAdjustmentReader) before trying to
        # delete the tempdir, which causes failures on Windows because Windows
        # doesn't allow you to delete a file if someone still has an open
        # handle to that file.

        # :(
        cls.add_class_callback(gc.collect)

Example #2

0

Show file

File: test_examples.py Project: AtwooTM/zipline

    def init_class_fixtures(cls):
        super(ExamplesTests, cls).init_class_fixtures()

        register("test", lambda *args: None)
        cls.add_class_callback(partial(unregister, "test"))

        with tarfile.open(test_resource_path("example_data.tar.gz")) as tar:
            tar.extractall(cls.tmpdir.path)

        cls.expected_perf = dataframe_cache(
            cls.tmpdir.getpath("example_data/expected_perf/%s" % pd.__version__.replace(".", "-")),
            serialization="pickle",
        )

Example #3

0

Show file

def _df_cache(_setup_class, request):
    request.cls.expected_perf = dataframe_cache(
        join(
            str(request.cls.tmp_path),
            "example_data",
            f"expected_perf/{request.param}",
        ),
        serialization="pickle",
    )

    request.cls.no_benchmark_expected_perf = {
        example_name: _no_benchmark_expectations_applied(expected_perf.copy())
        for example_name, expected_perf in request.cls.expected_perf.items()
    }

Example #4

0

Show file

File: test_examples.py Project: 280185386/zipline

    def init_class_fixtures(cls):
        super(ExamplesTests, cls).init_class_fixtures()

        register('test', lambda *args: None)
        cls.add_class_callback(partial(unregister, 'test'))

        with tarfile.open(test_resource_path('example_data.tar.gz')) as tar:
            tar.extractall(cls.tmpdir.path)

        cls.expected_perf = dataframe_cache(
            cls.tmpdir.getpath(
                'example_data/expected_perf/%s' %
                pd.__version__.replace('.', '-'),
            ),
            serialization='pickle',
        )

Example #5

0

Show file

    def init_class_fixtures(cls):
        super(ExamplesTests, cls).init_class_fixtures()

        register("test", lambda *args: None)
        cls.add_class_callback(partial(unregister, "test"))

        with tarfile.open(test_resource_path("example_data.tar.gz")) as tar:
            tar.extractall(cls.tmpdir.path)

        cls.expected_perf = dataframe_cache(
            cls.tmpdir.getpath(
                "example_data/expected_perf/%s" %
                pd.__version__.replace(".", "-"), ),
            serialization="pickle",
        )

        cls.no_benchmark_expected_perf = {
            example_name:
            cls._no_benchmark_expectations_applied(expected_perf.copy())
            for example_name, expected_perf in cls.expected_perf.items()
        }

Example #6

0

Show file

File: test_examples.py Project: zhou/zipline

    def init_class_fixtures(cls):
        super(ExamplesTests, cls).init_class_fixtures()

        register('test', lambda *args: None)
        cls.add_class_callback(partial(unregister, 'test'))

        with tarfile.open(test_resource_path('example_data.tar.gz')) as tar:
            tar.extractall(cls.tmpdir.path)

        cls.expected_perf = dataframe_cache(
            cls.tmpdir.getpath(
                'example_data/expected_perf/%s' %
                pd.__version__.replace('.', '-'),
            ),
            serialization='pickle',
        )

        market_data = ('SPY_benchmark.csv', 'treasury_curves.csv')
        for data in market_data:
            update_modified_time(
                cls.tmpdir.getpath(
                    'example_data/root/data/' + data
                )
            )

Example #7

0

Show file

File: core.py Project: RoyHsiao/zipline

    def ingest(name,
               environ=os.environ,
               timestamp=None,
               show_progress=False):
        """Ingest data for a given bundle.

        Parameters
        ----------
        name : str
            The name of the bundle.
        environ : mapping, optional
            The environment variables. By default this is os.environ.
        timestamp : datetime, optional
            The timestamp to use for the load.
            By default this is the current time.
        show_progress : bool, optional
            Tell the ingest function to display the progress where possible.
        """
        try:
            bundle = bundles[name]
        except KeyError:
            raise UnknownBundle(name)

        if timestamp is None:
            timestamp = pd.Timestamp.utcnow()
        timestamp = timestamp.tz_convert('utc').tz_localize(None)
        timestr = to_bundle_ingest_dirname(timestamp)
        cachepath = cache_path(name, environ=environ)
        pth.ensure_directory(pth.data_path([name, timestr], environ=environ))
        pth.ensure_directory(cachepath)
        with dataframe_cache(cachepath, clean_on_failure=False) as cache, \
                ExitStack() as stack:
            # we use `cleanup_on_failure=False` so that we don't purge the
            # cache directory if the load fails in the middle
            if bundle.create_writers:
                wd = stack.enter_context(working_dir(
                    pth.data_path([], environ=environ))
                )
                daily_bars_path = wd.ensure_dir(
                    *daily_equity_relative(
                        name, timestr, environ=environ,
                    )
                )
                daily_bar_writer = BcolzDailyBarWriter(
                    daily_bars_path,
                    nyse_cal,
                    bundle.calendar[0],
                    bundle.calendar[-1]
                )
                # Do an empty write to ensure that the daily ctables exist
                # when we create the SQLiteAdjustmentWriter below. The
                # SQLiteAdjustmentWriter needs to open the daily ctables so
                # that it can compute the adjustment ratios for the dividends.

                daily_bar_writer.write(())
                minute_bar_writer = BcolzMinuteBarWriter(
                    bundle.calendar[0],
                    wd.ensure_dir(*minute_equity_relative(
                        name, timestr, environ=environ)
                    ),
                    bundle.opens,
                    bundle.closes,
                    minutes_per_day=bundle.minutes_per_day,
                )
                asset_db_writer = AssetDBWriter(
                    wd.getpath(*asset_db_relative(
                        name, timestr, environ=environ,
                    ))
                )

                adjustment_db_writer = stack.enter_context(
                    SQLiteAdjustmentWriter(
                        wd.getpath(*adjustment_db_relative(
                            name, timestr, environ=environ)),
                        BcolzDailyBarReader(daily_bars_path),
                        bundle.calendar,
                        overwrite=True,
                    )
                )
            else:
                daily_bar_writer = None
                minute_bar_writer = None
                asset_db_writer = None
                adjustment_db_writer = None
            bundle.ingest(
                environ,
                asset_db_writer,
                minute_bar_writer,
                daily_bar_writer,
                adjustment_db_writer,
                bundle.calendar,
                cache,
                show_progress,
                pth.data_path([name, timestr], environ=environ),
            )

Example #8

0

Show file

File: core.py Project: SJCosgrove/quantopianresearch

    def ingest(name,
               environ=os.environ,
               timestamp=None,
               assets_versions=(),
               show_progress=False):
        """Ingest data for a given bundle.

        Parameters
        ----------
        name : str
            The name of the bundle.
        environ : mapping, optional
            The environment variables. By default this is os.environ.
        timestamp : datetime, optional
            The timestamp to use for the load.
            By default this is the current time.
        assets_versions : Iterable[int], optional
            Versions of the assets db to which to downgrade.
        show_progress : bool, optional
            Tell the ingest function to display the progress where possible.
        """
        try:
            bundle = bundles[name]
        except KeyError:
            raise UnknownBundle(name)

        calendar = get_calendar(bundle.calendar_name)

        start_session = bundle.start_session
        end_session = bundle.end_session

        if start_session is None or start_session < calendar.first_session:
            start_session = calendar.first_session

        if end_session is None or end_session > calendar.last_session:
            end_session = calendar.last_session

        if timestamp is None:
            timestamp = pd.Timestamp.utcnow()
        timestamp = timestamp.tz_convert('utc').tz_localize(None)

        timestr = to_bundle_ingest_dirname(timestamp)
        cachepath = cache_path(name, environ=environ)
        pth.ensure_directory(pth.data_path([name, timestr], environ=environ))
        pth.ensure_directory(cachepath)
        with dataframe_cache(cachepath, clean_on_failure=False) as cache, \
                ExitStack() as stack:
            # we use `cleanup_on_failure=False` so that we don't purge the
            # cache directory if the load fails in the middle
            if bundle.create_writers:
                wd = stack.enter_context(working_dir(
                    pth.data_path([], environ=environ))
                )
                daily_bars_path = wd.ensure_dir(
                    *daily_equity_relative(
                        name, timestr, environ=environ,
                    )
                )
                daily_bar_writer = BcolzDailyBarWriter(
                    daily_bars_path,
                    calendar,
                    start_session,
                    end_session,
                )
                # Do an empty write to ensure that the daily ctables exist
                # when we create the SQLiteAdjustmentWriter below. The
                # SQLiteAdjustmentWriter needs to open the daily ctables so
                # that it can compute the adjustment ratios for the dividends.

                daily_bar_writer.write(())
                minute_bar_writer = BcolzMinuteBarWriter(
                    wd.ensure_dir(*minute_equity_relative(
                        name, timestr, environ=environ)
                    ),
                    calendar,
                    start_session,
                    end_session,
                    minutes_per_day=bundle.minutes_per_day,
                )
                assets_db_path = wd.getpath(*asset_db_relative(
                    name, timestr, environ=environ,
                ))
                asset_db_writer = AssetDBWriter(assets_db_path)

                adjustment_db_writer = stack.enter_context(
                    SQLiteAdjustmentWriter(
                        wd.getpath(*adjustment_db_relative(
                            name, timestr, environ=environ)),
                        BcolzDailyBarReader(daily_bars_path),
                        calendar.all_sessions,
                        overwrite=True,
                    )
                )
            else:
                daily_bar_writer = None
                minute_bar_writer = None
                asset_db_writer = None
                adjustment_db_writer = None
                if assets_versions:
                    raise ValueError('Need to ingest a bundle that creates '
                                     'writers in order to downgrade the assets'
                                     ' db.')
            bundle.ingest(
                environ,
                asset_db_writer,
                minute_bar_writer,
                daily_bar_writer,
                adjustment_db_writer,
                calendar,
                start_session,
                end_session,
                cache,
                show_progress,
                pth.data_path([name, timestr], environ=environ),
            )

            for version in sorted(set(assets_versions), reverse=True):
                version_path = wd.getpath(*asset_db_relative(
                    name, timestr, environ=environ, db_version=version,
                ))
                with working_file(version_path) as wf:
                    shutil.copy2(assets_db_path, wf.path)
                    downgrade(wf.path, version)

Example #9

0

Show file

File: rebuild_example_data.py Project: sammerry/zipline-reloaded

def main(ctx, rebuild_input):
    """Rebuild the perf data for test_examples"""
    example_path = test_resource_path("example_data.tar.gz")

    with tmp_dir() as d:
        with tarfile.open(example_path) as tar:
            tar.extractall(d.path)

        # The environ here should be the same (modulo the tempdir location)
        # as we use in test_examples.py.
        environ = {"ZIPLINE_ROOT": d.getpath("example_data/root")}

        if rebuild_input:
            raise NotImplementedError(
                "We cannot rebuild input for Yahoo because of "
                "changes Yahoo made to their API, so we cannot "
                "use Yahoo data bundles anymore. This will be fixed in "
                "a future release",
            )

        # we need to register the bundle; it is already ingested and saved in
        # the example_data.tar.gz file
        @register("test")
        def nop_ingest(*args, **kwargs):
            raise NotImplementedError("we cannot rebuild the test buindle")

        new_perf_path = d.getpath(
            "example_data/new_perf/%s" % pd.__version__.replace(".", "-"),
        )
        c = dataframe_cache(
            new_perf_path,
            serialization="pickle:2",
        )
        with c:
            for name in EXAMPLE_MODULES:
                c[name] = examples.run_example(
                    EXAMPLE_MODULES,
                    name,
                    environ=environ,
                    benchmark_returns=read_checked_in_benchmark_data(),
                )

            correct_called = [False]

            console = None

            def _exit(*args, **kwargs):
                console.raw_input = eof

            def correct():
                correct_called[0] = True
                _exit()

            expected_perf_path = d.getpath(
                "example_data/expected_perf/%s"
                % pd.__version__.replace(".", "-"),
            )

            # allow users to run some analysis to make sure that the new
            # results check out
            console = InteractiveConsole(
                {
                    "correct": correct,
                    "exit": _exit,
                    "incorrect": _exit,
                    "new": c,
                    "np": np,
                    "old": dataframe_cache(
                        expected_perf_path,
                        serialization="pickle",
                    ),
                    "pd": pd,
                    "cols_to_check": examples._cols_to_check,
                    "changed_results": changed_results,
                }
            )
            console.interact(banner)

            if not correct_called[0]:
                ctx.fail(
                    "`correct()` was not called! This means that the new"
                    " results will not be written",
                )

            # move the new results to the expected path
            shutil.rmtree(expected_perf_path)
            shutil.copytree(new_perf_path, expected_perf_path)

        # Clear out all the temporary new perf so it doesn't get added to the
        # tarball.
        shutil.rmtree(d.getpath("example_data/new_perf/"))

        with tarfile.open(example_path, "w|gz") as tar:
            tar.add(d.getpath("example_data"), "example_data")