Beispiel #1
0
    def test_lookup_future_chain(self):
        metadata = {
            2: {
                'symbol': 'ADN15',
                'root_symbol': 'AD',
                'asset_type': 'future',
                'expiration_date': pd.Timestamp('2015-06-15', tz='UTC')
            },
            1: {
                'symbol': 'ADV15',
                'root_symbol': 'AD',
                'asset_type': 'future',
                'expiration_date': pd.Timestamp('2015-09-14', tz='UTC')
            },
            0: {
                'symbol': 'ADF16',
                'root_symbol': 'AD',
                'asset_type': 'future',
                'expiration_date': pd.Timestamp('2015-12-14', tz='UTC')
            },

        }

        finder = AssetFinder(metadata=metadata)
        dt = pd.Timestamp('2015-06-19', tz='UTC')

        # Check that we get the expected number of contract, in the
        # right order
        ad_contracts = finder.lookup_future_chain('AD', dt)
        self.assertEqual(len(ad_contracts), 2)
        self.assertEqual(ad_contracts[0].sid, 1)
        self.assertEqual(ad_contracts[1].sid, 0)
Beispiel #2
0
    def test_insert_metadata(self):
        finder = AssetFinder()
        finder.insert_metadata(0,
                               asset_type='equity',
                               start_date='2014-01-01',
                               end_date='2015-01-01',
                               symbol="PLAY",
                               foo_data="FOO",)

        # Test proper insertion
        self.assertEqual('equity', finder.metadata_cache[0]['asset_type'])
        self.assertEqual('PLAY', finder.metadata_cache[0]['symbol'])
        self.assertEqual('2015-01-01', finder.metadata_cache[0]['end_date'])

        # Test invalid field
        self.assertFalse('foo_data' in finder.metadata_cache[0])

        # Test updating fields
        finder.insert_metadata(0,
                               asset_type='equity',
                               start_date='2014-01-01',
                               end_date='2015-02-01',
                               symbol="PLAY",
                               exchange="NYSE",)
        self.assertEqual('2015-02-01', finder.metadata_cache[0]['end_date'])
        self.assertEqual('NYSE', finder.metadata_cache[0]['exchange'])

        # Check that old data survived
        self.assertEqual('PLAY', finder.metadata_cache[0]['symbol'])
Beispiel #3
0
def engine_from_files(daily_bar_path,
                      adjustments_path,
                      asset_db_path,
                      calendar,
                      warmup_assets=False):
    """
    Construct a SimplePipelineEngine from local filesystem resources.

    Parameters
    ----------
    daily_bar_path : str
        Path to pass to `BcolzDailyBarReader`.
    adjustments_path : str
        Path to pass to SQLiteAdjustmentReader.
    asset_db_path : str
        Path to pass to `AssetFinder`.
    calendar : pd.DatetimeIndex
        Calendar to use for the loader.
    warmup_assets : bool, optional
        Whether or not to populate AssetFinder caches.  This can speed up
        initial latency on subsequent pipeline runs, at the cost of extra
        memory consumption.  Default is False
    """
    loader = USEquityPricingLoader.from_files(daily_bar_path, adjustments_path)
    asset_finder = AssetFinder(asset_db_path)
    if warmup_assets:
        results = asset_finder.retrieve_all(asset_finder.sids)
        print("Warmed up %d assets." % len(results))

    return SimplePipelineEngine(
        lambda _: loader,
        calendar,
        asset_finder,
    )
Beispiel #4
0
    def test_yahoo_bars_to_panel_source(self):
        env = TradingEnvironment()
        finder = AssetFinder(env.engine)
        stocks = ['AAPL', 'GE']
        env.write_data(equities_identifiers=stocks)
        start = pd.datetime(1993, 1, 1, 0, 0, 0, 0, pytz.utc)
        end = pd.datetime(2002, 1, 1, 0, 0, 0, 0, pytz.utc)
        data = factory.load_bars_from_yahoo(stocks=stocks,
                                            indexes={},
                                            start=start,
                                            end=end)
        check_fields = ['sid', 'open', 'high', 'low', 'close',
                        'volume', 'price']

        copy_panel = data.copy()
        sids = finder.map_identifier_index_to_sids(
            data.items, data.major_axis[0]
        )
        copy_panel.items = sids
        source = DataPanelSource(copy_panel)
        for event in source:
            for check_field in check_fields:
                self.assertIn(check_field, event)
            self.assertTrue(isinstance(event['volume'], (integer_types)))
            self.assertTrue(event['sid'] in sids)
Beispiel #5
0
    def test_lookup_generic_handle_missing(self):
        data = pd.DataFrame.from_records(
            [
                {
                    'sid': 0,
                    'file_name': 'real',
                    'company_name': 'real',
                    'start_date_nano': pd.Timestamp('2013-1-1', tz='UTC'),
                    'end_date_nano': pd.Timestamp('2014-1-1', tz='UTC'),
                    'exchange': '',
                },
                {
                    'sid': 1,
                    'file_name': 'also_real',
                    'company_name': 'also_real',
                    'start_date_nano': pd.Timestamp('2013-1-1', tz='UTC'),
                    'end_date_nano': pd.Timestamp('2014-1-1', tz='UTC'),
                    'exchange': '',
                },
                # Sid whose end date is before our query date.  We should
                # still correctly find it.
                {
                    'sid': 2,
                    'file_name': 'real_but_old',
                    'company_name': 'real_but_old',
                    'start_date_nano': pd.Timestamp('2002-1-1', tz='UTC'),
                    'end_date_nano': pd.Timestamp('2003-1-1', tz='UTC'),
                    'exchange': '',
                },
                # Sid whose start_date is **after** our query date.  We should
                # **not** find it.
                {
                    'sid': 3,
                    'file_name': 'real_but_in_the_future',
                    'company_name': 'real_but_in_the_future',
                    'start_date_nano': pd.Timestamp('2014-1-1', tz='UTC'),
                    'end_date_nano': pd.Timestamp('2020-1-1', tz='UTC'),
                    'exchange': 'THE FUTURE',
                },
            ]
        )
        finder = AssetFinder(data)
        results, missing = finder.lookup_generic(
            ['real', 1, 'fake', 'real_but_old', 'real_but_in_the_future'],
            pd.Timestamp('2013-02-01', tz='UTC'),
        )

        self.assertEqual(len(results), 3)
        self.assertEqual(results[0].symbol, 'real')
        self.assertEqual(results[0].sid, 0)
        self.assertEqual(results[1].symbol, 'also_real')
        self.assertEqual(results[1].sid, 1)
        self.assertEqual(results[2].symbol, 'real_but_old')
        self.assertEqual(results[2].sid, 2)

        self.assertEqual(len(missing), 2)
        self.assertEqual(missing[0], 'fake')
        self.assertEqual(missing[1], 'real_but_in_the_future')
Beispiel #6
0
    def test_lookup_future_chain(self):
        metadata = {
            # Expires today, so should be valid
            2: {
                'symbol': 'ADN15',
                'root_symbol': 'AD',
                'asset_type': 'future',
                'expiration_date': pd.Timestamp('2015-06-15', tz='UTC'),
                'start_date': pd.Timestamp('2015-01-01', tz='UTC')
            },
            1: {
                'symbol': 'ADV15',
                'root_symbol': 'AD',
                'asset_type': 'future',
                'expiration_date': pd.Timestamp('2015-09-14', tz='UTC'),
                'start_date': pd.Timestamp('2015-01-01', tz='UTC')
            },
            # Starts trading today, so should be valid.
            0: {
                'symbol': 'ADF16',
                'root_symbol': 'AD',
                'asset_type': 'future',
                'expiration_date': pd.Timestamp('2015-12-14', tz='UTC'),
                'start_date': pd.Timestamp('2015-06-15', tz='UTC')
            },
            # Copy of the above future, but starts trading in August,
            # so it isn't valid.
            3: {
                'symbol': 'ADF16',
                'root_symbol': 'AD',
                'asset_type': 'future',
                'expiration_date': pd.Timestamp('2015-12-14', tz='UTC'),
                'start_date': pd.Timestamp('2015-08-01', tz='UTC')
            },

        }

        finder = AssetFinder(metadata=metadata)
        dt = pd.Timestamp('2015-06-15', tz='UTC')
        last_year = pd.Timestamp('2014-01-01', tz='UTC')
        first_day = pd.Timestamp('2015-01-01', tz='UTC')

        # Check that we get the expected number of contracts, in the
        # right order
        ad_contracts = finder.lookup_future_chain('AD', dt, dt)
        self.assertEqual(len(ad_contracts), 3)
        self.assertEqual(ad_contracts[0].sid, 2)
        self.assertEqual(ad_contracts[1].sid, 1)
        self.assertEqual(ad_contracts[2].sid, 0)

        # Check that we get nothing if our knowledge date is last year
        ad_contracts = finder.lookup_future_chain('AD', dt, last_year)
        self.assertEqual(len(ad_contracts), 0)

        # Check that we get things that start on the knowledge date
        ad_contracts = finder.lookup_future_chain('AD', dt, first_day)
        self.assertEqual(len(ad_contracts), 2)
Beispiel #7
0
    def test_lookup_generic_handle_missing(self):
        data = pd.DataFrame.from_records(
            [
                {
                    'sid': 0,
                    'symbol': 'real',
                    'start_date': pd.Timestamp('2013-1-1', tz='UTC'),
                    'end_date': pd.Timestamp('2014-1-1', tz='UTC'),
                    'exchange': '',
                },
                {
                    'sid': 1,
                    'symbol': 'also_real',
                    'start_date': pd.Timestamp('2013-1-1', tz='UTC'),
                    'end_date': pd.Timestamp('2014-1-1', tz='UTC'),
                    'exchange': '',
                },
                # Sid whose end date is before our query date.  We should
                # still correctly find it.
                {
                    'sid': 2,
                    'symbol': 'real_but_old',
                    'start_date': pd.Timestamp('2002-1-1', tz='UTC'),
                    'end_date': pd.Timestamp('2003-1-1', tz='UTC'),
                    'exchange': '',
                },
                # Sid whose start_date is **after** our query date.  We should
                # **not** find it.
                {
                    'sid': 3,
                    'symbol': 'real_but_in_the_future',
                    'start_date': pd.Timestamp('2014-1-1', tz='UTC'),
                    'end_date': pd.Timestamp('2020-1-1', tz='UTC'),
                    'exchange': 'THE FUTURE',
                },
            ]
        )
        self.env.write_data(equities_df=data)
        finder = AssetFinder(self.env.engine)
        results, missing = finder.lookup_generic(
            ['REAL', 1, 'FAKE', 'REAL_BUT_OLD', 'REAL_BUT_IN_THE_FUTURE'],
            pd.Timestamp('2013-02-01', tz='UTC'),
        )

        self.assertEqual(len(results), 3)
        self.assertEqual(results[0].symbol, 'REAL')
        self.assertEqual(results[0].sid, 0)
        self.assertEqual(results[1].symbol, 'ALSO_REAL')
        self.assertEqual(results[1].sid, 1)
        self.assertEqual(results[2].symbol, 'REAL_BUT_OLD')
        self.assertEqual(results[2].sid, 2)

        self.assertEqual(len(missing), 2)
        self.assertEqual(missing[0], 'FAKE')
        self.assertEqual(missing[1], 'REAL_BUT_IN_THE_FUTURE')
Beispiel #8
0
    def test_lookup_symbol_delimited(self):
        as_of = pd.Timestamp("2013-01-01", tz="UTC")
        frame = pd.DataFrame.from_records(
            [
                {
                    "sid": i,
                    "symbol": "TEST.%d" % i,
                    "company_name": "company%d" % i,
                    "start_date": as_of.value,
                    "end_date": as_of.value,
                    "exchange": uuid.uuid4().hex,
                }
                for i in range(3)
            ]
        )
        self.env.write_data(equities_df=frame)
        finder = AssetFinder(self.env.engine)
        asset_0, asset_1, asset_2 = (finder.retrieve_asset(i) for i in range(3))

        # we do it twice to catch caching bugs
        for i in range(2):
            with self.assertRaises(SymbolNotFound):
                finder.lookup_symbol("TEST", as_of)
            with self.assertRaises(SymbolNotFound):
                finder.lookup_symbol("TEST1", as_of)
            # '@' is not a supported delimiter
            with self.assertRaises(SymbolNotFound):
                finder.lookup_symbol("TEST@1", as_of)

            # Adding an unnecessary fuzzy shouldn't matter.
            for fuzzy_char in ["-", "/", "_", "."]:
                self.assertEqual(asset_1, finder.lookup_symbol("TEST%s1" % fuzzy_char, as_of))
Beispiel #9
0
    def test_lookup_generic_handle_missing(self):
        data = pd.DataFrame.from_records(
            [
                {
                    "sid": 0,
                    "symbol": "real",
                    "start_date": pd.Timestamp("2013-1-1", tz="UTC"),
                    "end_date": pd.Timestamp("2014-1-1", tz="UTC"),
                    "exchange": "",
                },
                {
                    "sid": 1,
                    "symbol": "also_real",
                    "start_date": pd.Timestamp("2013-1-1", tz="UTC"),
                    "end_date": pd.Timestamp("2014-1-1", tz="UTC"),
                    "exchange": "",
                },
                # Sid whose end date is before our query date.  We should
                # still correctly find it.
                {
                    "sid": 2,
                    "symbol": "real_but_old",
                    "start_date": pd.Timestamp("2002-1-1", tz="UTC"),
                    "end_date": pd.Timestamp("2003-1-1", tz="UTC"),
                    "exchange": "",
                },
                # Sid whose start_date is **after** our query date.  We should
                # **not** find it.
                {
                    "sid": 3,
                    "symbol": "real_but_in_the_future",
                    "start_date": pd.Timestamp("2014-1-1", tz="UTC"),
                    "end_date": pd.Timestamp("2020-1-1", tz="UTC"),
                    "exchange": "THE FUTURE",
                },
            ]
        )
        self.env.write_data(equities_df=data)
        finder = AssetFinder(self.env.engine)
        results, missing = finder.lookup_generic(
            ["REAL", 1, "FAKE", "REAL_BUT_OLD", "REAL_BUT_IN_THE_FUTURE"], pd.Timestamp("2013-02-01", tz="UTC")
        )

        self.assertEqual(len(results), 3)
        self.assertEqual(results[0].symbol, "REAL")
        self.assertEqual(results[0].sid, 0)
        self.assertEqual(results[1].symbol, "ALSO_REAL")
        self.assertEqual(results[1].sid, 1)
        self.assertEqual(results[2].symbol, "REAL_BUT_OLD")
        self.assertEqual(results[2].sid, 2)

        self.assertEqual(len(missing), 2)
        self.assertEqual(missing[0], "FAKE")
        self.assertEqual(missing[1], "REAL_BUT_IN_THE_FUTURE")
Beispiel #10
0
    def test_sid_assignment(self):

        # This metadata does not contain SIDs
        metadata = {'PLAY': {'symbol': 'PLAY'},
                    'MSFT': {'symbol': 'MSFT'}}

        # Build a finder that is allowed to assign sids
        finder = AssetFinder(metadata=metadata, allow_sid_assignment=True)

        # Verify that Assets were built
        play = finder.retrieve_asset_by_identifier('PLAY')
        self.assertEqual('PLAY', play.symbol)
Beispiel #11
0
 def setUp(self):
     self.__calendar = date_range('2014', '2015', freq=trading_day)
     self.__assets = assets = Int64Index(arange(1, 20))
     self.__finder = AssetFinder(
         make_simple_asset_info(
             assets,
             self.__calendar[0],
             self.__calendar[-1],
         ),
         db_path=':memory:',
         create_table=True,
     )
     self.__mask = self.__finder.lifetimes(self.__calendar[-10:])
Beispiel #12
0
    def test_insert_metadata(self):
        data = {0: {"start_date": "2014-01-01", "end_date": "2015-01-01", "symbol": "PLAY", "foo_data": "FOO"}}
        self.env.write_data(equities_data=data)
        finder = AssetFinder(self.env.engine)
        # Test proper insertion
        equity = finder.retrieve_asset(0)
        self.assertIsInstance(equity, Equity)
        self.assertEqual("PLAY", equity.symbol)
        self.assertEqual(pd.Timestamp("2015-01-01", tz="UTC"), equity.end_date)

        # Test invalid field
        with self.assertRaises(AttributeError):
            equity.foo_data
Beispiel #13
0
    def test_lookup_symbol_fuzzy(self):
        as_of = pd.Timestamp('2013-01-01', tz='UTC')
        frame = pd.DataFrame.from_records([{
            'sid': i,
            'file_name': 'TEST@%d' % i,
            'company_name': "company%d" % i,
            'start_date_nano': as_of.value,
            'end_date_nano': as_of.value,
            'exchange': uuid.uuid4().hex,
        } for i in range(3)])
        finder = AssetFinder(frame, fuzzy_char='@')
        asset_0, asset_1, asset_2 = (finder.retrieve_asset(i)
                                     for i in range(3))

        for i in range(2):  # we do it twice to test for caching bugs
            self.assertIsNone(finder.lookup_symbol('test', as_of))
            self.assertEqual(asset_1, finder.lookup_symbol('test@1', as_of))

            # Adding an unnecessary fuzzy shouldn't matter.
            self.assertEqual(asset_1,
                             finder.lookup_symbol('test@1', as_of, fuzzy=True))

            # Shouldn't find this with no fuzzy_str passed.
            self.assertIsNone(finder.lookup_symbol('test1', as_of))
            # Should find exact match.
            self.assertEqual(
                asset_1,
                finder.lookup_symbol('test1', as_of, fuzzy=True),
            )
Beispiel #14
0
    def test_sid_assignment(self):

        # This metadata does not contain SIDs
        metadata = {'PLAY': {'symbol': 'PLAY'}, 'MSFT': {'symbol': 'MSFT'}}

        # Build a finder that is allowed to assign sids
        finder = AssetFinder(metadata=metadata, allow_sid_assignment=True)

        # Verify that Assets were built and different sids were assigned
        play = finder.lookup_symbol('PLAY', datetime.now())
        msft = finder.lookup_symbol('MSFT', datetime.now())
        self.assertEqual('PLAY', play.symbol)
        self.assertIsNotNone(play.sid)
        self.assertNotEqual(play.sid, msft.sid)
Beispiel #15
0
    def __init__(self,
                 load=None,
                 bm_symbol='^GSPC',
                 exchange_tz="US/Eastern",
                 max_date=None,
                 env_trading_calendar=tradingcalendar):
        """
        @load is function that returns benchmark_returns and treasury_curves
        The treasury_curves are expected to be a DataFrame with an index of
        dates and columns of the curve names, e.g. '10year', '1month', etc.
        """
        self.trading_day = env_trading_calendar.trading_day.copy()

        # `tc_td` is short for "trading calendar trading days"
        tc_td = env_trading_calendar.trading_days

        if max_date:
            self.trading_days = tc_td[tc_td <= max_date].copy()
        else:
            self.trading_days = tc_td.copy()

        self.first_trading_day = self.trading_days[0]
        self.last_trading_day = self.trading_days[-1]

        self.early_closes = env_trading_calendar.get_early_closes(
            self.first_trading_day, self.last_trading_day)

        self.open_and_closes = env_trading_calendar.open_and_closes.loc[
            self.trading_days]

        self.prev_environment = self
        self.bm_symbol = bm_symbol
        if not load:
            load = load_market_data

        self.benchmark_returns, self.treasury_curves = \
            load(self.trading_day, self.trading_days, self.bm_symbol)

        if max_date:
            tr_c = self.treasury_curves
            # Mask the treasury curves down to the current date.
            # In the case of live trading, the last date in the treasury
            # curves would be the day before the date considered to be
            # 'today'.
            self.treasury_curves = tr_c[tr_c.index <= max_date]

        self.exchange_tz = exchange_tz

        self.asset_finder = AssetFinder()
Beispiel #16
0
    def test_sid_assignment(self):

        # This metadata does not contain SIDs
        metadata = {'PLAY': {'symbol': 'PLAY'},
                    'MSFT': {'symbol': 'MSFT'}}

        # Build a finder that is allowed to assign sids
        finder = AssetFinder(metadata=metadata, allow_sid_assignment=True)

        # Verify that Assets were built and different sids were assigned
        play = finder.lookup_symbol('PLAY', datetime.now())
        msft = finder.lookup_symbol('MSFT', datetime.now())
        self.assertEqual('PLAY', play.symbol)
        self.assertIsNotNone(play.sid)
        self.assertNotEqual(play.sid, msft.sid)
Beispiel #17
0
    def __init__(self,
                 load=None,
                 bm_symbol='^GSPC',
                 exchange_tz="US/Eastern",
                 trading_schedule=default_nyse_schedule,
                 asset_db_path=':memory:'):

        self.bm_symbol = bm_symbol
        if not load:
            load = load_market_data

        self.benchmark_returns, self.treasury_curves = load(
            trading_schedule.day,
            trading_schedule.schedule.index,
            self.bm_symbol,
        )

        self.exchange_tz = exchange_tz

        if isinstance(asset_db_path, string_types):
            asset_db_path = 'sqlite:///%s' % asset_db_path
            self.engine = engine = create_engine(asset_db_path)
        else:
            self.engine = engine = asset_db_path

        if engine is not None:
            AssetDBWriter(engine).init_db()
            self.asset_finder = AssetFinder(engine)
        else:
            self.asset_finder = None
Beispiel #18
0
    def load(name, environ=os.environ, timestamp=None):
        """Loads a previously ingested bundle.

        Parameters
        ----------
        name : str
            The name of the bundle.
        environ : mapping, optional
            The environment variables. Defaults of os.environ.
        timestamp : datetime, optional
            The timestamp of the data to lookup.
            Defaults to the current time.

        Returns
        -------
        bundle_data : BundleData
            The raw data readers for this bundle.
        """
        if timestamp is None:
            timestamp = pd.Timestamp.utcnow()
        timestr = most_recent_data(name, timestamp, environ=environ)
        return BundleData(
            asset_finder=AssetFinder(
                asset_db_path(name, timestr, environ=environ), ),
            equity_minute_bar_reader=BcolzMinuteBarReader(
                minute_equity_path(name, timestr, environ=environ), ),
            equity_daily_bar_reader=BcolzDailyBarReader(
                daily_equity_path(name, timestr, environ=environ), ),
            adjustment_reader=SQLiteAdjustmentReader(
                adjustment_db_path(name, timestr, environ=environ), ),
        )
Beispiel #19
0
    def setUp(self):
        self.constants = {
            # Every day, assume every stock starts at 2, goes down to 1,
            # goes up to 4, and finishes at 3.
            USEquityPricing.low:
            1,
            USEquityPricing.open:
            2,
            USEquityPricing.close:
            3,
            USEquityPricing.high:
            4,
        }
        self.assets = [1, 2, 3]
        self.dates = date_range('2014-01-01', '2014-02-01', freq='D', tz='UTC')
        self.loader = ConstantLoader(
            constants=self.constants,
            dates=self.dates,
            assets=self.assets,
        )

        self.asset_info = make_simple_asset_info(
            self.assets,
            start_date=self.dates[0],
            end_date=self.dates[-1],
        )
        self.asset_finder = AssetFinder(self.asset_info)
Beispiel #20
0
    def test_sid_assignment(self):

        # This metadata does not contain SIDs
        metadata = {'PLAY': {'symbol': 'PLAY'}, 'MSFT': {'symbol': 'MSFT'}}

        today = normalize_date(pd.Timestamp('2015-07-09', tz='UTC'))

        # Build a finder that is allowed to assign sids
        finder = AssetFinder(metadata=metadata, allow_sid_assignment=True)

        # Verify that Assets were built and different sids were assigned
        play = finder.lookup_symbol('PLAY', today)
        msft = finder.lookup_symbol('MSFT', today)
        self.assertEqual('PLAY', play.symbol)
        self.assertIsNotNone(play.sid)
        self.assertNotEqual(play.sid, msft.sid)
Beispiel #21
0
    def setUpClass(cls):
        cls.AAPL = 1
        cls.MSFT = 2
        cls.BRK_A = 3
        cls.assets = [cls.AAPL, cls.MSFT, cls.BRK_A]
        asset_info = make_simple_asset_info(
            cls.assets,
            Timestamp('2014'),
            Timestamp('2015'),
            ['AAPL', 'MSFT', 'BRK_A'],
        )
        cls.env = trading.TradingEnvironment()
        cls.env.write_data(equities_df=asset_info)
        cls.asset_finder = AssetFinder(cls.env.engine)
        cls.tempdir = tempdir = TempDirectory()
        tempdir.create()
        try:
            cls.raw_data, cls.bar_reader = cls.create_bar_reader(tempdir)
            cls.adj_reader = cls.create_adjustment_reader(tempdir)
            cls.ffc_loader = USEquityPricingLoader(
                cls.bar_reader, cls.adj_reader
            )
        except:
            cls.tempdir.cleanup()
            raise

        cls.dates = cls.raw_data[cls.AAPL].index.tz_localize('UTC')
Beispiel #22
0
    def __init__(self,
                 load=None,
                 bm_symbol='^GSPC',
                 exchange_tz="US/Eastern",
                 max_date=None,
                 env_trading_calendar=tradingcalendar,
                 asset_db_path=':memory:'):
        """
        @load is function that returns benchmark_returns and treasury_curves
        The treasury_curves are expected to be a DataFrame with an index of
        dates and columns of the curve names, e.g. '10year', '1month', etc.
        """
        self.trading_day = env_trading_calendar.trading_day.copy()

        # `tc_td` is short for "trading calendar trading days"
        tc_td = env_trading_calendar.trading_days

        if max_date:
            self.trading_days = tc_td[tc_td <= max_date].copy()
        else:
            self.trading_days = tc_td.copy()

        self.first_trading_day = self.trading_days[0]
        self.last_trading_day = self.trading_days[-1]

        self.early_closes = env_trading_calendar.get_early_closes(
            self.first_trading_day, self.last_trading_day)

        self.open_and_closes = env_trading_calendar.open_and_closes.loc[
            self.trading_days]

        self.bm_symbol = bm_symbol
        if not load:
            load = load_market_data

        self.benchmark_returns, self.treasury_curves = \
            load(self.trading_day, self.trading_days, self.bm_symbol)

        if max_date:
            tr_c = self.treasury_curves
            # Mask the treasury curves down to the current date.
            # In the case of live trading, the last date in the treasury
            # curves would be the day before the date considered to be
            # 'today'.
            self.treasury_curves = tr_c[tr_c.index <= max_date]

        self.exchange_tz = exchange_tz

        if isinstance(asset_db_path, string_types):
            asset_db_path = 'sqlite:///%s' % asset_db_path
            self.engine = engine = create_engine(asset_db_path)
            AssetDBWriterFromDictionary().init_db(engine)
        else:
            self.engine = engine = asset_db_path

        if engine is not None:
            self.asset_finder = AssetFinder(engine)
        else:
            self.asset_finder = None
Beispiel #23
0
    def test_finder_checks_version(self):
        version_table = self.metadata.tables['version_info']
        version_table.delete().execute()
        write_version_info(self.engine, version_table, -2)
        check_version_info(self.engine, version_table, -2)

        # Assert that trying to build a finder with a bad db raises an error
        with self.assertRaises(AssetDBVersionError):
            AssetFinder(engine=self.engine)

        # Change the version number of the db to the correct version
        version_table.delete().execute()
        write_version_info(self.engine, version_table, ASSET_DB_VERSION)
        check_version_info(self.engine, version_table, ASSET_DB_VERSION)

        # Now that the versions match, this Finder should succeed
        AssetFinder(engine=self.engine)
Beispiel #24
0
    def test_sid_assignment(self):

        # This metadata does not contain SIDs
        metadata = ["PLAY", "MSFT"]

        today = normalize_date(pd.Timestamp("2015-07-09", tz="UTC"))

        # Write data with sid assignment
        self.env.write_data(equities_identifiers=metadata, allow_sid_assignment=True)

        # Verify that Assets were built and different sids were assigned
        finder = AssetFinder(self.env.engine)
        play = finder.lookup_symbol("PLAY", today)
        msft = finder.lookup_symbol("MSFT", today)
        self.assertEqual("PLAY", play.symbol)
        self.assertIsNotNone(play.sid)
        self.assertNotEqual(play.sid, msft.sid)
Beispiel #25
0
    def test_sid_assignment_failure(self):

        # This metadata does not contain SIDs
        metadata = {'PLAY': {'symbol': 'PLAY'}, 'MSFT': {'symbol': 'MSFT'}}

        # Build a finder that is not allowed to assign sids, asserting failure
        with self.assertRaises(SidAssignmentError):
            AssetFinder(metadata=metadata, allow_sid_assignment=False)
Beispiel #26
0
    def test_insert_metadata(self):
        data = {0: {'start_date': '2014-01-01',
                    'end_date': '2015-01-01',
                    'symbol': "PLAY",
                    'foo_data': "FOO"}}
        self.env.write_data(equities_data=data)
        finder = AssetFinder(self.env.engine)
        # Test proper insertion
        equity = finder.retrieve_asset(0)
        self.assertIsInstance(equity, Equity)
        self.assertEqual('PLAY', equity.symbol)
        self.assertEqual(pd.Timestamp('2015-01-01', tz='UTC'),
                         equity.end_date)

        # Test invalid field
        with self.assertRaises(AttributeError):
            equity.foo_data
Beispiel #27
0
    def test_lookup_symbol(self):

        # Incrementing by two so that start and end dates for each
        # generated Asset don't overlap (each Asset's end_date is the
        # day after its start date.)
        dates = pd.date_range('2013-01-01', freq='2D', periods=5, tz='UTC')
        df = pd.DataFrame.from_records([{
            'sid':
            i,
            'symbol':
            'existing',
            'start_date':
            date.value,
            'end_date': (date + timedelta(days=1)).value,
            'exchange':
            'NYSE',
        } for i, date in enumerate(dates)])
        self.env.write_data(equities_df=df)
        finder = AssetFinder(self.env.engine)
        for _ in range(2):  # Run checks twice to test for caching bugs.
            with self.assertRaises(SymbolNotFound):
                finder.lookup_symbol('non_existing', dates[0])

            with self.assertRaises(MultipleSymbolsFound):
                finder.lookup_symbol('existing', None)

            for i, date in enumerate(dates):
                # Verify that we correctly resolve multiple symbols using
                # the supplied date
                result = finder.lookup_symbol('existing', date)
                self.assertEqual(result.symbol, 'EXISTING')
                self.assertEqual(result.sid, i)
Beispiel #28
0
    def test_consume_asset_as_identifier(self):
        # Build some end dates
        eq_end = pd.Timestamp("2012-01-01", tz="UTC")
        fut_end = pd.Timestamp("2008-01-01", tz="UTC")

        # Build some simple Assets
        equity_asset = Equity(1, symbol="TESTEQ", end_date=eq_end)
        future_asset = Future(200, symbol="TESTFUT", end_date=fut_end)

        # Consume the Assets
        self.env.write_data(equities_identifiers=[equity_asset], futures_identifiers=[future_asset])
        finder = AssetFinder(self.env.engine)

        # Test equality with newly built Assets
        self.assertEqual(equity_asset, finder.retrieve_asset(1))
        self.assertEqual(future_asset, finder.retrieve_asset(200))
        self.assertEqual(eq_end, finder.retrieve_asset(1).end_date)
        self.assertEqual(fut_end, finder.retrieve_asset(200).end_date)
Beispiel #29
0
    def test_sid_assignment(self):

        # This metadata does not contain SIDs
        metadata = ['PLAY', 'MSFT']

        today = normalize_date(pd.Timestamp('2015-07-09', tz='UTC'))

        # Write data with sid assignment
        self.env.write_data(equities_identifiers=metadata,
                            allow_sid_assignment=True)

        # Verify that Assets were built and different sids were assigned
        finder = AssetFinder(self.env.engine)
        play = finder.lookup_symbol('PLAY', today)
        msft = finder.lookup_symbol('MSFT', today)
        self.assertEqual('PLAY', play.symbol)
        self.assertIsNotNone(play.sid)
        self.assertNotEqual(play.sid, msft.sid)
Beispiel #30
0
    def test_sid_assignment(self):

        # This metadata does not contain SIDs
        metadata = {'PLAY': {'symbol': 'PLAY'},
                    'MSFT': {'symbol': 'MSFT'}}

        today = normalize_date(pd.Timestamp('2015-07-09', tz='UTC'))

        # Build a finder that is allowed to assign sids
        finder = AssetFinder(metadata=metadata,
                             allow_sid_assignment=True)

        # Verify that Assets were built and different sids were assigned
        play = finder.lookup_symbol('PLAY', today)
        msft = finder.lookup_symbol('MSFT', today)
        self.assertEqual('PLAY', play.symbol)
        self.assertIsNotNone(play.sid)
        self.assertNotEqual(play.sid, msft.sid)
Beispiel #31
0
    def test_insert_metadata(self):
        finder = AssetFinder()
        finder.insert_metadata(0,
                               asset_type='equity',
                               start_date='2014-01-01',
                               end_date='2015-01-01',
                               symbol="PLAY",
                               foo_data="FOO",)

        # Test proper insertion
        equity = finder.retrieve_asset(0)
        self.assertIsInstance(equity, Equity)
        self.assertEqual('PLAY', equity.symbol)
        self.assertEqual(pd.Timestamp('2015-01-01', tz='UTC'),
                         equity.end_date)

        # Test invalid field
        self.assertFalse('foo_data' in finder.metadata_cache[0])
Beispiel #32
0
    def test_insert_metadata(self):
        finder = AssetFinder()
        finder.insert_metadata(
            0,
            asset_type='equity',
            start_date='2014-01-01',
            end_date='2015-01-01',
            symbol="PLAY",
            foo_data="FOO",
        )

        # Test proper insertion
        equity = finder.retrieve_asset(0)
        self.assertIsInstance(equity, Equity)
        self.assertEqual('PLAY', equity.symbol)
        self.assertEqual(pd.Timestamp('2015-01-01', tz='UTC'), equity.end_date)

        # Test invalid field
        self.assertFalse('foo_data' in finder.metadata_cache[0])
Beispiel #33
0
    def setUp(self):
        self.assets = [1, 2, 3]
        self.dates = date_range('2014-01-01', '2014-02-01', freq='D', tz='UTC')

        asset_info = make_simple_asset_info(
            self.assets,
            start_date=self.dates[0],
            end_date=self.dates[-1],
        )
        self.asset_finder = AssetFinder(asset_info)
Beispiel #34
0
    def test_insert_metadata(self):
        data = {
            0: {
                'start_date': '2014-01-01',
                'end_date': '2015-01-01',
                'symbol': "PLAY",
                'foo_data': "FOO"
            }
        }
        self.env.write_data(equities_data=data)
        finder = AssetFinder(self.env.engine)
        # Test proper insertion
        equity = finder.retrieve_asset(0)
        self.assertIsInstance(equity, Equity)
        self.assertEqual('PLAY', equity.symbol)
        self.assertEqual(pd.Timestamp('2015-01-01', tz='UTC'), equity.end_date)

        # Test invalid field
        with self.assertRaises(AttributeError):
            equity.foo_data
Beispiel #35
0
    def test_map_identifier_index_to_sids(self):
        # Build an empty finder and some Assets
        dt = pd.Timestamp('2014-01-01', tz='UTC')
        finder = AssetFinder(self.env.engine)
        asset1 = Equity(1, symbol="AAPL")
        asset2 = Equity(2, symbol="GOOG")
        asset200 = Future(200, symbol="CLK15")
        asset201 = Future(201, symbol="CLM15")

        # Check for correct mapping and types
        pre_map = [asset1, asset2, asset200, asset201]
        post_map = finder.map_identifier_index_to_sids(pre_map, dt)
        self.assertListEqual([1, 2, 200, 201], post_map)
        for sid in post_map:
            self.assertIsInstance(sid, int)

        # Change order and check mapping again
        pre_map = [asset201, asset2, asset200, asset1]
        post_map = finder.map_identifier_index_to_sids(pre_map, dt)
        self.assertListEqual([201, 2, 200, 1], post_map)
Beispiel #36
0
    def test_consume_asset_as_identifier(self):

        # Build some end dates
        eq_end = pd.Timestamp('2012-01-01', tz='UTC')
        fut_end = pd.Timestamp('2008-01-01', tz='UTC')

        # Build some simple Assets
        equity_asset = Equity(1, symbol="TESTEQ", end_date=eq_end)
        future_asset = Future(200, symbol="TESTFUT", end_date=fut_end)

        # Consume the Assets
        finder = AssetFinder()
        finder.consume_identifiers([equity_asset, future_asset])
        finder.populate_cache()

        # Test equality with newly built Assets
        self.assertEqual(equity_asset, finder.retrieve_asset(1))
        self.assertEqual(future_asset, finder.retrieve_asset(200))
        self.assertEqual(eq_end, finder.retrieve_asset(1).end_date)
        self.assertEqual(fut_end, finder.retrieve_asset(200).end_date)
Beispiel #37
0
    def test_map_identifier_index_to_sids(self):
        # Build an empty finder and some Assets
        dt = pd.Timestamp("2014-01-01", tz="UTC")
        finder = AssetFinder(self.env.engine)
        asset1 = Equity(1, symbol="AAPL")
        asset2 = Equity(2, symbol="GOOG")
        asset200 = Future(200, symbol="CLK15")
        asset201 = Future(201, symbol="CLM15")

        # Check for correct mapping and types
        pre_map = [asset1, asset2, asset200, asset201]
        post_map = finder.map_identifier_index_to_sids(pre_map, dt)
        self.assertListEqual([1, 2, 200, 201], post_map)
        for sid in post_map:
            self.assertIsInstance(sid, int)

        # Change order and check mapping again
        pre_map = [asset201, asset2, asset200, asset1]
        post_map = finder.map_identifier_index_to_sids(pre_map, dt)
        self.assertListEqual([201, 2, 200, 1], post_map)
Beispiel #38
0
    def test_finder_checks_version(self):
        # Create an env and give it a bogus version number
        env = TradingEnvironment(load=noop_load)
        metadata = sa.MetaData(bind=env.engine)
        version_table = _version_table_schema(metadata)
        version_table.delete().execute()
        write_version_info(version_table, -2)
        check_version_info(version_table, -2)

        # Assert that trying to build a finder with a bad db raises an error
        with self.assertRaises(AssetDBVersionError):
            AssetFinder(engine=env.engine)

        # Change the version number of the db to the correct version
        version_table.delete().execute()
        write_version_info(version_table, ASSET_DB_VERSION)
        check_version_info(version_table, ASSET_DB_VERSION)

        # Now that the versions match, this Finder should succeed
        AssetFinder(engine=env.engine)
Beispiel #39
0
    def test_asset_finder_doesnt_silently_create_useless_empty_files(self):
        nonexistent_path = self.tmpdir.getpath(self.id() + '__nothing_here')

        with self.assertRaises(ValueError) as e:
            AssetFinder(nonexistent_path)
        expected = "SQLite file {!r} doesn't exist.".format(nonexistent_path)
        self.assertEqual(str(e.exception), expected)

        # sqlite3.connect will create an empty file if you connect somewhere
        # nonexistent. Test that we don't do that.
        self.assertFalse(os.path.exists(nonexistent_path))
Beispiel #40
0
    def test_compute_lifetimes(self, env=None):
        num_assets = 4
        trading_day = env.trading_day
        first_start = pd.Timestamp('2015-04-01', tz='UTC')

        frame = make_rotating_asset_info(
            num_assets=num_assets,
            first_start=first_start,
            frequency=env.trading_day,
            periods_between_starts=3,
            asset_lifetime=5
        )
        finder = AssetFinder(frame)

        all_dates = pd.date_range(
            start=first_start,
            end=frame.end_date.max(),
            freq=trading_day,
        )

        for dates in all_subindices(all_dates):
            expected_mask = full(
                shape=(len(dates), num_assets),
                fill_value=False,
                dtype=bool,
            )

            for i, date in enumerate(dates):
                it = frame[['start_date', 'end_date']].itertuples()
                for j, start, end in it:
                    if start <= date <= end:
                        expected_mask[i, j] = True

            # Filter out columns with all-empty columns.
            expected_result = pd.DataFrame(
                data=expected_mask,
                index=dates,
                columns=frame.sid.values,
            )
            actual_result = finder.lifetimes(dates)
            assert_frame_equal(actual_result, expected_result)
Beispiel #41
0
    def test_consume_metadata(self):

        # Test dict consumption
        finder = AssetFinder({0: {'asset_type': 'equity'}})
        dict_to_consume = {0: {'symbol': 'PLAY'},
                           1: {'symbol': 'MSFT'}}
        finder.consume_metadata(dict_to_consume)
        self.assertEqual('equity', finder.metadata_cache[0]['asset_type'])
        self.assertEqual('PLAY', finder.metadata_cache[0]['symbol'])

        # Test dataframe consumption
        df = pd.DataFrame(columns=['asset_name', 'exchange'], index=[0, 1])
        df['asset_name'][0] = "Dave'N'Busters"
        df['exchange'][0] = "NASDAQ"
        df['asset_name'][1] = "Microsoft"
        df['exchange'][1] = "NYSE"
        finder.consume_metadata(df)
        self.assertEqual('NASDAQ', finder.metadata_cache[0]['exchange'])
        self.assertEqual('Microsoft', finder.metadata_cache[1]['asset_name'])
        # Check that old data survived
        self.assertEqual('equity', finder.metadata_cache[0]['asset_type'])
Beispiel #42
0
    def test_lookup_symbol(self):

        # Incrementing by two so that start and end dates for each
        # generated Asset don't overlap (each Asset's end_date is the
        # day after its start date.)
        dates = pd.date_range("2013-01-01", freq="2D", periods=5, tz="UTC")
        df = pd.DataFrame.from_records(
            [
                {
                    "sid": i,
                    "symbol": "existing",
                    "start_date": date.value,
                    "end_date": (date + timedelta(days=1)).value,
                    "exchange": "NYSE",
                }
                for i, date in enumerate(dates)
            ]
        )
        self.env.write_data(equities_df=df)
        finder = AssetFinder(self.env.engine)
        for _ in range(2):  # Run checks twice to test for caching bugs.
            with self.assertRaises(SymbolNotFound):
                finder.lookup_symbol("NON_EXISTING", dates[0])

            with self.assertRaises(MultipleSymbolsFound):
                finder.lookup_symbol("EXISTING", None)

            for i, date in enumerate(dates):
                # Verify that we correctly resolve multiple symbols using
                # the supplied date
                result = finder.lookup_symbol("EXISTING", date)
                self.assertEqual(result.symbol, "EXISTING")
                self.assertEqual(result.sid, i)
Beispiel #43
0
    def test_insert_metadata(self):
        finder = AssetFinder()
        finder.insert_metadata(
            0,
            asset_type='equity',
            start_date='2014-01-01',
            end_date='2015-01-01',
            symbol="PLAY",
            foo_data="FOO",
        )

        # Test proper insertion
        self.assertEqual('equity', finder.metadata_cache[0]['asset_type'])
        self.assertEqual('PLAY', finder.metadata_cache[0]['symbol'])
        self.assertEqual('2015-01-01', finder.metadata_cache[0]['end_date'])

        # Test invalid field
        self.assertFalse('foo_data' in finder.metadata_cache[0])

        # Test updating fields
        finder.insert_metadata(
            0,
            asset_type='equity',
            start_date='2014-01-01',
            end_date='2015-02-01',
            symbol="PLAY",
            exchange="NYSE",
        )
        self.assertEqual('2015-02-01', finder.metadata_cache[0]['end_date'])
        self.assertEqual('NYSE', finder.metadata_cache[0]['exchange'])

        # Check that old data survived
        self.assertEqual('PLAY', finder.metadata_cache[0]['symbol'])
Beispiel #44
0
    def test_lookup_symbol(self):

        # Incrementing by two so that start and end dates for each
        # generated Asset don't overlap (each Asset's end_date is the
        # day after its start date.)
        dates = pd.date_range('2013-01-01', freq='2D', periods=5, tz='UTC')
        df = pd.DataFrame.from_records(
            [
                {
                    'sid': i,
                    'symbol':  'existing',
                    'start_date': date.value,
                    'end_date': (date + timedelta(days=1)).value,
                    'exchange': 'NYSE',
                }
                for i, date in enumerate(dates)
            ]
        )
        self.env.write_data(equities_df=df)
        finder = AssetFinder(self.env.engine)
        for _ in range(2):  # Run checks twice to test for caching bugs.
            with self.assertRaises(SymbolNotFound):
                finder.lookup_symbol('NON_EXISTING', dates[0])

            with self.assertRaises(MultipleSymbolsFound):
                finder.lookup_symbol('EXISTING', None)

            for i, date in enumerate(dates):
                # Verify that we correctly resolve multiple symbols using
                # the supplied date
                result = finder.lookup_symbol('EXISTING', date)
                self.assertEqual(result.symbol, 'EXISTING')
                self.assertEqual(result.sid, i)
Beispiel #45
0
    def __init__(self,
                 load=None,
                 bm_symbol='^GSPC',
                 exchange_tz="US/Eastern",
                 min_date=None,
                 max_date=None,
                 env_trading_calendar=tradingcalendar,
                 asset_db_path=':memory:'):
        self.trading_day = env_trading_calendar.trading_day.copy()

        # `tc_td` is short for "trading calendar trading days"
        tc_td = env_trading_calendar.trading_days

        self.trading_days = tc_td[tc_td.slice_indexer(min_date, max_date)]

        self.first_trading_day = self.trading_days[0]
        self.last_trading_day = self.trading_days[-1]

        self.early_closes = env_trading_calendar.get_early_closes(
            self.first_trading_day, self.last_trading_day)

        self.open_and_closes = env_trading_calendar.open_and_closes.loc[
            self.trading_days]

        self.bm_symbol = bm_symbol
        if not load:
            load = load_market_data

        self.benchmark_returns, self.treasury_curves = \
            load(self.trading_day, self.trading_days, self.bm_symbol)

        if max_date:
            tr_c = self.treasury_curves
            # Mask the treasury curves down to the current date.
            # In the case of live trading, the last date in the treasury
            # curves would be the day before the date considered to be
            # 'today'.
            self.treasury_curves = tr_c[tr_c.index <= max_date]

        self.exchange_tz = exchange_tz

        if isinstance(asset_db_path, string_types):
            asset_db_path = 'sqlite:///%s' % asset_db_path
            self.engine = engine = create_engine(asset_db_path)
        else:
            self.engine = engine = asset_db_path

        if engine is not None:
            AssetDBWriter(engine).init_db()
            self.asset_finder = AssetFinder(engine)
        else:
            self.asset_finder = None
Beispiel #46
0
 def setUp(self):
     self.__calendar = date_range('2014', '2015', freq=trading_day)
     self.__assets = assets = Int64Index(arange(1, 20))
     self.__finder = AssetFinder(
         make_simple_asset_info(
             assets,
             self.__calendar[0],
             self.__calendar[-1],
         ),
         db_path=':memory:',
         create_table=True,
     )
     self.__mask = self.__finder.lifetimes(self.__calendar[-10:])
Beispiel #47
0
    def test_consume_metadata(self):

        # Test dict consumption
        dict_to_consume = {0: {"symbol": "PLAY"}, 1: {"symbol": "MSFT"}}
        self.env.write_data(equities_data=dict_to_consume)
        finder = AssetFinder(self.env.engine)

        equity = finder.retrieve_asset(0)
        self.assertIsInstance(equity, Equity)
        self.assertEqual("PLAY", equity.symbol)

        # Test dataframe consumption
        df = pd.DataFrame(columns=["asset_name", "exchange"], index=[0, 1])
        df["asset_name"][0] = "Dave'N'Busters"
        df["exchange"][0] = "NASDAQ"
        df["asset_name"][1] = "Microsoft"
        df["exchange"][1] = "NYSE"
        self.env = TradingEnvironment(load=noop_load)
        self.env.write_data(equities_df=df)
        finder = AssetFinder(self.env.engine)
        self.assertEqual("NASDAQ", finder.retrieve_asset(0).exchange)
        self.assertEqual("Microsoft", finder.retrieve_asset(1).asset_name)
Beispiel #48
0
    def test_compute_lifetimes(self, env=None):
        num_assets = 4
        trading_day = env.trading_day
        first_start = pd.Timestamp('2015-04-01', tz='UTC')

        frame = make_rotating_asset_info(num_assets=num_assets,
                                         first_start=first_start,
                                         frequency=env.trading_day,
                                         periods_between_starts=3,
                                         asset_lifetime=5)
        finder = AssetFinder(frame)

        all_dates = pd.date_range(
            start=first_start,
            end=frame.end_date.max(),
            freq=trading_day,
        )

        for dates in all_subindices(all_dates):
            expected_mask = full(
                shape=(len(dates), num_assets),
                fill_value=False,
                dtype=bool,
            )

            for i, date in enumerate(dates):
                it = frame[['start_date', 'end_date']].itertuples()
                for j, start, end in it:
                    if start <= date <= end:
                        expected_mask[i, j] = True

            # Filter out columns with all-empty columns.
            expected_result = pd.DataFrame(
                data=expected_mask,
                index=dates,
                columns=frame.sid.values,
            )
            actual_result = finder.lifetimes(dates)
            assert_frame_equal(actual_result, expected_result)
Beispiel #49
0
    def __init__(
        self,
        load=None,
        bm_symbol='^GSPC',
        exchange_tz="US/Eastern",
        max_date=None,
        env_trading_calendar=tradingcalendar
    ):
        """
        @load is function that returns benchmark_returns and treasury_curves
        The treasury_curves are expected to be a DataFrame with an index of
        dates and columns of the curve names, e.g. '10year', '1month', etc.
        """
        self.trading_day = env_trading_calendar.trading_day.copy()

        # `tc_td` is short for "trading calendar trading days"
        tc_td = env_trading_calendar.trading_days

        if max_date:
            self.trading_days = tc_td[tc_td <= max_date].copy()
        else:
            self.trading_days = tc_td.copy()

        self.first_trading_day = self.trading_days[0]
        self.last_trading_day = self.trading_days[-1]

        self.early_closes = env_trading_calendar.get_early_closes(
            self.first_trading_day, self.last_trading_day)

        self.open_and_closes = env_trading_calendar.open_and_closes.loc[
            self.trading_days]

        self.prev_environment = self
        self.bm_symbol = bm_symbol
        if not load:
            load = load_market_data

        self.benchmark_returns, self.treasury_curves = \
            load(self.trading_day, self.trading_days, self.bm_symbol)

        if max_date:
            tr_c = self.treasury_curves
            # Mask the treasury curves down to the current date.
            # In the case of live trading, the last date in the treasury
            # curves would be the day before the date considered to be
            # 'today'.
            self.treasury_curves = tr_c[tr_c.index <= max_date]

        self.exchange_tz = exchange_tz

        self.asset_finder = AssetFinder()
Beispiel #50
0
    def test_consume_metadata(self):

        # Test dict consumption
        dict_to_consume = {0: {'symbol': 'PLAY'},
                           1: {'symbol': 'MSFT'}}
        self.env.write_data(equities_data=dict_to_consume)
        finder = AssetFinder(self.env.engine)

        equity = finder.retrieve_asset(0)
        self.assertIsInstance(equity, Equity)
        self.assertEqual('PLAY', equity.symbol)

        # Test dataframe consumption
        df = pd.DataFrame(columns=['asset_name', 'exchange'], index=[0, 1])
        df['asset_name'][0] = "Dave'N'Busters"
        df['exchange'][0] = "NASDAQ"
        df['asset_name'][1] = "Microsoft"
        df['exchange'][1] = "NYSE"
        self.env = TradingEnvironment(load=noop_load)
        self.env.write_data(equities_df=df)
        finder = AssetFinder(self.env.engine)
        self.assertEqual('NASDAQ', finder.retrieve_asset(0).exchange)
        self.assertEqual('Microsoft', finder.retrieve_asset(1).asset_name)
Beispiel #51
0
def engine_from_files(daily_bar_path,
                      adjustments_path,
                      asset_db_path,
                      calendar,
                      warmup_assets=False):
    """
    Construct a SimplePipelineEngine from local filesystem resources.

    Parameters
    ----------
    daily_bar_path : str
        Path to pass to `BcolzDailyBarReader`.
    adjustments_path : str
        Path to pass to SQLiteAdjustmentReader.
    asset_db_path : str
        Path to pass to `AssetFinder`.
    calendar : pd.DatetimeIndex
        Calendar to use for the loader.
    warmup_assets : bool, optional
        Whether or not to populate AssetFinder caches.  This can speed up
        initial latency on subsequent pipeline runs, at the cost of extra
        memory consumption.  Default is False
    """
    loader = USEquityPricingLoader.from_files(daily_bar_path, adjustments_path)

    if not asset_db_path.startswith("sqlite:"):
        asset_db_path = "sqlite:///" + asset_db_path
    asset_finder = AssetFinder(asset_db_path)
    if warmup_assets:
        results = asset_finder.retrieve_all(asset_finder.sids)
        print("Warmed up %d assets." % len(results))

    return SimplePipelineEngine(
        lambda _: loader,
        calendar,
        asset_finder,
    )
Beispiel #52
0
    def test_consume_asset_as_identifier(self):

        # Build some end dates
        eq_end = pd.Timestamp('2012-01-01', tz='UTC')
        fut_end = pd.Timestamp('2008-01-01', tz='UTC')

        # Build some simple Assets
        equity_asset = Equity(1, symbol="TESTEQ", end_date=eq_end)
        future_asset = Future(200, symbol="TESTFUT", end_date=fut_end)

        # Consume the Assets
        finder = AssetFinder()
        finder.consume_identifiers([equity_asset, future_asset])
        finder.populate_cache()

        # Test equality with newly built Assets
        self.assertEqual(equity_asset, finder.retrieve_asset(1))
        self.assertEqual(future_asset, finder.retrieve_asset(200))
        self.assertEqual(eq_end, finder.retrieve_asset(1).end_date)
        self.assertEqual(fut_end, finder.retrieve_asset(200).end_date)
Beispiel #53
0
    def test_yahoo_bars_to_panel_source(self):
        finder = AssetFinder()
        stocks = ['AAPL', 'GE']
        start = pd.datetime(1993, 1, 1, 0, 0, 0, 0, pytz.utc)
        end = pd.datetime(2002, 1, 1, 0, 0, 0, 0, pytz.utc)
        data = factory.load_bars_from_yahoo(stocks=stocks,
                                            indexes={},
                                            start=start,
                                            end=end)

        check_fields = ['sid', 'open', 'high', 'low', 'close',
                        'volume', 'price']

        copy_panel = data.copy()
        sids = finder.map_identifier_index_to_sids(
            data.items, data.major_axis[0]
        )
        copy_panel.items = sids
        source = DataPanelSource(copy_panel)
        for event in source:
            for check_field in check_fields:
                self.assertIn(check_field, event)
            self.assertTrue(isinstance(event['volume'], (integer_types)))
            self.assertTrue(event['sid'] in sids)
Beispiel #54
0
    def test_lookup_future_by_expiration(self):
        metadata = {
            2: {
                'symbol': 'ADN15',
                'root_symbol': 'AD',
                'asset_type': 'future',
                'expiration_date': pd.Timestamp('2015-06-15', tz='UTC')
            },
            1: {
                'symbol': 'ADV15',
                'root_symbol': 'AD',
                'asset_type': 'future',
                'expiration_date': pd.Timestamp('2015-09-14', tz='UTC')
            },
            0: {
                'symbol': 'ADF16',
                'root_symbol': 'AD',
                'asset_type': 'future',
                'expiration_date': pd.Timestamp('2015-12-14', tz='UTC')
            },

        }

        finder = AssetFinder(metadata=metadata)
        dt = pd.Timestamp('2015-06-19', tz='UTC')

        # First-of-the-month timestamps
        may_15 = pd.Timestamp('2015-05-01', tz='UTC')
        june_15 = pd.Timestamp('2015-06-01', tz='UTC')
        sept_15 = pd.Timestamp('2015-09-01', tz='UTC')
        dec_15 = pd.Timestamp('2015-12-01', tz='UTC')
        jan_16 = pd.Timestamp('2016-01-01', tz='UTC')

        # ADV15 is the next valid contract, so check that we get it
        # for every ref_date before 9/14/15
        contract = finder.lookup_future_by_expiration('AD', dt, may_15)
        self.assertEqual(contract.sid, 1)

        contract = finder.lookup_future_by_expiration('AD', dt, june_15)
        self.assertEqual(contract.sid, 1)

        contract = finder.lookup_future_by_expiration('AD', dt, sept_15)
        self.assertEqual(contract.sid, 1)

        # ADF16 has the next expiration date after 12/1/15
        contract = finder.lookup_future_by_expiration('AD', dt, dec_15)
        self.assertEqual(contract.sid, 0)

        # No contracts exist after 12/14/2015, so we should get none
        self.assertIsNone(finder.lookup_future_by_expiration('AD', dt, jan_16))
Beispiel #55
0
    def __init__(
        self,
        load=None,
        bm_symbol='SPY',
        exchange_tz="US/Eastern",
        trading_calendar=None,
        trading_day=None,
        trading_days=None,
        asset_db_path=':memory:',
        future_chain_predicates=CHAIN_PREDICATES,
        environ=None,
    ):

        self.bm_symbol = bm_symbol
        if not load:
            load = partial(load_market_data, environ=environ)

        if trading_day is None:
            if not trading_calendar:
                trading_calendar = get_calendar("NYSE")
            trading_day = trading_calendar.day
        if trading_days is None:
            if not trading_calendar:
                trading_calendar = get_calendar("NYSE")
            trading_days = trading_calendar.schedule.index

        self.benchmark_returns, self.treasury_curves = load(
            trading_day,
            trading_days,
            self.bm_symbol,
        )

        self.exchange_tz = exchange_tz

        if isinstance(asset_db_path, string_types):
            asset_db_path = 'sqlite:///' + asset_db_path
            self.engine = engine = create_engine(asset_db_path)
        else:
            self.engine = engine = asset_db_path

        if engine is not None:
            AssetDBWriter(engine).init_db()
            self.asset_finder = AssetFinder(
                engine,
                future_chain_predicates=future_chain_predicates)
        else:
            self.asset_finder = None
Beispiel #56
0
    def test_consume_metadata(self):

        # Test dict consumption
        finder = AssetFinder()
        dict_to_consume = {0: {'symbol': 'PLAY'}, 1: {'symbol': 'MSFT'}}
        finder.consume_metadata(dict_to_consume)

        equity = finder.retrieve_asset(0)
        self.assertIsInstance(equity, Equity)
        self.assertEqual('PLAY', equity.symbol)

        finder = AssetFinder()

        # Test dataframe consumption
        df = pd.DataFrame(columns=['asset_name', 'exchange'], index=[0, 1])
        df['asset_name'][0] = "Dave'N'Busters"
        df['exchange'][0] = "NASDAQ"
        df['asset_name'][1] = "Microsoft"
        df['exchange'][1] = "NYSE"
        finder.consume_metadata(df)
        self.assertEqual('NASDAQ', finder.metadata_cache[0]['exchange'])
        self.assertEqual('Microsoft', finder.metadata_cache[1]['asset_name'])
Beispiel #57
0
    def test_lookup_future_by_expiration(self):
        metadata = {
            2: {
                'symbol': 'ADN15',
                'root_symbol': 'AD',
                'asset_type': 'future',
                'expiration_date': pd.Timestamp('2015-06-15', tz='UTC'),
                'start_date': pd.Timestamp('2015-01-01', tz='UTC')
            },
            1: {
                'symbol': 'ADV15',
                'root_symbol': 'AD',
                'asset_type': 'future',
                'expiration_date': pd.Timestamp('2015-09-14', tz='UTC'),
                'start_date': pd.Timestamp('2015-01-01', tz='UTC')
            },
            0: {
                'symbol': 'ADF16',
                'root_symbol': 'AD',
                'asset_type': 'future',
                'expiration_date': pd.Timestamp('2015-12-14', tz='UTC'),
                'start_date': pd.Timestamp('2015-01-01', tz='UTC')
            },

        }

        finder = AssetFinder(metadata=metadata)
        dt = pd.Timestamp('2015-06-19', tz='UTC')

        # First-of-the-month timestamps
        may_15 = pd.Timestamp('2015-05-01', tz='UTC')
        june_15 = pd.Timestamp('2015-06-01', tz='UTC')
        sept_15 = pd.Timestamp('2015-09-01', tz='UTC')
        dec_15 = pd.Timestamp('2015-12-01', tz='UTC')
        jan_16 = pd.Timestamp('2016-01-01', tz='UTC')

        # ADV15 is the next valid contract, so check that we get it
        # for every ref_date before 9/14/15
        contract = finder.lookup_future_by_expiration('AD', dt, may_15)
        self.assertEqual(contract.sid, 1)

        contract = finder.lookup_future_by_expiration('AD', dt, june_15)
        self.assertEqual(contract.sid, 1)

        contract = finder.lookup_future_by_expiration('AD', dt, sept_15)
        self.assertEqual(contract.sid, 1)

        # ADF16 has the next expiration date after 12/1/15
        contract = finder.lookup_future_by_expiration('AD', dt, dec_15)
        self.assertEqual(contract.sid, 0)

        # No contracts exist after 12/14/2015, so we should get none
        self.assertIsNone(finder.lookup_future_by_expiration('AD', dt, jan_16))
Beispiel #58
0
    def test_lookup_symbol_fuzzy(self):
        as_of = pd.Timestamp('2013-01-01', tz='UTC')
        frame = pd.DataFrame.from_records(
            [
                {
                    'sid': i,
                    'file_name':  'TEST@%d' % i,
                    'company_name': "company%d" % i,
                    'start_date_nano': as_of.value,
                    'end_date_nano': as_of.value,
                    'exchange': uuid.uuid4().hex,
                }
                for i in range(3)
            ]
        )
        finder = AssetFinder(frame)
        asset_0, asset_1, asset_2 = (
            finder.retrieve_asset(i) for i in range(3)
        )

        for i in range(2):  # we do it twice to test for caching bugs
            self.assertIsNone(finder.lookup_symbol('test', as_of))
            self.assertEqual(
                asset_1,
                finder.lookup_symbol('test@1', as_of)
            )

            # Adding an unnecessary fuzzy shouldn't matter.
            self.assertEqual(
                asset_1,
                finder.lookup_symbol('test@1', as_of, fuzzy='@')
            )

            # Shouldn't find this with no fuzzy_str passed.
            self.assertIsNone(finder.lookup_symbol('test1', as_of))
            # Shouldn't find this with an incorrect fuzzy_str.
            self.assertIsNone(finder.lookup_symbol('test1', as_of, fuzzy='*'))
            # Should find it with the correct fuzzy_str.
            self.assertEqual(
                asset_1,
                finder.lookup_symbol('test1', as_of, fuzzy='@'),
            )
Beispiel #59
0
    def test_lookup_symbol_delimited(self):
        as_of = pd.Timestamp('2013-01-01', tz='UTC')
        frame = pd.DataFrame.from_records(
            [
                {
                    'sid': i,
                    'symbol':  'TEST.%d' % i,
                    'company_name': "company%d" % i,
                    'start_date': as_of.value,
                    'end_date': as_of.value,
                    'exchange': uuid.uuid4().hex
                }
                for i in range(3)
            ]
        )
        self.env.write_data(equities_df=frame)
        finder = AssetFinder(self.env.engine)
        asset_0, asset_1, asset_2 = (
            finder.retrieve_asset(i) for i in range(3)
        )

        # we do it twice to catch caching bugs
        for i in range(2):
            with self.assertRaises(SymbolNotFound):
                finder.lookup_symbol('TEST', as_of)
            with self.assertRaises(SymbolNotFound):
                finder.lookup_symbol('TEST1', as_of)
            # '@' is not a supported delimiter
            with self.assertRaises(SymbolNotFound):
                finder.lookup_symbol('TEST@1', as_of)

            # Adding an unnecessary fuzzy shouldn't matter.
            for fuzzy_char in ['-', '/', '_', '.']:
                self.assertEqual(
                    asset_1,
                    finder.lookup_symbol('TEST%s1' % fuzzy_char, as_of)
                )
Beispiel #60
0
    def setUp(self):
        env = TradingEnvironment.instance()
        day = env.trading_day

        self.assets = Int64Index([1, 2, 3])
        self.dates = date_range(
            '2015-01-01',
            '2015-01-31',
            freq=day,
            tz='UTC',
        )

        asset_info = make_simple_asset_info(
            self.assets,
            start_date=self.dates[0],
            end_date=self.dates[-1],
        )
        self.asset_finder = AssetFinder(asset_info)