def test_nan_insertion(self): '''when the external data source returns a subset of the requested dates NaNs are inserted into the database for the dates not returned. ''' connection = SQLiteTimeseries.connect(':memory:') driver = SQLiteTimeseries(connection=connection, table='price', metric='Adj Close') symbol = 'MSFT' missing_date = datetime.datetime(2012, 12, 4, tzinfo=pytz.UTC) missing_dates = {missing_date} returned_dates = { datetime.datetime(2012, 12, 1, tzinfo=pytz.UTC), datetime.datetime(2012, 12, 2, tzinfo=pytz.UTC), datetime.datetime(2012, 12, 3, tzinfo=pytz.UTC), } requested_dates = missing_dates | returned_dates mock_yahoo = mock.Mock() mock_yahoo.return_value = ((date, 10.) for date in returned_dates) cache = FinancialDataTimeSeriesCache(gets_data=mock_yahoo, database=driver) cached_values = list( cache.get(symbol=symbol, dates=list(requested_dates))) db_val = connection.execute( "SELECT value FROM price WHERE date = '{}'".format( missing_date)).next() self.assertEqual(db_val['value'], 'NaN') cache_value_dict = {date: value for date, value in cached_values} assert np.isnan(cache_value_dict[missing_date])
def build_sqlite_price_cache(cls, sqlite_file_path, table='prices', metric='Adj Close'): connection = SQLiteTimeseries.connect(sqlite_file_path) db = SQLiteTimeseries(connection=connection, table=table, metric=metric) cache = cls(gets_data=prices.get_prices_from_yahoo, database=db) return cache
def test_datetime_type_storage(self): '''make sure we can store datetimes in sqlite.''' conn = self.connection table_name, test_value = 'test_table', 'test_value' symbol = 'ABC' driver = SQLiteTimeseries(conn, table_name, metric=test_value) record_date = datetime.datetime(2012, 12, 1, tzinfo=pytz.UTC) driver.set(symbol=symbol, records=[(record_date, 100.)]) result = conn.cursor().execute('select * from {}'.format(table_name)).fetchone() date = result['date'] self.assertIsInstance(date, datetime.datetime) self.assertEqual(date.tzinfo, pytz.UTC) qry = 'select value from {} where date = ?'.format(table_name) results = conn.cursor().execute(qry, (record_date,)).fetchall() self.assertEqual(len(list(results)), 1)
def test_nan_insertion(self): '''when the external data source returns a subset of the requested dates NaNs are inserted into the database for the dates not returned. ''' connection = SQLiteTimeseries.connect(':memory:') driver = SQLiteTimeseries(connection=connection, table='price', metric='Adj Close') symbol = 'MSFT' missing_date = datetime.datetime(2012, 12, 4, tzinfo=pytz.UTC) missing_dates = {missing_date} returned_dates = {datetime.datetime(2012, 12, 1, tzinfo=pytz.UTC), datetime.datetime(2012, 12, 2, tzinfo=pytz.UTC), datetime.datetime(2012, 12, 3, tzinfo=pytz.UTC), } requested_dates = missing_dates | returned_dates mock_yahoo = mock.Mock() mock_yahoo.return_value = ((date, 10.) for date in returned_dates) cache = FinancialDataTimeSeriesCache(gets_data=mock_yahoo, database=driver) cached_values = list(cache.get(symbol=symbol, dates=list(requested_dates))) db_val = connection.execute("SELECT value FROM price WHERE date = '{}'".format(missing_date)).next() self.assertEqual(db_val['value'], 'NaN') cache_value_dict = {date : value for date, value in cached_values} assert np.isnan(cache_value_dict[missing_date])
def setUp(self): super(SQLiteTimeseriesTestCase, self).setUp() self.driver = SQLiteTimeseries(connection=self.connection, table=self.table, metric=self.metric)
class SQLiteTimeseriesTestCase(SQLiteTestCase): table = 'price' metric = 'Adj Close' def setUp(self): super(SQLiteTimeseriesTestCase, self).setUp() self.driver = SQLiteTimeseries(connection=self.connection, table=self.table, metric=self.metric) def test_single_get(self): symbol = 'ABC' date = datetime.datetime(2012, 12, 1, tzinfo=pytz.UTC) price = 6.5 self.connection.execute('INSERT INTO {} (symbol, date, metric, value) VALUES (?, ?, ?, ?)'\ .format(self.table), (symbol, date, self.metric, price)) cache_date, cache_price = self.driver.get(symbol=symbol, dates=[date]).next() self.assertEqual(cache_price, price) self.assertEqual(cache_date, date) def insert_date_combos(self, symbol_date_combos): test_vals = defaultdict(dict) for symbol, date in symbol_date_combos: price = random.randint(0, 1000) self.connection.execute('INSERT INTO {} (symbol, date, metric, value) VALUES (?, ?, ?, ?)'\ .format(self.table), (symbol, date, self.metric, price)) test_vals[symbol][date] = price return test_vals def test_multiple_get(self): symbols = ['ABC', 'XYZ'] dates = [datetime.datetime(2012, 12, 1, tzinfo=pytz.UTC), datetime.datetime(2012, 12, 2, tzinfo=pytz.UTC), datetime.datetime(2012, 12, 15, tzinfo=pytz.UTC), ] symbol_date_combos = [(symbol, date) for symbol in symbols for date in dates] price_dict = self.insert_date_combos(symbol_date_combos) for symbol in symbols: cached_values = list(self.driver.get(symbol=symbol, dates=dates)) cache_dict = {date : price for date, price in cached_values} self.assertDictEqual(price_dict[symbol], cache_dict) def test_date_query(self): '''assert we only get the dates we want.''' symbols = ['ABC', 'XYZ'] dates = [datetime.datetime(2012, 12, 1, tzinfo=pytz.UTC), datetime.datetime(2012, 12, 2, tzinfo=pytz.UTC), datetime.datetime(2012, 12, 3, tzinfo=pytz.UTC), ] symbol_date_combos = [(symbol, date) for symbol in symbols for date in dates] prices = self.insert_date_combos(symbol_date_combos) self.insert_date_combos([('ABC', datetime.datetime(2012, 12, 15))]) for symbol in symbols: cached_values = self.driver.get(symbol=symbol, dates=dates) cache_dict = {date : price for date, price in cached_values} self.assertDictEqual(prices[symbol], cache_dict) @unittest.skip('slow') def test_volume(self): '''make sure a larger number of records doesn't choke it somehow.''' symbols = S_P_500_TICKERS[:200] datetimeindex = get_trading_days(start=datetime.datetime(2012, 1, 1, tzinfo=pytz.UTC), end=datetime.datetime(2012, 7, 4, tzinfo=pytz.UTC)) dates = [datetime.datetime(d.date().year, d.date().month, d.date().day).replace(tzinfo=pytz.UTC) for d in datetimeindex] symbol_date_combos = [(symbol, date) for symbol in symbols for date in dates] test_vals = self.insert_date_combos(symbol_date_combos) for symbol in symbols: cached_values = self.driver.get(symbol=symbol, dates=dates) cache_dict = {date : price for date, price in cached_values} self.assertDictEqual(test_vals[symbol], cache_dict) def test_set(self): symbol = 'ABC' date = datetime.datetime(2012, 12, 1, tzinfo=pytz.UTC) price = 6.5 self.driver.set(symbol=symbol, records=[(date, price)]) qry = 'SELECT * FROM {}'.format(self.table) row = self.connection.execute(qry).fetchone() self.assertEqual(row['symbol'], symbol) self.assertEqual(row['metric'], self.metric) self.assertEqual(row['value'], price) def test_lots_of_dates(self): '''sqlite can only handle 999 variables.''' start = datetime.datetime(1990, 1,1, tzinfo=pytz.UTC) end = datetime.datetime.now(pytz.UTC) dates = list(get_trading_days(start, end).to_pydatetime()) for price, date in enumerate(dates): self.connection.execute('INSERT INTO {} (symbol, date, metric, value) VALUES (?, ?, ?, ?)'.format(self.table), ('ABC', date, self.metric, price)) list(self.driver.get(symbol='ABC', dates=dates))