def test_existing_hdf_file(self): with tempfile.NamedTemporaryFile() as fp: dl = DataLoader("tests/data/eoddata", engine=fp.name, mode="HDF", tablename="eod") shutil.copy2( "tests/data/eoddata/INDEX_20180731.txt", "tests/data/eoddata/INDEX_20000000.txt", ) dl.load_data() self.assertEqual(len(pd.read_hdf(fp.name, "data/eod")), 12053) self.assertEqual(len(pd.read_hdf(fp.name, "updated/eod")), 6)
def test_HDF_rename_columns(): with tempfile.NamedTemporaryFile() as fp: dl = DataLoader('tests/data/eoddata', engine=fp.name, mode='HDF', tablename='eod') dl.load_data(columns=rename) df = pd.read_hdf(fp.name, 'data/eod') assert len(df) == 10030 assert len(pd.read_hdf(fp.name, 'updated/eod')) == 5 cols = ['symbol', 'date', 'open', 'high', 'low', 'close', 'vol'] for x, y in zip(df.columns, cols): assert x == y
def test_HDF_rename_columns(): with tempfile.NamedTemporaryFile() as fp: dl = DataLoader("tests/data/eoddata", engine=fp.name, mode="HDF", tablename="eod") dl.load_data(columns=rename) df = pd.read_hdf(fp.name, "data/eod") assert len(df) == 10030 assert len(pd.read_hdf(fp.name, "updated/eod")) == 5 cols = ["symbol", "date", "open", "high", "low", "close", "vol"] for x, y in zip(df.columns, cols): assert x == y
def test_collate_data(): df = collate_data("tests/data/NASDAQ/data", parse_dates=["Date"]) df = df.rename(lambda x: x.lower(), axis="columns") df = df.sort_values(by=["date", "symbol"]) engine = create_engine("sqlite://") dl = DataLoader(directory="tests/data/NASDAQ/data", mode="SQL", engine=engine, tablename="eod") dl.load_data() df2 = pd.read_sql_table("eod", engine).sort_values(by=["date", "symbol"]) assert len(df) == len(df2) for i in range(100): assert compare(df, df2)
def test_wrong_mode(self): with tempfile.NamedTemporaryFile() as fp: dl = DataLoader('tests/data/eoddata', engine=fp.name, mode='SQL', tablename='eod') with self.assertRaises(Exception): dl.load_data() with self.assertRaises(TypeError): DataLoader('tests/data/eoddata', engine='some_random_mode', mode='CSV', tablename='eod')
def test_collate_data(): df = collate_data('tests/data/NASDAQ/data', parse_dates=['Date']) df = df.rename(lambda x: x.lower(), axis='columns') df = df.sort_values(by=['date', 'symbol']) engine = create_engine('sqlite://') dl = DataLoader(directory='tests/data/NASDAQ/data', mode='SQL', engine=engine, tablename='eod') dl.load_data() df2 = pd.read_sql_table('eod', engine).sort_values(by=['date', 'symbol']) assert len(df) == len(df2) for i in range(100): assert compare(df, df2)
def test_wrong_mode(self): with tempfile.NamedTemporaryFile() as fp: dl = DataLoader("tests/data/eoddata", engine=fp.name, mode="SQL", tablename="eod") with self.assertRaises(Exception): dl.load_data() with self.assertRaises(TypeError): DataLoader( "tests/data/eoddata", engine="some_random_mode", mode="CSV", tablename="eod", )
def test_SQL_post_func(): engine = create_engine("sqlite://") dl = DataLoader("tests/data/eoddata", engine=engine, mode="SQL", tablename="eod") def add_filename(x, y, z): x["filename"] = y x["avgprice"] = (x["open"] + x["close"]) / 2 return x dl.load_data(columns=rename, postfunc=add_filename) df = pd.read_sql_table("eod", engine) assert df.dtypes["date"] == dtype("<M8[ns]") assert df.shape[1] == 9 assert "filename" in df.columns assert "avgprice" in df.columns
def test_HDF_post_func(): with tempfile.NamedTemporaryFile() as fp: dl = DataLoader("tests/data/eoddata", engine=fp.name, mode="HDF", tablename="eod") def add_filename(x, y, z): x["filename"] = y x["avgprice"] = (x["open"] + x["close"]) / 2 return x dl.load_data(columns=rename, postfunc=add_filename) df = pd.read_hdf(fp.name, "data/eod") assert df.dtypes["date"] == dtype("<M8[ns]") assert df.shape[1] == 9 assert "filename" in df.columns assert "avgprice" in df.columns
def test_SQL_post_func(): engine = create_engine('sqlite://') dl = DataLoader('tests/data/eoddata', engine=engine, mode='SQL', tablename='eod') def add_filename(x, y, z): x['filename'] = y x['avgprice'] = (x['open'] + x['close']) / 2 return x dl.load_data(columns=rename, postfunc=add_filename) df = pd.read_sql_table('eod', engine) assert df.dtypes['date'] == dtype('<M8[ns]') assert df.shape[1] == 9 assert 'filename' in df.columns assert 'avgprice' in df.columns
def test_HDF_post_func(): with tempfile.NamedTemporaryFile() as fp: dl = DataLoader('tests/data/eoddata', engine=fp.name, mode='HDF', tablename='eod') def add_filename(x, y, z): x['filename'] = y x['avgprice'] = (x['open'] + x['close']) / 2 return x dl.load_data(columns=rename, postfunc=add_filename) df = pd.read_hdf(fp.name, 'data/eod') assert df.dtypes['date'] == dtype('<M8[ns]') assert df.shape[1] == 9 assert 'filename' in df.columns assert 'avgprice' in df.columns
def test_run_loader_multiple_times(self): with tempfile.NamedTemporaryFile() as fp: dl = DataLoader("tests/data/eoddata", engine=fp.name, mode="HDF", tablename="eod") for i in range(5): dl.load_data() engine = create_engine("sqlite://") dl = DataLoader("tests/data/eoddata", engine=engine, mode="SQL", tablename="eod") for i in range(5): dl.load_data() shape_hdf = len(pd.read_hdf(fp.name, "data/eod")) shape_sql = len(pd.read_sql_table("eod", engine)) self.assertEqual(shape_hdf, shape_sql) self.assertEqual(shape_hdf, 12053)
def test_run_loader_multiple_times(self): with tempfile.NamedTemporaryFile() as fp: dl = DataLoader('tests/data/eoddata', engine=fp.name, mode='HDF', tablename='eod') for i in range(5): dl.load_data() engine = create_engine('sqlite://') dl = DataLoader('tests/data/eoddata', engine=engine, mode='SQL', tablename='eod') for i in range(5): dl.load_data() shape_hdf = len(pd.read_hdf(fp.name, 'data/eod')) shape_sql = len(pd.read_sql_table('eod', engine)) self.assertEqual(shape_hdf, shape_sql) self.assertEqual(shape_hdf, 12053)