Ejemplo n.º 1
0
def test_apply_split_HDF_dataloader():
    with tempfile.NamedTemporaryFile() as fp:
        engine = fp.name
        dl = DataLoader(directory='NASDAQ/data',
                        mode='HDF',
                        engine=engine,
                        tablename='eod')
        dl.load_data()
        dl.apply_splits(directory='NASDAQ/adjustments/')
        df = pd.read_hdf(engine, 'data/eod')
        result = pd.read_csv('NASDAQ/nasdaq_results.csv', parse_dates=['date'])
        splits = pd.read_csv('NASDAQ/adjustments/splits.csv',
                             parse_dates=['date'])
        for i, row in splits.iterrows():
            sym = row.at['symbol']
            cond = 'symbol == "{}"'.format(sym)
            frame1 = df.query(cond).sort_values(by='date').reset_index(
                drop=True)
            frame2 = result.query(cond).sort_values(by='date').reset_index(
                drop=True)
            L = len(frame1)
            cols = frame1.columns
            for i in range(L):
                for j in cols:
                    if j in ['open', 'high', 'low', 'close', 'volume']:
                        a = frame1.loc[i, j]
                        b = frame2.loc[i, j]
                        print(a, b, sym)
                        assert isclose(a, b, abs_tol=0.015)
                    else:
                        assert frame1.loc[i, j] == frame2.loc[i, j]
Ejemplo n.º 2
0
 def test_existing_database(self):
     engine = create_engine('sqlite://')
     dl = DataLoader('eoddata', engine=engine, mode='SQL', tablename='eod')
     shutil.copy2('eoddata/INDEX_20180731.txt',
                  'eoddata/INDEX_20000000.txt')
     dl.load_data()
     self.assertEqual(len(pd.read_sql_table('eod', engine)), 12053)
     self.assertEqual(len(pd.read_sql_table('updated_eod', engine)), 6)
Ejemplo n.º 3
0
def test_SQL_rename_columns():
    engine = create_engine('sqlite://')
    dl = DataLoader('eoddata', engine=engine, mode='SQL', tablename='eod')
    dl.load_data(columns=rename)
    df = pd.read_sql_table('eod', engine)
    assert len(df) == 10030
    cols = ['symbol', 'date', 'open', 'high', 'low', 'close', 'vol']
    for x, y in zip(df.columns, cols):
        assert x == y
Ejemplo n.º 4
0
 def test_create_hdf_file(self):
     with tempfile.NamedTemporaryFile() as fp:
         dl = DataLoader('eoddata',
                         engine=fp.name,
                         mode='HDF',
                         tablename='eod')
         dl.load_data()
         self.assertEqual(len(pd.read_hdf(fp.name, 'data/eod')), 10030)
         self.assertEqual(len(pd.read_hdf(fp.name, 'updated/eod')), 5)
Ejemplo n.º 5
0
def test_HDF_rename_columns():
    with tempfile.NamedTemporaryFile() as fp:
        dl = DataLoader('eoddata', engine=fp.name, mode='HDF', tablename='eod')
        dl.load_data(columns=rename)
        df = pd.read_hdf(fp.name, 'data/eod')
        assert len(df) == 10030
        assert len(pd.read_hdf(fp.name, 'updated/eod')) == 5
        cols = ['symbol', 'date', 'open', 'high', 'low', 'close', 'vol']
        for x, y in zip(df.columns, cols):
            assert x == y
Ejemplo n.º 6
0
 def test_existing_hdf_file(self):
     with tempfile.NamedTemporaryFile() as fp:
         dl = DataLoader('eoddata',
                         engine=fp.name,
                         mode='HDF',
                         tablename='eod')
         shutil.copy2('eoddata/INDEX_20180731.txt',
                      'eoddata/INDEX_20000000.txt')
         dl.load_data()
         self.assertEqual(len(pd.read_hdf(fp.name, 'data/eod')), 12053)
         self.assertEqual(len(pd.read_hdf(fp.name, 'updated/eod')), 6)
Ejemplo n.º 7
0
    def test_wrong_mode(self):
        with tempfile.NamedTemporaryFile() as fp:
            dl = DataLoader('eoddata',
                            engine=fp.name,
                            mode='SQL',
                            tablename='eod')
            with self.assertRaises(Exception):
                dl.load_data()

        with self.assertRaises(TypeError):
            DataLoader('eoddata',
                       engine='some_random_mode',
                       mode='CSV',
                       tablename='eod')
Ejemplo n.º 8
0
def test_collate_data_function():
    df = collate_data('NASDAQ/data', parse_dates=['Date'])
    df = df.rename(lambda x: x.lower(), axis='columns')
    df = df.sort_values(by=['date', 'symbol'])
    engine = create_engine('sqlite://')
    dl = DataLoader(directory='NASDAQ/data',
                    mode='SQL',
                    engine=engine,
                    tablename='eod')
    dl.load_data()
    df2 = pd.read_sql_table('eod', engine).sort_values(by=['date', 'symbol'])
    assert len(df) == len(df2)
    for i in range(100):
        assert compare(df, df2)
Ejemplo n.º 9
0
def test_SQL_post_func():
    engine = create_engine('sqlite://')
    dl = DataLoader('eoddata', engine=engine, mode='SQL', tablename='eod')

    def add_filename(x, y, z):
        x['filename'] = y
        x['avgprice'] = (x['open'] + x['close']) / 2
        return x

    dl.load_data(columns=rename, postfunc=add_filename)
    df = pd.read_sql_table('eod', engine)
    assert df.dtypes['date'] == dtype('<M8[ns]')
    assert df.shape[1] == 9
    assert 'filename' in df.columns
    assert 'avgprice' in df.columns
Ejemplo n.º 10
0
def test_HDF_post_func():
    with tempfile.NamedTemporaryFile() as fp:
        dl = DataLoader('eoddata', engine=fp.name, mode='HDF', tablename='eod')

        def add_filename(x, y, z):
            x['filename'] = y
            x['avgprice'] = (x['open'] + x['close']) / 2
            return x

        dl.load_data(columns=rename, postfunc=add_filename)
        df = pd.read_hdf(fp.name, 'data/eod')
        assert df.dtypes['date'] == dtype('<M8[ns]')
        assert df.shape[1] == 9
        assert 'filename' in df.columns
        assert 'avgprice' in df.columns
Ejemplo n.º 11
0
 def test_run_loader_multiple_times(self):
     with tempfile.NamedTemporaryFile() as fp:
         dl = DataLoader('eoddata',
                         engine=fp.name,
                         mode='HDF',
                         tablename='eod')
         for i in range(5):
             dl.load_data()
         engine = create_engine('sqlite://')
         dl = DataLoader('eoddata',
                         engine=engine,
                         mode='SQL',
                         tablename='eod')
         for i in range(5):
             dl.load_data()
         shape_hdf = len(pd.read_hdf(fp.name, 'data/eod'))
         shape_sql = len(pd.read_sql_table('eod', engine))
         self.assertEqual(shape_hdf, shape_sql)
         self.assertEqual(shape_hdf, 12053)
Ejemplo n.º 12
0
 def test_create_database(self):
     engine = create_engine('sqlite://')
     dl = DataLoader('eoddata', engine=engine, mode='SQL', tablename='eod')
     dl.load_data()
     self.assertEqual(len(pd.read_sql_table('eod', engine)), 10030)
     self.assertEqual(len(pd.read_sql_table('updated_eod', engine)), 5)
Ejemplo n.º 13
0
def test_SQL_parse_dates_auto():
    engine = create_engine('sqlite://')
    dl = DataLoader('eoddata', engine=engine, mode='SQL', tablename='eod')
    dl.load_data(columns=rename)
    df = pd.read_sql_table('eod', engine)
    assert df.dtypes['date'] == dtype('<M8[ns]')
Ejemplo n.º 14
0
def test_HDF_parse_dates_auto():
    with tempfile.NamedTemporaryFile() as fp:
        dl = DataLoader('eoddata', engine=fp.name, mode='HDF', tablename='eod')
        dl.load_data(columns=rename)
        df = pd.read_hdf(fp.name, 'data/eod')
        assert df.dtypes['date'] == dtype('<M8[ns]')