Beispiel #1
0
def custom_index(data, on, window=30, function='median', num=30, sort_mode=False):
    """
    Generate a custom index
    data
        dataframe with symbol and timestamp columns
    on
        column on which the index is to be generated
    window
        look back window
    function
        function to be applied
    out
        number of stocks to pick each day
    sort_mode
        whether to pick top stocks or bottom stocks
    """
    from fastbt.datasource import DataSource
    ds = DataSource(data)
    ds.add_rolling(on=on, window=window, function=function,
                   lag=1, col_name='custom_index')
    grouped = ds.data.groupby('timestamp')
    if sort_mode:
        return grouped.apply(lambda x: x.sort_values(
            by='custom_index').head(num)).reset_index(drop=True)
    else:
        return grouped.apply(lambda x: x.sort_values(
            by='custom_index').tail(num)).reset_index(drop=True)
Beispiel #2
0
    def test_single_symbol(self):
        df = self.df.query('symbol=="one"')
        ds = DataSource(df)
        ds.add_indicator('SMA', period=3, col_name='sma')
        assert len(ds.data) == 6

        sma = talib.SMA(df.close.values, timeperiod=3)
        # If both are equal, there should be no differences
        assert (ds.data.sma - sma).sum() == 0
Beispiel #3
0
 def test_initialize_column_rename(self):
     df = pd.read_csv('tests/data/sample.csv', parse_dates=['timestamp'])
     df.columns = [
         'TS', 'TRADINGSYMBOL', 'OPEN', 'HIGH', 'LOW', 'CLOSE', 'VOLUME',
         'PREVCLOSE'
     ]
     self.ds = DataSource(data=df, timestamp='TS', symbol='TRADINGSYMBOL')
     self.assertEqual(self.ds.data.columns[0], 'timestamp')
     self.assertEqual(self.ds.data.columns[1], 'symbol')
Beispiel #4
0
def test_rolling_zscore():
    np.random.seed(100)
    df = pd.DataFrame(np.random.randn(100, 4), columns=["open", "high", "low", "close"])
    df["symbol"] = list("ABCD") * 25
    dates = list(pd.date_range(end="2018-04-25", periods=25)) * 4
    df["timestamp"] = dates
    from fastbt.datasource import DataSource

    ds = DataSource(df)
    ds.add_rolling(on="close", window=5, function="zscore")
    assert ds.data.query('symbol=="A"').iloc[8]["rol_zscore_close_5"].round(2) == 0.12
    assert ds.data.query('symbol=="B"').iloc[-7]["rol_zscore_close_5"].round(2) == 0.17
    assert ds.data.query('symbol=="C"').iloc[-6]["rol_zscore_close_5"].round(2) == -0.48
Beispiel #5
0
 def test_initialize_column_rename(self):
     df = pd.read_csv("tests/data/sample.csv", parse_dates=["timestamp"])
     df.columns = [
         "TS",
         "TRADINGSYMBOL",
         "OPEN",
         "HIGH",
         "LOW",
         "CLOSE",
         "VOLUME",
         "PREVCLOSE",
     ]
     self.ds = DataSource(data=df, timestamp="TS", symbol="TRADINGSYMBOL")
     self.assertEqual(self.ds.data.columns[0], "timestamp")
     self.assertEqual(self.ds.data.columns[1], "symbol")
Beispiel #6
0
def test_rolling_zscore():
    np.random.seed(100)
    df = pd.DataFrame(np.random.randn(100, 4),
                      columns=['open', 'high', 'low', 'close'])
    df['symbol'] = list('ABCD') * 25
    dates = list(pd.date_range(end='2018-04-25', periods=25)) * 4
    df['timestamp'] = dates
    from fastbt.datasource import DataSource
    ds = DataSource(df)
    ds.add_rolling(on='close', window=5, function='zscore')
    assert ds.data.query('symbol=="A"').iloc[8]['rol_zscore_close_5'].round(
        2) == 0.12
    assert ds.data.query('symbol=="B"').iloc[-7]['rol_zscore_close_5'].round(
        2) == 0.17
    assert ds.data.query('symbol=="C"').iloc[-6]['rol_zscore_close_5'].round(
        2) == -0.48
Beispiel #7
0
 def test_reindex_different_fills(self):
     ds = DataSource(self.df)
     ds.reindex([1, 2, 3], method=None)
     print(ds.data)
     assert pd.isnull(
         ds.data.set_index(['symbol', 'timestamp']).at[('C', 3), 'high'])
     ds = DataSource(self.df)
     ds.reindex([1, 2, 3, 4], method='bfill')
     assert ds.data.set_index(['symbol', 'timestamp']).at[('B', 2),
                                                          'close'] == 19
Beispiel #8
0
 def test_reindex_different_fills(self):
     ds = DataSource(self.df)
     ds.reindex([1, 2, 3], method=None)
     print(ds.data)
     assert pd.isnull(
         ds.data.set_index(["symbol", "timestamp"]).at[("C", 3), "high"]
     )
     ds = DataSource(self.df)
     ds.reindex([1, 2, 3, 4], method="bfill")
     assert ds.data.set_index(["symbol", "timestamp"]).at[("B", 2), "close"] == 19
Beispiel #9
0
def prepare_data(data, columns=None, dropna=True):
    """
    Add the necessary columns
    data
        source dataframe
    columns
        columns in the required format to be
        added as a datasource
    dropna
        whether to drop NaN's before adding columns
    """
    if dropna:
        data = data.dropna()
    if columns:
        ds = DataSource(data)
        return ds.batch_process(columns)
    else:
        return data
Beispiel #10
0
def transform(data):
    """
    Return transform data
    """
    ds = DataSource(data)
    ds.add_pct_change(col_name='ret', lag=1)
    ds.add_formula('(open/prevclose)-1', col_name='pret')
    return ds.data
Beispiel #11
0
 def test_reindex(self):
     ds = DataSource(self.df)
     ds.reindex([1, 2, 3])
     assert len(ds.data) == 9
     # Check values
     assert ds.data.set_index(["symbol", "timestamp"]).at[("A", 1), "open"] == 0
     assert ds.data.set_index(["symbol", "timestamp"]).at[("B", 2), "close"] == 7
     assert ds.data.set_index(["symbol", "timestamp"]).at[("C", 3), "high"] == 9
     ds.reindex([1, 2, 3, 4])
     assert len(ds.data) == 12
Beispiel #12
0
 def test_reindex(self):
     ds = DataSource(self.df)
     ds.reindex([1, 2, 3])
     assert len(ds.data) == 9
     # Check values
     assert ds.data.set_index(['symbol', 'timestamp']).at[('A', 1),
                                                          'open'] == 0
     assert ds.data.set_index(['symbol', 'timestamp']).at[('B', 2),
                                                          'close'] == 7
     assert ds.data.set_index(['symbol', 'timestamp']).at[('C', 3),
                                                          'high'] == 9
     ds.reindex([1, 2, 3, 4])
     assert len(ds.data) == 12
Beispiel #13
0
class TestDataSource(unittest.TestCase):
    def setUp(self):
        df = pd.read_csv('tests/data/sample.csv', parse_dates=['timestamp'])
        self.ds = DataSource(data=df)

    def test_data(self):
        self.assertEqual(self.ds.data.iloc[20, 1], 'five')
        self.assertEqual(self.ds.data.iloc[14, 3], 112)
        self.assertEqual(self.ds.data.iloc[24, 7], 10.54)

    def test_data_without_sort(self):
        df = pd.read_csv('tests/data/sample.csv', parse_dates=['timestamp'])
        self.ds = DataSource(data=df, sort=False)
        self.assertEqual(self.ds.data.iloc[9, 4], 999)
        self.assertEqual(self.ds.data.iloc[24, 6], 41688)
        self.assertEqual(self.ds.data.at[4, 'close'], 10.6)

    def test_initialize_case(self):
        df = pd.read_csv('tests/data/sample.csv', parse_dates=['timestamp'])
        df.columns = [x.upper() for x in df.columns]
        self.assertEqual(df.columns[0], 'TIMESTAMP')
        self.ds = DataSource(data=df)
        self.assertEqual(self.ds.data.columns[0], 'timestamp')

    def test_initialize_column_rename(self):
        df = pd.read_csv('tests/data/sample.csv', parse_dates=['timestamp'])
        df.columns = [
            'TS', 'TRADINGSYMBOL', 'OPEN', 'HIGH', 'LOW', 'CLOSE', 'VOLUME',
            'PREVCLOSE'
        ]
        self.ds = DataSource(data=df, timestamp='TS', symbol='TRADINGSYMBOL')
        self.assertEqual(self.ds.data.columns[0], 'timestamp')
        self.assertEqual(self.ds.data.columns[1], 'symbol')

    def test_add_lag(self):
        length = len(self.ds.data)
        idx = pd.IndexSlice
        self.ds.add_lag(on='close')
        self.ds.add_lag(on='volume', period=2)
        d = self.ds.data.set_index(['timestamp', 'symbol'])
        self.assertEqual(d.at[idx['2018-01-04', 'one'], 'lag_close_1'], 11)
        self.assertEqual(d.at[idx['2018-01-06', 'six'], 'lag_volume_2'], 86014)
        self.assertEqual(len(self.ds.data.columns), 10)
        self.assertEqual(len(self.ds.data), length)

    def test_add_lag_column_rename(self):
        idx = pd.IndexSlice
        self.ds.add_lag(on='close')
        self.ds.add_lag(on='close', col_name='some_col')
        d = self.ds.data.set_index(['timestamp', 'symbol'])
        self.assertEqual(d.at[idx['2018-01-04', 'one'], 'lag_close_1'], 11)
        self.assertEqual(d.at[idx['2018-01-04', 'one'], 'some_col'], 11)
        self.assertEqual(d.at[idx['2018-01-05', 'three'], 'some_col'], 109)

    def test_add_pct_change(self):
        idx = pd.IndexSlice
        self.ds.add_pct_change(on='close')
        self.ds.add_pct_change(on='close', period=2)
        self.ds.add_pct_change(on='close', period=2, col_name='new_col')
        d = self.ds.data.set_index(['timestamp', 'symbol'])
        R = lambda x: round(x, 2)
        self.assertEqual(R(d.at[idx['2018-01-05', 'three'], 'chg_close_1']),
                         -0.07)
        self.assertEqual(R(d.at[idx['2018-01-06', 'five'], 'chg_close_1']),
                         0.17)
        self.assertEqual(R(d.at[idx['2018-01-05', 'four'], 'chg_close_2']),
                         0.05)
        self.assertEqual(R(d.at[idx['2018-01-05', 'four'], 'new_col']), 0.05)
        self.assertEqual(R(d.at[idx['2018-01-03', 'six'], 'new_col']), -0.1)
        self.assertEqual(pd.isna(d.at[idx['2018-01-02', 'one'], 'new_col']),
                         True)
        self.assertEqual(len(self.ds.data.columns), 11)

    def test_add_pct_change_lag(self):
        idx = pd.IndexSlice
        self.ds.add_pct_change(on='close', period=2, lag=1)
        self.ds.add_pct_change(on='close', period=1, lag=2)
        d = self.ds.data.set_index(['timestamp', 'symbol'])
        R = lambda x: round(x, 2)
        self.assertEqual(R(d.at[idx['2018-01-04', 'four'], 'chg_close_2']),
                         0.09)
        self.assertEqual(R(d.at[idx['2018-01-04', 'four'], 'chg_close_1']),
                         0.01)
        self.assertEqual(R(d.at[idx['2018-01-06', 'three'], 'chg_close_1']),
                         -0.01)

    def test_add_pct_change_lag_col_name(self):
        idx = pd.IndexSlice
        self.ds.add_pct_change(on='high', period=2, lag=1)
        self.ds.add_pct_change(on='close',
                               period=1,
                               lag=2,
                               col_name='lagged_2')
        d = self.ds.data.set_index(['timestamp', 'symbol'])
        R = lambda x: round(x, 2)
        self.assertEqual(R(d.at[idx['2018-01-05', 'six'], 'chg_high_2']),
                         -0.04)
        self.assertEqual(R(d.at[idx['2018-01-04', 'four'], 'lagged_2']), 0.01)

    def test_formula_add_col_name(self):
        idx = pd.IndexSlice
        self.ds.add_formula('open+close', 'new_col')
        self.ds.add_formula('volume/close', 'new_col_2')
        d = self.ds.data.set_index(['timestamp', 'symbol'])
        R = lambda x: round(x, 2)
        self.assertEqual(R(d.at[idx['2018-01-04', 'four'], 'new_col']), 336)
        self.assertEqual(R(d.at[idx['2018-01-06', 'one'], 'new_col_2']),
                         77755.77)

    def test_formula_case_insensitive(self):
        idx = pd.IndexSlice
        self.ds.add_formula('OPEN+CLOSE', 'new_col')
        self.ds.add_formula('volume/close', 'NEW_COL_2')
        d = self.ds.data.set_index(['timestamp', 'symbol'])
        R = lambda x: round(x, 2)
        self.assertEqual(R(d.at[idx['2018-01-04', 'four'], 'new_col']), 336)
        self.assertEqual(R(d.at[idx['2018-01-06', 'one'], 'new_col_2']),
                         77755.77)

    def test_formula_calculated_column(self):
        idx = pd.IndexSlice
        self.ds.add_formula('(open+close)*100', 'new_col_1')
        self.ds.add_formula('volume/100', 'new_col_2')
        self.ds.add_formula('new_col_1+new_col_2', 'new_col_3')
        d = self.ds.data.set_index(['timestamp', 'symbol'])
        R = lambda x: round(x, 2)
        self.assertEqual(R(d.at[idx['2018-01-06', 'one'], 'new_col_3']),
                         10190.6)
        self.assertEqual(R(d.at[idx['2018-01-05', 'two'], 'new_col_3']),
                         200389.97)

    def test_rolling_simple(self):
        from pandas import isna
        q = 'symbol == "one"'
        df = pd.read_csv('tests/data/sample.csv',
                         parse_dates=['timestamp']).query(q)
        df['r2'] = df['close'].rolling(2).mean()
        self.ds.add_rolling(2, col_name='r2')
        df2 = self.ds.data.query(q)
        print('RESULT', df['r2'], df2['r2'])
        for a, b in zip(df['r2'], df2['r2']):
            if not (isna(a)):
                assert a == b

    def test_rolling_values(self):
        idx = pd.IndexSlice
        self.ds.add_rolling(4, on='volume', function='max')
        d = self.ds.data.set_index(['timestamp', 'symbol'])
        R = lambda x: round(x, 2)
        self.assertEqual(d.at[idx['2018-01-05', 'five'], 'rol_max_volume_4'],
                         971704)
        self.assertEqual(d.at[idx['2018-01-05', 'six'], 'rol_max_volume_4'],
                         195539)
        self.assertEqual(d.at[idx['2018-01-04', 'three'], 'rol_max_volume_4'],
                         433733)
        # Adding lag and testing
        self.ds.add_rolling(4, on='volume', function='max', lag=1)
        d = self.ds.data.set_index(['timestamp', 'symbol'])
        self.assertEqual(d.at[idx['2018-01-06', 'five'], 'rol_max_volume_4'],
                         971704)
        self.assertEqual(d.at[idx['2018-01-06', 'six'], 'rol_max_volume_4'],
                         195539)
        self.assertEqual(d.at[idx['2018-01-05', 'three'], 'rol_max_volume_4'],
                         433733)
        # Testing for 2 lags and column name
        self.ds.add_rolling(4,
                            on='volume',
                            function='max',
                            lag=2,
                            col_name='check')
        d = self.ds.data.set_index(['timestamp', 'symbol'])
        self.assertEqual(d.at[idx['2018-01-06', 'three'], 'check'], 433733)

    def test_batch(self):
        length = len(self.ds.data)
        batch = [{
            'P': {
                'on': 'close',
                'period': 1,
                'lag': 1
            }
        }, {
            'L': {
                'on': 'volume',
                'period': 1
            }
        }, {
            'F': {
                'formula': '(open+close)/2',
                'col_name': 'AvgPrice'
            }
        }, {
            'I': {
                'indicator': 'SMA',
                'period': 3,
                'lag': 1,
                'col_name': 'SMA3'
            }
        }, {
            'F': {
                'formula': 'avgprice + sma3',
                'col_name': 'final'
            }
        }, {
            'R': {
                'window': 3,
                'function': 'mean'
            }
        }]
        d = self.ds.batch_process(batch).set_index(['timestamp', 'symbol'])
        self.assertEqual(len(d.columns), 12)
        self.assertEqual(len(self.ds.data.columns), 14)
        self.assertEqual(len(self.ds.data), length)

    def test_raise_error_if_not_dataframe(self):
        pass
Beispiel #14
0
 def setUp(self):
     df = pd.read_csv("tests/data/sample.csv", parse_dates=["timestamp"])
     self.ds = DataSource(data=df)
Beispiel #15
0
 def test_initialize_case(self):
     df = pd.read_csv("tests/data/sample.csv", parse_dates=["timestamp"])
     df.columns = [x.upper() for x in df.columns]
     self.assertEqual(df.columns[0], "TIMESTAMP")
     self.ds = DataSource(data=df)
     self.assertEqual(self.ds.data.columns[0], "timestamp")
Beispiel #16
0
 def test_initialize_case(self):
     df = pd.read_csv('tests/data/sample.csv', parse_dates=['timestamp'])
     df.columns = [x.upper() for x in df.columns]
     self.assertEqual(df.columns[0], 'TIMESTAMP')
     self.ds = DataSource(data=df)
     self.assertEqual(self.ds.data.columns[0], 'timestamp')
Beispiel #17
0
class TestDataSource(unittest.TestCase):
    def setUp(self):
        df = pd.read_csv("tests/data/sample.csv", parse_dates=["timestamp"])
        self.ds = DataSource(data=df)

    def test_data(self):
        self.assertEqual(self.ds.data.iloc[20, 1], "five")
        self.assertEqual(self.ds.data.iloc[14, 3], 112)
        self.assertEqual(self.ds.data.iloc[24, 7], 10.54)

    def test_data_without_sort(self):
        df = pd.read_csv("tests/data/sample.csv", parse_dates=["timestamp"])
        self.ds = DataSource(data=df, sort=False)
        self.assertEqual(self.ds.data.iloc[9, 4], 999)
        self.assertEqual(self.ds.data.iloc[24, 6], 41688)
        self.assertEqual(self.ds.data.at[4, "close"], 10.6)

    def test_initialize_case(self):
        df = pd.read_csv("tests/data/sample.csv", parse_dates=["timestamp"])
        df.columns = [x.upper() for x in df.columns]
        self.assertEqual(df.columns[0], "TIMESTAMP")
        self.ds = DataSource(data=df)
        self.assertEqual(self.ds.data.columns[0], "timestamp")

    def test_initialize_column_rename(self):
        df = pd.read_csv("tests/data/sample.csv", parse_dates=["timestamp"])
        df.columns = [
            "TS",
            "TRADINGSYMBOL",
            "OPEN",
            "HIGH",
            "LOW",
            "CLOSE",
            "VOLUME",
            "PREVCLOSE",
        ]
        self.ds = DataSource(data=df, timestamp="TS", symbol="TRADINGSYMBOL")
        self.assertEqual(self.ds.data.columns[0], "timestamp")
        self.assertEqual(self.ds.data.columns[1], "symbol")

    def test_add_lag(self):
        length = len(self.ds.data)
        idx = pd.IndexSlice
        self.ds.add_lag(on="close")
        self.ds.add_lag(on="volume", period=2)
        d = self.ds.data.set_index(["timestamp", "symbol"])
        self.assertEqual(d.at[idx["2018-01-04", "one"], "lag_close_1"], 11)
        self.assertEqual(d.at[idx["2018-01-06", "six"], "lag_volume_2"], 86014)
        self.assertEqual(len(self.ds.data.columns), 10)
        self.assertEqual(len(self.ds.data), length)

    def test_add_lag_column_rename(self):
        idx = pd.IndexSlice
        self.ds.add_lag(on="close")
        self.ds.add_lag(on="close", col_name="some_col")
        d = self.ds.data.set_index(["timestamp", "symbol"])
        self.assertEqual(d.at[idx["2018-01-04", "one"], "lag_close_1"], 11)
        self.assertEqual(d.at[idx["2018-01-04", "one"], "some_col"], 11)
        self.assertEqual(d.at[idx["2018-01-05", "three"], "some_col"], 109)

    def test_add_pct_change(self):
        idx = pd.IndexSlice
        self.ds.add_pct_change(on="close")
        self.ds.add_pct_change(on="close", period=2)
        self.ds.add_pct_change(on="close", period=2, col_name="new_col")
        d = self.ds.data.set_index(["timestamp", "symbol"])
        R = lambda x: round(x, 2)
        self.assertEqual(R(d.at[idx["2018-01-05", "three"], "chg_close_1"]), -0.07)
        self.assertEqual(R(d.at[idx["2018-01-06", "five"], "chg_close_1"]), 0.17)
        self.assertEqual(R(d.at[idx["2018-01-05", "four"], "chg_close_2"]), 0.05)
        self.assertEqual(R(d.at[idx["2018-01-05", "four"], "new_col"]), 0.05)
        self.assertEqual(R(d.at[idx["2018-01-03", "six"], "new_col"]), -0.1)
        self.assertEqual(pd.isna(d.at[idx["2018-01-02", "one"], "new_col"]), True)
        self.assertEqual(len(self.ds.data.columns), 11)

    def test_add_pct_change_lag(self):
        idx = pd.IndexSlice
        self.ds.add_pct_change(on="close", period=2, lag=1)
        self.ds.add_pct_change(on="close", period=1, lag=2)
        d = self.ds.data.set_index(["timestamp", "symbol"])
        R = lambda x: round(x, 2)
        self.assertEqual(R(d.at[idx["2018-01-04", "four"], "chg_close_2"]), 0.09)
        self.assertEqual(R(d.at[idx["2018-01-04", "four"], "chg_close_1"]), 0.01)
        self.assertEqual(R(d.at[idx["2018-01-06", "three"], "chg_close_1"]), -0.01)

    def test_add_pct_change_lag_col_name(self):
        idx = pd.IndexSlice
        self.ds.add_pct_change(on="high", period=2, lag=1)
        self.ds.add_pct_change(on="close", period=1, lag=2, col_name="lagged_2")
        d = self.ds.data.set_index(["timestamp", "symbol"])
        R = lambda x: round(x, 2)
        self.assertEqual(R(d.at[idx["2018-01-05", "six"], "chg_high_2"]), -0.04)
        self.assertEqual(R(d.at[idx["2018-01-04", "four"], "lagged_2"]), 0.01)

    def test_formula_add_col_name(self):
        idx = pd.IndexSlice
        self.ds.add_formula("open+close", "new_col")
        self.ds.add_formula("volume/close", "new_col_2")
        d = self.ds.data.set_index(["timestamp", "symbol"])
        R = lambda x: round(x, 2)
        self.assertEqual(R(d.at[idx["2018-01-04", "four"], "new_col"]), 336)
        self.assertEqual(R(d.at[idx["2018-01-06", "one"], "new_col_2"]), 77755.77)

    def test_formula_case_insensitive(self):
        idx = pd.IndexSlice
        self.ds.add_formula("OPEN+CLOSE", "new_col")
        self.ds.add_formula("volume/close", "NEW_COL_2")
        d = self.ds.data.set_index(["timestamp", "symbol"])
        R = lambda x: round(x, 2)
        self.assertEqual(R(d.at[idx["2018-01-04", "four"], "new_col"]), 336)
        self.assertEqual(R(d.at[idx["2018-01-06", "one"], "new_col_2"]), 77755.77)

    def test_formula_calculated_column(self):
        idx = pd.IndexSlice
        self.ds.add_formula("(open+close)*100", "new_col_1")
        self.ds.add_formula("volume/100", "new_col_2")
        self.ds.add_formula("new_col_1+new_col_2", "new_col_3")
        d = self.ds.data.set_index(["timestamp", "symbol"])
        R = lambda x: round(x, 2)
        self.assertEqual(R(d.at[idx["2018-01-06", "one"], "new_col_3"]), 10190.6)
        self.assertEqual(R(d.at[idx["2018-01-05", "two"], "new_col_3"]), 200389.97)

    def test_rolling_simple(self):
        from pandas import isna

        q = 'symbol == "one"'
        df = pd.read_csv("tests/data/sample.csv", parse_dates=["timestamp"]).query(q)
        df["r2"] = df["close"].rolling(2).mean()
        self.ds.add_rolling(2, col_name="r2")
        df2 = self.ds.data.query(q)
        print("RESULT", df["r2"], df2["r2"])
        for a, b in zip(df["r2"], df2["r2"]):
            if not (isna(a)):
                assert a == b

    def test_rolling_values(self):
        idx = pd.IndexSlice
        self.ds.add_rolling(4, on="volume", function="max")
        d = self.ds.data.set_index(["timestamp", "symbol"])
        R = lambda x: round(x, 2)
        self.assertEqual(d.at[idx["2018-01-05", "five"], "rol_max_volume_4"], 971704)
        self.assertEqual(d.at[idx["2018-01-05", "six"], "rol_max_volume_4"], 195539)
        self.assertEqual(d.at[idx["2018-01-04", "three"], "rol_max_volume_4"], 433733)
        # Adding lag and testing
        self.ds.add_rolling(4, on="volume", function="max", lag=1)
        d = self.ds.data.set_index(["timestamp", "symbol"])
        self.assertEqual(d.at[idx["2018-01-06", "five"], "rol_max_volume_4"], 971704)
        self.assertEqual(d.at[idx["2018-01-06", "six"], "rol_max_volume_4"], 195539)
        self.assertEqual(d.at[idx["2018-01-05", "three"], "rol_max_volume_4"], 433733)
        # Testing for 2 lags and column name
        self.ds.add_rolling(4, on="volume", function="max", lag=2, col_name="check")
        d = self.ds.data.set_index(["timestamp", "symbol"])
        self.assertEqual(d.at[idx["2018-01-06", "three"], "check"], 433733)

    def test_batch(self):
        length = len(self.ds.data)
        batch = [
            {"P": {"on": "close", "period": 1, "lag": 1}},
            {"L": {"on": "volume", "period": 1}},
            {"F": {"formula": "(open+close)/2", "col_name": "AvgPrice"}},
            {"I": {"indicator": "SMA", "period": 3, "lag": 1, "col_name": "SMA3"}},
            {"F": {"formula": "avgprice + sma3", "col_name": "final"}},
            {"R": {"window": 3, "function": "mean"}},
        ]
        d = self.ds.batch_process(batch).set_index(["timestamp", "symbol"])
        self.assertEqual(len(d.columns), 12)
        self.assertEqual(len(self.ds.data.columns), 14)
        self.assertEqual(len(self.ds.data), length)

    def test_raise_error_if_not_dataframe(self):
        pass
Beispiel #18
0
 def test_data_without_sort(self):
     df = pd.read_csv('tests/data/sample.csv', parse_dates=['timestamp'])
     self.ds = DataSource(data=df, sort=False)
     self.assertEqual(self.ds.data.iloc[9, 4], 999)
     self.assertEqual(self.ds.data.iloc[24, 6], 41688)
     self.assertEqual(self.ds.data.at[4, 'close'], 10.6)
Beispiel #19
0
 def setUp(self):
     df = pd.read_csv('tests/data/sample.csv', parse_dates=['timestamp'])
     self.ds = DataSource(data=df)
Beispiel #20
0
def transform(data):
    """
    Apply the necessary transformation to the given data
    """
    ds = DataSource(data, timestamp='date')
    for i in range(2,8):
        ds.add_rolling(on='high', window=i, col_name='rmax'+str(i),
            function='max', lag=1)
        ds.add_rolling(on='low', window=i, col_name='rmin'+str(i),
            function='min', lag=1)
    ds.add_formula('(open/prevclose)-1', col_name='pret')
    ds.add_formula('(close/open)-1', col_name='idret')
    ds.add_formula('(tottrdval/totaltrades)', col_name='qtrd')
    for i in [1,2,3]:
        ds.add_pct_change(on='close', period=i, col_name='ret'+str(i),
            lag=1)
    for i in [2,3]:
        ds.add_rolling(on='tottrdval', window=i, col_name='vol'+str(i),
            function='sum', lag=1)
    for col in ['tottrdval', 'perdel', 'qtrd']:
        ds.add_lag(on=col, period=1, col_name='prev_'+col) 
    return ds.data