コード例 #1
0
def custom_index(data, on, window=30, function='median', num=30, sort_mode=False):
    """
    Generate a custom index
    data
        dataframe with symbol and timestamp columns
    on
        column on which the index is to be generated
    window
        look back window
    function
        function to be applied
    out
        number of stocks to pick each day
    sort_mode
        whether to pick top stocks or bottom stocks
    """
    from fastbt.datasource import DataSource
    ds = DataSource(data)
    ds.add_rolling(on=on, window=window, function=function,
                   lag=1, col_name='custom_index')
    grouped = ds.data.groupby('timestamp')
    if sort_mode:
        return grouped.apply(lambda x: x.sort_values(
            by='custom_index').head(num)).reset_index(drop=True)
    else:
        return grouped.apply(lambda x: x.sort_values(
            by='custom_index').tail(num)).reset_index(drop=True)
コード例 #2
0
ファイル: test_datasource.py プロジェクト: rahulmr/fastbt
    def test_single_symbol(self):
        df = self.df.query('symbol=="one"')
        ds = DataSource(df)
        ds.add_indicator('SMA', period=3, col_name='sma')
        assert len(ds.data) == 6

        sma = talib.SMA(df.close.values, timeperiod=3)
        # If both are equal, there should be no differences
        assert (ds.data.sma - sma).sum() == 0
コード例 #3
0
ファイル: test_datasource.py プロジェクト: rahulmr/fastbt
 def test_initialize_column_rename(self):
     df = pd.read_csv('tests/data/sample.csv', parse_dates=['timestamp'])
     df.columns = [
         'TS', 'TRADINGSYMBOL', 'OPEN', 'HIGH', 'LOW', 'CLOSE', 'VOLUME',
         'PREVCLOSE'
     ]
     self.ds = DataSource(data=df, timestamp='TS', symbol='TRADINGSYMBOL')
     self.assertEqual(self.ds.data.columns[0], 'timestamp')
     self.assertEqual(self.ds.data.columns[1], 'symbol')
コード例 #4
0
def test_rolling_zscore():
    np.random.seed(100)
    df = pd.DataFrame(np.random.randn(100, 4), columns=["open", "high", "low", "close"])
    df["symbol"] = list("ABCD") * 25
    dates = list(pd.date_range(end="2018-04-25", periods=25)) * 4
    df["timestamp"] = dates
    from fastbt.datasource import DataSource

    ds = DataSource(df)
    ds.add_rolling(on="close", window=5, function="zscore")
    assert ds.data.query('symbol=="A"').iloc[8]["rol_zscore_close_5"].round(2) == 0.12
    assert ds.data.query('symbol=="B"').iloc[-7]["rol_zscore_close_5"].round(2) == 0.17
    assert ds.data.query('symbol=="C"').iloc[-6]["rol_zscore_close_5"].round(2) == -0.48
コード例 #5
0
 def test_initialize_column_rename(self):
     df = pd.read_csv("tests/data/sample.csv", parse_dates=["timestamp"])
     df.columns = [
         "TS",
         "TRADINGSYMBOL",
         "OPEN",
         "HIGH",
         "LOW",
         "CLOSE",
         "VOLUME",
         "PREVCLOSE",
     ]
     self.ds = DataSource(data=df, timestamp="TS", symbol="TRADINGSYMBOL")
     self.assertEqual(self.ds.data.columns[0], "timestamp")
     self.assertEqual(self.ds.data.columns[1], "symbol")
コード例 #6
0
ファイル: test_datasource.py プロジェクト: rahulmr/fastbt
def test_rolling_zscore():
    np.random.seed(100)
    df = pd.DataFrame(np.random.randn(100, 4),
                      columns=['open', 'high', 'low', 'close'])
    df['symbol'] = list('ABCD') * 25
    dates = list(pd.date_range(end='2018-04-25', periods=25)) * 4
    df['timestamp'] = dates
    from fastbt.datasource import DataSource
    ds = DataSource(df)
    ds.add_rolling(on='close', window=5, function='zscore')
    assert ds.data.query('symbol=="A"').iloc[8]['rol_zscore_close_5'].round(
        2) == 0.12
    assert ds.data.query('symbol=="B"').iloc[-7]['rol_zscore_close_5'].round(
        2) == 0.17
    assert ds.data.query('symbol=="C"').iloc[-6]['rol_zscore_close_5'].round(
        2) == -0.48
コード例 #7
0
ファイル: test_datasource.py プロジェクト: rahulmr/fastbt
 def test_reindex_different_fills(self):
     ds = DataSource(self.df)
     ds.reindex([1, 2, 3], method=None)
     print(ds.data)
     assert pd.isnull(
         ds.data.set_index(['symbol', 'timestamp']).at[('C', 3), 'high'])
     ds = DataSource(self.df)
     ds.reindex([1, 2, 3, 4], method='bfill')
     assert ds.data.set_index(['symbol', 'timestamp']).at[('B', 2),
                                                          'close'] == 19
コード例 #8
0
 def test_reindex_different_fills(self):
     ds = DataSource(self.df)
     ds.reindex([1, 2, 3], method=None)
     print(ds.data)
     assert pd.isnull(
         ds.data.set_index(["symbol", "timestamp"]).at[("C", 3), "high"]
     )
     ds = DataSource(self.df)
     ds.reindex([1, 2, 3, 4], method="bfill")
     assert ds.data.set_index(["symbol", "timestamp"]).at[("B", 2), "close"] == 19
コード例 #9
0
ファイル: rapid.py プロジェクト: rahulmr/fastbt
def prepare_data(data, columns=None, dropna=True):
    """
    Add the necessary columns
    data
        source dataframe
    columns
        columns in the required format to be
        added as a datasource
    dropna
        whether to drop NaN's before adding columns
    """
    if dropna:
        data = data.dropna()
    if columns:
        ds = DataSource(data)
        return ds.batch_process(columns)
    else:
        return data
コード例 #10
0
ファイル: simple.py プロジェクト: uberdeveloper/fastbt
def transform(data):
    """
    Return transform data
    """
    ds = DataSource(data)
    ds.add_pct_change(col_name='ret', lag=1)
    ds.add_formula('(open/prevclose)-1', col_name='pret')
    return ds.data
コード例 #11
0
 def test_reindex(self):
     ds = DataSource(self.df)
     ds.reindex([1, 2, 3])
     assert len(ds.data) == 9
     # Check values
     assert ds.data.set_index(["symbol", "timestamp"]).at[("A", 1), "open"] == 0
     assert ds.data.set_index(["symbol", "timestamp"]).at[("B", 2), "close"] == 7
     assert ds.data.set_index(["symbol", "timestamp"]).at[("C", 3), "high"] == 9
     ds.reindex([1, 2, 3, 4])
     assert len(ds.data) == 12
コード例 #12
0
ファイル: test_datasource.py プロジェクト: rahulmr/fastbt
 def test_reindex(self):
     ds = DataSource(self.df)
     ds.reindex([1, 2, 3])
     assert len(ds.data) == 9
     # Check values
     assert ds.data.set_index(['symbol', 'timestamp']).at[('A', 1),
                                                          'open'] == 0
     assert ds.data.set_index(['symbol', 'timestamp']).at[('B', 2),
                                                          'close'] == 7
     assert ds.data.set_index(['symbol', 'timestamp']).at[('C', 3),
                                                          'high'] == 9
     ds.reindex([1, 2, 3, 4])
     assert len(ds.data) == 12
コード例 #13
0
ファイル: test_datasource.py プロジェクト: rahulmr/fastbt
class TestDataSource(unittest.TestCase):
    def setUp(self):
        df = pd.read_csv('tests/data/sample.csv', parse_dates=['timestamp'])
        self.ds = DataSource(data=df)

    def test_data(self):
        self.assertEqual(self.ds.data.iloc[20, 1], 'five')
        self.assertEqual(self.ds.data.iloc[14, 3], 112)
        self.assertEqual(self.ds.data.iloc[24, 7], 10.54)

    def test_data_without_sort(self):
        df = pd.read_csv('tests/data/sample.csv', parse_dates=['timestamp'])
        self.ds = DataSource(data=df, sort=False)
        self.assertEqual(self.ds.data.iloc[9, 4], 999)
        self.assertEqual(self.ds.data.iloc[24, 6], 41688)
        self.assertEqual(self.ds.data.at[4, 'close'], 10.6)

    def test_initialize_case(self):
        df = pd.read_csv('tests/data/sample.csv', parse_dates=['timestamp'])
        df.columns = [x.upper() for x in df.columns]
        self.assertEqual(df.columns[0], 'TIMESTAMP')
        self.ds = DataSource(data=df)
        self.assertEqual(self.ds.data.columns[0], 'timestamp')

    def test_initialize_column_rename(self):
        df = pd.read_csv('tests/data/sample.csv', parse_dates=['timestamp'])
        df.columns = [
            'TS', 'TRADINGSYMBOL', 'OPEN', 'HIGH', 'LOW', 'CLOSE', 'VOLUME',
            'PREVCLOSE'
        ]
        self.ds = DataSource(data=df, timestamp='TS', symbol='TRADINGSYMBOL')
        self.assertEqual(self.ds.data.columns[0], 'timestamp')
        self.assertEqual(self.ds.data.columns[1], 'symbol')

    def test_add_lag(self):
        length = len(self.ds.data)
        idx = pd.IndexSlice
        self.ds.add_lag(on='close')
        self.ds.add_lag(on='volume', period=2)
        d = self.ds.data.set_index(['timestamp', 'symbol'])
        self.assertEqual(d.at[idx['2018-01-04', 'one'], 'lag_close_1'], 11)
        self.assertEqual(d.at[idx['2018-01-06', 'six'], 'lag_volume_2'], 86014)
        self.assertEqual(len(self.ds.data.columns), 10)
        self.assertEqual(len(self.ds.data), length)

    def test_add_lag_column_rename(self):
        idx = pd.IndexSlice
        self.ds.add_lag(on='close')
        self.ds.add_lag(on='close', col_name='some_col')
        d = self.ds.data.set_index(['timestamp', 'symbol'])
        self.assertEqual(d.at[idx['2018-01-04', 'one'], 'lag_close_1'], 11)
        self.assertEqual(d.at[idx['2018-01-04', 'one'], 'some_col'], 11)
        self.assertEqual(d.at[idx['2018-01-05', 'three'], 'some_col'], 109)

    def test_add_pct_change(self):
        idx = pd.IndexSlice
        self.ds.add_pct_change(on='close')
        self.ds.add_pct_change(on='close', period=2)
        self.ds.add_pct_change(on='close', period=2, col_name='new_col')
        d = self.ds.data.set_index(['timestamp', 'symbol'])
        R = lambda x: round(x, 2)
        self.assertEqual(R(d.at[idx['2018-01-05', 'three'], 'chg_close_1']),
                         -0.07)
        self.assertEqual(R(d.at[idx['2018-01-06', 'five'], 'chg_close_1']),
                         0.17)
        self.assertEqual(R(d.at[idx['2018-01-05', 'four'], 'chg_close_2']),
                         0.05)
        self.assertEqual(R(d.at[idx['2018-01-05', 'four'], 'new_col']), 0.05)
        self.assertEqual(R(d.at[idx['2018-01-03', 'six'], 'new_col']), -0.1)
        self.assertEqual(pd.isna(d.at[idx['2018-01-02', 'one'], 'new_col']),
                         True)
        self.assertEqual(len(self.ds.data.columns), 11)

    def test_add_pct_change_lag(self):
        idx = pd.IndexSlice
        self.ds.add_pct_change(on='close', period=2, lag=1)
        self.ds.add_pct_change(on='close', period=1, lag=2)
        d = self.ds.data.set_index(['timestamp', 'symbol'])
        R = lambda x: round(x, 2)
        self.assertEqual(R(d.at[idx['2018-01-04', 'four'], 'chg_close_2']),
                         0.09)
        self.assertEqual(R(d.at[idx['2018-01-04', 'four'], 'chg_close_1']),
                         0.01)
        self.assertEqual(R(d.at[idx['2018-01-06', 'three'], 'chg_close_1']),
                         -0.01)

    def test_add_pct_change_lag_col_name(self):
        idx = pd.IndexSlice
        self.ds.add_pct_change(on='high', period=2, lag=1)
        self.ds.add_pct_change(on='close',
                               period=1,
                               lag=2,
                               col_name='lagged_2')
        d = self.ds.data.set_index(['timestamp', 'symbol'])
        R = lambda x: round(x, 2)
        self.assertEqual(R(d.at[idx['2018-01-05', 'six'], 'chg_high_2']),
                         -0.04)
        self.assertEqual(R(d.at[idx['2018-01-04', 'four'], 'lagged_2']), 0.01)

    def test_formula_add_col_name(self):
        idx = pd.IndexSlice
        self.ds.add_formula('open+close', 'new_col')
        self.ds.add_formula('volume/close', 'new_col_2')
        d = self.ds.data.set_index(['timestamp', 'symbol'])
        R = lambda x: round(x, 2)
        self.assertEqual(R(d.at[idx['2018-01-04', 'four'], 'new_col']), 336)
        self.assertEqual(R(d.at[idx['2018-01-06', 'one'], 'new_col_2']),
                         77755.77)

    def test_formula_case_insensitive(self):
        idx = pd.IndexSlice
        self.ds.add_formula('OPEN+CLOSE', 'new_col')
        self.ds.add_formula('volume/close', 'NEW_COL_2')
        d = self.ds.data.set_index(['timestamp', 'symbol'])
        R = lambda x: round(x, 2)
        self.assertEqual(R(d.at[idx['2018-01-04', 'four'], 'new_col']), 336)
        self.assertEqual(R(d.at[idx['2018-01-06', 'one'], 'new_col_2']),
                         77755.77)

    def test_formula_calculated_column(self):
        idx = pd.IndexSlice
        self.ds.add_formula('(open+close)*100', 'new_col_1')
        self.ds.add_formula('volume/100', 'new_col_2')
        self.ds.add_formula('new_col_1+new_col_2', 'new_col_3')
        d = self.ds.data.set_index(['timestamp', 'symbol'])
        R = lambda x: round(x, 2)
        self.assertEqual(R(d.at[idx['2018-01-06', 'one'], 'new_col_3']),
                         10190.6)
        self.assertEqual(R(d.at[idx['2018-01-05', 'two'], 'new_col_3']),
                         200389.97)

    def test_rolling_simple(self):
        from pandas import isna
        q = 'symbol == "one"'
        df = pd.read_csv('tests/data/sample.csv',
                         parse_dates=['timestamp']).query(q)
        df['r2'] = df['close'].rolling(2).mean()
        self.ds.add_rolling(2, col_name='r2')
        df2 = self.ds.data.query(q)
        print('RESULT', df['r2'], df2['r2'])
        for a, b in zip(df['r2'], df2['r2']):
            if not (isna(a)):
                assert a == b

    def test_rolling_values(self):
        idx = pd.IndexSlice
        self.ds.add_rolling(4, on='volume', function='max')
        d = self.ds.data.set_index(['timestamp', 'symbol'])
        R = lambda x: round(x, 2)
        self.assertEqual(d.at[idx['2018-01-05', 'five'], 'rol_max_volume_4'],
                         971704)
        self.assertEqual(d.at[idx['2018-01-05', 'six'], 'rol_max_volume_4'],
                         195539)
        self.assertEqual(d.at[idx['2018-01-04', 'three'], 'rol_max_volume_4'],
                         433733)
        # Adding lag and testing
        self.ds.add_rolling(4, on='volume', function='max', lag=1)
        d = self.ds.data.set_index(['timestamp', 'symbol'])
        self.assertEqual(d.at[idx['2018-01-06', 'five'], 'rol_max_volume_4'],
                         971704)
        self.assertEqual(d.at[idx['2018-01-06', 'six'], 'rol_max_volume_4'],
                         195539)
        self.assertEqual(d.at[idx['2018-01-05', 'three'], 'rol_max_volume_4'],
                         433733)
        # Testing for 2 lags and column name
        self.ds.add_rolling(4,
                            on='volume',
                            function='max',
                            lag=2,
                            col_name='check')
        d = self.ds.data.set_index(['timestamp', 'symbol'])
        self.assertEqual(d.at[idx['2018-01-06', 'three'], 'check'], 433733)

    def test_batch(self):
        length = len(self.ds.data)
        batch = [{
            'P': {
                'on': 'close',
                'period': 1,
                'lag': 1
            }
        }, {
            'L': {
                'on': 'volume',
                'period': 1
            }
        }, {
            'F': {
                'formula': '(open+close)/2',
                'col_name': 'AvgPrice'
            }
        }, {
            'I': {
                'indicator': 'SMA',
                'period': 3,
                'lag': 1,
                'col_name': 'SMA3'
            }
        }, {
            'F': {
                'formula': 'avgprice + sma3',
                'col_name': 'final'
            }
        }, {
            'R': {
                'window': 3,
                'function': 'mean'
            }
        }]
        d = self.ds.batch_process(batch).set_index(['timestamp', 'symbol'])
        self.assertEqual(len(d.columns), 12)
        self.assertEqual(len(self.ds.data.columns), 14)
        self.assertEqual(len(self.ds.data), length)

    def test_raise_error_if_not_dataframe(self):
        pass
コード例 #14
0
 def setUp(self):
     df = pd.read_csv("tests/data/sample.csv", parse_dates=["timestamp"])
     self.ds = DataSource(data=df)
コード例 #15
0
 def test_initialize_case(self):
     df = pd.read_csv("tests/data/sample.csv", parse_dates=["timestamp"])
     df.columns = [x.upper() for x in df.columns]
     self.assertEqual(df.columns[0], "TIMESTAMP")
     self.ds = DataSource(data=df)
     self.assertEqual(self.ds.data.columns[0], "timestamp")
コード例 #16
0
ファイル: test_datasource.py プロジェクト: rahulmr/fastbt
 def test_initialize_case(self):
     df = pd.read_csv('tests/data/sample.csv', parse_dates=['timestamp'])
     df.columns = [x.upper() for x in df.columns]
     self.assertEqual(df.columns[0], 'TIMESTAMP')
     self.ds = DataSource(data=df)
     self.assertEqual(self.ds.data.columns[0], 'timestamp')
コード例 #17
0
class TestDataSource(unittest.TestCase):
    def setUp(self):
        df = pd.read_csv("tests/data/sample.csv", parse_dates=["timestamp"])
        self.ds = DataSource(data=df)

    def test_data(self):
        self.assertEqual(self.ds.data.iloc[20, 1], "five")
        self.assertEqual(self.ds.data.iloc[14, 3], 112)
        self.assertEqual(self.ds.data.iloc[24, 7], 10.54)

    def test_data_without_sort(self):
        df = pd.read_csv("tests/data/sample.csv", parse_dates=["timestamp"])
        self.ds = DataSource(data=df, sort=False)
        self.assertEqual(self.ds.data.iloc[9, 4], 999)
        self.assertEqual(self.ds.data.iloc[24, 6], 41688)
        self.assertEqual(self.ds.data.at[4, "close"], 10.6)

    def test_initialize_case(self):
        df = pd.read_csv("tests/data/sample.csv", parse_dates=["timestamp"])
        df.columns = [x.upper() for x in df.columns]
        self.assertEqual(df.columns[0], "TIMESTAMP")
        self.ds = DataSource(data=df)
        self.assertEqual(self.ds.data.columns[0], "timestamp")

    def test_initialize_column_rename(self):
        df = pd.read_csv("tests/data/sample.csv", parse_dates=["timestamp"])
        df.columns = [
            "TS",
            "TRADINGSYMBOL",
            "OPEN",
            "HIGH",
            "LOW",
            "CLOSE",
            "VOLUME",
            "PREVCLOSE",
        ]
        self.ds = DataSource(data=df, timestamp="TS", symbol="TRADINGSYMBOL")
        self.assertEqual(self.ds.data.columns[0], "timestamp")
        self.assertEqual(self.ds.data.columns[1], "symbol")

    def test_add_lag(self):
        length = len(self.ds.data)
        idx = pd.IndexSlice
        self.ds.add_lag(on="close")
        self.ds.add_lag(on="volume", period=2)
        d = self.ds.data.set_index(["timestamp", "symbol"])
        self.assertEqual(d.at[idx["2018-01-04", "one"], "lag_close_1"], 11)
        self.assertEqual(d.at[idx["2018-01-06", "six"], "lag_volume_2"], 86014)
        self.assertEqual(len(self.ds.data.columns), 10)
        self.assertEqual(len(self.ds.data), length)

    def test_add_lag_column_rename(self):
        idx = pd.IndexSlice
        self.ds.add_lag(on="close")
        self.ds.add_lag(on="close", col_name="some_col")
        d = self.ds.data.set_index(["timestamp", "symbol"])
        self.assertEqual(d.at[idx["2018-01-04", "one"], "lag_close_1"], 11)
        self.assertEqual(d.at[idx["2018-01-04", "one"], "some_col"], 11)
        self.assertEqual(d.at[idx["2018-01-05", "three"], "some_col"], 109)

    def test_add_pct_change(self):
        idx = pd.IndexSlice
        self.ds.add_pct_change(on="close")
        self.ds.add_pct_change(on="close", period=2)
        self.ds.add_pct_change(on="close", period=2, col_name="new_col")
        d = self.ds.data.set_index(["timestamp", "symbol"])
        R = lambda x: round(x, 2)
        self.assertEqual(R(d.at[idx["2018-01-05", "three"], "chg_close_1"]), -0.07)
        self.assertEqual(R(d.at[idx["2018-01-06", "five"], "chg_close_1"]), 0.17)
        self.assertEqual(R(d.at[idx["2018-01-05", "four"], "chg_close_2"]), 0.05)
        self.assertEqual(R(d.at[idx["2018-01-05", "four"], "new_col"]), 0.05)
        self.assertEqual(R(d.at[idx["2018-01-03", "six"], "new_col"]), -0.1)
        self.assertEqual(pd.isna(d.at[idx["2018-01-02", "one"], "new_col"]), True)
        self.assertEqual(len(self.ds.data.columns), 11)

    def test_add_pct_change_lag(self):
        idx = pd.IndexSlice
        self.ds.add_pct_change(on="close", period=2, lag=1)
        self.ds.add_pct_change(on="close", period=1, lag=2)
        d = self.ds.data.set_index(["timestamp", "symbol"])
        R = lambda x: round(x, 2)
        self.assertEqual(R(d.at[idx["2018-01-04", "four"], "chg_close_2"]), 0.09)
        self.assertEqual(R(d.at[idx["2018-01-04", "four"], "chg_close_1"]), 0.01)
        self.assertEqual(R(d.at[idx["2018-01-06", "three"], "chg_close_1"]), -0.01)

    def test_add_pct_change_lag_col_name(self):
        idx = pd.IndexSlice
        self.ds.add_pct_change(on="high", period=2, lag=1)
        self.ds.add_pct_change(on="close", period=1, lag=2, col_name="lagged_2")
        d = self.ds.data.set_index(["timestamp", "symbol"])
        R = lambda x: round(x, 2)
        self.assertEqual(R(d.at[idx["2018-01-05", "six"], "chg_high_2"]), -0.04)
        self.assertEqual(R(d.at[idx["2018-01-04", "four"], "lagged_2"]), 0.01)

    def test_formula_add_col_name(self):
        idx = pd.IndexSlice
        self.ds.add_formula("open+close", "new_col")
        self.ds.add_formula("volume/close", "new_col_2")
        d = self.ds.data.set_index(["timestamp", "symbol"])
        R = lambda x: round(x, 2)
        self.assertEqual(R(d.at[idx["2018-01-04", "four"], "new_col"]), 336)
        self.assertEqual(R(d.at[idx["2018-01-06", "one"], "new_col_2"]), 77755.77)

    def test_formula_case_insensitive(self):
        idx = pd.IndexSlice
        self.ds.add_formula("OPEN+CLOSE", "new_col")
        self.ds.add_formula("volume/close", "NEW_COL_2")
        d = self.ds.data.set_index(["timestamp", "symbol"])
        R = lambda x: round(x, 2)
        self.assertEqual(R(d.at[idx["2018-01-04", "four"], "new_col"]), 336)
        self.assertEqual(R(d.at[idx["2018-01-06", "one"], "new_col_2"]), 77755.77)

    def test_formula_calculated_column(self):
        idx = pd.IndexSlice
        self.ds.add_formula("(open+close)*100", "new_col_1")
        self.ds.add_formula("volume/100", "new_col_2")
        self.ds.add_formula("new_col_1+new_col_2", "new_col_3")
        d = self.ds.data.set_index(["timestamp", "symbol"])
        R = lambda x: round(x, 2)
        self.assertEqual(R(d.at[idx["2018-01-06", "one"], "new_col_3"]), 10190.6)
        self.assertEqual(R(d.at[idx["2018-01-05", "two"], "new_col_3"]), 200389.97)

    def test_rolling_simple(self):
        from pandas import isna

        q = 'symbol == "one"'
        df = pd.read_csv("tests/data/sample.csv", parse_dates=["timestamp"]).query(q)
        df["r2"] = df["close"].rolling(2).mean()
        self.ds.add_rolling(2, col_name="r2")
        df2 = self.ds.data.query(q)
        print("RESULT", df["r2"], df2["r2"])
        for a, b in zip(df["r2"], df2["r2"]):
            if not (isna(a)):
                assert a == b

    def test_rolling_values(self):
        idx = pd.IndexSlice
        self.ds.add_rolling(4, on="volume", function="max")
        d = self.ds.data.set_index(["timestamp", "symbol"])
        R = lambda x: round(x, 2)
        self.assertEqual(d.at[idx["2018-01-05", "five"], "rol_max_volume_4"], 971704)
        self.assertEqual(d.at[idx["2018-01-05", "six"], "rol_max_volume_4"], 195539)
        self.assertEqual(d.at[idx["2018-01-04", "three"], "rol_max_volume_4"], 433733)
        # Adding lag and testing
        self.ds.add_rolling(4, on="volume", function="max", lag=1)
        d = self.ds.data.set_index(["timestamp", "symbol"])
        self.assertEqual(d.at[idx["2018-01-06", "five"], "rol_max_volume_4"], 971704)
        self.assertEqual(d.at[idx["2018-01-06", "six"], "rol_max_volume_4"], 195539)
        self.assertEqual(d.at[idx["2018-01-05", "three"], "rol_max_volume_4"], 433733)
        # Testing for 2 lags and column name
        self.ds.add_rolling(4, on="volume", function="max", lag=2, col_name="check")
        d = self.ds.data.set_index(["timestamp", "symbol"])
        self.assertEqual(d.at[idx["2018-01-06", "three"], "check"], 433733)

    def test_batch(self):
        length = len(self.ds.data)
        batch = [
            {"P": {"on": "close", "period": 1, "lag": 1}},
            {"L": {"on": "volume", "period": 1}},
            {"F": {"formula": "(open+close)/2", "col_name": "AvgPrice"}},
            {"I": {"indicator": "SMA", "period": 3, "lag": 1, "col_name": "SMA3"}},
            {"F": {"formula": "avgprice + sma3", "col_name": "final"}},
            {"R": {"window": 3, "function": "mean"}},
        ]
        d = self.ds.batch_process(batch).set_index(["timestamp", "symbol"])
        self.assertEqual(len(d.columns), 12)
        self.assertEqual(len(self.ds.data.columns), 14)
        self.assertEqual(len(self.ds.data), length)

    def test_raise_error_if_not_dataframe(self):
        pass
コード例 #18
0
ファイル: test_datasource.py プロジェクト: rahulmr/fastbt
 def test_data_without_sort(self):
     df = pd.read_csv('tests/data/sample.csv', parse_dates=['timestamp'])
     self.ds = DataSource(data=df, sort=False)
     self.assertEqual(self.ds.data.iloc[9, 4], 999)
     self.assertEqual(self.ds.data.iloc[24, 6], 41688)
     self.assertEqual(self.ds.data.at[4, 'close'], 10.6)
コード例 #19
0
ファイル: test_datasource.py プロジェクト: rahulmr/fastbt
 def setUp(self):
     df = pd.read_csv('tests/data/sample.csv', parse_dates=['timestamp'])
     self.ds = DataSource(data=df)
コード例 #20
0
ファイル: main.py プロジェクト: uberdeveloper/btzoo
def transform(data):
    """
    Apply the necessary transformation to the given data
    """
    ds = DataSource(data, timestamp='date')
    for i in range(2,8):
        ds.add_rolling(on='high', window=i, col_name='rmax'+str(i),
            function='max', lag=1)
        ds.add_rolling(on='low', window=i, col_name='rmin'+str(i),
            function='min', lag=1)
    ds.add_formula('(open/prevclose)-1', col_name='pret')
    ds.add_formula('(close/open)-1', col_name='idret')
    ds.add_formula('(tottrdval/totaltrades)', col_name='qtrd')
    for i in [1,2,3]:
        ds.add_pct_change(on='close', period=i, col_name='ret'+str(i),
            lag=1)
    for i in [2,3]:
        ds.add_rolling(on='tottrdval', window=i, col_name='vol'+str(i),
            function='sum', lag=1)
    for col in ['tottrdval', 'perdel', 'qtrd']:
        ds.add_lag(on=col, period=1, col_name='prev_'+col) 
    return ds.data