Example #1
0
 def __init__(self):
     load_dotenv(find_dotenv('config.env'))
     self.writer = FileWriter()
     self.reader = FileReader()
     self.finder = PathFinder()
     self.traveller = TimeTraveller()
     self.provider = 'iexcloud'
Example #2
0
    def __init__(self, usr=None, pwd=None, mfa=None):
        # Authentication
        load_dotenv()

        username = usr or os.environ['RH_USERNAME']
        password = pwd or os.environ['RH_PASSWORD']
        mfa_code = mfa or pyotp.TOTP(os.environ['RH_2FA']).now()

        rh.login(username, password, mfa_code=mfa_code)
        self.api = rh
        self.writer = FileWriter()
        self.reader = FileReader()
        self.finder = PathFinder()
Example #3
0
class MarketData:
    def __init__(self, broker=None):
        self.writer = FileWriter()
        self.reader = FileReader()
        self.finder = PathFinder()

    def get_symbols(self):
        # get cached list of symbols
        symbols_path = self.finder.get_symbols_path()
        return list(self.reader.load_csv(symbols_path)[C.SYMBOL])

    def get_dividends(self, symbol, timeframe=None):
        # given a symbol, return a cached dataframe
        return self.reader.load_csv(self.finder.get_dividends_path(symbol))

    def save_dividends(self, symbol, timeframe=None):
        # given a symbol, save its dividend history
        if timeframe:
            df = self.get_dividends(symbol, timeframe)
        else:
            df = self.get_dividends(symbol)
        self.writer.update_csv(self.finder.get_dividends_path(symbol), df)
Example #4
0
class MarketData:
    def __init__(self):
        load_dotenv(find_dotenv('config.env'))
        self.writer = FileWriter()
        self.reader = FileReader()
        self.finder = PathFinder()
        self.traveller = TimeTraveller()
        self.provider = 'iexcloud'

    def try_again(self, func, **kwargs):
        retries = (kwargs['retries']
                   if 'retries' in kwargs else C.DEFAULT_RETRIES)
        delay = (kwargs['delay'] if 'delay' in kwargs else C.DEFAULT_DELAY)
        func_args = {
            k: v
            for k, v in kwargs.items() if k not in {'retries', 'delay'}
        }
        for retry in range(retries):
            try:
                return func(**func_args)
            except Exception as e:
                if retry == retries - 1:
                    raise e
                else:
                    sleep(delay)

    def get_symbols(self):
        # get cached list of symbols
        symbols_path = self.finder.get_symbols_path()
        return list(self.reader.load_csv(symbols_path)[C.SYMBOL])

    def get_dividends(self, symbol, timeframe='max'):
        # given a symbol, return a cached dataframe
        df = self.reader.load_csv(
            self.finder.get_dividends_path(symbol, self.provider))
        filtered = self.reader.data_in_timeframe(df, C.EX, timeframe)
        return filtered

    def standardize(self, df, full_mapping, filename, columns, default):
        mapping = {k: v for k, v in full_mapping.items() if k in df}

        df = df[list(mapping)].rename(columns=mapping)
        time_col, val_cols = columns[0], columns[1:]

        if time_col in df and set(val_cols).issubset(df.columns):
            df = self.reader.update_df(filename, df,
                                       time_col).sort_values(by=[time_col])
            # since time col is pd.datetime,
            # consider converting to YYYY-MM-DD str format
            for val_col in val_cols:
                df[val_col] = df[val_col].apply(lambda val: float(val)
                                                if val else default)

        return df

    def standardize_dividends(self, symbol, df):
        full_mapping = dict(
            zip(['exDate', 'paymentDate', 'declaredDate', 'amount'],
                [C.EX, C.PAY, C.DEC, C.DIV]))
        filename = self.finder.get_dividends_path(symbol, self.provider)
        return self.standardize(df, full_mapping, filename, [C.EX, C.DIV], 0)

    def save_dividends(self, **kwargs):
        # given a symbol, save its dividend history
        symbol = kwargs['symbol']
        filename = self.finder.get_dividends_path(symbol, self.provider)
        if os.path.exists(filename):
            os.remove(filename)
        df = self.reader.update_df(filename, self.get_dividends(**kwargs),
                                   C.EX, C.DATE_FMT)
        self.writer.update_csv(filename, df)
        if os.path.exists(filename):
            return filename

    def get_splits(self, symbol, timeframe='max'):
        # given a symbol, return a cached dataframe
        df = self.reader.load_csv(
            self.finder.get_splits_path(symbol, self.provider))
        filtered = self.reader.data_in_timeframe(df, C.EX, timeframe)
        return filtered

    def standardize_splits(self, symbol, df):
        full_mapping = dict(
            zip(['exDate', 'paymentDate', 'declaredDate', 'ratio'],
                [C.EX, C.PAY, C.DEC, C.RATIO]))
        filename = self.finder.get_splits_path(symbol, self.provider)
        return self.standardize(df, full_mapping, filename, [C.EX, C.RATIO], 1)

    def save_splits(self, **kwargs):
        # given a symbol, save its splits history
        symbol = kwargs['symbol']
        filename = self.finder.get_splits_path(symbol, self.provider)
        if os.path.exists(filename):
            os.remove(filename)
        df = self.reader.update_df(filename, self.get_splits(**kwargs), C.EX,
                                   C.DATE_FMT)
        self.writer.update_csv(filename, df)
        if os.path.exists(filename):
            return filename

    def standardize_ohlc(self, symbol, df, filename=None):
        full_mapping = dict(
            zip([
                'date', 'open', 'high', 'low', 'close', 'volume', 'average',
                'trades'
            ], [
                C.TIME, C.OPEN, C.HIGH, C.LOW, C.CLOSE, C.VOL, C.AVG, C.TRADES
            ]))

        filename = filename or self.finder.get_ohlc_path(symbol, self.provider)

        df = self.standardize(df, full_mapping, filename,
                              [C.TIME, C.OPEN, C.HIGH, C.LOW, C.CLOSE], 0)

        for col in [C.VOL, C.TRADES]:
            if col in df:
                df[col] = df[col].apply(lambda val: 0
                                        if pd.isnull(val) else int(val))

        return df

    def get_ohlc(self, symbol, timeframe='max'):
        df = self.reader.load_csv(
            self.finder.get_ohlc_path(symbol, self.provider))
        filtered = self.reader.data_in_timeframe(df, C.TIME, timeframe)
        return filtered

    def save_ohlc(self, **kwargs):
        symbol = kwargs['symbol']
        filename = self.finder.get_ohlc_path(symbol, self.provider)
        if os.path.exists(filename):
            os.remove(filename)
        df = self.reader.update_df(filename, self.get_ohlc(**kwargs), C.TIME,
                                   C.DATE_FMT)
        self.writer.update_csv(filename, df)
        if os.path.exists(filename):
            return filename

    def get_social_sentiment(self, symbol, timeframe='max'):
        # given a symbol, return a cached dataframe
        df = self.reader.load_csv(self.finder.get_sentiment_path(symbol))
        filtered = self.reader.data_in_timeframe(
            df, C.TIME, timeframe)[[C.TIME, C.POS, C.NEG]]
        return filtered

    def get_social_volume(self, symbol, timeframe='max'):
        # given a symbol, return a cached dataframe
        df = self.reader.load_csv(self.finder.get_sentiment_path(symbol))
        filtered = self.reader.data_in_timeframe(
            df, C.TIME, timeframe)[[C.TIME, C.VOL, C.DELTA]]
        return filtered

    def save_social_sentiment(self, **kwargs):
        # # given a symbol, save its sentiment data
        symbol = kwargs['symbol']
        filename = self.finder.get_sentiment_path(symbol)

        if os.path.exists(filename):
            os.remove(filename)

        sen_df = self.reader.update_df(filename,
                                       self.get_social_sentiment(**kwargs),
                                       C.TIME)
        sen_df = sen_df[{C.TIME, C.POS, C.NEG}.intersection(sen_df.columns)]

        vol_df = self.reader.update_df(filename,
                                       self.get_social_volume(**kwargs),
                                       C.TIME)
        vol_df = vol_df[{C.TIME, C.VOL, C.DELTA}.intersection(vol_df.columns)]

        if sen_df.empty and not vol_df.empty:
            df = vol_df
        elif not sen_df.empty and vol_df.empty:
            df = sen_df
        elif not sen_df.empty and not vol_df.empty:
            df = sen_df.merge(vol_df, how="outer", on=C.TIME)
        else:
            return
        self.writer.update_csv(filename, df)
        if os.path.exists(filename):
            return filename

    def standardize_sentiment(self, symbol, df):
        full_mapping = dict(
            zip(['timestamp', 'bullish', 'bearish'], [C.TIME, C.POS, C.NEG]))
        filename = self.finder.get_sentiment_path(symbol, self.provider)
        df = self.standardize(df, full_mapping, filename,
                              [C.TIME, C.POS, C.NEG], 0)
        return df[{C.TIME, C.POS, C.NEG}.intersection(df.columns)]

    def standardize_volume(self, symbol, df):
        full_mapping = dict(
            zip(['timestamp', 'volume_score', 'volume_change'],
                [C.TIME, C.VOL, C.DELTA]))
        filename = self.finder.get_sentiment_path(symbol, self.provider)
        df = self.standardize(df, full_mapping, filename,
                              [C.TIME, C.VOL, C.DELTA], 0)
        return df[{C.TIME, C.VOL, C.DELTA}.intersection(df.columns)]

    def get_intraday(self, symbol, min=1, timeframe='max', extra_hrs=False):
        # implement way to transform 1 min dataset to 5 min data
        #  or 30 or 60 should be flexible soln
        # implement way to only get market hours
        # given a symbol, return a cached dataframe
        dates = self.traveller.dates_in_range(timeframe)
        for date in dates:
            df = self.reader.load_csv(
                self.finder.get_intraday_path(symbol, date, self.provider))
            yield self.reader.data_in_timeframe(df, C.TIME, timeframe)

    def save_intraday(self, **kwargs):
        symbol = kwargs['symbol']
        dfs = self.get_intraday(**kwargs)
        filenames = []

        for df in dfs:
            date = df[C.TIME].iloc[0].strftime(C.DATE_FMT)
            filename = self.finder.get_intraday_path(symbol, date,
                                                     self.provider)
            if os.path.exists(filename):
                os.remove(filename)
            save_fmt = f'{C.DATE_FMT} {C.TIME_FMT}'
            df = self.reader.update_df(filename, df, C.TIME, save_fmt)
            self.writer.update_csv(filename, df)
            if os.path.exists(filename):
                filenames.append(filename)
        return filenames

    def get_unemployment_rate(self, timeframe='max'):
        # given a timeframe, return a cached dataframe
        df = self.reader.load_csv(self.finder.get_unemployment_path())
        filtered = self.reader.data_in_timeframe(df, C.TIME, timeframe)
        return filtered

    def standardize_unemployment(self, df):
        full_mapping = dict(zip(['time', 'value'], [C.TIME, C.UN_RATE]))
        filename = self.finder.get_unemployment_path()
        return self.standardize(df, full_mapping, filename,
                                [C.TIME, C.UN_RATE], 0)

    def save_unemployment_rate(self, **kwargs):
        # given a symbol, save its dividend history
        filename = self.finder.get_unemployment_path()
        if os.path.exists(filename):
            os.remove(filename)
        df = self.reader.update_df(filename,
                                   self.get_unemployment_rate(**kwargs),
                                   C.TIME, '%Y-%m')
        self.writer.update_csv(filename, df)
        if os.path.exists(filename):
            return filename

    def standardize_s2f_ratio(self, df):
        full_mapping = dict(
            zip(['t', 'o.daysTillHalving', 'o.ratio'],
                [C.TIME, C.HALVING, C.RATIO]))
        filename = self.finder.get_s2f_path()
        df = self.standardize(df, full_mapping, filename,
                              [C.TIME, C.HALVING, C.RATIO], 0)
        return df[{C.TIME, C.HALVING, C.RATIO}.intersection(df.columns)]

    def get_s2f_ratio(self, timeframe='max'):
        # given a symbol, return a cached dataframe
        df = self.reader.load_csv(self.finder.get_s2f_path())
        filtered = self.reader.data_in_timeframe(
            df, C.TIME, timeframe)[[C.TIME, C.HALVING, C.RATIO]]
        return filtered

    def standardize_s2f_deflection(self, df):
        full_mapping = dict(zip(['t', 'v'], [C.TIME, C.VAL]))
        filename = self.finder.get_s2f_path()
        df = self.standardize(df, full_mapping, filename, [C.TIME, C.VAL], 1)
        return df[{C.TIME, C.VAL}.intersection(df.columns)]

    def get_s2f_deflection(self, timeframe='max'):
        # given a symbol, return a cached dataframe
        df = self.reader.load_csv(self.finder.get_s2f_path())
        filtered = self.reader.data_in_timeframe(df, C.TIME,
                                                 timeframe)[[C.TIME, C.VAL]]
        return filtered

    def save_s2f(self, **kwargs):
        # # given a symbol, save its s2f data
        filename = self.finder.get_s2f_path()

        if os.path.exists(filename):
            os.remove(filename)

        rat_df = self.reader.update_df(filename, self.get_s2f_ratio(**kwargs),
                                       C.TIME, C.DATE_FMT)
        rat_df = rat_df[{C.TIME, C.HALVING,
                         C.RATIO}.intersection(rat_df.columns)]

        def_df = self.reader.update_df(filename,
                                       self.get_s2f_deflection(**kwargs),
                                       C.TIME, C.DATE_FMT)
        def_df = def_df[{C.TIME, C.VAL}.intersection(def_df.columns)]

        if rat_df.empty and not def_df.empty:
            df = def_df
        elif not rat_df.empty and def_df.empty:
            df = rat_df
        elif not rat_df.empty and not def_df.empty:
            df = rat_df.merge(def_df, how="outer", on=C.TIME)
        else:
            return
        self.writer.update_csv(filename, df)
        if os.path.exists(filename):
            return filename
Example #5
0
 def __init__(self, broker=None):
     self.writer = FileWriter()
     self.reader = FileReader()
     self.finder = PathFinder()
Example #6
0
    'symbol': 'NVDA',
    'open': 445.00,
    'volume': 102265,
    'date': '2015-01-15'
}
data_ = data[:]
data_.append(snippet)

csv_path1 = os.path.join(dir_path, 'test1.csv')
csv_path2 = os.path.join(dir_path, 'test2.csv')
test_df = pd.DataFrame(data)
big_df = pd.DataFrame(data_)
small_df = pd.DataFrame([snippet])
empty_df = pd.DataFrame()

reader = FileReader()
writer = FileWriter()
symbols_path = reader.store.finder.get_symbols_path()
test_path = f'{symbols_path}_TEST'


class TestFileWriter:
    def test_init(self):
        assert type(writer).__name__ == 'FileWriter'
        assert hasattr(reader, 'store')

    def test_save_json(self):
        # save empty json object
        writer.save_json(json_path1, {})
        assert os.path.exists(json_path1)