def __init__(self): load_dotenv(find_dotenv('config.env')) self.writer = FileWriter() self.reader = FileReader() self.finder = PathFinder() self.traveller = TimeTraveller() self.provider = 'iexcloud'
def __init__(self, usr=None, pwd=None, mfa=None): # Authentication load_dotenv() username = usr or os.environ['RH_USERNAME'] password = pwd or os.environ['RH_PASSWORD'] mfa_code = mfa or pyotp.TOTP(os.environ['RH_2FA']).now() rh.login(username, password, mfa_code=mfa_code) self.api = rh self.writer = FileWriter() self.reader = FileReader() self.finder = PathFinder()
class MarketData: def __init__(self, broker=None): self.writer = FileWriter() self.reader = FileReader() self.finder = PathFinder() def get_symbols(self): # get cached list of symbols symbols_path = self.finder.get_symbols_path() return list(self.reader.load_csv(symbols_path)[C.SYMBOL]) def get_dividends(self, symbol, timeframe=None): # given a symbol, return a cached dataframe return self.reader.load_csv(self.finder.get_dividends_path(symbol)) def save_dividends(self, symbol, timeframe=None): # given a symbol, save its dividend history if timeframe: df = self.get_dividends(symbol, timeframe) else: df = self.get_dividends(symbol) self.writer.update_csv(self.finder.get_dividends_path(symbol), df)
class MarketData: def __init__(self): load_dotenv(find_dotenv('config.env')) self.writer = FileWriter() self.reader = FileReader() self.finder = PathFinder() self.traveller = TimeTraveller() self.provider = 'iexcloud' def try_again(self, func, **kwargs): retries = (kwargs['retries'] if 'retries' in kwargs else C.DEFAULT_RETRIES) delay = (kwargs['delay'] if 'delay' in kwargs else C.DEFAULT_DELAY) func_args = { k: v for k, v in kwargs.items() if k not in {'retries', 'delay'} } for retry in range(retries): try: return func(**func_args) except Exception as e: if retry == retries - 1: raise e else: sleep(delay) def get_symbols(self): # get cached list of symbols symbols_path = self.finder.get_symbols_path() return list(self.reader.load_csv(symbols_path)[C.SYMBOL]) def get_dividends(self, symbol, timeframe='max'): # given a symbol, return a cached dataframe df = self.reader.load_csv( self.finder.get_dividends_path(symbol, self.provider)) filtered = self.reader.data_in_timeframe(df, C.EX, timeframe) return filtered def standardize(self, df, full_mapping, filename, columns, default): mapping = {k: v for k, v in full_mapping.items() if k in df} df = df[list(mapping)].rename(columns=mapping) time_col, val_cols = columns[0], columns[1:] if time_col in df and set(val_cols).issubset(df.columns): df = self.reader.update_df(filename, df, time_col).sort_values(by=[time_col]) # since time col is pd.datetime, # consider converting to YYYY-MM-DD str format for val_col in val_cols: df[val_col] = df[val_col].apply(lambda val: float(val) if val else default) return df def standardize_dividends(self, symbol, df): full_mapping = dict( zip(['exDate', 'paymentDate', 'declaredDate', 'amount'], [C.EX, C.PAY, C.DEC, C.DIV])) filename = self.finder.get_dividends_path(symbol, self.provider) return self.standardize(df, full_mapping, filename, [C.EX, C.DIV], 0) def save_dividends(self, **kwargs): # given a symbol, save its dividend history symbol = kwargs['symbol'] filename = self.finder.get_dividends_path(symbol, self.provider) if os.path.exists(filename): os.remove(filename) df = self.reader.update_df(filename, self.get_dividends(**kwargs), C.EX, C.DATE_FMT) self.writer.update_csv(filename, df) if os.path.exists(filename): return filename def get_splits(self, symbol, timeframe='max'): # given a symbol, return a cached dataframe df = self.reader.load_csv( self.finder.get_splits_path(symbol, self.provider)) filtered = self.reader.data_in_timeframe(df, C.EX, timeframe) return filtered def standardize_splits(self, symbol, df): full_mapping = dict( zip(['exDate', 'paymentDate', 'declaredDate', 'ratio'], [C.EX, C.PAY, C.DEC, C.RATIO])) filename = self.finder.get_splits_path(symbol, self.provider) return self.standardize(df, full_mapping, filename, [C.EX, C.RATIO], 1) def save_splits(self, **kwargs): # given a symbol, save its splits history symbol = kwargs['symbol'] filename = self.finder.get_splits_path(symbol, self.provider) if os.path.exists(filename): os.remove(filename) df = self.reader.update_df(filename, self.get_splits(**kwargs), C.EX, C.DATE_FMT) self.writer.update_csv(filename, df) if os.path.exists(filename): return filename def standardize_ohlc(self, symbol, df, filename=None): full_mapping = dict( zip([ 'date', 'open', 'high', 'low', 'close', 'volume', 'average', 'trades' ], [ C.TIME, C.OPEN, C.HIGH, C.LOW, C.CLOSE, C.VOL, C.AVG, C.TRADES ])) filename = filename or self.finder.get_ohlc_path(symbol, self.provider) df = self.standardize(df, full_mapping, filename, [C.TIME, C.OPEN, C.HIGH, C.LOW, C.CLOSE], 0) for col in [C.VOL, C.TRADES]: if col in df: df[col] = df[col].apply(lambda val: 0 if pd.isnull(val) else int(val)) return df def get_ohlc(self, symbol, timeframe='max'): df = self.reader.load_csv( self.finder.get_ohlc_path(symbol, self.provider)) filtered = self.reader.data_in_timeframe(df, C.TIME, timeframe) return filtered def save_ohlc(self, **kwargs): symbol = kwargs['symbol'] filename = self.finder.get_ohlc_path(symbol, self.provider) if os.path.exists(filename): os.remove(filename) df = self.reader.update_df(filename, self.get_ohlc(**kwargs), C.TIME, C.DATE_FMT) self.writer.update_csv(filename, df) if os.path.exists(filename): return filename def get_social_sentiment(self, symbol, timeframe='max'): # given a symbol, return a cached dataframe df = self.reader.load_csv(self.finder.get_sentiment_path(symbol)) filtered = self.reader.data_in_timeframe( df, C.TIME, timeframe)[[C.TIME, C.POS, C.NEG]] return filtered def get_social_volume(self, symbol, timeframe='max'): # given a symbol, return a cached dataframe df = self.reader.load_csv(self.finder.get_sentiment_path(symbol)) filtered = self.reader.data_in_timeframe( df, C.TIME, timeframe)[[C.TIME, C.VOL, C.DELTA]] return filtered def save_social_sentiment(self, **kwargs): # # given a symbol, save its sentiment data symbol = kwargs['symbol'] filename = self.finder.get_sentiment_path(symbol) if os.path.exists(filename): os.remove(filename) sen_df = self.reader.update_df(filename, self.get_social_sentiment(**kwargs), C.TIME) sen_df = sen_df[{C.TIME, C.POS, C.NEG}.intersection(sen_df.columns)] vol_df = self.reader.update_df(filename, self.get_social_volume(**kwargs), C.TIME) vol_df = vol_df[{C.TIME, C.VOL, C.DELTA}.intersection(vol_df.columns)] if sen_df.empty and not vol_df.empty: df = vol_df elif not sen_df.empty and vol_df.empty: df = sen_df elif not sen_df.empty and not vol_df.empty: df = sen_df.merge(vol_df, how="outer", on=C.TIME) else: return self.writer.update_csv(filename, df) if os.path.exists(filename): return filename def standardize_sentiment(self, symbol, df): full_mapping = dict( zip(['timestamp', 'bullish', 'bearish'], [C.TIME, C.POS, C.NEG])) filename = self.finder.get_sentiment_path(symbol, self.provider) df = self.standardize(df, full_mapping, filename, [C.TIME, C.POS, C.NEG], 0) return df[{C.TIME, C.POS, C.NEG}.intersection(df.columns)] def standardize_volume(self, symbol, df): full_mapping = dict( zip(['timestamp', 'volume_score', 'volume_change'], [C.TIME, C.VOL, C.DELTA])) filename = self.finder.get_sentiment_path(symbol, self.provider) df = self.standardize(df, full_mapping, filename, [C.TIME, C.VOL, C.DELTA], 0) return df[{C.TIME, C.VOL, C.DELTA}.intersection(df.columns)] def get_intraday(self, symbol, min=1, timeframe='max', extra_hrs=False): # implement way to transform 1 min dataset to 5 min data # or 30 or 60 should be flexible soln # implement way to only get market hours # given a symbol, return a cached dataframe dates = self.traveller.dates_in_range(timeframe) for date in dates: df = self.reader.load_csv( self.finder.get_intraday_path(symbol, date, self.provider)) yield self.reader.data_in_timeframe(df, C.TIME, timeframe) def save_intraday(self, **kwargs): symbol = kwargs['symbol'] dfs = self.get_intraday(**kwargs) filenames = [] for df in dfs: date = df[C.TIME].iloc[0].strftime(C.DATE_FMT) filename = self.finder.get_intraday_path(symbol, date, self.provider) if os.path.exists(filename): os.remove(filename) save_fmt = f'{C.DATE_FMT} {C.TIME_FMT}' df = self.reader.update_df(filename, df, C.TIME, save_fmt) self.writer.update_csv(filename, df) if os.path.exists(filename): filenames.append(filename) return filenames def get_unemployment_rate(self, timeframe='max'): # given a timeframe, return a cached dataframe df = self.reader.load_csv(self.finder.get_unemployment_path()) filtered = self.reader.data_in_timeframe(df, C.TIME, timeframe) return filtered def standardize_unemployment(self, df): full_mapping = dict(zip(['time', 'value'], [C.TIME, C.UN_RATE])) filename = self.finder.get_unemployment_path() return self.standardize(df, full_mapping, filename, [C.TIME, C.UN_RATE], 0) def save_unemployment_rate(self, **kwargs): # given a symbol, save its dividend history filename = self.finder.get_unemployment_path() if os.path.exists(filename): os.remove(filename) df = self.reader.update_df(filename, self.get_unemployment_rate(**kwargs), C.TIME, '%Y-%m') self.writer.update_csv(filename, df) if os.path.exists(filename): return filename def standardize_s2f_ratio(self, df): full_mapping = dict( zip(['t', 'o.daysTillHalving', 'o.ratio'], [C.TIME, C.HALVING, C.RATIO])) filename = self.finder.get_s2f_path() df = self.standardize(df, full_mapping, filename, [C.TIME, C.HALVING, C.RATIO], 0) return df[{C.TIME, C.HALVING, C.RATIO}.intersection(df.columns)] def get_s2f_ratio(self, timeframe='max'): # given a symbol, return a cached dataframe df = self.reader.load_csv(self.finder.get_s2f_path()) filtered = self.reader.data_in_timeframe( df, C.TIME, timeframe)[[C.TIME, C.HALVING, C.RATIO]] return filtered def standardize_s2f_deflection(self, df): full_mapping = dict(zip(['t', 'v'], [C.TIME, C.VAL])) filename = self.finder.get_s2f_path() df = self.standardize(df, full_mapping, filename, [C.TIME, C.VAL], 1) return df[{C.TIME, C.VAL}.intersection(df.columns)] def get_s2f_deflection(self, timeframe='max'): # given a symbol, return a cached dataframe df = self.reader.load_csv(self.finder.get_s2f_path()) filtered = self.reader.data_in_timeframe(df, C.TIME, timeframe)[[C.TIME, C.VAL]] return filtered def save_s2f(self, **kwargs): # # given a symbol, save its s2f data filename = self.finder.get_s2f_path() if os.path.exists(filename): os.remove(filename) rat_df = self.reader.update_df(filename, self.get_s2f_ratio(**kwargs), C.TIME, C.DATE_FMT) rat_df = rat_df[{C.TIME, C.HALVING, C.RATIO}.intersection(rat_df.columns)] def_df = self.reader.update_df(filename, self.get_s2f_deflection(**kwargs), C.TIME, C.DATE_FMT) def_df = def_df[{C.TIME, C.VAL}.intersection(def_df.columns)] if rat_df.empty and not def_df.empty: df = def_df elif not rat_df.empty and def_df.empty: df = rat_df elif not rat_df.empty and not def_df.empty: df = rat_df.merge(def_df, how="outer", on=C.TIME) else: return self.writer.update_csv(filename, df) if os.path.exists(filename): return filename
def __init__(self, broker=None): self.writer = FileWriter() self.reader = FileReader() self.finder = PathFinder()
'symbol': 'NVDA', 'open': 445.00, 'volume': 102265, 'date': '2015-01-15' } data_ = data[:] data_.append(snippet) csv_path1 = os.path.join(dir_path, 'test1.csv') csv_path2 = os.path.join(dir_path, 'test2.csv') test_df = pd.DataFrame(data) big_df = pd.DataFrame(data_) small_df = pd.DataFrame([snippet]) empty_df = pd.DataFrame() reader = FileReader() writer = FileWriter() symbols_path = reader.store.finder.get_symbols_path() test_path = f'{symbols_path}_TEST' class TestFileWriter: def test_init(self): assert type(writer).__name__ == 'FileWriter' assert hasattr(reader, 'store') def test_save_json(self): # save empty json object writer.save_json(json_path1, {}) assert os.path.exists(json_path1)