Esempio n. 1
0
class MarketData:
    def __init__(self, broker=None):
        self.writer = FileWriter()
        self.reader = FileReader()
        self.finder = PathFinder()

    def get_symbols(self):
        # get cached list of symbols
        symbols_path = self.finder.get_symbols_path()
        return list(self.reader.load_csv(symbols_path)[C.SYMBOL])

    def get_dividends(self, symbol, timeframe=None):
        # given a symbol, return a cached dataframe
        return self.reader.load_csv(self.finder.get_dividends_path(symbol))

    def save_dividends(self, symbol, timeframe=None):
        # given a symbol, save its dividend history
        if timeframe:
            df = self.get_dividends(symbol, timeframe)
        else:
            df = self.get_dividends(symbol)
        self.writer.update_csv(self.finder.get_dividends_path(symbol), df)
Esempio n. 2
0
class Robinhood:
    # broker operation
    def __init__(self, usr=None, pwd=None, mfa=None):
        # Authentication
        load_dotenv()

        username = usr or os.environ['RH_USERNAME']
        password = pwd or os.environ['RH_PASSWORD']
        mfa_code = mfa or pyotp.TOTP(os.environ['RH_2FA']).now()

        rh.login(username, password, mfa_code=mfa_code)
        self.api = rh
        self.writer = FileWriter()
        self.reader = FileReader()
        self.finder = PathFinder()

    def get_symbols_from_instruments(self, instruments):
        # given a list of instruments,
        # return a list of corresponding symbols
        return [
            self.api.get_symbol_by_url(instrument)
            for instrument in instruments
        ]

    def flatten(self, xxs):
        # flattens 2d list into 1d list
        return [x for xs in xxs for x in xs]

    def get_hists(self, symbols, span='year', interval='day', save=False):
        # given a list of symbols,
        # return a DataFrame with historical data
        hists = [
            self.api.get_stock_historicals(symbol, interval, span)
            for symbol in symbols
        ]
        clean = [hist for hist in hists if hist != [None]]
        df = pd.DataFrame.from_records(self.flatten(clean))
        # look into diff b/w tz_localize and tz_convert w param 'US/Eastern'
        # ideally store utc time
        df['begins_at'] = pd.to_datetime(
            df['begins_at']).apply(lambda x: x.tz_localize(None))
        # df = df.sort_values('begins_at')
        if save is True:
            self.writer.save_csv('data/data.csv', df)
        return df

    def get_names(self, symbols):
        # given a list of stock symbols
        # return a list of company names
        return [self.api.get_name_by_symbol(symbol) for symbol in symbols]

    def save_symbols(self):
        # save all the portfolio symbols in a table
        if not hasattr(self, 'symbols'):
            self.load_portfolio()
        symbols = list(self.symbols)
        names = self.get_names(symbols)
        df = pd.DataFrame({C.SYMBOL: symbols, C.NAME: names})
        self.writer.update_csv(self.finder.get_symbols_path(), df)

    def load_portfolio(self):
        start = time.time()
        # Data acquisition
        self.positions = self.api.get_all_positions()
        self.holdings = self.api.build_holdings()
        # print(self.holdings)

        # Create lookup table instrument -> symbol and vice versa
        instruments = [position['instrument'] for position in self.positions]
        symbols = self.get_symbols_from_instruments(instruments)

        self.instruments = dict(zip(instruments, symbols))
        self.symbols = dict(map(reversed, self.instruments.items()))

        # Get historical data for all instruments
        self.hist = self.get_hists(symbols)
        end = time.time()
        print(f'Successfully loaded portfolio in {round(end-start, 2)}s.')
Esempio n. 3
0
class Robinhood:
    # broker operation
    def __init__(self, usr=None, pwd=None, mfa=None):
        # Authentication
        load_dotenv(find_dotenv('config.env'))

        username = usr or os.environ['RH_USERNAME']
        password = pwd or os.environ['RH_PASSWORD']
        mfa_code = mfa or pyotp.TOTP(os.environ['RH_2FA']).now()

        rh.login(username, password, mfa_code=mfa_code)
        self.api = rh
        self.writer = FileWriter()
        self.reader = FileReader()
        self.finder = PathFinder()

    def flatten(self, xxs):
        # flattens 2d list into 1d list
        return [x for xs in xxs for x in xs]

    def get_hists(self, symbols, span='year', interval='day', save=False):
        # given a list of symbols,
        # return a DataFrame with historical data
        hists = [
            self.api.get_stock_historicals(symbol, interval, span)
            for symbol in symbols
        ]
        clean = [hist for hist in hists if hist != [None]]
        df = pd.DataFrame.from_records(self.flatten(clean))
        # look into diff b/w tz_localize and tz_convert w param 'US/Eastern'
        # ideally store utc time
        df['begins_at'] = pd.to_datetime(
            df['begins_at']).apply(lambda x: x.tz_localize(None))
        # df = df.sort_values('begins_at')
        if save is True:
            self.writer.save_csv('data/data.csv', df)
        return df

    def get_names(self, symbols):
        # given a list of stock symbols
        # return a list of company names
        names = []
        for symbol in symbols:
            if hasattr(self, 'holdings') and symbol in self.holdings:
                names.append(self.holdings[symbol]['name'])
            else:
                names.append(self.api.get_name_by_symbol(symbol))
        return names

    def save_symbols(self):
        # save all the portfolio symbols in a table
        symbols = self.get_symbols()
        names = self.get_names(symbols)
        df = pd.DataFrame({C.SYMBOL: symbols, C.NAME: names})
        self.writer.save_csv(self.finder.get_symbols_path(), df)

    def get_holdings(self):
        if not hasattr(self, 'holdings'):
            self.holdings = self.api.build_holdings()
        return self.holdings

    def get_symbols(self):
        if not hasattr(self, 'holdings'):
            self.get_holdings()

        return [symbol for symbol in self.holdings]
Esempio n. 4
0
class MarketData:
    def __init__(self):
        load_dotenv(find_dotenv('config.env'))
        self.writer = FileWriter()
        self.reader = FileReader()
        self.finder = PathFinder()
        self.traveller = TimeTraveller()
        self.provider = 'iexcloud'

    def try_again(self, func, **kwargs):
        retries = (kwargs['retries']
                   if 'retries' in kwargs else C.DEFAULT_RETRIES)
        delay = (kwargs['delay'] if 'delay' in kwargs else C.DEFAULT_DELAY)
        func_args = {
            k: v
            for k, v in kwargs.items() if k not in {'retries', 'delay'}
        }
        for retry in range(retries):
            try:
                return func(**func_args)
            except Exception as e:
                if retry == retries - 1:
                    raise e
                else:
                    sleep(delay)

    def get_symbols(self):
        # get cached list of symbols
        symbols_path = self.finder.get_symbols_path()
        return list(self.reader.load_csv(symbols_path)[C.SYMBOL])

    def get_dividends(self, symbol, timeframe='max'):
        # given a symbol, return a cached dataframe
        df = self.reader.load_csv(
            self.finder.get_dividends_path(symbol, self.provider))
        filtered = self.reader.data_in_timeframe(df, C.EX, timeframe)
        return filtered

    def standardize(self, df, full_mapping, filename, columns, default):
        mapping = {k: v for k, v in full_mapping.items() if k in df}

        df = df[list(mapping)].rename(columns=mapping)
        time_col, val_cols = columns[0], columns[1:]

        if time_col in df and set(val_cols).issubset(df.columns):
            df = self.reader.update_df(filename, df,
                                       time_col).sort_values(by=[time_col])
            # since time col is pd.datetime,
            # consider converting to YYYY-MM-DD str format
            for val_col in val_cols:
                df[val_col] = df[val_col].apply(lambda val: float(val)
                                                if val else default)

        return df

    def standardize_dividends(self, symbol, df):
        full_mapping = dict(
            zip(['exDate', 'paymentDate', 'declaredDate', 'amount'],
                [C.EX, C.PAY, C.DEC, C.DIV]))
        filename = self.finder.get_dividends_path(symbol, self.provider)
        return self.standardize(df, full_mapping, filename, [C.EX, C.DIV], 0)

    def save_dividends(self, **kwargs):
        # given a symbol, save its dividend history
        symbol = kwargs['symbol']
        filename = self.finder.get_dividends_path(symbol, self.provider)
        if os.path.exists(filename):
            os.remove(filename)
        df = self.reader.update_df(filename, self.get_dividends(**kwargs),
                                   C.EX, C.DATE_FMT)
        self.writer.update_csv(filename, df)
        if os.path.exists(filename):
            return filename

    def get_splits(self, symbol, timeframe='max'):
        # given a symbol, return a cached dataframe
        df = self.reader.load_csv(
            self.finder.get_splits_path(symbol, self.provider))
        filtered = self.reader.data_in_timeframe(df, C.EX, timeframe)
        return filtered

    def standardize_splits(self, symbol, df):
        full_mapping = dict(
            zip(['exDate', 'paymentDate', 'declaredDate', 'ratio'],
                [C.EX, C.PAY, C.DEC, C.RATIO]))
        filename = self.finder.get_splits_path(symbol, self.provider)
        return self.standardize(df, full_mapping, filename, [C.EX, C.RATIO], 1)

    def save_splits(self, **kwargs):
        # given a symbol, save its splits history
        symbol = kwargs['symbol']
        filename = self.finder.get_splits_path(symbol, self.provider)
        if os.path.exists(filename):
            os.remove(filename)
        df = self.reader.update_df(filename, self.get_splits(**kwargs), C.EX,
                                   C.DATE_FMT)
        self.writer.update_csv(filename, df)
        if os.path.exists(filename):
            return filename

    def standardize_ohlc(self, symbol, df, filename=None):
        full_mapping = dict(
            zip([
                'date', 'open', 'high', 'low', 'close', 'volume', 'average',
                'trades'
            ], [
                C.TIME, C.OPEN, C.HIGH, C.LOW, C.CLOSE, C.VOL, C.AVG, C.TRADES
            ]))

        filename = filename or self.finder.get_ohlc_path(symbol, self.provider)

        df = self.standardize(df, full_mapping, filename,
                              [C.TIME, C.OPEN, C.HIGH, C.LOW, C.CLOSE], 0)

        for col in [C.VOL, C.TRADES]:
            if col in df:
                df[col] = df[col].apply(lambda val: 0
                                        if pd.isnull(val) else int(val))

        return df

    def get_ohlc(self, symbol, timeframe='max'):
        df = self.reader.load_csv(
            self.finder.get_ohlc_path(symbol, self.provider))
        filtered = self.reader.data_in_timeframe(df, C.TIME, timeframe)
        return filtered

    def save_ohlc(self, **kwargs):
        symbol = kwargs['symbol']
        filename = self.finder.get_ohlc_path(symbol, self.provider)
        if os.path.exists(filename):
            os.remove(filename)
        df = self.reader.update_df(filename, self.get_ohlc(**kwargs), C.TIME,
                                   C.DATE_FMT)
        self.writer.update_csv(filename, df)
        if os.path.exists(filename):
            return filename

    def get_social_sentiment(self, symbol, timeframe='max'):
        # given a symbol, return a cached dataframe
        df = self.reader.load_csv(self.finder.get_sentiment_path(symbol))
        filtered = self.reader.data_in_timeframe(
            df, C.TIME, timeframe)[[C.TIME, C.POS, C.NEG]]
        return filtered

    def get_social_volume(self, symbol, timeframe='max'):
        # given a symbol, return a cached dataframe
        df = self.reader.load_csv(self.finder.get_sentiment_path(symbol))
        filtered = self.reader.data_in_timeframe(
            df, C.TIME, timeframe)[[C.TIME, C.VOL, C.DELTA]]
        return filtered

    def save_social_sentiment(self, **kwargs):
        # # given a symbol, save its sentiment data
        symbol = kwargs['symbol']
        filename = self.finder.get_sentiment_path(symbol)

        if os.path.exists(filename):
            os.remove(filename)

        sen_df = self.reader.update_df(filename,
                                       self.get_social_sentiment(**kwargs),
                                       C.TIME)
        sen_df = sen_df[{C.TIME, C.POS, C.NEG}.intersection(sen_df.columns)]

        vol_df = self.reader.update_df(filename,
                                       self.get_social_volume(**kwargs),
                                       C.TIME)
        vol_df = vol_df[{C.TIME, C.VOL, C.DELTA}.intersection(vol_df.columns)]

        if sen_df.empty and not vol_df.empty:
            df = vol_df
        elif not sen_df.empty and vol_df.empty:
            df = sen_df
        elif not sen_df.empty and not vol_df.empty:
            df = sen_df.merge(vol_df, how="outer", on=C.TIME)
        else:
            return
        self.writer.update_csv(filename, df)
        if os.path.exists(filename):
            return filename

    def standardize_sentiment(self, symbol, df):
        full_mapping = dict(
            zip(['timestamp', 'bullish', 'bearish'], [C.TIME, C.POS, C.NEG]))
        filename = self.finder.get_sentiment_path(symbol, self.provider)
        df = self.standardize(df, full_mapping, filename,
                              [C.TIME, C.POS, C.NEG], 0)
        return df[{C.TIME, C.POS, C.NEG}.intersection(df.columns)]

    def standardize_volume(self, symbol, df):
        full_mapping = dict(
            zip(['timestamp', 'volume_score', 'volume_change'],
                [C.TIME, C.VOL, C.DELTA]))
        filename = self.finder.get_sentiment_path(symbol, self.provider)
        df = self.standardize(df, full_mapping, filename,
                              [C.TIME, C.VOL, C.DELTA], 0)
        return df[{C.TIME, C.VOL, C.DELTA}.intersection(df.columns)]

    def get_intraday(self, symbol, min=1, timeframe='max', extra_hrs=False):
        # implement way to transform 1 min dataset to 5 min data
        #  or 30 or 60 should be flexible soln
        # implement way to only get market hours
        # given a symbol, return a cached dataframe
        dates = self.traveller.dates_in_range(timeframe)
        for date in dates:
            df = self.reader.load_csv(
                self.finder.get_intraday_path(symbol, date, self.provider))
            yield self.reader.data_in_timeframe(df, C.TIME, timeframe)

    def save_intraday(self, **kwargs):
        symbol = kwargs['symbol']
        dfs = self.get_intraday(**kwargs)
        filenames = []

        for df in dfs:
            date = df[C.TIME].iloc[0].strftime(C.DATE_FMT)
            filename = self.finder.get_intraday_path(symbol, date,
                                                     self.provider)
            if os.path.exists(filename):
                os.remove(filename)
            save_fmt = f'{C.DATE_FMT} {C.TIME_FMT}'
            df = self.reader.update_df(filename, df, C.TIME, save_fmt)
            self.writer.update_csv(filename, df)
            if os.path.exists(filename):
                filenames.append(filename)
        return filenames

    def get_unemployment_rate(self, timeframe='max'):
        # given a timeframe, return a cached dataframe
        df = self.reader.load_csv(self.finder.get_unemployment_path())
        filtered = self.reader.data_in_timeframe(df, C.TIME, timeframe)
        return filtered

    def standardize_unemployment(self, df):
        full_mapping = dict(zip(['time', 'value'], [C.TIME, C.UN_RATE]))
        filename = self.finder.get_unemployment_path()
        return self.standardize(df, full_mapping, filename,
                                [C.TIME, C.UN_RATE], 0)

    def save_unemployment_rate(self, **kwargs):
        # given a symbol, save its dividend history
        filename = self.finder.get_unemployment_path()
        if os.path.exists(filename):
            os.remove(filename)
        df = self.reader.update_df(filename,
                                   self.get_unemployment_rate(**kwargs),
                                   C.TIME, '%Y-%m')
        self.writer.update_csv(filename, df)
        if os.path.exists(filename):
            return filename

    def standardize_s2f_ratio(self, df):
        full_mapping = dict(
            zip(['t', 'o.daysTillHalving', 'o.ratio'],
                [C.TIME, C.HALVING, C.RATIO]))
        filename = self.finder.get_s2f_path()
        df = self.standardize(df, full_mapping, filename,
                              [C.TIME, C.HALVING, C.RATIO], 0)
        return df[{C.TIME, C.HALVING, C.RATIO}.intersection(df.columns)]

    def get_s2f_ratio(self, timeframe='max'):
        # given a symbol, return a cached dataframe
        df = self.reader.load_csv(self.finder.get_s2f_path())
        filtered = self.reader.data_in_timeframe(
            df, C.TIME, timeframe)[[C.TIME, C.HALVING, C.RATIO]]
        return filtered

    def standardize_s2f_deflection(self, df):
        full_mapping = dict(zip(['t', 'v'], [C.TIME, C.VAL]))
        filename = self.finder.get_s2f_path()
        df = self.standardize(df, full_mapping, filename, [C.TIME, C.VAL], 1)
        return df[{C.TIME, C.VAL}.intersection(df.columns)]

    def get_s2f_deflection(self, timeframe='max'):
        # given a symbol, return a cached dataframe
        df = self.reader.load_csv(self.finder.get_s2f_path())
        filtered = self.reader.data_in_timeframe(df, C.TIME,
                                                 timeframe)[[C.TIME, C.VAL]]
        return filtered

    def save_s2f(self, **kwargs):
        # # given a symbol, save its s2f data
        filename = self.finder.get_s2f_path()

        if os.path.exists(filename):
            os.remove(filename)

        rat_df = self.reader.update_df(filename, self.get_s2f_ratio(**kwargs),
                                       C.TIME, C.DATE_FMT)
        rat_df = rat_df[{C.TIME, C.HALVING,
                         C.RATIO}.intersection(rat_df.columns)]

        def_df = self.reader.update_df(filename,
                                       self.get_s2f_deflection(**kwargs),
                                       C.TIME, C.DATE_FMT)
        def_df = def_df[{C.TIME, C.VAL}.intersection(def_df.columns)]

        if rat_df.empty and not def_df.empty:
            df = def_df
        elif not rat_df.empty and def_df.empty:
            df = rat_df
        elif not rat_df.empty and not def_df.empty:
            df = rat_df.merge(def_df, how="outer", on=C.TIME)
        else:
            return
        self.writer.update_csv(filename, df)
        if os.path.exists(filename):
            return filename