def get_consensuses(self, start_date=None, end_date=None, assets=None) -> pd.DataFrame: logger.info('get_consensuses: start') cache_key = 'estimize_consensuses' df = self.cache_service.get(cache_key) if df is None: df = self.csv_data_service.get_from_file(filename=os.path.join( cfg.data_dir(), 'consensus.csv'), pre_func=self._pre_func, post_func=self._post_func, date_column='date', date_format='%y-%m-%d', timezone='US/Eastern', symbol_column='ticker') self.cache_service.put(cache_key, df) df = dfutils.filter(df, start_date, end_date, assets) logger.info('get_consensuses: end') return df
def get_market_factors(self, start_date=None, end_date=None, assets=None, use_cache=True) -> pd.DataFrame: logger.info('get_market_factors: start') cache_key = 'market_factors' df = self.cache_service.get(cache_key) if df is None: df = self.csv_data_service.get_from_url( url='{}/market_factors.csv'.format(cfg.ROOT_DATA_URL), post_func=self._post_func, date_column='as_of_date', timezone='US/Eastern', symbol_column='ticker') self.cache_service.put(cache_key, df) df = dfutils.filter(df, start_date, end_date, assets) logger.info('get_market_factors: end') return df
def get_signals(self, start_date=None, end_date=None, assets=None) -> pd.DataFrame: logger.info('get_signals: start') cache_key = 'estimize_signals' df = self.cache_service.get(cache_key) if df is None: df = self.csv_data_service.get_from_file( filename='{}/signal_time_series.csv'.format(cfg.data_dir()), pre_func=self._pre_func, post_func=self._post_func, date_column='as_of', date_format='%Y-%m-%dT%H:%M:%S', timezone='UTC', symbol_column='ticker') self.cache_service.put(cache_key, df) df = dfutils.filter(df, start_date, end_date, assets) logger.info('get_signals: end') return df
def get_market_caps(self, start_date=None, end_date=None, assets=None): cache_key = 'market_caps' df = self.cache_service.get(cache_key) if df is None: df = self.csv_data_service.get_from_url( url='{}/market_caps.csv'.format(cfg.ROOT_DATA_URL), post_func=self._post_func, date_column='as_of_date', symbol_column='ticker') df.reset_index(inplace=True) df = pd.pivot_table(df, index='as_of_date', columns='asset', values='market_cap') end_date = df.index.get_level_values('as_of_date').max() dates = self.calendar_service.get_trading_days_between( cfg.DEFAULT_START_DATE, end_date) dates = pd.DataFrame([], index=dates) df = dates.join(df, how='left') df.ffill(inplace=True) df = df.stack().to_frame() df.reset_index(inplace=True) df.rename(columns={ 'level_0': 'as_of_date', 'level_1': 'asset', df.columns[-1]: 'market_cap' }, inplace=True) df.set_index(['as_of_date', 'asset'], inplace=True) df.loc[df['market_cap'].between(0, 300e6), 'market_cap_type'] = 'Micro' df.loc[df['market_cap'].between(300e6, 2e9), 'market_cap_type'] = 'Small' df.loc[df['market_cap'].between(2e9, 10e9), 'market_cap_type'] = 'Mid' df.loc[df['market_cap'].between(10e9, 200e9), 'market_cap_type'] = 'Large' df.loc[df['market_cap'].between(200e9, 12e12), 'market_cap_type'] = 'Mega' self.cache_service.put(cache_key, df) df = dfutils.filter(df, start_date, end_date, assets) return df
def get_asset_info(self, assets=None): cache_key = 'asset_info' df = self.cache_service.get(cache_key) if df is None: df = self.csv_data_service.get_from_file(filename=os.path.join( cfg.data_dir(), 'instruments.csv'), pre_func=self._pre_func, post_func=self._post_func, symbol_column='ticker') self.cache_service.put(cache_key, df) df = dfutils.filter(df, assets=assets) return df
def get_final_consensuses(self, start_date=None, end_date=None, assets=None) -> pd.DataFrame: logger.info('get_final_consensuses: start') cache_key = 'estimize_final_consensuses' df = self.cache_service.get(cache_key) if df is None: df = self.get_consensuses() df = df.iloc[df.index.get_level_values('as_of_date') == pd.to_datetime(df['reports_at_date'])] self.cache_service.put(cache_key, df) df = dfutils.filter(df, start_date, end_date, assets) logger.info('get_final_consensuses: end') return df
def get_releases(self, start_date=None, end_date=None, assets=None) -> pd.DataFrame: cache_key = 'releases' df = self.cache_service.get(cache_key) if df is None: adf = self.asset_info_service.get_asset_info().reset_index()[['asset', 'instrument_id']] adf.set_index('instrument_id', inplace=True) df = pd.read_csv(os.path.join(cfg.data_dir(), 'releases.csv')) df.rename(columns={'id': 'release_id'}, inplace=True) df.set_index('instrument_id', inplace=True) df = df.join(adf, how='inner') df.reset_index(inplace=True) df['as_of_date'] = pd.to_datetime(df['reports_at'], format='%Y-%m-%dT%H:%M:%S').dt.tz_localize( 'UTC').dt.tz_convert('US/Eastern').dt.date df.set_index(['as_of_date', 'asset'], inplace=True) self.cache_service.put(cache_key, df) df = dfutils.filter(df, start_date, end_date, assets) return df
def get_market_factors(self, start_date=None, end_date=None, assets=None, use_cache=True) -> pd.DataFrame: logger.info('get_market_factors: start') cache_key = 'market_factors' df = self.cache_service.get(cache_key) if df is None: try: df = self.csv_data_service.get_from_url( url='{}/market_factors.csv'.format(cfg.ROOT_DATA_URL), post_func=self._post_func, date_column='as_of_date', timezone='US/Eastern', symbol_column='ticker' ) except: assets = dfutils.unique_assets(self.estimize_consensus_service.get_final_consensuses()) df = MarketFactorModelQuery( asset_service=self.asset_service, calendar_service=self.calendar_service, assets=assets ).results() # Save the generated file as a csv csv_df = df.reset_index() csv_df['ticker'] = csv_df['asset'].map(lambda a: a.symbol) csv_df.drop(['asset'], axis=1, inplace=True) csv_df.set_index(['as_of_date', 'ticker'], inplace=True) csv_df.to_csv('{}/market_factors.csv'.format(cfg.data_dir())) self.cache_service.put(cache_key, df) df = dfutils.filter(df, start_date, end_date, assets) logger.info('get_market_factors: end') return df