def try_fix_missing(ticker): """Attempt to fix missing values in DataFrame by using rules :param ticker: str - Customized ticker :return: Number of missing values left """ log = logging.getLogger(__name__) # Get DataFrame df = odu.get_market_df(ticker) before = df.isnull().sum().sum() log.debug('Missing values before: {}'.format(before)) fields = ['Open', 'High', 'Low', 'Close'] for idx in range(len(df)): row = df.iloc[idx] rvals = row[fields] missing = rvals.isnull().sum() # Rule 1: If 3 missing values, then all values should be equal! if missing == 3: value = rvals.dropna()[0] df.iloc[idx] = row.fillna(value) # Rule 2: If Open and Close are missing, then Low goes to Open and High to Close if missing == 2 and math.isnan(row['Open']) and math.isnan(row['Close']): df.iloc[idx]['Open'] = row['Low'] df.iloc[idx]['Close'] = row['High'] # Save DataFrame odu.save_market_df(ticker, df) after = df.isnull().sum().sum() log.warning('Missing values after: {} - fixed: {}'.format(after, before - after)) return after
def calculate_stds(df, length=20): """Calculate standard deviations for the spread in the specified dataframe. Returns a dataframe with a few extra columns. This is to be used in Excel. :param df: DataFrame universe of traded symbols :param length: Length for the standard deviation :return: DataFrame with results """ log = logging.getLogger(__name__) df_stds = [] for idx, row in df.iterrows(): ticker = row['Ticker'] log.debug('Process spread: {}'.format(ticker)) # STD dfm = odu.get_market_df(ticker) if dfm is not None: dfm = dfm.tail(length) std = dfm['Close'].std() else: std = 0 # COT cdf = odc.cot_data(ticker[0:2]) hps = cdf.iloc[-1]['HPS'] hph = cdf.iloc[-1]['HPH'] # Add data df_stds.append({'Ticker': ticker, 'LastDate': row['LastDate'], 'DaysTo': row['DaysTo'], 'HPS': hps, 'HPH': hph, 'PVol': std, 'IVol': std * i.get(ticker[0:2], 'Point')}) # Return dataframe return pd.DataFrame(df_stds, columns=['Ticker', 'LastDate', 'DaysTo', 'HPS', 'HPH', 'PVol', 'IVol'])
def test_roll_yield(): ticker = 'LBS2H18' df = odu.get_market_df(ticker) ry = odu.roll_yield(ticker, df) assert isinstance(ry, pd.Series) ndf = df ndf['RollYield'] = ry assert isinstance(ndf, pd.DataFrame) assert df.index[0] == ndf.index[0] assert df.index[-1] == ndf.index[-1] print(ndf.head()) print(ndf.tail())
def initialize_data(self, days_back=90, extra_days=True, partial=False): """Get an ordered dictionary of DataFrames with the corresponding data. :param days_back: int - Number of days back for the data (default: 90) :param extra_days: bool - Option to add a day before and the last day (LTD or FND) - Used to load data :param partial: bool - Option to load data even if length is smaller than days_back :return: DataFrame """ self.days_back = days_back if self.chain is None: self.initialize_contracts(Status.Expired) # Get all the dataframes dfs = co.OrderedDict() for index, row in self.chain.iterrows(): sdf = odu.get_market_df(row['Ticker']) if sdf is not None and (len(sdf) >= self.days_back or partial): # Roll Yield for Spreads if self.future_type == oci.FutureType.Spread: sdf['RollYield'] = odu.roll_yield(row['Ticker'], sdf) if extra_days: sdf = sdf[sdf.index <= pd.to_datetime(row['LastDate'])] dfs[row['Ticker']] = sdf.iloc[-(days_back + 1):] else: # LTD or FND should be excluded as they can be volatile (and can't trade FND) sdf = sdf[sdf.index < pd.to_datetime(row['LastDate'])] dfs[row['Ticker']] = sdf.iloc[-days_back:] # Find start date if self.status == Status.ActiveLive: start = odu.get_start_date(row['LastDate'], days_back=days_back) sdf = sdf[sdf.index >= start] if len(sdf) > 0: dfs[row['Ticker']] = sdf else: # Remove from the contracts and the chain DataFrame self.chain.drop(index, inplace=True) self.contracts.remove(row['Ticker']) self.log.warning('Removing {} as too early!'.format( row['Ticker'])) else: # Remove from the contracts and the chain DataFrame self.chain.drop(index, inplace=True) self.contracts.remove(row['Ticker']) self.log.warning( 'File not found or not enough data for: {}!'.format( row['Ticker'])) self.chain.reset_index(inplace=True, drop=True) self.data = dfs return self.data
def download_list(status, symbols=None, interval='daily', override_last=False): """Download all data for a specific list as defined in the list module. :param status: Enum Status - Active/ActivePlus/All/Expired Status :param symbols: List of symbols to download (Stem should be provided) - If not provided, will go through the universe :param interval: String data interval, possible values: 'minute', 'daily' or both :param override_last: bool - To force re-downloading data for current day """ log = logging.getLogger(__name__) log.info('Download {}'.format(interval)) symbols = oci.json_db if symbols is None else symbols for m in symbols: log.debug('Download data for {}'.format(m)) # Generate tickers for download sdf = ocl.generate_tickers_df(m, status) # Go through all tickers for idx, row in sdf.iterrows(): ticker = row['Ticker'] # Get last entry odf = odu.get_market_df(ticker) last_date = '1900-01-01' start = '1900-01-01' if odf is not None: last_date = dt.datetime.strftime(odf.index[-1], '%Y-%m-%d') start = dt.datetime.strftime(odf.index[-1] - o.BDay(3), '%Y-%m-%d') if last_date != dt.datetime.today().strftime( '%Y-%m-%d') or override_last: # Download Data & Save to the file df = get_ohlcv_data(ticker, interval, start, dt.datetime.today().strftime('%Y-%m-%d')) if df is not None: odu.save_market_df(ticker, df) else: log.info( 'Do not download {} as it has already been downloaded'. format(ticker))
def inspect_files(stems=None, future_type=None): """File inspection. Go through all the datafiles in the Daily folder and check for missing values. :param stems: list - List of Futures to inspect :param future_type: enum FutureType - Type of Futures to inspect (Outright, Spread, etc...) :return: dict - Problematic tickers to be checked """ log = logging.getLogger(__name__) inspect = {} for root, dirs, files in os.walk(op.join(oc.cfg['default']['data'], 'Daily')): for f in files: ticker = f.split('.')[0] if stems is not None and ticker[0:2] not in stems: continue if future_type is not None and oci.get_future_type(ticker) != future_type: continue df = odu.get_market_df(ticker) if df is not None: missing = df.isnull().sum().sum() if missing > 0: inspect[ticker] = missing return inspect
def get_forecasts_and_data(): start = dt.datetime(2016, 1, 1) liborff = calculate_liborffm() fcs = [] dfs = [] for m in months_generator(start): date = m.strftime('%Y-%m-%d') m_rule = m.strftime('%m') if debug: print('Process Month with ending: {}!'.format(date)) # ED ed_rule = rules_maturities[m_rule]['ED'] ed_ctrmth = int('{}{}'.format( int(date[0:4]) + (1 if m_rule in ['09', '10', '11', '12'] else 0), ed_rule)) if debug: print('ED {} {}'.format(date, ed_ctrmth)) # FF ff_rule = rules_maturities[m_rule]['FF'] plus_one = ['06', '07', '08', '09', '10', '11', '12'] ff_ctrmth = [ int('{}{}'.format( int(date[0:4]) + (1 if m_rule in plus_one else 0), r)) for r in ff_rule ] if debug: print('FF {} {}'.format(date, ff_ctrmth)) # Get dataframes start_m = (m - rd.relativedelta(months=1)).strftime('%Y-%m-%d') next_m = (m + rd.relativedelta(months=1)).strftime('%Y-%m-%d') ed_df = odu.get_market_df( '{}{}'.format('ED', i.ym_maturity(ed_ctrmth)), start_m, next_m) ff_df = [ odu.get_market_df('{}{}'.format('FF', i.ym_maturity(cm)), start_m, next_m) for cm in ff_ctrmth ] if False: print(ed_df.tail()) [print(df.tail()) for df in ff_df] # Calculate Forecast ed_value = get_last_value(ed_df, date, ed_ctrmth) if ed_value == 0: print('Null value for ED!') ff_values = [ get_last_value(ff_df[x], date, ff_ctrmth[x]) for x in range(0, 3) ] if sum(ff_values) == 0: print('All values are null for date: {} - see: {}!'.format( date, ff_values)) continue # Calculate forecast and add to the list ff_values = [v for v in ff_values if v > 0] fc = (100 - ed_value) - (100 - sum(ff_values) / len(ff_values)) fcs.append([date, fc]) if debug: print('Values: {} {} - Forecast: {:.2f} - LIBORFF: {:.2f}'.format( ed_value, ff_values, fc, liborff[date] * 100)) # Construct DataFrame and add to the list sfc = pd.Series(data=[(fc - liborff[date] * 100) for _ in range(len(ed_df.index))], index=ed_df.index) df = pd.concat([ ed_df['Close'], ff_df[0]['Close'], ff_df[1]['Close'], ff_df[2]['Close'], sfc ], axis=1) df.columns = ['ED', 'FF1', 'FF2', 'FF3', 'FC'] df = df[df.index > date] if debug: print(df.head()) dfs.append(df.dropna()) # DataFrame return pd.concat(dfs)
def test_fix_settlement(): ticker = 'FCS1V12' df = odu.get_market_df(ticker) sdf = odd.fix_settlement(ticker, df, False) print(sdf)
def test_save_market_df(): ticker = 'FCF10' df = odu.get_market_df(ticker) odu.save_market_df(ticker, df)
def test_get_market_df(): df = odu.get_market_df('EDM18') assert isinstance(df, pd.DataFrame) print(df.head())