def fake_data(df): #if df.shape[1] < 2: df.columns = ['OPEN'] df["HIGH"] = df.iloc[:, 0] df["LOW"] = df.iloc[:, 0] df["CLOSE"] = df.iloc[:, 0] df["VOLUME"] = np.sign(df.iloc[:, 0]) * 1e9 df["ADJUSTED"] = df.iloc[:, 0] dt_fmt = '%Y/%m/%d' sd = df.index[0].strftime(dt_fmt) #ed = df.index[-1].strftime(dt_fmt) ed = date.today().strftime(dt_fmt) bd_list = get_business_date_list(fmt=dt_fmt) print(sd, ed, type(bd_list)) short_bd_list = pd.to_datetime(bd_list[(bd_list >= sd) & (bd_list <= ed)]) newdf = df.copy(deep=True) #print('newdf\n',newdf) ''' newdf = newdf.reindex(short_bd_list).ffill(limit=10) df = newdf ''' newdf = newdf.reindex(short_bd_list) df = df.append(newdf) df.sort_index(inplace=True) df = df[~df.index.duplicated(keep='first')] df.ffill(limit=3, inplace=True) df.bfill(limit=3, inplace=True) #print('test',df) return df
def amend_daily_data(i,sd,ed,dk,ded): dt_series = None try: dt_series = (pd.read_sql_table(table_name=i, con=ded)['date'].sort_values()) except Exception as e: print(e) df = None if dt_series is None or dt_series.empty: if dk == 'fund_nav': df = fetch_fund_data(i,sd,ed,dk) #elif dk == 'index': # df = fetch_index_data(i,sd,ed,dk) else: df = fetch_daily_data(i,sd,ed,dk) else: dt_set = set(dt_series) bd_list = get_business_date_list(fmt='%Y%m%d') print('sd/ed',sd,ed ) bd_list = (bd_list[(bd_list > sd) & (bd_list < ed)]) bd_set = set(bd_list) missing_dates = dt_set.union(bd_set) - dt_set.intersection(bd_set) missing_dates = sorted(list(missing_dates)) df = pd.DataFrame() if missing_dates is None: return None print('missing_dates',(missing_dates)) print('dt_series',dt_series) print('bd_list',bd_list) dt_begin = missing_dates[0] fast_mode = False for dt in missing_dates[1:] : if dt > : continue pd_dt = pd.to_datetime(dt) dt_diff = pd_dt - pd.to_datetime(dt_begin) if fast_mode and dt_diff < timedelta(7): continue dt_end = (pd_dt - timedelta(1)).strftime('%Y%m%d') if dt_diff > timedelta(31): dt_end = dt_begin tmpdf = fetch_daily_data(i, dt_begin, dt_end, dk) print('amending date:',dt_begin,dt_end,dt_diff) dt_begin = dt time.sleep(0.10) if tmpdf is None: continue print(tmpdf) df = pd.concat([df, tmpdf]).drop_duplicates() print('amend_daily_data',i) print(df) return df
def fill_missing_data(fin, fout, index_col, zfix): df = None try: df = pd.read_csv(fin, index_col=index_col, parse_dates=True) except Exception as err: print(str(err)) dt_fmt = '%Y-%m-%d' if df.shape[1] < 2: df.columns = ['open'] if df.empty: return False sd = df.index[0].strftime(dt_fmt) ed = df.index[-1].strftime(dt_fmt) bd_list = get_business_date_list(fmt=dt_fmt) print(sd, ed, type(bd_list)) short_bd_list = pd.to_datetime(bd_list[(bd_list >= sd) & (bd_list <= ed)]) print('jzcheck', df.iloc[-10:, ]) print(short_bd_list) df.sort_index(inplace=True) try: #df.index.df.drop_duplicates(inplace=True) df = df[~df.index.duplicated()] #df = df.drop_duplicates() except Exception as err: print('jzerror:', str(err)) print('jzcheck2', df.iloc[-10:, ]) df.sort_index(inplace=True) df = df.reindex(short_bd_list, method='ffill') df = df.fillna(method='ffill') if df.shape[1] < 2: df = fake_data(df) elif re.match(r'.*FX\.csv$', fin.split('/')[-1]): df = fake_data(df, adjflag=True) if zfix: zfix_dt = pd.to_datetime(bd_list[bd_list > ed][0]) zseries = df.iloc[-1, ] zdf = pd.DataFrame(data=zseries, index=[zfix_dt]) zseries.name = zfix_dt df = df.append(zseries) print('zfix: appended extra row', zfix_dt) df.index.names = ['date'] df.sort_index().round(7).to_csv(fout, index=True, date_format=dt_fmt, na_rep='') return True
def fill_missing_data(fin, fout, index_col, zfix): df = None try: df = pd.read_csv(fin, index_col=index_col, parse_dates=True) except Exception as err: print(str(err)) dt_fmt = '%Y%m%d' if df.shape[1] < 2: df.columns = ['close'] if df.empty: return False sd = df.index[0].strftime(dt_fmt) ed = df.index[-1].strftime(dt_fmt) bd_list = get_business_date_list(fmt=dt_fmt) print(sd, ed, type(bd_list)) short_bd_list = pd.to_datetime(bd_list[(bd_list >= sd) & (bd_list <= ed)]) df = df.reindex(short_bd_list, method='ffill') df = df.fillna(method='ffill') if df.shape[1] < 2: df = fake_data(df) if zfix: zfix_dt = pd.to_datetime(bd_list[bd_list > ed][0]) zseries = df.iloc[-1, ] zdf = pd.DataFrame(data=zseries, index=[zfix_dt]) zseries.name = zfix_dt df = df.append(zseries) print('zfix: appended extra row', zfix_dt) df.index.names = ['date'] df.sort_index().round(7).to_csv(fout, index=True, date_format=dt_fmt, na_rep='') return True
def main(): import getopt, sys try: opts, args = getopt.getopt(sys.argv[1:], "d:u:e:hoclv", ["datakey=", "help"]) except getopt.GetoptError as err: print(str(err)) usage() sys.exit(2) uname = pwd.getpwuid(os.getuid()).pw_name sys.path.append('/work/' + uname + '/project/zlib/') from zutils import get_prev_business_date, get_business_date_list bdl = get_business_date_list(fmt='%Y%m%d') output_flag = False conv_flag = False link_flag = False verbose = False dkey = 'opt' edate = None for o, a in opts: if o == "-v": verbose = True elif o in ("-d", "--datakey"): dkey = a elif o == '-u': uname = a elif o == '-o': output_flag = True elif o == '-c': conv_flag = True elif o == '-e': edate = a elif o == '-l': link_flag = True else: assert False, 'unhandled option' print(dkey) edate = get_prev_business_date(date.today(), -1) if edate is None else edate sdate = get_prev_business_date(date.today() - timedelta(7), -1) #.strftime("%Y%m%d") print(sdate, edate) input_path = '/work/' + uname + '/input/' + dkey + '/' if dkey in ('opt', 'fut', 'fund', 'fund_nav', 'index', 'stock'): if dkey in ('stock'): for k in fs_list: get_db_data(input_path, sdate, edate, uname, bdt_list=bdl, dk=dkey, d_type=k, oflag=output_flag, lflag=link_flag) elif dkey in ('index'): for k in ix_list: get_db_data(input_path, sdate, edate, uname, bdt_list=bdl, dk=dkey, d_type=k, oflag=output_flag, lflag=link_flag) get_db_data(input_path, sdate, edate, uname, bdt_list=bdl, dk=dkey, d_type='basic', oflag=output_flag, lflag=link_flag) get_db_data(input_path, sdate, edate, uname, bdt_list=bdl, dk=dkey, d_type='daily', oflag=output_flag, lflag=link_flag)