def calc_target(start_date, end_date, type, ticker1, lagLst=lagLst, periodLst=periodLst, filedir=filedir, database='db_corr'): corr = corrlab.corrAna(filedir=filedir, start_date=start_date, end_date=end_date, type=type) conn = MySQLdb.connect(host='localhost', user='******', passwd='hhui123456') cursor = conn.cursor() conn.select_db(database) dayLst = corr.generateDayLst() for day in dayLst: # 时间跨度为1天 data = corr.concatdata([day]) symbol1 = corr.symbolDict[day][ticker1[:2]] for lag in lagLst: for period in periodLst: res = pd.DataFrame() temp = data.copy() shifted = temp[ticker1].shift(-int(lag[:-1]), 's') align_base = corr.get_align_base(data) _, align_shifted = align_base.align(shifted, join='left', axis=0) temp[ticker1] = align_shifted.values temp = corr.sampledata(temp, period=period) temp.fillna(method='ffill', inplace=True) temp.fillna(method='bfill', inplace=True) temp_corr = temp.corr().sort_index() res = pd.concat([res, temp_corr[ticker1]]) res.rename(columns={0: day}, inplace=True) res.fillna(-2, inplace=True) for ticker2 in temp_corr.index.values: corr_value = res[day][ticker2] ticker2 = ticker2.split('_')[0] symbol2 = corr.symbolDict[day][ticker2[:2]] cursor.execute( """REPLACE INTO tb_corr( start_date, end_date, ticker1, symbol1, ticker2, symbol2, type, period, lag, corr) VALUES ( '%s', '%s','%s','%s','%s','%s','%d','%d','%d','%.6f' ) """ % (day, day, ticker1, symbol1, ticker2, symbol2, type, int(period[:-1]), int(lag[:-1]), corr_value)) conn.commit()
def __init__(self,train_start, train_end, test_start, test_end, period, lag, target, type = 0, filedir ='/hdd/ctp/day/'): self.train_start = train_start self.train_end = train_end self.test_start = test_start self.test_end = test_end self.period = period self.lag = lag self.target = target self.type = type self.filedir = filedir self.corr = corrlab.corrAna(filedir=self.filedir, start_date=self.train_start, end_date=self.train_end, type=self.type)
def calc_target_week(start_date, end_date, type, ticker1, lagLst=lagLst, periodLst=periodLst, filedir=filedir, database='db_corr'): '''day duration is 5 days''' corr = corrlab.corrAna(filedir=filedir, start_date=start_date, end_date=end_date, type=type) conn = MySQLdb.connect(host='localhost', user='******', passwd='hhui123456') cursor = conn.cursor() conn.select_db(database) dayLst = corr.generateDayLst() length = len(dayLst) for i in range(5, length): lst = dayLst[i - 5:i] print 'processing ', lst[0] + '-' + lst[-1] data = corr.concatdata(lst) symbol1 = corr.symbolDict[lst[0] + '-' + lst[-1]][ticker1[:2]] for lag in lagLst: for period in periodLst: res = pd.DataFrame() temp = data.copy() shifted = temp[ticker1].shift(-int(lag[:-1]), 's') align_base = corr.get_align_base(data) _, align_shifted = align_base.align(shifted, join='left', axis=0) temp[ticker1] = align_shifted.values temp = corr.sampledata(temp, period=period) temp.fillna(method='ffill', inplace=True) temp.fillna(method='bfill', inplace=True) temp_corr = temp.corr().sort_index() res = pd.concat([res, temp_corr[ticker1]]) res.rename(columns={0: lst[0] + '-' + lst[-1]}, inplace=True) res.fillna(-2, inplace=True) for ticker2 in temp_corr.index.values: corr_value = res[(lst[0] + '-' + lst[-1])][ticker2] ticker2 = ticker2.split('_')[0] symbol2 = corr.symbolDict[lst[0] + '-' + lst[-1]][ticker2[:2]] cursor.execute( """INSERT INTO tb_corr( start_date, end_date, ticker1, symbol1, ticker2, symbol2, type, period, lag, corr) VALUES ( '%s', '%s','%s','%s','%s','%s','%d','%d','%d','%.6f' ) """ % (lst[0], lst[-1], ticker1, symbol1, ticker2, symbol2, type, int(period[:-1]), int(lag[:-1]), corr_value)) conn.commit()
start_date = '20171101' end_date = '20171215' type = 0 # 1 for aggravated, 0 for rolling, 2 for both ticker1 = 'ru0' lagLst = ['1s', '5s', '10s', '30s', '60s'] periodLst = ['1s', '5s', '10s', '30s', '60s'] outputdir = u'/home/hui/Documents/corr output/' typelst = [ 'noble', 'nonferrous', 'black', 'farm', 'chemical', 'futures', 'loan' ] analst = ['ru', 'zn', 'rb', 'jm', 'j1'] #appointed analyst corr = corrlab.corrAna(filedir=filedir, start_date=start_date, end_date=end_date, type=type) dayLst = corr.generateDayLst() entire = pd.DataFrame() entire_1s, entire_5s, entire_10s = pd.DataFrame(), pd.DataFrame( ), pd.DataFrame() # for day in dayLst: # lst = [] # lst.append(day) # # data = corr.concatdata(lst) # print (day, 'calculate done') # target = corr.getsymbol(data, ticker1) # one_sec = corr.sampledata(data,period = '1s')