def download(startDt, endDt): dt = startDt filenames = [ os.path.join(config.LOCAL_PATH_RAW, dn + '.csv') for dn in DATA_NAMES ] cntDt = 0 lastMon = '' while dt <= endDt: for dataname, filename in zip(DATA_NAMES, filenames): if dt[:7] != lastMon: lastMon = dt[:7] print(lastMon) pass df = eval(dataname + '(dt)') if df.shape[0] > 0: if cntDt > 0: fmode = 'a' wheader = False else: fmode = 'w' wheader = True pass df.to_csv(filename, mode=fmode, header=wheader, index=False) cntDt += 1 pass pass dt = utils_common.dtAdd(dt, 1) pass pass
def main(modelPath, startDt, endDt, initMoney, strategy, reportPath, testStartDt): df = dataio.getLabelAndFeature(config.LABEL, config.FEATURE_SELECT) turnoverFilter = dataio.getTurnoverRankFilter() maxContinousCloseDayFilter = dataio.getMaxContinousCloseDayFilter() secFilter = turnoverFilter & maxContinousCloseDayFilter idxFilter = np.asarray([True if st in secFilter else False \ for st in df.index]) df = df[idxFilter] feature = df[config.FEATURE_SELECT].values dts = df.index.get_level_values('tradeDate') index = df.index.values dt = startDt while dt <= endDt: print(dt) filename = 'xgb_' + dt stdout = sys.stdout with open(os.path.join(reportPath, filename), 'w') as fout: sys.stdout = fout daily_test(feature, dts, index, initMoney, testStartDt, os.path.join(modelPath, filename), strategy) sys.stdout = stdout pass dt = utils_common.dtAdd(dt, 1) pass
def main(modelPath,startDt,endDt,initMoney,strategy,reportPath,testStartDt): df = dataio.getLabelAndFeature(config.LABEL,config.FEATURE_SELECT); turnoverFilter = dataio.getTurnoverRankFilter(); maxContinousCloseDayFilter = dataio.getMaxContinousCloseDayFilter(); secFilter = turnoverFilter & maxContinousCloseDayFilter; idxFilter = np.asarray([True if st in secFilter else False \ for st in df.index]); df = df[idxFilter] feature = df[config.FEATURE_SELECT].values; dts = df.index.get_level_values('tradeDate'); index = df.index.values; dt = startDt; while dt<=endDt: print(dt); filename = 'xgb_' + dt; stdout = sys.stdout; with open(os.path.join(reportPath,filename),'w') as fout: sys.stdout = fout; daily_test(feature,dts,index,initMoney,testStartDt, os.path.join(modelPath,filename), strategy); sys.stdout = stdout; pass; dt = utils_common.dtAdd(dt,1); pass;
def download(startDt,endDt): dt = startDt; filenames = [os.path.join(config.LOCAL_PATH_RAW,dn + '.csv') for dn in DATA_NAMES]; cntDt = 0; lastMon = ''; while dt<=endDt: for dataname,filename in zip(DATA_NAMES,filenames): if dt[:7]!=lastMon: lastMon = dt[:7]; print(lastMon); pass; df = eval(dataname + '(dt)'); if df.shape[0]>0: if cntDt>0: fmode = 'a'; wheader = False; else: fmode = 'w'; wheader = True; pass; df.to_csv(filename,mode=fmode,header=wheader,index=False); cntDt+=1; pass; pass; dt = utils_common.dtAdd(dt,1); pass; pass;
def main(dtStart,dtEnd,savePath): label,feature,dts = loadDataReg(); dt = dtStart; while dt<=dtEnd: filename = 'xgb_' + dt; print(dt); daily_train_reg(feature,label,dts,dt,os.path.join(savePath,filename)); dt = utils_common.dtAdd(dt,1); pass; pass;
def main(dtStart,dtEnd,savePath): label,feature,dts = daily_train.loadData(); dt = dtStart; while dt<=dtEnd: filename = 'xgb_' + dt; print(dt); daily_train.daily_train(feature,label,dts,dt,os.path.join(savePath,filename)); dt = utils_common.dtAdd(dt,1); pass; pass;
def main(dtStart, dtEnd, savePath): label, feature, dts = loadDataReg() dt = dtStart while dt <= dtEnd: filename = 'xgb_' + dt print(dt) daily_train_reg(feature, label, dts, dt, os.path.join(savePath, filename)) dt = utils_common.dtAdd(dt, 1) pass pass
def getDecFactors(dtStart,dtEnd): ret = dict(); decFactors = dict(); dt = utils_common.dtAdd(dtStart,-7); while dt<=dtEnd: decFactor = getDecFactorSingle(dt); if decFactor is not None: decFactors[dt] = decFactor; pass; dt = utils_common.dtAdd(dt,1); pass; # shift dt sortedDt = sorted(list(decFactors.keys())); mapDt = {sortedDt[i]:sortedDt[i+1] for i in range(len(sortedDt)-1)}; for k,v in mapDt.items(): ret[v] = decFactors[k]; pass; return ret;
def getDecFactors(dtStart, dtEnd): ret = dict() decFactors = dict() dt = utils_common.dtAdd(dtStart, -7) while dt <= dtEnd: decFactor = getDecFactorSingle(dt) if decFactor is not None: decFactors[dt] = decFactor pass dt = utils_common.dtAdd(dt, 1) pass # shift dt sortedDt = sorted(list(decFactors.keys())) mapDt = {sortedDt[i]: sortedDt[i + 1] for i in range(len(sortedDt) - 1)} for k, v in mapDt.items(): ret[v] = decFactors[k] pass return ret
def daily_train(feature,label,dts,dtEnd,savePath): idx = (dts<dtEnd) & (dts>=utils_common.dtAdd(dtEnd,-config.TRAINING_DAYS)); feature = feature[idx]; label = label[idx]; weight = utils_common.getWeight(dts[idx],config.WEIGHTER); cls = xgb.XGBClassifier(max_depth=4,learning_rate=0.1,n_estimators=500); print('training sample: {0}'.format(feature.shape)); print(weight.shape); cls.fit(feature,label,sample_weight=weight); print('train end'); pickle.dump(cls,open(savePath,'w')); pass;
def daily_train_reg(feature, label, dts, dtEnd, savePath): idx = (dts < dtEnd) & (dts >= utils_common.dtAdd(dtEnd, -config.TRAINING_DAYS)) feature = feature[idx] label = label[idx] weight = utils_common.getWeight(dts[idx], config.WEIGHTER) cls = xgb.XGBRegressor(max_depth=11, learning_rate=0.04, n_estimators=200) print('training sample: {0}'.format(feature.shape)) print(weight.shape) cls.fit(feature, label, sample_weight=weight) print('train end') pickle.dump(cls, open(savePath, 'w')) pass
def completeFundETFConsGet(): df = getdf('FundETFConsGet'); dfMarket = getdf('MktEqudAdjAfGet'); secIDs = df.index.get_level_values('secID').values; tradeDates = df.index.get_level_values('tradeDate').values; usecs = np.unique(secIDs); udtMarket = np.unique(dfMarket.index.get_level_values('tradeDate').values); dfComp = df.copy(); for secID in usecs: idx = secIDs==secID; dts = tradeDates[idx]; minDt = np.min(dts); missDt = {dt for dt in udtMarket if dt not in dts and dt>=minDt}; print('{0}:{1}'.format(secID,json.dumps(list(missDt)))); for dt in missDt: for i in range(20): preDt = utils_common.dtAdd(dt,-i); if preDt in dts: break; pass; if preDt not in dts: print('cannot find previous date, sec: {0}'.format(secID)); break; addDf = df[idx & (tradeDates==preDt)]; addDf.reset_index(inplace=True); addDf.loc[:,'tradeDate'] = dt; addDf.set_index(config.DATA_NAMES['FundETFConsGet'],inplace=True); dfComp = dfComp.append(addDf); pass; pass; dfComp.to_csv(os.path.join(getdb(),'FundETFConsGet')); pass;
def backtest(initMoney,dtStart,dtEnd,strategy,decFactor, outputfile=None): print('dt start: ' + dtStart + ', dt end:' + dtEnd); totals = []; gains = []; tReturns = []; trader = BacktestTrader(initMoney); totals.append(trader.getMarketValue()); numDt = 0; dt = dtStart; dts = decFactor.keys(); if '9999-99-99' in dts: dts.remove('9999-99-99'); pass; maxDt = max(dts); dtEnd = min(dtEnd,maxDt); dt = max(dt,min(dts)); if outputfile is not None: outputfile = open(outputfile,'w') pass while dt<=dtEnd: if not trader.isTradingDay(dt): dt = utils_common.dtAdd(dt,1); continue; decFactorDt = decFactor[dt] if dt in decFactor else None; trader.setTime(dt,'open'); strategy.handle(dt,'open',trader,decFactorDt); #print(decFactorDt.shape); #print('open: ' + str(len(trader.getPositions())) + ', cache: ' + str(trader.cache_)); trader.setTime(dt,'close'); strategy.handle(dt,'close',trader,decFactorDt); #print('close: ' + str(len([pos['secID'] for pos in trader.position_])) + ', cache: ' + str(trader.cache_)); #print('close: ' + str(trader.getPositions()) + ', cache: ' + str(trader.cache_)); totals.append(trader.getMarketValue()); gains.append((totals[-1]-totals[-2])/totals[-2]); tReturns.append((totals[-1]-totals[0])/totals[0]); print('dt: {0}, value: {1}, gains: {2}, sharp: {3}'.format(dt,totals[-1],gains[-1], utils_common.sharpRatio(tReturns,0.1/200))); if outputfile is not None: outputfile.write('{0},{1}\n'.format(dt, totals[-1])); pass dt = utils_common.dtAdd(dt,1); numDt+=1; pass; print('total value: ' + str(totals[-1])); print('max loss: ' + str(utils_common.minSumSubList(gains))); posDays = len([i for i in gains if i>0]); totalDays = numDt; print('posive days: {0}/{1}'.format(float(posDays)/totalDays,totalDays)); print('sharp ratio : {0}'.format(utils_common.sharpRatio(tReturns,0.1/200))); print(totalDays); if outputfile is not None: outputfile.close(); pass; pass;
def backtest(initMoney, dtStart, dtEnd, strategy, decFactor, outputfile=None): print('dt start: ' + dtStart + ', dt end:' + dtEnd) totals = [] gains = [] tReturns = [] trader = BacktestTrader(initMoney) totals.append(trader.getMarketValue()) numDt = 0 dt = dtStart dts = decFactor.keys() if '9999-99-99' in dts: dts.remove('9999-99-99') pass maxDt = max(dts) dtEnd = min(dtEnd, maxDt) dt = max(dt, min(dts)) if outputfile is not None: outputfile = open(outputfile, 'w') pass while dt <= dtEnd: if not trader.isTradingDay(dt): dt = utils_common.dtAdd(dt, 1) continue decFactorDt = decFactor[dt] if dt in decFactor else None trader.setTime(dt, 'open') strategy.handle(dt, 'open', trader, decFactorDt) #print(decFactorDt.shape); #print('open: ' + str(len(trader.getPositions())) + ', cache: ' + str(trader.cache_)); trader.setTime(dt, 'close') strategy.handle(dt, 'close', trader, decFactorDt) #print('close: ' + str(len([pos['secID'] for pos in trader.position_])) + ', cache: ' + str(trader.cache_)); #print('close: ' + str(trader.getPositions()) + ', cache: ' + str(trader.cache_)); totals.append(trader.getMarketValue()) gains.append((totals[-1] - totals[-2]) / totals[-2]) tReturns.append((totals[-1] - totals[0]) / totals[0]) print('dt: {0}, value: {1}, gains: {2}, sharp: {3}'.format( dt, totals[-1], gains[-1], utils_common.sharpRatio(tReturns, 0.1 / 200))) if outputfile is not None: outputfile.write('{0},{1}\n'.format(dt, totals[-1])) pass dt = utils_common.dtAdd(dt, 1) numDt += 1 pass print('total value: ' + str(totals[-1])) print('max loss: ' + str(utils_common.minSumSubList(gains))) posDays = len([i for i in gains if i > 0]) totalDays = numDt print('posive days: {0}/{1}'.format(float(posDays) / totalDays, totalDays)) print('sharp ratio : {0}'.format( utils_common.sharpRatio(tReturns, 0.1 / 200))) print(totalDays) if outputfile is not None: outputfile.close() pass pass
#turnoverFilter = dataio.getHS300Filter(); print(df.shape[0]) print('prepare...') dt = df.index.get_level_values('tradeDate') label = np.squeeze(df[[LABEL]].values) feature = df[FEATURE_SELECT].values idxFilter = np.asarray([True if st in secFilter else False \ for st in df.index]) print('begin') for iy, year in enumerate(YEARS[:-1]): if len(year) < 10: dtTrainStart = utils_common.dtAdd(year + '-01-01', -config.TRAINING_DAYS) else: dtTrainStart = utils_common.dtAdd(year, -config.TRAINING_DAYS) pass idxTrain = (dt < year) & (dt >= dtTrainStart) #&idxFilter; idxTest = (dt >= year) & (dt < YEARS[iy + 1]) #&idxFilter; labelTrain = label[idxTrain] labelTest = label[idxTest] featureTrain = feature[idxTrain] featureTest = feature[idxTest] dtTest = dt[idxTest] dtTrain = dt[idxTrain] weightTrain = getWeight(dtTrain) validate(labelTrain, labelTest, featureTrain, featureTest, dtTest, weightTrain)
def extract(self): df = dataio.getLabelAndFeature(self.labelName_, config.FEATURE_SELECT) df = df[df[self.labelName_] > -1] df = dataio.joinTurnoverRank(df) dt = '2014-01-01' dts = df.index.get_level_values('tradeDate') indices = df.index.values label = np.squeeze(df[[self.labelName_]].values) feature = df[config.FEATURE_SELECT].values #dtMax = '2016-01-01'; dtMax = np.max(dts.values) prs = [] while dt <= dtMax: dtStart = dt dtEnd = utils_common.dtAdd(dtStart, self.iterval_) print('start: {0}, end: {1}'.format(dtStart, dtEnd)) idxTrain = (dts < dtStart) & (dts >= utils_common.dtAdd( dtStart, -config.TRAINING_DAYS)) idxTest = (dts >= dtStart) & (dts <= dtEnd) labelTrain = label[idxTrain] featureTrain = feature[idxTrain] featureTest = feature[idxTest] weight = utils_common.getWeight(dts[idxTrain], config.WEIGHTER) pred = self.trainTestReg(labelTrain, featureTrain, featureTest, weight) #print(utils_common.topNPosRate(dts[idxTest],label[idxTest],pred,5)); df = pd.DataFrame(pred, index=pd.MultiIndex.from_tuples( indices[idxTest], names=['secID', 'tradeDate']), columns=[self.__class__.__name__]) prs.append(df) dt = utils_common.dtAdd(dtEnd, 1) df = pd.concat(prs) udt = np.sort(np.unique(df.index.get_level_values('tradeDate').values)) dtMap = {udt[i]: udt[i + 1] for i in range(udt.shape[0] - 1)} maxDt = np.max(udt) df.reset_index(inplace=True) arr = df.values idxDt = df.columns.get_loc('tradeDate') print(df.shape) for i in range(df.shape[0]): dt = arr[i, idxDt] if dt == maxDt: arr[i, idxDt] = '9999-99-99' else: arr[i, idxDt] = dtMap[dt] pass pass df = pd.DataFrame(arr, columns=df.columns) df.set_index(['secID', 'tradeDate'], inplace=True) return df
def extract(self): df = dataio.getLabelAndFeature(self.labelName_,config.FEATURE_SELECT); df = df[df[self.labelName_]>-1]; df = dataio.joinTurnoverRank(df); dt = '2014-01-01'; dts = df.index.get_level_values('tradeDate'); indices = df.index.values; label = np.squeeze(df[[self.labelName_]].values); feature = df[config.FEATURE_SELECT].values; #dtMax = '2016-01-01'; dtMax = np.max(dts.values); prs = []; while dt<=dtMax: dtStart = dt; dtEnd = utils_common.dtAdd(dtStart,self.iterval_); print('start: {0}, end: {1}'.format(dtStart,dtEnd)); idxTrain = (dts<dtStart) & (dts>=utils_common.dtAdd(dtStart,-config.TRAINING_DAYS)); idxTest = (dts>=dtStart) & (dts<=dtEnd); labelTrain = label[idxTrain]; featureTrain = feature[idxTrain]; featureTest = feature[idxTest]; weight = utils_common.getWeight(dts[idxTrain],config.WEIGHTER); pred = self.trainTestReg(labelTrain,featureTrain,featureTest,weight); #print(utils_common.topNPosRate(dts[idxTest],label[idxTest],pred,5)); df = pd.DataFrame(pred, index=pd.MultiIndex.from_tuples(indices[idxTest], names=['secID','tradeDate']), columns=[self.__class__.__name__] ); prs.append(df); dt = utils_common.dtAdd(dtEnd,1); df = pd.concat(prs); udt = np.sort(np.unique(df.index.get_level_values('tradeDate').values)); dtMap = {udt[i]:udt[i+1] for i in range(udt.shape[0]-1)}; maxDt = np.max(udt); df.reset_index(inplace=True); arr = df.values; idxDt = df.columns.get_loc('tradeDate'); print(df.shape); for i in range(df.shape[0]): dt = arr[i,idxDt]; if dt==maxDt: arr[i,idxDt] = '9999-99-99' else: arr[i,idxDt] = dtMap[dt]; pass; pass; df = pd.DataFrame(arr,columns=df.columns); df.set_index(['secID','tradeDate'],inplace=True); return df;
#turnoverFilter = dataio.getHS300Filter(); print(df.shape[0]); print('prepare...'); dt = df.index.get_level_values('tradeDate'); label = np.squeeze(df[[LABEL]].values); feature = df[FEATURE_SELECT].values; idxFilter = np.asarray([True if st in secFilter else False \ for st in df.index]); print('begin'); for iy,year in enumerate(YEARS[:-1]): if len(year)<10: dtTrainStart = utils_common.dtAdd(year+'-01-01',-config.TRAINING_DAYS); else: dtTrainStart = utils_common.dtAdd(year,-config.TRAINING_DAYS); pass idxTrain = (dt<year)&(dt>=dtTrainStart)#&idxFilter; idxTest = (dt>=year)&(dt<YEARS[iy+1])#&idxFilter; labelTrain = label[idxTrain]; labelTest = label[idxTest]; featureTrain = feature[idxTrain]; featureTest = feature[idxTest]; dtTest = dt[idxTest]; dtTrain = dt[idxTrain]; weightTrain = getWeight(dtTrain); validate(labelTrain,labelTest,featureTrain,featureTest,dtTest,weightTrain); pass;