def main(modelPath,outpath,dt=None): df = dataio.getLabelAndFeature(config.LABEL,config.FEATURE_SELECT); turnoverFilter = dataio.getTurnoverRankFilter(); maxContinousCloseDayFilter = dataio.getMaxContinousCloseDayFilter(); secFilter = turnoverFilter & maxContinousCloseDayFilter; idxFilter = np.asarray([True if st in secFilter else False \ for st in df.index]); df = df[idxFilter] dts = df.index.get_level_values('tradeDate').values; if dt is None: dt = np.max(dts); pass; df = df[dts==dt]; feature = df[config.FEATURE_SELECT].values; cls = pickle.load(open(modelPath)); #pred = np.squeeze(cls.predict_proba(feature)[:,cls.classes_==1]); pred = np.squeeze(cls.predict(feature)); dfout = pd.DataFrame(pred, index=df.index, columns = ['score']); dfout = dfout[dfout.index.map(lambda i:i[0] not in config.BLACK_LIST).get_values()] dfout.sort_values(['score'],inplace=True,ascending=False); dfout.to_csv(outpath,float_format='%g'); pass;
def main(modelPath,startDt,endDt,initMoney,strategy,reportPath,testStartDt): df = dataio.getLabelAndFeature(config.LABEL,config.FEATURE_SELECT); turnoverFilter = dataio.getTurnoverRankFilter(); maxContinousCloseDayFilter = dataio.getMaxContinousCloseDayFilter(); secFilter = turnoverFilter & maxContinousCloseDayFilter; idxFilter = np.asarray([True if st in secFilter else False \ for st in df.index]); df = df[idxFilter] feature = df[config.FEATURE_SELECT].values; dts = df.index.get_level_values('tradeDate'); index = df.index.values; dt = startDt; while dt<=endDt: print(dt); filename = 'xgb_' + dt; stdout = sys.stdout; with open(os.path.join(reportPath,filename),'w') as fout: sys.stdout = fout; daily_test(feature,dts,index,initMoney,testStartDt, os.path.join(modelPath,filename), strategy); sys.stdout = stdout; pass; dt = utils_common.dtAdd(dt,1); pass;
def main(modelPath, outpath, dt=None): df = dataio.getLabelAndFeature(config.LABEL, config.FEATURE_SELECT) turnoverFilter = dataio.getTurnoverRankFilter() maxContinousCloseDayFilter = dataio.getMaxContinousCloseDayFilter() secFilter = turnoverFilter & maxContinousCloseDayFilter idxFilter = np.asarray([True if st in secFilter else False \ for st in df.index]) df = df[idxFilter] dts = df.index.get_level_values('tradeDate').values if dt is None: dt = np.max(dts) pass df = df[dts == dt] feature = df[config.FEATURE_SELECT].values cls = pickle.load(open(modelPath)) #pred = np.squeeze(cls.predict_proba(feature)[:,cls.classes_==1]); pred = np.squeeze(cls.predict(feature)) dfout = pd.DataFrame(pred, index=df.index, columns=['score']) dfout = dfout[dfout.index.map( lambda i: i[0] not in config.BLACK_LIST).get_values()] dfout.sort_values(['score'], inplace=True, ascending=False) dfout.to_csv(outpath, float_format='%g') pass
def main(modelPath, startDt, endDt, initMoney, strategy, reportPath, testStartDt): df = dataio.getLabelAndFeature(config.LABEL, config.FEATURE_SELECT) turnoverFilter = dataio.getTurnoverRankFilter() maxContinousCloseDayFilter = dataio.getMaxContinousCloseDayFilter() secFilter = turnoverFilter & maxContinousCloseDayFilter idxFilter = np.asarray([True if st in secFilter else False \ for st in df.index]) df = df[idxFilter] feature = df[config.FEATURE_SELECT].values dts = df.index.get_level_values('tradeDate') index = df.index.values dt = startDt while dt <= endDt: print(dt) filename = 'xgb_' + dt stdout = sys.stdout with open(os.path.join(reportPath, filename), 'w') as fout: sys.stdout = fout daily_test(feature, dts, index, initMoney, testStartDt, os.path.join(modelPath, filename), strategy) sys.stdout = stdout pass dt = utils_common.dtAdd(dt, 1) pass
def loadData(): df = dataio.getLabelAndFeature(config.LABEL, config.FEATURE_SELECT) df = df[df[config.LABEL] > -1] df = dataio.joinTurnoverRank(df) dts = df.index.get_level_values('tradeDate') label = np.squeeze(df[[config.LABEL]].values) feature = df[config.FEATURE_SELECT].values idxChoose = ((label < config.FILT_DOWN) | (label >= config.FILT_UP)) label = label[idxChoose] feature = feature[idxChoose] dts = dts[idxChoose] labelBin = np.where(label >= config.FILT_UP, 1, 0) return labelBin, feature, dts
def loadDataReg(): df = dataio.getLabelAndFeature(config.LABEL, config.FEATURE_SELECT) df = df[df[config.LABEL] > -1] turnoverFilter = dataio.getTurnoverRankFilter(); maxContinousCloseDayFilter = dataio.getMaxContinousCloseDayFilter(); secFilter = turnoverFilter & maxContinousCloseDayFilter; idxFilter = np.asarray([True if st in secFilter else False \ for st in df.index]); df = df[idxFilter] dts = df.index.get_level_values('tradeDate') label = np.squeeze(df[[config.LABEL]].values) feature = df[config.FEATURE_SELECT].values return label, feature, dts
def loadDataReg(): df = dataio.getLabelAndFeature(config.LABEL, config.FEATURE_SELECT) df = df[df[config.LABEL] > -1] turnoverFilter = dataio.getTurnoverRankFilter() maxContinousCloseDayFilter = dataio.getMaxContinousCloseDayFilter() secFilter = turnoverFilter & maxContinousCloseDayFilter idxFilter = np.asarray([True if st in secFilter else False \ for st in df.index]) df = df[idxFilter] dts = df.index.get_level_values('tradeDate') label = np.squeeze(df[[config.LABEL]].values) feature = df[config.FEATURE_SELECT].values return label, feature, dts
def extract(self): df = dataio.getLabelAndFeature(self.labelName_,config.FEATURE_SELECT); df = df[df[self.labelName_]>-1]; df = dataio.joinTurnoverRank(df); dt = '2014-01-01'; dts = df.index.get_level_values('tradeDate'); indices = df.index.values; label = np.squeeze(df[[self.labelName_]].values); feature = df[config.FEATURE_SELECT].values; #dtMax = '2016-01-01'; dtMax = np.max(dts.values); prs = []; while dt<=dtMax: dtStart = dt; dtEnd = utils_common.dtAdd(dtStart,self.iterval_); print('start: {0}, end: {1}'.format(dtStart,dtEnd)); idxTrain = (dts<dtStart) & (dts>=utils_common.dtAdd(dtStart,-config.TRAINING_DAYS)); idxTest = (dts>=dtStart) & (dts<=dtEnd); labelTrain = label[idxTrain]; featureTrain = feature[idxTrain]; featureTest = feature[idxTest]; weight = utils_common.getWeight(dts[idxTrain],config.WEIGHTER); pred = self.trainTestReg(labelTrain,featureTrain,featureTest,weight); #print(utils_common.topNPosRate(dts[idxTest],label[idxTest],pred,5)); df = pd.DataFrame(pred, index=pd.MultiIndex.from_tuples(indices[idxTest], names=['secID','tradeDate']), columns=[self.__class__.__name__] ); prs.append(df); dt = utils_common.dtAdd(dtEnd,1); df = pd.concat(prs); udt = np.sort(np.unique(df.index.get_level_values('tradeDate').values)); dtMap = {udt[i]:udt[i+1] for i in range(udt.shape[0]-1)}; maxDt = np.max(udt); df.reset_index(inplace=True); arr = df.values; idxDt = df.columns.get_loc('tradeDate'); print(df.shape); for i in range(df.shape[0]): dt = arr[i,idxDt]; if dt==maxDt: arr[i,idxDt] = '9999-99-99' else: arr[i,idxDt] = dtMap[dt]; pass; pass; df = pd.DataFrame(arr,columns=df.columns); df.set_index(['secID','tradeDate'],inplace=True); return df;
pred = cls.predict_proba(featureTest.reshape(1, -1))[:, cls.classes_ == 1][0][0] #pred = cls.predict_proba(featureTest.reshape(1,-1))[0]; #print((labelTest,pred)); #print(featureTest.reshape(1,-1)); return labelTest, pred if __name__ == '__main__': print('load data...') print('feature length: ' + str(len(FEATURE_SELECT))) print(feaNames) df = dataio.getLabelAndFeature(LABEL, feaNames) df = df[df[LABEL] > -1] print('prepare...') dt = df.index.get_level_values('tradeDate') label = np.squeeze(df[[LABEL]].values) feature = df[feaNames].values idxChoose = ((label < FILT_DOWN) | (label >= FILT_UP)) & (~np.any(np.isnan(feature), axis=1)) label = label[idxChoose] feature = feature[idxChoose] print('begin')
from config import dataio; import config; import numpy as np; import pandas as pd; import sys; if __name__=='__main__': df = dataio.getLabelAndFeature(config.LABEL,config.FEATURE_SELECT); df = df[df[config.LABEL]>-1]; turnoverFilter = dataio.getTurnoverRankFilter(); maxContinousCloseDayFilter = dataio.getMaxContinousCloseDayFilter(); secFilter = turnoverFilter & maxContinousCloseDayFilter; idxFilter = np.asarray([True if st in secFilter else False \ for st in df.index]); df.rename(columns = {config.LABEL:'label'}, inplace=True); #df = df[idxFilter] df.to_csv(sys.argv[1]); pass;
from config import dataio import config import numpy as np import pandas as pd import sys if __name__ == '__main__': df = dataio.getLabelAndFeature(config.LABEL, config.FEATURE_SELECT) df = df[df[config.LABEL] > -1] turnoverFilter = dataio.getTurnoverRankFilter() maxContinousCloseDayFilter = dataio.getMaxContinousCloseDayFilter() secFilter = turnoverFilter & maxContinousCloseDayFilter idxFilter = np.asarray([True if st in secFilter else False \ for st in df.index]) df.rename(columns={config.LABEL: 'label'}, inplace=True) #df = df[idxFilter] df.to_csv(sys.argv[1]) pass
def extract(self): df = dataio.getLabelAndFeature(self.labelName_, config.FEATURE_SELECT) df = df[df[self.labelName_] > -1] df = dataio.joinTurnoverRank(df) dt = '2014-01-01' dts = df.index.get_level_values('tradeDate') indices = df.index.values label = np.squeeze(df[[self.labelName_]].values) feature = df[config.FEATURE_SELECT].values #dtMax = '2016-01-01'; dtMax = np.max(dts.values) prs = [] while dt <= dtMax: dtStart = dt dtEnd = utils_common.dtAdd(dtStart, self.iterval_) print('start: {0}, end: {1}'.format(dtStart, dtEnd)) idxTrain = (dts < dtStart) & (dts >= utils_common.dtAdd( dtStart, -config.TRAINING_DAYS)) idxTest = (dts >= dtStart) & (dts <= dtEnd) labelTrain = label[idxTrain] featureTrain = feature[idxTrain] featureTest = feature[idxTest] weight = utils_common.getWeight(dts[idxTrain], config.WEIGHTER) pred = self.trainTestReg(labelTrain, featureTrain, featureTest, weight) #print(utils_common.topNPosRate(dts[idxTest],label[idxTest],pred,5)); df = pd.DataFrame(pred, index=pd.MultiIndex.from_tuples( indices[idxTest], names=['secID', 'tradeDate']), columns=[self.__class__.__name__]) prs.append(df) dt = utils_common.dtAdd(dtEnd, 1) df = pd.concat(prs) udt = np.sort(np.unique(df.index.get_level_values('tradeDate').values)) dtMap = {udt[i]: udt[i + 1] for i in range(udt.shape[0] - 1)} maxDt = np.max(udt) df.reset_index(inplace=True) arr = df.values idxDt = df.columns.get_loc('tradeDate') print(df.shape) for i in range(df.shape[0]): dt = arr[i, idxDt] if dt == maxDt: arr[i, idxDt] = '9999-99-99' else: arr[i, idxDt] = dtMap[dt] pass pass df = pd.DataFrame(arr, columns=df.columns) df.set_index(['secID', 'tradeDate'], inplace=True) return df
import pandas as pd; import common_proc; LABEL = 'LabelEvery1DayTrade'; NUMBER = 30; FILT_UP = 0.02; FILT_DOWN = 0.01; if __name__=='__main__': print('load data...'); FEATURE_SELECT = dataio.getAllFeatureNames(); print('feature length: ' + str(len(FEATURE_SELECT))); df = dataio.getLabelAndFeature(LABEL,FEATURE_SELECT); df = df[df[LABEL]>-1]; #df = df.apply(partial(pd.to_numeric, errors='coerse')); print('prepare...'); dt = df.index.get_level_values('tradeDate'); label = np.squeeze(df[[LABEL]].values); feature = df[FEATURE_SELECT].values; #feature[np.isnan(feature)] = 0; idxChoose = ((label<FILT_DOWN) | (label>=FILT_UP)); label = label[idxChoose]; feature = feature[idxChoose,:]; weight = common_proc.getWeight(dt);