예제 #1
0
def main(modelPath,outpath,dt=None):
    df = dataio.getLabelAndFeature(config.LABEL,config.FEATURE_SELECT);
    turnoverFilter = dataio.getTurnoverRankFilter();
    maxContinousCloseDayFilter = dataio.getMaxContinousCloseDayFilter();
    secFilter = turnoverFilter & maxContinousCloseDayFilter;
    idxFilter = np.asarray([True if st in secFilter else False \
                            for st in df.index]);

    df = df[idxFilter]

    
    dts = df.index.get_level_values('tradeDate').values;
    if dt is None:
        dt = np.max(dts);
        pass;
    
    df = df[dts==dt];


    feature = df[config.FEATURE_SELECT].values;
    cls = pickle.load(open(modelPath));
    #pred = np.squeeze(cls.predict_proba(feature)[:,cls.classes_==1]);
    pred = np.squeeze(cls.predict(feature));

    dfout = pd.DataFrame(pred,
                         index=df.index,
                         columns = ['score']);
    
    dfout = dfout[dfout.index.map(lambda i:i[0] not in config.BLACK_LIST).get_values()]
    dfout.sort_values(['score'],inplace=True,ascending=False);

    dfout.to_csv(outpath,float_format='%g');
    pass;
예제 #2
0
def main(modelPath,startDt,endDt,initMoney,strategy,reportPath,testStartDt):
    df = dataio.getLabelAndFeature(config.LABEL,config.FEATURE_SELECT);
    turnoverFilter = dataio.getTurnoverRankFilter();
    maxContinousCloseDayFilter = dataio.getMaxContinousCloseDayFilter();
    secFilter = turnoverFilter & maxContinousCloseDayFilter;
    idxFilter = np.asarray([True if st in secFilter else False \
                            for st in df.index]);

    df = df[idxFilter]
    
    feature = df[config.FEATURE_SELECT].values;
    dts = df.index.get_level_values('tradeDate');
    index = df.index.values;

    dt = startDt;
    while dt<=endDt:
        print(dt);
        filename = 'xgb_' + dt;
        stdout = sys.stdout;

        with open(os.path.join(reportPath,filename),'w') as fout:
            sys.stdout = fout;
            daily_test(feature,dts,index,initMoney,testStartDt,
                       os.path.join(modelPath,filename),
                       strategy);
            sys.stdout = stdout;
            pass;
        dt = utils_common.dtAdd(dt,1);
    pass;
예제 #3
0
def main(modelPath, outpath, dt=None):
    df = dataio.getLabelAndFeature(config.LABEL, config.FEATURE_SELECT)
    turnoverFilter = dataio.getTurnoverRankFilter()
    maxContinousCloseDayFilter = dataio.getMaxContinousCloseDayFilter()
    secFilter = turnoverFilter & maxContinousCloseDayFilter
    idxFilter = np.asarray([True if st in secFilter else False \
                            for st in df.index])

    df = df[idxFilter]

    dts = df.index.get_level_values('tradeDate').values
    if dt is None:
        dt = np.max(dts)
        pass

    df = df[dts == dt]

    feature = df[config.FEATURE_SELECT].values
    cls = pickle.load(open(modelPath))
    #pred = np.squeeze(cls.predict_proba(feature)[:,cls.classes_==1]);
    pred = np.squeeze(cls.predict(feature))

    dfout = pd.DataFrame(pred, index=df.index, columns=['score'])

    dfout = dfout[dfout.index.map(
        lambda i: i[0] not in config.BLACK_LIST).get_values()]
    dfout.sort_values(['score'], inplace=True, ascending=False)

    dfout.to_csv(outpath, float_format='%g')
    pass
예제 #4
0
def main(modelPath, startDt, endDt, initMoney, strategy, reportPath,
         testStartDt):
    df = dataio.getLabelAndFeature(config.LABEL, config.FEATURE_SELECT)
    turnoverFilter = dataio.getTurnoverRankFilter()
    maxContinousCloseDayFilter = dataio.getMaxContinousCloseDayFilter()
    secFilter = turnoverFilter & maxContinousCloseDayFilter
    idxFilter = np.asarray([True if st in secFilter else False \
                            for st in df.index])

    df = df[idxFilter]

    feature = df[config.FEATURE_SELECT].values
    dts = df.index.get_level_values('tradeDate')
    index = df.index.values

    dt = startDt
    while dt <= endDt:
        print(dt)
        filename = 'xgb_' + dt
        stdout = sys.stdout

        with open(os.path.join(reportPath, filename), 'w') as fout:
            sys.stdout = fout
            daily_test(feature, dts, index, initMoney, testStartDt,
                       os.path.join(modelPath, filename), strategy)
            sys.stdout = stdout
            pass
        dt = utils_common.dtAdd(dt, 1)
    pass
예제 #5
0
def loadData():
    df = dataio.getLabelAndFeature(config.LABEL, config.FEATURE_SELECT)
    df = df[df[config.LABEL] > -1]
    df = dataio.joinTurnoverRank(df)

    dts = df.index.get_level_values('tradeDate')

    label = np.squeeze(df[[config.LABEL]].values)
    feature = df[config.FEATURE_SELECT].values
    idxChoose = ((label < config.FILT_DOWN) | (label >= config.FILT_UP))
    label = label[idxChoose]
    feature = feature[idxChoose]
    dts = dts[idxChoose]
    labelBin = np.where(label >= config.FILT_UP, 1, 0)
    return labelBin, feature, dts
예제 #6
0
def loadData():
    df = dataio.getLabelAndFeature(config.LABEL, config.FEATURE_SELECT)
    df = df[df[config.LABEL] > -1]
    df = dataio.joinTurnoverRank(df)

    dts = df.index.get_level_values('tradeDate')

    label = np.squeeze(df[[config.LABEL]].values)
    feature = df[config.FEATURE_SELECT].values
    idxChoose = ((label < config.FILT_DOWN) | (label >= config.FILT_UP))
    label = label[idxChoose]
    feature = feature[idxChoose]
    dts = dts[idxChoose]
    labelBin = np.where(label >= config.FILT_UP, 1, 0)
    return labelBin, feature, dts
예제 #7
0
def loadDataReg():
    df = dataio.getLabelAndFeature(config.LABEL, config.FEATURE_SELECT)
    df = df[df[config.LABEL] > -1]
    turnoverFilter = dataio.getTurnoverRankFilter();
    maxContinousCloseDayFilter = dataio.getMaxContinousCloseDayFilter();
    secFilter = turnoverFilter & maxContinousCloseDayFilter;
    idxFilter = np.asarray([True if st in secFilter else False \
                            for st in df.index]);

    df = df[idxFilter]

    dts = df.index.get_level_values('tradeDate')

    label = np.squeeze(df[[config.LABEL]].values)
    feature = df[config.FEATURE_SELECT].values
    return label, feature, dts
예제 #8
0
def loadDataReg():
    df = dataio.getLabelAndFeature(config.LABEL, config.FEATURE_SELECT)
    df = df[df[config.LABEL] > -1]
    turnoverFilter = dataio.getTurnoverRankFilter()
    maxContinousCloseDayFilter = dataio.getMaxContinousCloseDayFilter()
    secFilter = turnoverFilter & maxContinousCloseDayFilter
    idxFilter = np.asarray([True if st in secFilter else False \
                            for st in df.index])

    df = df[idxFilter]

    dts = df.index.get_level_values('tradeDate')

    label = np.squeeze(df[[config.LABEL]].values)
    feature = df[config.FEATURE_SELECT].values
    return label, feature, dts
예제 #9
0
    def extract(self):
        df = dataio.getLabelAndFeature(self.labelName_,config.FEATURE_SELECT);
        df = df[df[self.labelName_]>-1];

        df = dataio.joinTurnoverRank(df);
        dt = '2014-01-01';

        dts = df.index.get_level_values('tradeDate');
        indices = df.index.values;
        label = np.squeeze(df[[self.labelName_]].values);
        feature = df[config.FEATURE_SELECT].values;

        #dtMax = '2016-01-01';
        dtMax = np.max(dts.values);
        prs = [];
        while dt<=dtMax:
            dtStart = dt;
            dtEnd = utils_common.dtAdd(dtStart,self.iterval_);

            print('start: {0}, end: {1}'.format(dtStart,dtEnd));

            idxTrain = (dts<dtStart) & (dts>=utils_common.dtAdd(dtStart,-config.TRAINING_DAYS));
            idxTest = (dts>=dtStart) & (dts<=dtEnd);

            labelTrain = label[idxTrain];
            featureTrain = feature[idxTrain];
            featureTest = feature[idxTest];
            weight = utils_common.getWeight(dts[idxTrain],config.WEIGHTER);

            pred = self.trainTestReg(labelTrain,featureTrain,featureTest,weight);
            #print(utils_common.topNPosRate(dts[idxTest],label[idxTest],pred,5));
            
            df = pd.DataFrame(pred,
                              index=pd.MultiIndex.from_tuples(indices[idxTest],
                                                              names=['secID','tradeDate']),
                              columns=[self.__class__.__name__]
                              );
            prs.append(df);
            dt = utils_common.dtAdd(dtEnd,1);
            
        df = pd.concat(prs);

        udt = np.sort(np.unique(df.index.get_level_values('tradeDate').values));
        dtMap = {udt[i]:udt[i+1] for i in range(udt.shape[0]-1)};
        maxDt = np.max(udt);

        df.reset_index(inplace=True);

        arr = df.values;
        idxDt = df.columns.get_loc('tradeDate');

        print(df.shape);
        for i in range(df.shape[0]):
            dt = arr[i,idxDt];
            if dt==maxDt:
                arr[i,idxDt] = '9999-99-99'
            else:
                arr[i,idxDt] = dtMap[dt];
                pass;
            pass;

        df = pd.DataFrame(arr,columns=df.columns);
        df.set_index(['secID','tradeDate'],inplace=True);
        return df;
예제 #10
0
    pred = cls.predict_proba(featureTest.reshape(1,
                                                 -1))[:,
                                                      cls.classes_ == 1][0][0]
    #pred = cls.predict_proba(featureTest.reshape(1,-1))[0];
    #print((labelTest,pred));
    #print(featureTest.reshape(1,-1));

    return labelTest, pred


if __name__ == '__main__':
    print('load data...')
    print('feature length: ' + str(len(FEATURE_SELECT)))

    print(feaNames)
    df = dataio.getLabelAndFeature(LABEL, feaNames)
    df = df[df[LABEL] > -1]

    print('prepare...')
    dt = df.index.get_level_values('tradeDate')

    label = np.squeeze(df[[LABEL]].values)
    feature = df[feaNames].values

    idxChoose = ((label < FILT_DOWN) |
                 (label >= FILT_UP)) & (~np.any(np.isnan(feature), axis=1))
    label = label[idxChoose]
    feature = feature[idxChoose]

    print('begin')
예제 #11
0
from config import dataio;
import config;
import numpy as np;
import pandas as pd;
import sys;

if __name__=='__main__':
    df = dataio.getLabelAndFeature(config.LABEL,config.FEATURE_SELECT);
    df = df[df[config.LABEL]>-1];
    turnoverFilter = dataio.getTurnoverRankFilter();
    maxContinousCloseDayFilter = dataio.getMaxContinousCloseDayFilter();
    secFilter = turnoverFilter & maxContinousCloseDayFilter;
    idxFilter = np.asarray([True if st in secFilter else False \
                            for st in df.index]);

    df.rename(columns = {config.LABEL:'label'}, inplace=True);
    #df = df[idxFilter]

    df.to_csv(sys.argv[1]);
    pass;
예제 #12
0
from config import dataio
import config
import numpy as np
import pandas as pd
import sys

if __name__ == '__main__':
    df = dataio.getLabelAndFeature(config.LABEL, config.FEATURE_SELECT)
    df = df[df[config.LABEL] > -1]
    turnoverFilter = dataio.getTurnoverRankFilter()
    maxContinousCloseDayFilter = dataio.getMaxContinousCloseDayFilter()
    secFilter = turnoverFilter & maxContinousCloseDayFilter
    idxFilter = np.asarray([True if st in secFilter else False \
                            for st in df.index])

    df.rename(columns={config.LABEL: 'label'}, inplace=True)
    #df = df[idxFilter]

    df.to_csv(sys.argv[1])
    pass
예제 #13
0
    def extract(self):
        df = dataio.getLabelAndFeature(self.labelName_, config.FEATURE_SELECT)
        df = df[df[self.labelName_] > -1]

        df = dataio.joinTurnoverRank(df)
        dt = '2014-01-01'

        dts = df.index.get_level_values('tradeDate')
        indices = df.index.values
        label = np.squeeze(df[[self.labelName_]].values)
        feature = df[config.FEATURE_SELECT].values

        #dtMax = '2016-01-01';
        dtMax = np.max(dts.values)
        prs = []
        while dt <= dtMax:
            dtStart = dt
            dtEnd = utils_common.dtAdd(dtStart, self.iterval_)

            print('start: {0}, end: {1}'.format(dtStart, dtEnd))

            idxTrain = (dts < dtStart) & (dts >= utils_common.dtAdd(
                dtStart, -config.TRAINING_DAYS))
            idxTest = (dts >= dtStart) & (dts <= dtEnd)

            labelTrain = label[idxTrain]
            featureTrain = feature[idxTrain]
            featureTest = feature[idxTest]
            weight = utils_common.getWeight(dts[idxTrain], config.WEIGHTER)

            pred = self.trainTestReg(labelTrain, featureTrain, featureTest,
                                     weight)
            #print(utils_common.topNPosRate(dts[idxTest],label[idxTest],pred,5));

            df = pd.DataFrame(pred,
                              index=pd.MultiIndex.from_tuples(
                                  indices[idxTest],
                                  names=['secID', 'tradeDate']),
                              columns=[self.__class__.__name__])
            prs.append(df)
            dt = utils_common.dtAdd(dtEnd, 1)

        df = pd.concat(prs)

        udt = np.sort(np.unique(df.index.get_level_values('tradeDate').values))
        dtMap = {udt[i]: udt[i + 1]
                 for i in range(udt.shape[0] - 1)}
        maxDt = np.max(udt)

        df.reset_index(inplace=True)

        arr = df.values
        idxDt = df.columns.get_loc('tradeDate')

        print(df.shape)
        for i in range(df.shape[0]):
            dt = arr[i, idxDt]
            if dt == maxDt:
                arr[i, idxDt] = '9999-99-99'
            else:
                arr[i, idxDt] = dtMap[dt]
                pass
            pass

        df = pd.DataFrame(arr, columns=df.columns)
        df.set_index(['secID', 'tradeDate'], inplace=True)
        return df
예제 #14
0
import pandas as pd;
import common_proc;

LABEL = 'LabelEvery1DayTrade';
NUMBER = 30;


FILT_UP = 0.02;
FILT_DOWN = 0.01;

if __name__=='__main__':
    print('load data...');
    FEATURE_SELECT = dataio.getAllFeatureNames();
    print('feature length: ' + str(len(FEATURE_SELECT)));
    
    df = dataio.getLabelAndFeature(LABEL,FEATURE_SELECT);
    df = df[df[LABEL]>-1];

    #df = df.apply(partial(pd.to_numeric, errors='coerse'));
    
    print('prepare...');
    dt = df.index.get_level_values('tradeDate');

    label = np.squeeze(df[[LABEL]].values);
    feature = df[FEATURE_SELECT].values;

    #feature[np.isnan(feature)] = 0;
    idxChoose = ((label<FILT_DOWN) | (label>=FILT_UP));
    label = label[idxChoose];
    feature = feature[idxChoose,:];
    weight = common_proc.getWeight(dt);