Ejemplo n.º 1
0
def main(args):
    exec "import main.pandas_talib.sig_%s as conf" % args.signame
    build.work2(20, 'sp500Top50', args.signame)
    df = base.get_merged(conf.__name__, yeod.get_sp500Top50())
    df2 = df[df["ta_sig_%s" % args.signame] > 0]
    print ana(df, df2)
    print len(df), len(df2), len(df2[df2["label5"] > 1]) * 1.0 / len(df2), len(
        df[df["label5"] > 1]) * 1.0 / len(df)
    dfT1 = df[df["date"] <= '2009-12-31']
    dfT2 = dfT1[dfT1["ta_sig_%s" % args.signame] > 0]
    print len(dfT1), len(dfT2), len(
        dfT2[dfT2["label5"] > 1]) * 1.0 / len(dfT2), len(
            dfT1[dfT1["label5"] > 1]) * 1.0 / len(dfT1)

    dfT1 = df[df["date"] > '2009-12-31']
    dfT2 = dfT1[dfT1["ta_sig_%s" % args.signame] > 0]
    print len(dfT1), len(dfT2), len(
        dfT2[dfT2["label5"] > 1]) * 1.0 / len(dfT2), len(
            dfT1[dfT1["label5"] > 1]) * 1.0 / len(dfT1)

    dfT1 = df[(df["date"] > '2001-12-31') & (df['date'] <= '2009-12-31')]
    dfT2 = dfT1[dfT1["ta_sig_%s" % args.signame] > 0]
    print len(dfT1), len(dfT2), len(
        dfT2[dfT2["label5"] > 1]) * 1.0 / len(dfT2), len(
            dfT1[dfT1["label5"] > 1]) * 1.0 / len(dfT1)
Ejemplo n.º 2
0
def one_work(cls, taName, label, date_range, th):
    df = base.get_merged(base.dir_ta(taName))
    df = get_range(df, date_range[0], date_range[1])
    m = joblib.load(os.path.join(root, 'data', 'models',"model_" + cls + ".pkl"))
    s = joblib.load(os.path.join(root, 'data', 'models',"scaler_" + cls + ".pkl"))
    feat_names = base.get_feat_names(df)
    npFeat = df.loc[:,feat_names].values
    #npPred = cls.predict_proba(npFeat)[:,1]
    #prent npPred
    res = ""
    for i, npPred in enumerate(m.staged_predict_proba(s.transform(npFeat))):
        #if i % 1 != 0:
        #    continue
        re =  "%s\t%s\t%s\t%s\t%s\t%f\t" % (cls, taName, label, date_range[0], date_range[1],th)
        df["pred"] = npPred[:,1]
        dacc =  accu(df, label, th)
        re += "%d\t%d\t%d\t" % (i, dacc["trueInPos"], dacc["pos"])
        if dacc["pos"] > 0:
            re += "%f" % (dacc["trueInPos"]*1.0 / dacc["pos"])
        else :
            re += "0.0"
        re += "\n"
        print re
        res += re
    return re
Ejemplo n.º 3
0
def main(args):
    exec "import main.pandas_talib.sig_%s as conf" % args.signame
    build.work2(20, 'sp500Top50', args.signame)
    df = base.get_merged(conf.__name__, yeod.get_sp500Top50())
    df.to_csv("ta.csv")

    tree = DecisionTreeClassifier()

    feat_names = base.get_feat_names(df)

    dfTrain = df[(df.date >= '1970-01-01') & (df.date <= '2009-12-31')]
    npTrainFeat = dfTrain.loc[:, feat_names].values.copy()
    npTrainLabel = dfTrain.loc[:, "label5"].values.copy()
    npTrainLabel[npTrainLabel > 1.0] = 1
    npTrainLabel[npTrainLabel < 1.0] = 0

    tree.fit(npTrainFeat, npTrainLabel)
    joblib.dump(tree, "tree.pkl", compress=3)

    dfTest = df[(df.date >= '2010-01-01') & (df.date <= '2099-12-31')]
    npTestFeat = dfTest.loc[:, feat_names].values.copy()

    npPred = tree.predict_proba(npTestFeat)

    dfTest.loc[:, "pred"] = npPred[:, 1]

    print dfTest['pred'].head()

    dfPos = dfTest[dfTest['pred'] > 0.55]
    print 1.0 * len(dfPos[dfPos['label5'] > 1]) / len(dfPos)
    print 1.0 * len(dfTest[dfTest['label5'] > 1]) / len(dfTest)
Ejemplo n.º 4
0
def main(args):
    exec "import main.pandas_talib.sig_%s as conf" % args.signame
    build.work2(20, 'sp500Top50', args.signame)
    df = base.get_merged(conf.__name__, yeod.get_sp500Top50())
    df.to_csv("ta.csv")

    tree = DecisionTreeClassifier() 
    
    feat_names = base.get_feat_names(df)

    dfTrain = df[(df.date>='1970-01-01') & (df.date <='2009-12-31')]
    npTrainFeat = dfTrain.loc[:,feat_names].values.copy()
    npTrainLabel = dfTrain.loc[:,"label5"].values.copy()
    npTrainLabel[npTrainLabel >  1.0] = 1
    npTrainLabel[npTrainLabel <  1.0] = 0

    tree.fit(npTrainFeat, npTrainLabel)
    joblib.dump(tree, "tree.pkl", compress = 3)
    
    dfTest = df[(df.date>='2010-01-01') & (df.date <='2099-12-31')]
    npTestFeat = dfTest.loc[:, feat_names].values.copy()
    
    npPred = tree.predict_proba(npTestFeat)

    dfTest.loc[:,"pred"] = npPred[:,1]
    
    print dfTest['pred'].head()

    dfPos = dfTest[ dfTest['pred'] > 0.55 ]
    print 1.0 * len(dfPos[dfPos['label5']>1])  / len(dfPos)
    print 1.0 * len(dfTest[dfTest['label5']>1])  / len(dfTest)
Ejemplo n.º 5
0
def main(args):
    lsym = getattr(yeod, "get_%s" % args.setname)()
    dfTa = base.get_merged(args.taname, lsym)
    if dfTa is None:
        print "can not merge " % args.setname
        sys.exit(1)
    dfTa = base.get_range(dfTa, args.start, args.end)
    print dfTa.shape
    #if args.filter:
    #    dfTa = filter_trend(dfTa)
    #print dfTa.shape

    cls = joblib.load(os.path.join(base.dir_model(), args.model))
    feat_names = base.get_feat_names(dfTa)
    npFeat = dfTa.loc[:, feat_names].values
    if isscaler:
        scaler = get_scaler(clsName)
        npFeatScaler = scaler.transform(npFeat)
    else:
        npFeatScaler = npFeat
    #for i, npPred in enumerate(cls.staged_predict_proba(npFeatScaler)):
    #    if i == args.stage:
    #        break
    npPred = cls.predict_proba(npFeat)
    dfTa["pred"] = npPred[:, 1]
    dfTa = dfTa.sort_values(['pred'], ascending=False)
    freport, fpred = base.file_paper(args)
    dfTa.to_csv(fpred)

    ana.main([fpred, args.top, args.thresh, freport, args.level])
    print freport
Ejemplo n.º 6
0
def one_work(cls, taName, label, date_range, th):
    df = base.get_merged(base.dir_ta(taName))
    df = get_range(df, date_range[0], date_range[1])
    m = joblib.load(
        os.path.join(root, 'data', 'models', "model_" + cls + ".pkl"))
    s = joblib.load(
        os.path.join(root, 'data', 'models', "scaler_" + cls + ".pkl"))
    feat_names = base.get_feat_names(df)
    npFeat = df.loc[:, feat_names].values
    #npPred = cls.predict_proba(npFeat)[:,1]
    #prent npPred
    res = ""
    for i, npPred in enumerate(m.staged_predict_proba(s.transform(npFeat))):
        #if i % 1 != 0:
        #    continue
        re = "%s\t%s\t%s\t%s\t%s\t%f\t" % (cls, taName, label, date_range[0],
                                           date_range[1], th)
        df["pred"] = npPred[:, 1]
        dacc = accu(df, label, th)
        re += "%d\t%d\t%d\t" % (i, dacc["trueInPos"], dacc["pos"])
        if dacc["pos"] > 0:
            re += "%f" % (dacc["trueInPos"] * 1.0 / dacc["pos"])
        else:
            re += "0.0"
        re += "\n"
        print re
        res += re
    return re
Ejemplo n.º 7
0
def main(args):
    cls = params_set.d_model[args.clsname]
    file_model, file_ipt = base.file_model(args)

    if os.path.isfile(file_model):
        print "%s already exists!" % file_model
        return
    dfTa = base.get_merged(args.taname, getattr(yeod, "get_%s" % args.setname)(),args.start,args.end)
    if dfTa is None:
        return None
    dfTrain = dfTa # build_trains(dfTa, args.start, args.end)

    if args.sample:
        print "sampling ..."
        sample = len(dfTrain)/sample
        rows = random.sample(range(len(dfTrain)), sample)
        print len(rows)
        dfTrain = dfTrain.reset_index(drop=True)
        dfTrain = dfTrain.ix[rows]

    if args.repeat:
        print "repeat ..."
        toAppends = []
        for i in range(1,3):
            dfTmp = dfTrain[dfTrain.label5>=1+i/20.0]
            toAppends.append(dfTmp)
        print dfTrain.shape
        dfTrain = dfTrain.append(toAppends)
        print dfTrain.shape

    if args.sw:
        dfTrain = getattr(sw, "sw_%s" % args.sw)(dfTrain)

    feat_names = base.get_feat_names(dfTrain)
    npTrainFeat = dfTrain.loc[:,feat_names].values

    npTrainLabel = dfTrain.loc[:,args.labelname].values.copy()
    npTrainLabel[npTrainLabel != 1.0]
    npTrainLabel[npTrainLabel >  1.0] = 1
    npTrainLabel[npTrainLabel <  1.0] = 0

    if args.scaler:
        scaler = getMinMax(npTrainFeat)
        npTrainFeatScaled = scaler.transform(npTrainFeat)
    else:
        npTrainFeatScaled = npTrainFeat
    if args.sw:
        cls.fit(npTrainFeatScaled, npTrainLabel, sample_weight=dfTrain["sample_weight"].values)
    else:
        cls.fit(npTrainFeatScaled, npTrainLabel)
    joblib.dump(cls, file_model, compress = 3)
    #joblib.dump(scaler, os.path.join(root, 'data', 'models',scalerName), compress = 3)
    dFeatImps = dict(zip( feat_names, cls.feature_importances_))
    with open(file_ipt, 'w') as fipt:
        for each in sorted(dFeatImps.iteritems(), key = lambda a: a[1], reverse=True):
            print >> fipt, each[0], ",", each[1]
Ejemplo n.º 8
0
def main(args):
    build.work2(10, "sp500Top50", sig_macd)
    dfTa = base.get_merged("sig_macd", yeod.get_sp500Top50())
    dfTa1 = dfTa[(dfTa["date"] < "2010-01-01")]
    #dfTa = dfTa[(dfTa["date"] >= "2010-01-01") & (dfTa["date"]<="2010-12-31")]
    #dfTa = dfTa[(dfTa["date"]>"2010-12-31")]
    dfTa2 = dfTa1[dfTa1["ta_macd_signal_%d_%d" % (fast, slow)] > 0]
    print len(dfTa2[dfTa2["label5"] > 1.0]) * 1.0 / len(dfTa2),
    print len(dfTa2[dfTa2["label5"] > 1.0]) * 1.0,
    print len(dfTa1[dfTa1["label5"] > 1.0]) * 1.0 / len(dfTa1)

    dfTa1 = dfTa[(dfTa["date"] > "2010-01-01")]
    dfTa2 = dfTa1[dfTa1["ta_macd_signal_%d_%d" % (fast, slow)] > 0]
    print len(dfTa2[dfTa2["label5"] > 1.0]) * 1.0 / len(dfTa2),
    print len(dfTa2[dfTa2["label5"] > 1.0]) * 1.0,
    print len(dfTa1[dfTa1["label5"] > 1.0]) * 1.0 / len(dfTa1)
Ejemplo n.º 9
0
def main(args):
    build.work2(10, "sp500Top50", sig_macd)
    dfTa = base.get_merged("sig_macd", yeod.get_sp500Top50())
    dfTa1 = dfTa[(dfTa["date"] < "2010-01-01")]
    #dfTa = dfTa[(dfTa["date"] >= "2010-01-01") & (dfTa["date"]<="2010-12-31")]
    #dfTa = dfTa[(dfTa["date"]>"2010-12-31")]
    dfTa2 = dfTa1[dfTa1["ta_macd_signal_%d_%d" % (fast, slow)]>0]
    print len(dfTa2[dfTa2["label5"]>1.0])*1.0/len(dfTa2),
    print len(dfTa2[dfTa2["label5"]>1.0])*1.0,
    print len(dfTa1[dfTa1["label5"]>1.0])*1.0/len(dfTa1)

    dfTa1 = dfTa[(dfTa["date"] > "2010-01-01")]
    dfTa2 = dfTa1[dfTa1["ta_macd_signal_%d_%d" % (fast, slow)]>0]
    print len(dfTa2[dfTa2["label5"]>1.0])*1.0/len(dfTa2),
    print len(dfTa2[dfTa2["label5"]>1.0])*1.0,
    print len(dfTa1[dfTa1["label5"]>1.0])*1.0/len(dfTa1)
Ejemplo n.º 10
0
def work(setname, start, end, depth, thresh, scorename):
    """
    """
    phase1 = base.get_merged("base1", getattr(yeod, "get_%s" % setname)(), start, end)
    print phase1.shape
    phase1.reset_index(drop=True, inplace=True)
    phase1 = score.agn_rank_score(phase1)
    phase1 = score.agn_rank_score(phase1, interval=5, threshold=0.55)
    phase1 = score.agn_label_score(phase1, interval=5, threshold=1.0)
    meta = feat_select.flat_metas(phase1, depth, 100000, scorename)

    print meta[["fname", "c_p"]]
    meta = extract_meta(meta, thresh)
    meta.reset_index(drop=True, inplace=True)

    meta.to_pickle("./data/model/meta_base1_%s_%s_%s_%s_%d_100000.pkl" % (setname, scorename, start, end, depth))
    return meta
Ejemplo n.º 11
0
def main(args):
    exec "import main.pandas_talib.sig_%s as conf" % args.signame
    build.work2(20, 'sp500Top50', args.signame)
    df = base.get_merged(conf.__name__, yeod.get_sp500Top50())
    df2 = df[df["ta_sig_%s" % args.signame]>0]
    print ana(df,df2)
    print len(df), len(df2), len(df2[df2["label5"]>1]) *1.0/len(df2) , len(df[df["label5"]>1]) *1.0/len(df)
    dfT1 = df[df["date"] <= '2009-12-31'] 
    dfT2 = dfT1[dfT1["ta_sig_%s" % args.signame]>0]
    print len(dfT1), len(dfT2), len(dfT2[dfT2["label5"]>1]) *1.0/len(dfT2) , len(dfT1[dfT1["label5"]>1]) *1.0/len(dfT1)

    dfT1 = df[df["date"] > '2009-12-31'] 
    dfT2 = dfT1[dfT1["ta_sig_%s" % args.signame]>0]
    print len(dfT1), len(dfT2), len(dfT2[dfT2["label5"]>1]) *1.0/len(dfT2) , len(dfT1[dfT1["label5"]>1]) *1.0/len(dfT1)

    dfT1 = df[(df["date"] > '2001-12-31') & (df['date']<='2009-12-31') ] 
    dfT2 = dfT1[dfT1["ta_sig_%s" % args.signame]>0]
    print len(dfT1), len(dfT2), len(dfT2[dfT2["label5"]>1]) *1.0/len(dfT2) , len(dfT1[dfT1["label5"]>1]) *1.0/len(dfT1)
Ejemplo n.º 12
0
def work(setname, start, end, depth, thresh, scorename):
    """
    """
    phase1 = base.get_merged("base1",
                             getattr(yeod, "get_%s" % setname)(), start, end)
    print phase1.shape
    phase1.reset_index(drop=True, inplace=True)
    phase1 = score.agn_rank_score(phase1)
    phase1 = score.agn_rank_score(phase1, interval=5, threshold=0.55)
    phase1 = score.agn_label_score(phase1, interval=5, threshold=1.0)
    meta = feat_select.flat_metas(phase1, depth, 100000, scorename)

    print meta[["fname", "c_p"]]
    meta = extract_meta(meta, thresh)
    meta.reset_index(drop=True, inplace=True)

    meta.to_pickle("./data/model/meta_base1_%s_%s_%s_%s_%d_100000.pkl" %
                   (setname, scorename, start, end, depth))
    return meta
Ejemplo n.º 13
0
def main(argv):
    clsName = argv[0]
    taName = argv[1]
    labelName = argv[2]
    start = argv[3]
    end = argv[4]
    top = int(argv[5])
    ta_father = os.path.join(root, 'data', 'ta_batch', taName )
    for d in sorted(os.listdir(ta_father)):
        if d == None or not os.path.isdir(os.path.join(ta_father,d)):
            continue
        cls = joblib.load(os.path.join(root, 'data', 'models_batch', clsName,d, "model.pkl"))
        ta = base.get_merged(os.path.join(ta_father, d))
        out_file = os.path.join(root, 'data', 'select_batch', clsName + "-" + taName + "-"  + labelName
                + "-" + start + "-" + end + "-" + str(top), d)
        if not os.path.exists(out_file):
            os.makedirs(out_file)
        res = one_work(cls, ta, labelName,start, end, top)
        res.to_csv(out_file + "/select.csv")
        print out_file
Ejemplo n.º 14
0
def one_work(clsName, taName, labelName, start, end, top):
    df = base.get_merged(base.dir_ta(taName))
    df = get_range(df, start, end)
    m = joblib.load(os.path.join(root, 'data', 'models',"model_" + clsName + ".pkl"))
    if isscaler:
        s = joblib.load(os.path.join(root, 'data', 'models',"scaler_" + clsName + ".pkl"))
    feat_names = base.get_feat_names(df)
    npFeat = df.loc[:,feat_names].values
    res = ""
    if isscaler :
        npFeat = s.transform(npFeat)
    topscore = None
    l = []
    for i, npPred in enumerate(m.staged_predict_proba(npFeat)):
        df.loc[:,"pred"] = npPred[:,1]
        dacc =  accu(df, labelName, top)
        acc = 0.0
        if dacc["pos"] > 0:
            acc = (dacc["trueInPos"]*1.0 / dacc["pos"])
        print i, acc
        l.append([i, acc])
    return pd.DataFrame(np.asarray(l), columns=["idx", "acc"])
Ejemplo n.º 15
0
def main(argv):
    clsName = argv[0]
    taName = argv[1]
    labelName = argv[2]
    start = argv[3]
    end = argv[4]
    top = int(argv[5])
    ta_father = os.path.join(root, 'data', 'ta_batch', taName)
    for d in sorted(os.listdir(ta_father)):
        if d == None or not os.path.isdir(os.path.join(ta_father, d)):
            continue
        cls = joblib.load(
            os.path.join(root, 'data', 'models_batch', clsName, d,
                         "model.pkl"))
        ta = base.get_merged(os.path.join(ta_father, d))
        out_file = os.path.join(
            root, 'data', 'select_batch', clsName + "-" + taName + "-" +
            labelName + "-" + start + "-" + end + "-" + str(top), d)
        if not os.path.exists(out_file):
            os.makedirs(out_file)
        res = one_work(cls, ta, labelName, start, end, top)
        res.to_csv(out_file + "/select.csv")
        print out_file
Ejemplo n.º 16
0
def load_feat(taname, setname, start="", end=""):
    # get all the features
    dfTa = base.get_merged(taname,
                           getattr(yeod, "get_%s" % setname)(), start, end)
    dfTa = dfTa[dfTa.label5 != 1.0]
    return dfTa
Ejemplo n.º 17
0
def load_feat(taname, setname, label, start="", end=""):
    # get all the features
    dfTa = base.get_merged(taname,
                           getattr(yeod, "get_%s" % setname)(), start, end)
    return dfTa