def main(args): exec "import main.pandas_talib.sig_%s as conf" % args.signame build.work2(20, 'sp500Top50', args.signame) df = base.get_merged(conf.__name__, yeod.get_sp500Top50()) df2 = df[df["ta_sig_%s" % args.signame] > 0] print ana(df, df2) print len(df), len(df2), len(df2[df2["label5"] > 1]) * 1.0 / len(df2), len( df[df["label5"] > 1]) * 1.0 / len(df) dfT1 = df[df["date"] <= '2009-12-31'] dfT2 = dfT1[dfT1["ta_sig_%s" % args.signame] > 0] print len(dfT1), len(dfT2), len( dfT2[dfT2["label5"] > 1]) * 1.0 / len(dfT2), len( dfT1[dfT1["label5"] > 1]) * 1.0 / len(dfT1) dfT1 = df[df["date"] > '2009-12-31'] dfT2 = dfT1[dfT1["ta_sig_%s" % args.signame] > 0] print len(dfT1), len(dfT2), len( dfT2[dfT2["label5"] > 1]) * 1.0 / len(dfT2), len( dfT1[dfT1["label5"] > 1]) * 1.0 / len(dfT1) dfT1 = df[(df["date"] > '2001-12-31') & (df['date'] <= '2009-12-31')] dfT2 = dfT1[dfT1["ta_sig_%s" % args.signame] > 0] print len(dfT1), len(dfT2), len( dfT2[dfT2["label5"] > 1]) * 1.0 / len(dfT2), len( dfT1[dfT1["label5"] > 1]) * 1.0 / len(dfT1)
def one_work(cls, taName, label, date_range, th): df = base.get_merged(base.dir_ta(taName)) df = get_range(df, date_range[0], date_range[1]) m = joblib.load(os.path.join(root, 'data', 'models',"model_" + cls + ".pkl")) s = joblib.load(os.path.join(root, 'data', 'models',"scaler_" + cls + ".pkl")) feat_names = base.get_feat_names(df) npFeat = df.loc[:,feat_names].values #npPred = cls.predict_proba(npFeat)[:,1] #prent npPred res = "" for i, npPred in enumerate(m.staged_predict_proba(s.transform(npFeat))): #if i % 1 != 0: # continue re = "%s\t%s\t%s\t%s\t%s\t%f\t" % (cls, taName, label, date_range[0], date_range[1],th) df["pred"] = npPred[:,1] dacc = accu(df, label, th) re += "%d\t%d\t%d\t" % (i, dacc["trueInPos"], dacc["pos"]) if dacc["pos"] > 0: re += "%f" % (dacc["trueInPos"]*1.0 / dacc["pos"]) else : re += "0.0" re += "\n" print re res += re return re
def main(args): exec "import main.pandas_talib.sig_%s as conf" % args.signame build.work2(20, 'sp500Top50', args.signame) df = base.get_merged(conf.__name__, yeod.get_sp500Top50()) df.to_csv("ta.csv") tree = DecisionTreeClassifier() feat_names = base.get_feat_names(df) dfTrain = df[(df.date >= '1970-01-01') & (df.date <= '2009-12-31')] npTrainFeat = dfTrain.loc[:, feat_names].values.copy() npTrainLabel = dfTrain.loc[:, "label5"].values.copy() npTrainLabel[npTrainLabel > 1.0] = 1 npTrainLabel[npTrainLabel < 1.0] = 0 tree.fit(npTrainFeat, npTrainLabel) joblib.dump(tree, "tree.pkl", compress=3) dfTest = df[(df.date >= '2010-01-01') & (df.date <= '2099-12-31')] npTestFeat = dfTest.loc[:, feat_names].values.copy() npPred = tree.predict_proba(npTestFeat) dfTest.loc[:, "pred"] = npPred[:, 1] print dfTest['pred'].head() dfPos = dfTest[dfTest['pred'] > 0.55] print 1.0 * len(dfPos[dfPos['label5'] > 1]) / len(dfPos) print 1.0 * len(dfTest[dfTest['label5'] > 1]) / len(dfTest)
def main(args): exec "import main.pandas_talib.sig_%s as conf" % args.signame build.work2(20, 'sp500Top50', args.signame) df = base.get_merged(conf.__name__, yeod.get_sp500Top50()) df.to_csv("ta.csv") tree = DecisionTreeClassifier() feat_names = base.get_feat_names(df) dfTrain = df[(df.date>='1970-01-01') & (df.date <='2009-12-31')] npTrainFeat = dfTrain.loc[:,feat_names].values.copy() npTrainLabel = dfTrain.loc[:,"label5"].values.copy() npTrainLabel[npTrainLabel > 1.0] = 1 npTrainLabel[npTrainLabel < 1.0] = 0 tree.fit(npTrainFeat, npTrainLabel) joblib.dump(tree, "tree.pkl", compress = 3) dfTest = df[(df.date>='2010-01-01') & (df.date <='2099-12-31')] npTestFeat = dfTest.loc[:, feat_names].values.copy() npPred = tree.predict_proba(npTestFeat) dfTest.loc[:,"pred"] = npPred[:,1] print dfTest['pred'].head() dfPos = dfTest[ dfTest['pred'] > 0.55 ] print 1.0 * len(dfPos[dfPos['label5']>1]) / len(dfPos) print 1.0 * len(dfTest[dfTest['label5']>1]) / len(dfTest)
def main(args): lsym = getattr(yeod, "get_%s" % args.setname)() dfTa = base.get_merged(args.taname, lsym) if dfTa is None: print "can not merge " % args.setname sys.exit(1) dfTa = base.get_range(dfTa, args.start, args.end) print dfTa.shape #if args.filter: # dfTa = filter_trend(dfTa) #print dfTa.shape cls = joblib.load(os.path.join(base.dir_model(), args.model)) feat_names = base.get_feat_names(dfTa) npFeat = dfTa.loc[:, feat_names].values if isscaler: scaler = get_scaler(clsName) npFeatScaler = scaler.transform(npFeat) else: npFeatScaler = npFeat #for i, npPred in enumerate(cls.staged_predict_proba(npFeatScaler)): # if i == args.stage: # break npPred = cls.predict_proba(npFeat) dfTa["pred"] = npPred[:, 1] dfTa = dfTa.sort_values(['pred'], ascending=False) freport, fpred = base.file_paper(args) dfTa.to_csv(fpred) ana.main([fpred, args.top, args.thresh, freport, args.level]) print freport
def one_work(cls, taName, label, date_range, th): df = base.get_merged(base.dir_ta(taName)) df = get_range(df, date_range[0], date_range[1]) m = joblib.load( os.path.join(root, 'data', 'models', "model_" + cls + ".pkl")) s = joblib.load( os.path.join(root, 'data', 'models', "scaler_" + cls + ".pkl")) feat_names = base.get_feat_names(df) npFeat = df.loc[:, feat_names].values #npPred = cls.predict_proba(npFeat)[:,1] #prent npPred res = "" for i, npPred in enumerate(m.staged_predict_proba(s.transform(npFeat))): #if i % 1 != 0: # continue re = "%s\t%s\t%s\t%s\t%s\t%f\t" % (cls, taName, label, date_range[0], date_range[1], th) df["pred"] = npPred[:, 1] dacc = accu(df, label, th) re += "%d\t%d\t%d\t" % (i, dacc["trueInPos"], dacc["pos"]) if dacc["pos"] > 0: re += "%f" % (dacc["trueInPos"] * 1.0 / dacc["pos"]) else: re += "0.0" re += "\n" print re res += re return re
def main(args): cls = params_set.d_model[args.clsname] file_model, file_ipt = base.file_model(args) if os.path.isfile(file_model): print "%s already exists!" % file_model return dfTa = base.get_merged(args.taname, getattr(yeod, "get_%s" % args.setname)(),args.start,args.end) if dfTa is None: return None dfTrain = dfTa # build_trains(dfTa, args.start, args.end) if args.sample: print "sampling ..." sample = len(dfTrain)/sample rows = random.sample(range(len(dfTrain)), sample) print len(rows) dfTrain = dfTrain.reset_index(drop=True) dfTrain = dfTrain.ix[rows] if args.repeat: print "repeat ..." toAppends = [] for i in range(1,3): dfTmp = dfTrain[dfTrain.label5>=1+i/20.0] toAppends.append(dfTmp) print dfTrain.shape dfTrain = dfTrain.append(toAppends) print dfTrain.shape if args.sw: dfTrain = getattr(sw, "sw_%s" % args.sw)(dfTrain) feat_names = base.get_feat_names(dfTrain) npTrainFeat = dfTrain.loc[:,feat_names].values npTrainLabel = dfTrain.loc[:,args.labelname].values.copy() npTrainLabel[npTrainLabel != 1.0] npTrainLabel[npTrainLabel > 1.0] = 1 npTrainLabel[npTrainLabel < 1.0] = 0 if args.scaler: scaler = getMinMax(npTrainFeat) npTrainFeatScaled = scaler.transform(npTrainFeat) else: npTrainFeatScaled = npTrainFeat if args.sw: cls.fit(npTrainFeatScaled, npTrainLabel, sample_weight=dfTrain["sample_weight"].values) else: cls.fit(npTrainFeatScaled, npTrainLabel) joblib.dump(cls, file_model, compress = 3) #joblib.dump(scaler, os.path.join(root, 'data', 'models',scalerName), compress = 3) dFeatImps = dict(zip( feat_names, cls.feature_importances_)) with open(file_ipt, 'w') as fipt: for each in sorted(dFeatImps.iteritems(), key = lambda a: a[1], reverse=True): print >> fipt, each[0], ",", each[1]
def main(args): build.work2(10, "sp500Top50", sig_macd) dfTa = base.get_merged("sig_macd", yeod.get_sp500Top50()) dfTa1 = dfTa[(dfTa["date"] < "2010-01-01")] #dfTa = dfTa[(dfTa["date"] >= "2010-01-01") & (dfTa["date"]<="2010-12-31")] #dfTa = dfTa[(dfTa["date"]>"2010-12-31")] dfTa2 = dfTa1[dfTa1["ta_macd_signal_%d_%d" % (fast, slow)] > 0] print len(dfTa2[dfTa2["label5"] > 1.0]) * 1.0 / len(dfTa2), print len(dfTa2[dfTa2["label5"] > 1.0]) * 1.0, print len(dfTa1[dfTa1["label5"] > 1.0]) * 1.0 / len(dfTa1) dfTa1 = dfTa[(dfTa["date"] > "2010-01-01")] dfTa2 = dfTa1[dfTa1["ta_macd_signal_%d_%d" % (fast, slow)] > 0] print len(dfTa2[dfTa2["label5"] > 1.0]) * 1.0 / len(dfTa2), print len(dfTa2[dfTa2["label5"] > 1.0]) * 1.0, print len(dfTa1[dfTa1["label5"] > 1.0]) * 1.0 / len(dfTa1)
def main(args): build.work2(10, "sp500Top50", sig_macd) dfTa = base.get_merged("sig_macd", yeod.get_sp500Top50()) dfTa1 = dfTa[(dfTa["date"] < "2010-01-01")] #dfTa = dfTa[(dfTa["date"] >= "2010-01-01") & (dfTa["date"]<="2010-12-31")] #dfTa = dfTa[(dfTa["date"]>"2010-12-31")] dfTa2 = dfTa1[dfTa1["ta_macd_signal_%d_%d" % (fast, slow)]>0] print len(dfTa2[dfTa2["label5"]>1.0])*1.0/len(dfTa2), print len(dfTa2[dfTa2["label5"]>1.0])*1.0, print len(dfTa1[dfTa1["label5"]>1.0])*1.0/len(dfTa1) dfTa1 = dfTa[(dfTa["date"] > "2010-01-01")] dfTa2 = dfTa1[dfTa1["ta_macd_signal_%d_%d" % (fast, slow)]>0] print len(dfTa2[dfTa2["label5"]>1.0])*1.0/len(dfTa2), print len(dfTa2[dfTa2["label5"]>1.0])*1.0, print len(dfTa1[dfTa1["label5"]>1.0])*1.0/len(dfTa1)
def work(setname, start, end, depth, thresh, scorename): """ """ phase1 = base.get_merged("base1", getattr(yeod, "get_%s" % setname)(), start, end) print phase1.shape phase1.reset_index(drop=True, inplace=True) phase1 = score.agn_rank_score(phase1) phase1 = score.agn_rank_score(phase1, interval=5, threshold=0.55) phase1 = score.agn_label_score(phase1, interval=5, threshold=1.0) meta = feat_select.flat_metas(phase1, depth, 100000, scorename) print meta[["fname", "c_p"]] meta = extract_meta(meta, thresh) meta.reset_index(drop=True, inplace=True) meta.to_pickle("./data/model/meta_base1_%s_%s_%s_%s_%d_100000.pkl" % (setname, scorename, start, end, depth)) return meta
def main(args): exec "import main.pandas_talib.sig_%s as conf" % args.signame build.work2(20, 'sp500Top50', args.signame) df = base.get_merged(conf.__name__, yeod.get_sp500Top50()) df2 = df[df["ta_sig_%s" % args.signame]>0] print ana(df,df2) print len(df), len(df2), len(df2[df2["label5"]>1]) *1.0/len(df2) , len(df[df["label5"]>1]) *1.0/len(df) dfT1 = df[df["date"] <= '2009-12-31'] dfT2 = dfT1[dfT1["ta_sig_%s" % args.signame]>0] print len(dfT1), len(dfT2), len(dfT2[dfT2["label5"]>1]) *1.0/len(dfT2) , len(dfT1[dfT1["label5"]>1]) *1.0/len(dfT1) dfT1 = df[df["date"] > '2009-12-31'] dfT2 = dfT1[dfT1["ta_sig_%s" % args.signame]>0] print len(dfT1), len(dfT2), len(dfT2[dfT2["label5"]>1]) *1.0/len(dfT2) , len(dfT1[dfT1["label5"]>1]) *1.0/len(dfT1) dfT1 = df[(df["date"] > '2001-12-31') & (df['date']<='2009-12-31') ] dfT2 = dfT1[dfT1["ta_sig_%s" % args.signame]>0] print len(dfT1), len(dfT2), len(dfT2[dfT2["label5"]>1]) *1.0/len(dfT2) , len(dfT1[dfT1["label5"]>1]) *1.0/len(dfT1)
def main(argv): clsName = argv[0] taName = argv[1] labelName = argv[2] start = argv[3] end = argv[4] top = int(argv[5]) ta_father = os.path.join(root, 'data', 'ta_batch', taName ) for d in sorted(os.listdir(ta_father)): if d == None or not os.path.isdir(os.path.join(ta_father,d)): continue cls = joblib.load(os.path.join(root, 'data', 'models_batch', clsName,d, "model.pkl")) ta = base.get_merged(os.path.join(ta_father, d)) out_file = os.path.join(root, 'data', 'select_batch', clsName + "-" + taName + "-" + labelName + "-" + start + "-" + end + "-" + str(top), d) if not os.path.exists(out_file): os.makedirs(out_file) res = one_work(cls, ta, labelName,start, end, top) res.to_csv(out_file + "/select.csv") print out_file
def one_work(clsName, taName, labelName, start, end, top): df = base.get_merged(base.dir_ta(taName)) df = get_range(df, start, end) m = joblib.load(os.path.join(root, 'data', 'models',"model_" + clsName + ".pkl")) if isscaler: s = joblib.load(os.path.join(root, 'data', 'models',"scaler_" + clsName + ".pkl")) feat_names = base.get_feat_names(df) npFeat = df.loc[:,feat_names].values res = "" if isscaler : npFeat = s.transform(npFeat) topscore = None l = [] for i, npPred in enumerate(m.staged_predict_proba(npFeat)): df.loc[:,"pred"] = npPred[:,1] dacc = accu(df, labelName, top) acc = 0.0 if dacc["pos"] > 0: acc = (dacc["trueInPos"]*1.0 / dacc["pos"]) print i, acc l.append([i, acc]) return pd.DataFrame(np.asarray(l), columns=["idx", "acc"])
def main(argv): clsName = argv[0] taName = argv[1] labelName = argv[2] start = argv[3] end = argv[4] top = int(argv[5]) ta_father = os.path.join(root, 'data', 'ta_batch', taName) for d in sorted(os.listdir(ta_father)): if d == None or not os.path.isdir(os.path.join(ta_father, d)): continue cls = joblib.load( os.path.join(root, 'data', 'models_batch', clsName, d, "model.pkl")) ta = base.get_merged(os.path.join(ta_father, d)) out_file = os.path.join( root, 'data', 'select_batch', clsName + "-" + taName + "-" + labelName + "-" + start + "-" + end + "-" + str(top), d) if not os.path.exists(out_file): os.makedirs(out_file) res = one_work(cls, ta, labelName, start, end, top) res.to_csv(out_file + "/select.csv") print out_file
def load_feat(taname, setname, start="", end=""): # get all the features dfTa = base.get_merged(taname, getattr(yeod, "get_%s" % setname)(), start, end) dfTa = dfTa[dfTa.label5 != 1.0] return dfTa
def load_feat(taname, setname, label, start="", end=""): # get all the features dfTa = base.get_merged(taname, getattr(yeod, "get_%s" % setname)(), start, end) return dfTa