class PerformanceMonitoring(object): def __init__(self, num_folds, conf): self.conf = conf if self.conf.families_supervision: self.perf_indicators = MulticlassPerfIndicators(num_folds) self.errors = MulticlassErrors() else: self.perf_indicators = BinaryPerfIndicators(num_folds, conf.probabilistModel()) self.errors = BinaryErrors(conf) self.confusion_matrix = ConfusionMatrix() self.roc = ROC(num_folds, conf) def addFold(self, fold, true_labels, instances_ids, predicted_proba, predicted_scores, predicted_labels): if self.conf.families_supervision: self.perf_indicators.addFold(fold, true_labels, predicted_labels) self.errors.addFold(true_labels, instances_ids, predicted_labels) else: self.perf_indicators.addFold(fold, true_labels, predicted_proba, predicted_scores, predicted_labels) self.errors.addFold(true_labels, instances_ids, predicted_labels, predicted_proba, predicted_scores) self.confusion_matrix.addFold(true_labels, predicted_labels) self.roc.addFold(fold, true_labels, predicted_proba, predicted_scores) def finalComputations(self): self.perf_indicators.finalComputations() def display(self, directory): with open(directory + 'perf_indicators.json', 'w') as f: self.perf_indicators.toJson(f) with open(directory + 'errors.json', 'w') as f: self.errors.toJson(f) if not self.conf.families_supervision: with open(directory + 'confusion_matrix.json', 'w') as f: self.confusion_matrix.toJson(f) self.roc.display(directory)
def __init__(self, num_folds, conf): self.conf = conf if self.conf.families_supervision: self.perf_indicators = MulticlassPerfIndicators(num_folds) self.errors = MulticlassErrors() else: self.perf_indicators = BinaryPerfIndicators(num_folds, conf.probabilistModel()) self.errors = BinaryErrors(conf) self.confusion_matrix = ConfusionMatrix() self.roc = ROC(num_folds, conf)
def performance(weights, matrix, target, roc_type): scores = [] actual_label = [] for row, val in zip(matrix, target): predicted_value = np.vdot(row, weights) scores.append(predicted_value) actual_label.append(val[0]) roc = ROC(scores, actual_label, roc_type) roc.compute() print max(roc.accuracy) roc.plot()
def sell(open_p, close_p, low_p, high_p, volume): sell = [0] roc = ROC(close_p) for i in range(1, len(close_p)): if roc[i] <= 0: sell.append(1) else: sell.append(0) return sell
def buy(open_p, close_p, low_p, high_p, volume): buy = [0] sar = PSAR(close_p) roc = ROC(close_p) for i in range(1, len(close_p)): if roc[i] > 0 and sar[i] < close_p[i]: buy.append(1) else: buy.append(0) return buy
def gbdt(train,target,test,n): from sklearn.tree import DecisionTreeRegressor from sklearn.ensemble import GradientBoostingRegressor clf = GradientBoostingRegressor(n_estimators=150, learning_rate=0.1,subsample=0.4, max_depth=5, random_state=0, loss='ls') # .fit(train, target) from ROC import ROC,ROC2 print "delete ",-1*n," feature" (model,ks) = ROC(clf,train,target) #(model, ks) = ROC2(clf, train, target) result = model.predict(test) writeDatas(result, test, "bn{}".format(ks))
def gbdt_a(n_estimators=300,rate=0.1,max_depth=5,rand_state=0,name='train_data_5'): train,target,test = getDatas(name) print "data :",name print train.shape from sklearn.tree import DecisionTreeRegressor from sklearn.ensemble import GradientBoostingRegressor clf = GradientBoostingRegressor(n_estimators=n_estimators, learning_rate=rate, max_depth=max_depth, random_state=rand_state, loss='ls') # .fit(train, target) from ROC import ROC,ROC2,ROC3 logger.info("Datas name: %s",name) logger.info("n_estimators= %s rate= %s max_depth= %s rand_state= %s", n_estimators,rate,max_depth,rand_state) (model, ks) = ROC(clf, train, target) result = model.predict(test) writeDatas(result, test, "{}".format(ks))
def CoppockCurve(close, shortRocPeriod=11, longRocPeriod=14, period=10): shortRoc = ROC(close, shortRocPeriod) longRoc = ROC(close, longRocPeriod) CC = EMA([i + j for (i, j) in zip(shortRoc, longRoc)], period) return CC
makeDirIfNeeded(basefolderOutput + '/ROC') #ROC extraText = [] extraText.append(extraTextFormat("Efficiency: " + samples[0] + " MC")) extraText.append(extraTextFormat("FR : " + samples[1] + " MC")) extraText.append(extraTextFormat('p_{T}^{#tau} > 20 GeV, |#eta_{#tau}| < 2.3')) ROC_graphs = [] for n, name in enumerate(tau_id_algos): if (args.category == 'Iso'): ylabel = "jet -> #tau FR" elif (args.category == 'LepDiscr' and n < 2): ylabel = "#mu -> #tau FR" elif (args.category == 'LepDiscr' and n > 1): ylabel = "e -> #tau FR" else: ylabel = "Fake Rate" tmproc = ROC(name[0]) print name[0] tmproc.load_efficiency( 'roc_efficiency_' + name[0], basefolderInput + '/Efficiency/' + args.effMethod + '/' + samples[0] + '/roc_efficiency_' + name[0] + '.root') tmproc.load_fakerate( 'roc_fakerate_' + name[0], basefolderInput + '/FakeRate/' + args.frMethod + '/' + samples[1] + '/roc_fakerate_' + name[0] + '.root') ROC_graphs.append(tmproc.return_graph()) #if args.category == 'LepDiscr': plotROCfromgraph(ROC_graphs[n], "Efficiency (%)", ylabel, (name[0]), basefolderOutput+"/ROC/ROC_"+name[0], False, True, extraText) discr = [x[0] for x in tau_id_algos] if not args.category == 'LepDiscr':
import numpy as np import matplotlib.pyplot as plt from matplotlib.font_manager import FontProperties plt.rcParams['font.sans-serif'] = ['SimHei'] plt.rcParams['axes.unicode_minus'] = False font = FontProperties(fname='/usr/share/fonts/truetype/arphic/ukai.ttc', size=14) # 都是 pandas 的 DataFrame train, target, test = getUserBankDatas() from sklearn.tree import DecisionTreeRegressor from sklearn.ensemble import GradientBoostingRegressor clf = GradientBoostingRegressor(n_estimators=20, learning_rate=0.1, max_depth=3, random_state=0, loss='ls') # .fit(train, target) from ROC import ROC clf = ROC(clf, train, target) result = clf.predict(test) #print result #writeDatas(result,test,"00")
# -*- coding: utf-8 -*- """ Created on Thu Oct 27 14:18:10 2013 @author: abraha84 """ from ROC import ROC import random TrueLabel = [] PredictedLabel = [] # open file to read for i in range(0, 100): TrueLabel.append(random.uniform(0, 1)) PredictedLabel.append(random.uniform(-0.2, 1)) for i in range(0, 100): TrueLabel.append(random.uniform(-1, 0)) PredictedLabel.append(random.uniform(-1, 0.2)) ROC_Obj = ROC(TrueLabel, PredictedLabel, 'Log1.csv') print("ROC AUC: %s" % (str(ROC_Obj.auc()), )) ROC_Obj.plot(True, True, PlotFileName='Plots/ROCPlot.png')
# ts plot results_ts_plot(df, slab_proba, wet_proba) # feature importances label_list = ['gbc: slab', 'gbc: wet'] color_list = ['b', 'g'] feat_sort_l = feature_importances(feat_list, label_list, color_list) # accuracy, precision, recall metrics: labels = ['slab', 'wet', 'combined'] colors = ['b', 'g', 'purple'] fpr_tpr_l = [] for y_true, y_hat, y_proba, name in zip(y_true_l, y_hat_l, proba_l, labels): print(name) roc = ROC() roc.fit(y_true, y_hat) print(f'acc: {roc.accuracy}') print(f'prec: {roc.precision}') print(f'rec: {roc.recall}') # roc TPR, FPR fpr, tpr = roc.calc_roc(y_true, y_proba) fpr_tpr_l.append((fpr, tpr)) # apr plot precision, recall, thresholds = precision_recall_curve(y_true.values, y_proba, pos_label=1) thresholds = np.append(thresholds, 1) plt.plot(thresholds, recall, 'b--', label='recall')
('ElectronDiscrMVA', ['VLoose', 'Loose', 'Medium', 'Tight', 'VTight']), ('ElectronDiscrdeeptau', [ 'VVVLoose', 'VVLoose', 'VLoose', 'Loose', 'Medium', 'Tight', 'VTight', 'VVTight' ]) ] #Change getTauLepDiscr() accordingly #Whatever needs to be saved at the end pt_bins = np.linspace(20, 120, 11) eta_bins = np.linspace(-2.4, 2.4, 25) roc = [] ptHist = [] etaHist = [] for tau_id in tau_id_algos: roc.append(ROC('roc_efficiency_' + tau_id[0], tau_id[1])) ptHist.append(efficiency('pt_efficiency_' + tau_id[0], pt_bins, tau_id[1])) etaHist.append( efficiency('eta_efficiency_' + tau_id[0], eta_bins, tau_id[1])) if args.isTest: eventrange = xrange(5000) else: eventrange = sample.getEventRange(int(args.subJob)) for entry in eventrange: Chain.GetEntry(entry) for lepton in xrange(Chain._nL): if not objSel.isGoodBaseTau(Chain, lepton): continue if Chain._tauGenStatus[lepton] != 5: continue
#Whatever needs to be saved at the end pt_bins = np.linspace(20, 120, 11) eta_bins = np.linspace(-2.4, 2.4, 25) basefolder = '/storage_mnt/storage/user/lwezenbe/private/PhD/Results/TauStudy/Efficiency/Histos/All/' makeDirIfNeeded(basefolder) makeDirIfNeeded(basefolder + sample.output) makeDirIfNeeded(basefolder + sample.output + '/' + args.method) basefolder = basefolder + sample.output + '/' + args.method roc = [] ptHist = [] etaHist = [] for tau_id in tau_id_algos: roc.append(ROC('roc_fakerate_' + tau_id[0], tau_id[1])) ptHist.append(efficiency('pt_fakerate_' + tau_id[0], pt_bins, tau_id[1])) etaHist.append(efficiency('eta_fakerate_' + tau_id[0], eta_bins, tau_id[1])) if args.isTest: eventRange = range(5000) else: eventRange = sample.getEventRange(int(args.subJob)) #event loop for entry in eventRange: Chain.GetEntry(entry) CalcFakeRate(Chain, sample, args)
def adTree(): clf = AdaBoostRegressor(DecisionTreeRegressor(max_depth=4), n_estimators=300) clf = ROC(clf, train, target) result = clf.predict(test)
from GetData import getDatas from WriteDatas import writeDatas # 都是 pandas 的 DataFrame train, target, test = getDatas('bill_browser_user_data') from sklearn.tree import DecisionTreeRegressor from sklearn.ensemble import AdaBoostRegressor from sklearn.ensemble import RandomForestRegressor from ROC import ROC def adTree(): clf = AdaBoostRegressor(DecisionTreeRegressor(max_depth=4), n_estimators=300) clf = ROC(clf, train, target) result = clf.predict(test) rf = RandomForestRegressor(max_depth=4, random_state=2, n_estimators=100) rf = ROC(rf, train, target) # 输出测试集用户逾期还款概率,predict_proba会输出两个概率,取‘1’的概率 #print result #writeDatas(result,test,"700")