コード例 #1
0
class PerformanceMonitoring(object):

    def __init__(self, num_folds, conf):
        self.conf = conf
        if self.conf.families_supervision:
            self.perf_indicators = MulticlassPerfIndicators(num_folds)
            self.errors          = MulticlassErrors()
        else:
            self.perf_indicators  = BinaryPerfIndicators(num_folds, conf.probabilistModel())
            self.errors           = BinaryErrors(conf)
            self.confusion_matrix = ConfusionMatrix()
            self.roc              = ROC(num_folds, conf)

    def addFold(self, fold, true_labels, instances_ids, predicted_proba, predicted_scores, predicted_labels):
        if self.conf.families_supervision:
            self.perf_indicators.addFold(fold, true_labels, predicted_labels)
            self.errors.addFold(true_labels, instances_ids, predicted_labels)
        else:
            self.perf_indicators.addFold(fold, true_labels, predicted_proba, predicted_scores, predicted_labels)
            self.errors.addFold(true_labels, instances_ids, predicted_labels, predicted_proba, predicted_scores)
            self.confusion_matrix.addFold(true_labels, predicted_labels)
            self.roc.addFold(fold, true_labels, predicted_proba, predicted_scores)

    def finalComputations(self):
        self.perf_indicators.finalComputations()

    def display(self, directory):
        with open(directory + 'perf_indicators.json', 'w') as f:
            self.perf_indicators.toJson(f)
        with open(directory + 'errors.json', 'w') as f:
            self.errors.toJson(f)
        if not self.conf.families_supervision:
            with open(directory + 'confusion_matrix.json', 'w') as f:
                self.confusion_matrix.toJson(f)
            self.roc.display(directory)
コード例 #2
0
 def __init__(self, num_folds, conf):
     self.conf = conf
     if self.conf.families_supervision:
         self.perf_indicators = MulticlassPerfIndicators(num_folds)
         self.errors          = MulticlassErrors()
     else:
         self.perf_indicators  = BinaryPerfIndicators(num_folds, conf.probabilistModel())
         self.errors           = BinaryErrors(conf)
         self.confusion_matrix = ConfusionMatrix()
         self.roc              = ROC(num_folds, conf)
コード例 #3
0
def performance(weights, matrix, target, roc_type):
    scores = []
    actual_label = []
    for row, val in zip(matrix, target):
        predicted_value = np.vdot(row, weights)
        scores.append(predicted_value)
        actual_label.append(val[0])

    roc = ROC(scores, actual_label, roc_type)
    roc.compute()
    print max(roc.accuracy)
    roc.plot()
コード例 #4
0
def sell(open_p, close_p, low_p, high_p, volume):
    sell = [0]
    roc = ROC(close_p)
    for i in range(1, len(close_p)):
        if roc[i] <= 0:
            sell.append(1)
        else:
            sell.append(0)
    return sell
コード例 #5
0
def buy(open_p, close_p, low_p, high_p, volume):
    buy = [0]
    sar = PSAR(close_p)
    roc = ROC(close_p)
    for i in range(1, len(close_p)):
        if roc[i] > 0 and sar[i] < close_p[i]:
            buy.append(1)
        else:
            buy.append(0)
    return buy
コード例 #6
0
ファイル: GBDT.py プロジェクト: hadoop73/sklearn
def gbdt(train,target,test,n):

    from sklearn.tree import DecisionTreeRegressor
    from sklearn.ensemble import GradientBoostingRegressor


    clf = GradientBoostingRegressor(n_estimators=150, learning_rate=0.1,subsample=0.4,
                max_depth=5, random_state=0, loss='ls')  # .fit(train, target)

    from ROC import ROC,ROC2
    print "delete ",-1*n," feature"
    (model,ks) = ROC(clf,train,target)

    #(model, ks) = ROC2(clf, train, target)

    result = model.predict(test)
    writeDatas(result, test, "bn{}".format(ks))
コード例 #7
0
ファイル: GBDT.py プロジェクト: hadoop73/sklearn
def gbdt_a(n_estimators=300,rate=0.1,max_depth=5,rand_state=0,name='train_data_5'):

    train,target,test = getDatas(name)
    print "data :",name
    print train.shape
    from sklearn.tree import DecisionTreeRegressor
    from sklearn.ensemble import GradientBoostingRegressor


    clf = GradientBoostingRegressor(n_estimators=n_estimators, learning_rate=rate,
                max_depth=max_depth, random_state=rand_state, loss='ls')  # .fit(train, target)

    from ROC import ROC,ROC2,ROC3
    logger.info("Datas name: %s",name)
    logger.info("n_estimators= %s rate= %s max_depth= %s rand_state= %s",
                n_estimators,rate,max_depth,rand_state)

    (model, ks) = ROC(clf, train, target)
    result = model.predict(test)
    writeDatas(result, test, "{}".format(ks))
コード例 #8
0
def CoppockCurve(close, shortRocPeriod=11, longRocPeriod=14, period=10):
    shortRoc = ROC(close, shortRocPeriod)
    longRoc = ROC(close, longRocPeriod)
    CC = EMA([i + j for (i, j) in zip(shortRoc, longRoc)], period)
    return CC
コード例 #9
0
ファイル: plots.py プロジェクト: lwezenbe/TauStudy
makeDirIfNeeded(basefolderOutput + '/ROC')

#ROC
extraText = []
extraText.append(extraTextFormat("Efficiency: " + samples[0] + " MC"))
extraText.append(extraTextFormat("FR : " + samples[1] + " MC"))
extraText.append(extraTextFormat('p_{T}^{#tau} > 20 GeV, |#eta_{#tau}| < 2.3'))

ROC_graphs = []
for n, name in enumerate(tau_id_algos):
    if (args.category == 'Iso'): ylabel = "jet -> #tau FR"
    elif (args.category == 'LepDiscr' and n < 2): ylabel = "#mu -> #tau FR"
    elif (args.category == 'LepDiscr' and n > 1): ylabel = "e -> #tau FR"
    else: ylabel = "Fake Rate"

    tmproc = ROC(name[0])
    print name[0]
    tmproc.load_efficiency(
        'roc_efficiency_' + name[0],
        basefolderInput + '/Efficiency/' + args.effMethod + '/' + samples[0] +
        '/roc_efficiency_' + name[0] + '.root')
    tmproc.load_fakerate(
        'roc_fakerate_' + name[0],
        basefolderInput + '/FakeRate/' + args.frMethod + '/' + samples[1] +
        '/roc_fakerate_' + name[0] + '.root')

    ROC_graphs.append(tmproc.return_graph())
    #if args.category == 'LepDiscr': plotROCfromgraph(ROC_graphs[n], "Efficiency (%)", ylabel, (name[0]), basefolderOutput+"/ROC/ROC_"+name[0], False, True, extraText)

discr = [x[0] for x in tau_id_algos]
if not args.category == 'LepDiscr':
コード例 #10
0
ファイル: GBDT_user_bank.py プロジェクト: hadoop73/sklearn
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.font_manager import FontProperties
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False
font = FontProperties(fname='/usr/share/fonts/truetype/arphic/ukai.ttc',
                      size=14)

#  都是 pandas 的 DataFrame

train, target, test = getUserBankDatas()

from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import GradientBoostingRegressor

clf = GradientBoostingRegressor(n_estimators=20,
                                learning_rate=0.1,
                                max_depth=3,
                                random_state=0,
                                loss='ls')  # .fit(train, target)

from ROC import ROC

clf = ROC(clf, train, target)

result = clf.predict(test)

#print result

#writeDatas(result,test,"00")
コード例 #11
0
# -*- coding: utf-8 -*-
"""
Created on Thu Oct 27 14:18:10 2013

@author: abraha84
"""

from ROC import ROC
import random

TrueLabel = []
PredictedLabel = []

# open file to read
for i in range(0, 100):
    TrueLabel.append(random.uniform(0, 1))
    PredictedLabel.append(random.uniform(-0.2, 1))

for i in range(0, 100):
    TrueLabel.append(random.uniform(-1, 0))
    PredictedLabel.append(random.uniform(-1, 0.2))
ROC_Obj = ROC(TrueLabel, PredictedLabel, 'Log1.csv')
print("ROC AUC: %s" % (str(ROC_Obj.auc()), ))
ROC_Obj.plot(True, True, PlotFileName='Plots/ROCPlot.png')
コード例 #12
0
    # ts plot
    results_ts_plot(df, slab_proba, wet_proba)

    # feature importances
    label_list = ['gbc: slab', 'gbc: wet']
    color_list = ['b', 'g']
    feat_sort_l = feature_importances(feat_list, label_list, color_list)

    # accuracy, precision, recall metrics:
    labels = ['slab', 'wet', 'combined']
    colors = ['b', 'g', 'purple']
    fpr_tpr_l = []
    for y_true, y_hat, y_proba, name in zip(y_true_l, y_hat_l, proba_l,
                                            labels):
        print(name)
        roc = ROC()
        roc.fit(y_true, y_hat)
        print(f'acc: {roc.accuracy}')
        print(f'prec: {roc.precision}')
        print(f'rec: {roc.recall}')

        # roc TPR, FPR
        fpr, tpr = roc.calc_roc(y_true, y_proba)
        fpr_tpr_l.append((fpr, tpr))

        # apr plot
        precision, recall, thresholds = precision_recall_curve(y_true.values,
                                                               y_proba,
                                                               pos_label=1)
        thresholds = np.append(thresholds, 1)
        plt.plot(thresholds, recall, 'b--', label='recall')
コード例 #13
0
ファイル: LightLepDiscrEff.py プロジェクト: lwezenbe/TauStudy
    ('ElectronDiscrMVA', ['VLoose', 'Loose', 'Medium', 'Tight', 'VTight']),
    ('ElectronDiscrdeeptau', [
        'VVVLoose', 'VVLoose', 'VLoose', 'Loose', 'Medium', 'Tight', 'VTight',
        'VVTight'
    ])
]  #Change getTauLepDiscr() accordingly

#Whatever needs to be saved at the end
pt_bins = np.linspace(20, 120, 11)
eta_bins = np.linspace(-2.4, 2.4, 25)

roc = []
ptHist = []
etaHist = []
for tau_id in tau_id_algos:
    roc.append(ROC('roc_efficiency_' + tau_id[0], tau_id[1]))
    ptHist.append(efficiency('pt_efficiency_' + tau_id[0], pt_bins, tau_id[1]))
    etaHist.append(
        efficiency('eta_efficiency_' + tau_id[0], eta_bins, tau_id[1]))

if args.isTest:
    eventrange = xrange(5000)
else:
    eventrange = sample.getEventRange(int(args.subJob))

for entry in eventrange:
    Chain.GetEntry(entry)

    for lepton in xrange(Chain._nL):
        if not objSel.isGoodBaseTau(Chain, lepton): continue
        if Chain._tauGenStatus[lepton] != 5: continue
コード例 #14
0
#Whatever needs to be saved at the end
pt_bins = np.linspace(20, 120, 11)
eta_bins = np.linspace(-2.4, 2.4, 25)

basefolder = '/storage_mnt/storage/user/lwezenbe/private/PhD/Results/TauStudy/Efficiency/Histos/All/'
makeDirIfNeeded(basefolder)
makeDirIfNeeded(basefolder + sample.output)
makeDirIfNeeded(basefolder + sample.output + '/' + args.method)
basefolder = basefolder + sample.output + '/' + args.method

roc = []
ptHist = []
etaHist = []
for tau_id in tau_id_algos:
    roc.append(ROC('roc_fakerate_' + tau_id[0], tau_id[1]))
    ptHist.append(efficiency('pt_fakerate_' + tau_id[0], pt_bins, tau_id[1]))
    etaHist.append(efficiency('eta_fakerate_' + tau_id[0], eta_bins,
                              tau_id[1]))

if args.isTest:
    eventRange = range(5000)
else:
    eventRange = sample.getEventRange(int(args.subJob))

#event loop
for entry in eventRange:
    Chain.GetEntry(entry)

    CalcFakeRate(Chain, sample, args)
コード例 #15
0
ファイル: DecisionTree.py プロジェクト: hadoop73/sklearn
def adTree():
    clf = AdaBoostRegressor(DecisionTreeRegressor(max_depth=4),
                            n_estimators=300)
    clf = ROC(clf, train, target)
    result = clf.predict(test)
コード例 #16
0
ファイル: DecisionTree.py プロジェクト: hadoop73/sklearn
from GetData import getDatas
from WriteDatas import writeDatas

#  都是 pandas 的 DataFrame

train, target, test = getDatas('bill_browser_user_data')

from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import AdaBoostRegressor
from sklearn.ensemble import RandomForestRegressor

from ROC import ROC


def adTree():
    clf = AdaBoostRegressor(DecisionTreeRegressor(max_depth=4),
                            n_estimators=300)
    clf = ROC(clf, train, target)
    result = clf.predict(test)


rf = RandomForestRegressor(max_depth=4, random_state=2, n_estimators=100)

rf = ROC(rf, train, target)

# 输出测试集用户逾期还款概率,predict_proba会输出两个概率,取‘1’的概率

#print result

#writeDatas(result,test,"700")