Esempio n. 1
0
def templateROC(c='c002812'):
    data = pd.read_csv(DATA + 'roc_curve_' + c + '_small.csv')

    # data_train, data_test = train_test_split(data, test_size=1.0/20, random_state=101)
    # data_test = data_test.sort_values(by=['tpr', 'fpt', 'threshold'])
    # data_test.to_csv(DATA + 'roc_curve_' + c + '_small.csv', index=False)

    source = ColumnDataSource(data=dict(tpr=data['tpr'],
                                        fpr=data['fpt'],
                                        thre=data['threshold']
                                        ))

    TOOLS = "pan,wheel_zoom,reset,hover,save"
    p = Figure(tools=TOOLS, height=300, width=300, toolbar_location="above")
    p.line('fpr', 'tpr', source=source, line_width=5)
    p.line([0, 1], [0, 1], line_dash='dashed', line_alpha=0.6)
    p.yaxis.axis_label = 'True Positive Rate'
    p.xaxis.axis_label = 'False Positive Rate'
    # p.background_fill_color = LIGHT_GREEN
    # p.border_fill_color = LIGHT_GREEN

    hover = p.select_one(HoverTool)
    hover.point_policy = "follow_mouse"
    hover.tooltips = [
        ("FPR", "@fpr{1.11}"),
        ("TPR", "@tpr{1.11}"),
        ("THRESHOLD", "@thre{1.11}")
    ]
    #
    script, div = components(p)

    return script, div
Esempio n. 2
0
p2 = Figure(x_range=(0, 1000),
            y_range=(0, 10),
            plot_width=600,
            plot_height=150,
            tools='hover',
            title='hover over color')

color_range1 = p2.rect(x='x',
                       y='y',
                       width=1,
                       height=10,
                       color='crcolor',
                       source=crsource)

# set up hover tool to show color hex code and sample swatch
p2.select_one(HoverTool).tooltips = [('color',
                                      '$color[hex, rgb, swatch]:crcolor'),
                                     ('RGB levels', '@RGBs')]

# theme everything for a cleaner look
curdoc().theme = Theme(json=yaml.load("""
attrs:
    Plot:
        toolbar_location: null
    Grid:
        grid_line_color: null
    Axis:
        axis_line_color: null
        major_label_text_color: null
        major_tick_line_color: null
        minor_tick_line_color: null
Esempio n. 3
0
cry = [ 5 for i in range(len(crx)) ]
crcolor, crRGBs = generate_color_range(1000,brightness) # produce spectrum

# make data source object to allow information to be displayed by hover tool
crsource = ColumnDataSource(data=dict(x=crx, y=cry, crcolor=crcolor, RGBs=crRGBs))

# create second plot
p2 = Figure(x_range=(0,1000), y_range=(0,10),
            plot_width=600, plot_height=150,
            tools='hover', title='hover over color')

color_range1 = p2.rect(x='x', y='y', width=1, height=10,
                       color='crcolor', source=crsource)

# set up hover tool to show color hex code and sample swatch
p2.select_one(HoverTool).tooltips = [
    ('color', '$color[hex, rgb, swatch]:crcolor'),
    ('RGB levels', '@RGBs')
]

# theme everything for a cleaner look
curdoc().theme = Theme(json=yaml.load("""
attrs:
    Plot:
        toolbar_location: null
    Grid:
        grid_line_color: null
    Axis:
        axis_line_color: null
        major_label_text_color: null
        major_tick_line_color: null
Esempio n. 4
0
def templateRateCorrelation(state):
    DEFAULT_X = ['Amount Requested', 'Annual Income', 'Debt To Income Ratio']

    dati = pd.read_csv(DATA + 'accepted_less_col_small_' + state + '.csv', header=0)

    amnt = dati['amnt']
    income = dati['income']
    dti = dati['dti']
    rate = dati['rate'] / 100

    source = ColumnDataSource(
        data={'x': amnt,
              'y': rate,
              'Amount Requested': amnt,
              'Annual Income': income,
              'Debt To Income Ratio': dti,
              'Rate_per_100': rate * 100}
    )

    codex = """
            var data = source.get('data');
            data['x'] = data[cb_obj.get('value')];//
            // var r = data[cb_obj.get('value')];
            // var {var} = data[cb_obj.get('value')];
            // //window.alert( "{var} " + cb_obj.get('value') + {var}  );
            // for (i = 0; i < r.length; i++) {{
            //     {var}[i] = r[i] ;
            //     data['{var}'][i] = r[i];
            // }}
            source.trigger('change');
        """

    callbackx = CustomJS(args=dict(source=source), code=codex)

    TOOLS = "pan,wheel_zoom,reset,hover,save"
    plot = Figure(title=None, height=400, width=600, tools=TOOLS)

    # Make a line and connect to data source
    plot.circle(x="x", y="y", line_color="#0062cc", line_width=6, line_alpha=0.6, source=source)
    plot.yaxis.axis_label = 'Loan Rate'
    plot.yaxis[0].formatter = NumeralTickFormatter(format="0.0%")

    xaxis_select = Select(title="Label X axis:", value="Amount",
                          options=DEFAULT_X, callback=callbackx)

    hover = plot.select_one(HoverTool)
    hover.point_policy = "follow_mouse"
    hover.tooltips = [
        ("Rate", "@Rate_per_100{1.11}%"),
        ("Amount Requested", "@{Amount Requested}{1.11}"),
        ("Annual Income", "@{Annual Income}{1.11}"),
        ("Debt To Income Ratio", "@{Debt To Income Ratio}{1.11}%")
    ]

    # Layout widgets next to the plot
    controls = VBox(xaxis_select)

    layout = HBox(controls, plot, width=800)

    # show(layout)

    script_corr, div_corr = components(layout)

    return script_corr, div_corr
Esempio n. 5
0
def outputGridSearchLogisticRegression(path=DATA_LOCAL +
                                       'accepted_refused_ds.csv'):

    data_file = open(DATA_LOCAL + 'dict_recall.json', 'r')
    # print data_file.read()
    data = json.load(data_file)
    # pprint(data)

    params = data['param_C']['data']
    acc = data['mean_test_score']
    best_param = params[acc.index(max(acc))]
    print 'Best param: ' + str(best_param)
    # log loss: 0.0281176869797
    # f1: 0.0281176869797
    # roc_auc: 0.0001
    # recall: 0.0001
    plt.semilogx(data['param_C']['data'], data['mean_test_score'])
    # plt.show()
    plt.close()
    print 'Data transformation'
    # my_data = dataTransformation(path)
    my_data = pd.read_csv(DATA_LOCAL + 'accepted_refused_ds_trans.csv',
                          header=0)
    print my_data.head()
    y = my_data['loan']
    str_a = 'acc'
    str_r = 'ref'
    y = y.map(lambda x: str(x).replace('1', str_a).replace('0', str_r))
    X = my_data.drop('loan', axis=1)
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.3,
                                                        random_state=101)
    #    #print sum(y_test)
    #print len(y_test) - sum(y_test)
    print 'Built and fit the model'
    lr = LogisticRegression(class_weight='balanced',
                            random_state=101,
                            C=best_param)
    lr.fit(X=X_train,
           y=y_train.map(
               lambda x: int(x.replace(str_a, '1').replace(str_r, '0'))))
    y_pred = lr.predict_proba(X=X_test)
    coef = lr.coef_
    print lr.get_params()
    name_coef = my_data.columns.values[:-1]
    coef_val = pd.DataFrame(data={'coef': name_coef, 'val': coef[0]})
    coef_val.to_csv(DATA_LOCAL + 'LogisticRegressionCoef_c' + str(best_param) +
                    '.csv',
                    index=False)

    print 'Built ROC curve'
    # voglio avere valori = 0 per ACCETTATO
    #                     = 1 per REFUSED
    # cos' nella matroce di cinfusione ho TP FN
    #                                     FP TN
    y_acc = [p[1] for p in y_pred]
    y_test = y_test.map(
        lambda x: int(x.replace(str_a, '1').replace(str_r, '0')))
    fpr, tpr, threshold = roc_curve(y_true=y_test, y_score=y_acc)

    # data_roc = pd.DataFrame({'tpr': tpr, 'fpt': fpr, 'threshold': threshold})
    # data_roc.to_csv(DATA_LOCAL#  + 'roc_curve_c00001.csv', index=False)

    l = np.arange(len(tpr))
    roc = pd.DataFrame({
        'fpr': pd.Series(fpr, index=l),
        'tpr': pd.Series(tpr, index=l),
        '1-fpr': pd.Series(1 - fpr, index=l),
        'tf': pd.Series(tpr - (1 - fpr), index=l),
        'thresholds': pd.Series(threshold, index=l)
    })
    print roc.ix[(roc.tf - 0.0).abs().argsort()[:1]]
    # Plot tpr vs 1-fpr
    fig, ax = plt.subplots()
    plt.plot(roc['tpr'])
    plt.plot(roc['1-fpr'], color='red')
    plt.xlabel('1-False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('Receiver operating characteristic')
    ax.set_xticklabels([])
    # plt.show()
    roc_auc = auc(fpr, tpr)
    plt.figure()
    lw = 2
    plt.plot(fpr,
             tpr,
             color='darkorange',
             lw=lw,
             label='ROC curve (area = %0.8f)' % roc_auc)
    plt.plot([0, 1], [0, 1], color='navy', lw=lw, linestyle='--')
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('Receiver operating characteristic')
    plt.legend(loc="lower right")
    # plt.show()

    # thre = float(roc.ix[(roc.tf - 0.0).abs().argsort()[:1]]['thresholds'])
    thre = 0.40

    source = ColumnDataSource(data=dict(tpr=tpr, fpr=fpr, thre=threshold))
    TOOLS = "pan,wheel_zoom,reset,hover,save"
    p = Figure(tools=TOOLS)
    p.line('fpr', 'tpr', source=source, line_width=4)
    p.line([0, 1], [0, 1], line_dash='dashed', line_alpha=0.6)
    p.yaxis.axis_label = 'True Positive Rate'
    p.xaxis.axis_label = 'False Positive Rate'

    hover = p.select_one(HoverTool)
    hover.point_policy = "follow_mouse"
    hover.tooltips = [("FPR", "@fpr{1.11}"), ("TPR", "@tpr{1.11}"),
                      ("THRESHOLD", "@thre{1.11}")]

    show(p)

    y_pred_05 = [str_a if a > 0.5 else str_r for a in y_acc]
    y_pred_thre = [str_a if a > thre else str_r for a in y_acc]
    y_test = y_test.map(
        lambda x: str(x).replace('1', str_a).replace('0', str_r))

    print 'Confusion matrix threshold = ' + str(thre)
    print confusion_matrix(y_true=y_test, y_pred=y_pred_thre)

    print 'Confusion matrix threshold = 0.5'
    print confusion_matrix(y_true=y_test, y_pred=y_pred_05)

    y_pred_05 = [1 if a > 0.5 else 0 for a in y_acc]
    y_pred_thre = [1 if a > thre else 0 for a in y_acc]
    y_test = y_test.map(
        lambda x: int(x.replace(str_a, '1').replace(str_r, '0')))

    print 'Score f1 (thres = ' + str(thre) + ') = ' + str(
        f1_score(y_true=y_test, y_pred=y_pred_thre))

    print 'Score f1 (thres = 0.5) = ' + str(
        f1_score(y_true=y_test, y_pred=y_pred_05))

    print 'Score recall (thres = ' + str(thre) + ') = ' + str(
        recall_score(y_true=y_test, y_pred=y_pred_thre))

    print 'Score recall (thres = 0.5) = ' + str(
        recall_score(y_true=y_test, y_pred=y_pred_05))

    print 'Score accuracy (thres = ' + str(thre) + ') = ' + str(
        accuracy_score(y_true=y_test, y_pred=y_pred_thre))

    print 'Score accuracy (thres = 0.5) = ' + str(
        accuracy_score(y_true=y_test, y_pred=y_pred_05))

    print 'Score precision (thres = ' + str(thre) + ') = ' + str(
        precision_score(y_true=y_test, y_pred=y_pred_thre))

    print 'Score precision (thres = 0.5) = ' + str(
        precision_score(y_true=y_test, y_pred=y_pred_05))

    print 'Score AUC = ' + str(roc_auc)