def templateROC(c='c002812'): data = pd.read_csv(DATA + 'roc_curve_' + c + '_small.csv') # data_train, data_test = train_test_split(data, test_size=1.0/20, random_state=101) # data_test = data_test.sort_values(by=['tpr', 'fpt', 'threshold']) # data_test.to_csv(DATA + 'roc_curve_' + c + '_small.csv', index=False) source = ColumnDataSource(data=dict(tpr=data['tpr'], fpr=data['fpt'], thre=data['threshold'] )) TOOLS = "pan,wheel_zoom,reset,hover,save" p = Figure(tools=TOOLS, height=300, width=300, toolbar_location="above") p.line('fpr', 'tpr', source=source, line_width=5) p.line([0, 1], [0, 1], line_dash='dashed', line_alpha=0.6) p.yaxis.axis_label = 'True Positive Rate' p.xaxis.axis_label = 'False Positive Rate' # p.background_fill_color = LIGHT_GREEN # p.border_fill_color = LIGHT_GREEN hover = p.select_one(HoverTool) hover.point_policy = "follow_mouse" hover.tooltips = [ ("FPR", "@fpr{1.11}"), ("TPR", "@tpr{1.11}"), ("THRESHOLD", "@thre{1.11}") ] # script, div = components(p) return script, div
p2 = Figure(x_range=(0, 1000), y_range=(0, 10), plot_width=600, plot_height=150, tools='hover', title='hover over color') color_range1 = p2.rect(x='x', y='y', width=1, height=10, color='crcolor', source=crsource) # set up hover tool to show color hex code and sample swatch p2.select_one(HoverTool).tooltips = [('color', '$color[hex, rgb, swatch]:crcolor'), ('RGB levels', '@RGBs')] # theme everything for a cleaner look curdoc().theme = Theme(json=yaml.load(""" attrs: Plot: toolbar_location: null Grid: grid_line_color: null Axis: axis_line_color: null major_label_text_color: null major_tick_line_color: null minor_tick_line_color: null
cry = [ 5 for i in range(len(crx)) ] crcolor, crRGBs = generate_color_range(1000,brightness) # produce spectrum # make data source object to allow information to be displayed by hover tool crsource = ColumnDataSource(data=dict(x=crx, y=cry, crcolor=crcolor, RGBs=crRGBs)) # create second plot p2 = Figure(x_range=(0,1000), y_range=(0,10), plot_width=600, plot_height=150, tools='hover', title='hover over color') color_range1 = p2.rect(x='x', y='y', width=1, height=10, color='crcolor', source=crsource) # set up hover tool to show color hex code and sample swatch p2.select_one(HoverTool).tooltips = [ ('color', '$color[hex, rgb, swatch]:crcolor'), ('RGB levels', '@RGBs') ] # theme everything for a cleaner look curdoc().theme = Theme(json=yaml.load(""" attrs: Plot: toolbar_location: null Grid: grid_line_color: null Axis: axis_line_color: null major_label_text_color: null major_tick_line_color: null
def templateRateCorrelation(state): DEFAULT_X = ['Amount Requested', 'Annual Income', 'Debt To Income Ratio'] dati = pd.read_csv(DATA + 'accepted_less_col_small_' + state + '.csv', header=0) amnt = dati['amnt'] income = dati['income'] dti = dati['dti'] rate = dati['rate'] / 100 source = ColumnDataSource( data={'x': amnt, 'y': rate, 'Amount Requested': amnt, 'Annual Income': income, 'Debt To Income Ratio': dti, 'Rate_per_100': rate * 100} ) codex = """ var data = source.get('data'); data['x'] = data[cb_obj.get('value')];// // var r = data[cb_obj.get('value')]; // var {var} = data[cb_obj.get('value')]; // //window.alert( "{var} " + cb_obj.get('value') + {var} ); // for (i = 0; i < r.length; i++) {{ // {var}[i] = r[i] ; // data['{var}'][i] = r[i]; // }} source.trigger('change'); """ callbackx = CustomJS(args=dict(source=source), code=codex) TOOLS = "pan,wheel_zoom,reset,hover,save" plot = Figure(title=None, height=400, width=600, tools=TOOLS) # Make a line and connect to data source plot.circle(x="x", y="y", line_color="#0062cc", line_width=6, line_alpha=0.6, source=source) plot.yaxis.axis_label = 'Loan Rate' plot.yaxis[0].formatter = NumeralTickFormatter(format="0.0%") xaxis_select = Select(title="Label X axis:", value="Amount", options=DEFAULT_X, callback=callbackx) hover = plot.select_one(HoverTool) hover.point_policy = "follow_mouse" hover.tooltips = [ ("Rate", "@Rate_per_100{1.11}%"), ("Amount Requested", "@{Amount Requested}{1.11}"), ("Annual Income", "@{Annual Income}{1.11}"), ("Debt To Income Ratio", "@{Debt To Income Ratio}{1.11}%") ] # Layout widgets next to the plot controls = VBox(xaxis_select) layout = HBox(controls, plot, width=800) # show(layout) script_corr, div_corr = components(layout) return script_corr, div_corr
def outputGridSearchLogisticRegression(path=DATA_LOCAL + 'accepted_refused_ds.csv'): data_file = open(DATA_LOCAL + 'dict_recall.json', 'r') # print data_file.read() data = json.load(data_file) # pprint(data) params = data['param_C']['data'] acc = data['mean_test_score'] best_param = params[acc.index(max(acc))] print 'Best param: ' + str(best_param) # log loss: 0.0281176869797 # f1: 0.0281176869797 # roc_auc: 0.0001 # recall: 0.0001 plt.semilogx(data['param_C']['data'], data['mean_test_score']) # plt.show() plt.close() print 'Data transformation' # my_data = dataTransformation(path) my_data = pd.read_csv(DATA_LOCAL + 'accepted_refused_ds_trans.csv', header=0) print my_data.head() y = my_data['loan'] str_a = 'acc' str_r = 'ref' y = y.map(lambda x: str(x).replace('1', str_a).replace('0', str_r)) X = my_data.drop('loan', axis=1) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=101) # #print sum(y_test) #print len(y_test) - sum(y_test) print 'Built and fit the model' lr = LogisticRegression(class_weight='balanced', random_state=101, C=best_param) lr.fit(X=X_train, y=y_train.map( lambda x: int(x.replace(str_a, '1').replace(str_r, '0')))) y_pred = lr.predict_proba(X=X_test) coef = lr.coef_ print lr.get_params() name_coef = my_data.columns.values[:-1] coef_val = pd.DataFrame(data={'coef': name_coef, 'val': coef[0]}) coef_val.to_csv(DATA_LOCAL + 'LogisticRegressionCoef_c' + str(best_param) + '.csv', index=False) print 'Built ROC curve' # voglio avere valori = 0 per ACCETTATO # = 1 per REFUSED # cos' nella matroce di cinfusione ho TP FN # FP TN y_acc = [p[1] for p in y_pred] y_test = y_test.map( lambda x: int(x.replace(str_a, '1').replace(str_r, '0'))) fpr, tpr, threshold = roc_curve(y_true=y_test, y_score=y_acc) # data_roc = pd.DataFrame({'tpr': tpr, 'fpt': fpr, 'threshold': threshold}) # data_roc.to_csv(DATA_LOCAL# + 'roc_curve_c00001.csv', index=False) l = np.arange(len(tpr)) roc = pd.DataFrame({ 'fpr': pd.Series(fpr, index=l), 'tpr': pd.Series(tpr, index=l), '1-fpr': pd.Series(1 - fpr, index=l), 'tf': pd.Series(tpr - (1 - fpr), index=l), 'thresholds': pd.Series(threshold, index=l) }) print roc.ix[(roc.tf - 0.0).abs().argsort()[:1]] # Plot tpr vs 1-fpr fig, ax = plt.subplots() plt.plot(roc['tpr']) plt.plot(roc['1-fpr'], color='red') plt.xlabel('1-False Positive Rate') plt.ylabel('True Positive Rate') plt.title('Receiver operating characteristic') ax.set_xticklabels([]) # plt.show() roc_auc = auc(fpr, tpr) plt.figure() lw = 2 plt.plot(fpr, tpr, color='darkorange', lw=lw, label='ROC curve (area = %0.8f)' % roc_auc) plt.plot([0, 1], [0, 1], color='navy', lw=lw, linestyle='--') plt.xlim([0.0, 1.0]) plt.ylim([0.0, 1.05]) plt.xlabel('False Positive Rate') plt.ylabel('True Positive Rate') plt.title('Receiver operating characteristic') plt.legend(loc="lower right") # plt.show() # thre = float(roc.ix[(roc.tf - 0.0).abs().argsort()[:1]]['thresholds']) thre = 0.40 source = ColumnDataSource(data=dict(tpr=tpr, fpr=fpr, thre=threshold)) TOOLS = "pan,wheel_zoom,reset,hover,save" p = Figure(tools=TOOLS) p.line('fpr', 'tpr', source=source, line_width=4) p.line([0, 1], [0, 1], line_dash='dashed', line_alpha=0.6) p.yaxis.axis_label = 'True Positive Rate' p.xaxis.axis_label = 'False Positive Rate' hover = p.select_one(HoverTool) hover.point_policy = "follow_mouse" hover.tooltips = [("FPR", "@fpr{1.11}"), ("TPR", "@tpr{1.11}"), ("THRESHOLD", "@thre{1.11}")] show(p) y_pred_05 = [str_a if a > 0.5 else str_r for a in y_acc] y_pred_thre = [str_a if a > thre else str_r for a in y_acc] y_test = y_test.map( lambda x: str(x).replace('1', str_a).replace('0', str_r)) print 'Confusion matrix threshold = ' + str(thre) print confusion_matrix(y_true=y_test, y_pred=y_pred_thre) print 'Confusion matrix threshold = 0.5' print confusion_matrix(y_true=y_test, y_pred=y_pred_05) y_pred_05 = [1 if a > 0.5 else 0 for a in y_acc] y_pred_thre = [1 if a > thre else 0 for a in y_acc] y_test = y_test.map( lambda x: int(x.replace(str_a, '1').replace(str_r, '0'))) print 'Score f1 (thres = ' + str(thre) + ') = ' + str( f1_score(y_true=y_test, y_pred=y_pred_thre)) print 'Score f1 (thres = 0.5) = ' + str( f1_score(y_true=y_test, y_pred=y_pred_05)) print 'Score recall (thres = ' + str(thre) + ') = ' + str( recall_score(y_true=y_test, y_pred=y_pred_thre)) print 'Score recall (thres = 0.5) = ' + str( recall_score(y_true=y_test, y_pred=y_pred_05)) print 'Score accuracy (thres = ' + str(thre) + ') = ' + str( accuracy_score(y_true=y_test, y_pred=y_pred_thre)) print 'Score accuracy (thres = 0.5) = ' + str( accuracy_score(y_true=y_test, y_pred=y_pred_05)) print 'Score precision (thres = ' + str(thre) + ') = ' + str( precision_score(y_true=y_test, y_pred=y_pred_thre)) print 'Score precision (thres = 0.5) = ' + str( precision_score(y_true=y_test, y_pred=y_pred_05)) print 'Score AUC = ' + str(roc_auc)