def test(): ## Main functions to be used def sim_prediction_bin(n, real, noise): noise = noise*(np.random.random(n)-.5)*2 pred = real_cat+noise pred = (pred-pred.min())/(pred.max()-pred.min()) return pred def sim_prediction_cont(n, real, noise): noise = noise*np.random.random(n) pred = real_cat+noise return pred ## Parameters n, m_preds, noise = 1000, 10, .6 real_cat = np.random.randint(0, 2, n) real_cont = np.random.random(n) pred_cat = sim_prediction_bin(n, real_cat, noise) preds_cat = [sim_prediction_bin(n, real_cat, noise) for i in range(m_preds)] pred_cont = sim_prediction_cont(n, real_cont, noise) preds_cont = [sim_prediction_cont(n, real_cont, noise) for i in range(m_preds)] tags = ['prediction '+str(i) for i in range(m_preds)] x, y = np.random.random(100), np.random.random(100) G_x = nx.from_numpy_matrix(x.reshape((10, 10))) G_y = nx.from_numpy_matrix(x.reshape((10, 10)) > 0.5) Gs_x = [G_x for i in range(10)] ## Testing sorting measures ########################### fpr, tpr, _ = roc_comparison(real_cat, pred_cat) rocs = [roc_comparison(real_cat, preds_cat[i]) for i in range(m_preds)] fprs = [rocs[i][0] for i in range(m_preds)] tprs = [rocs[i][1] for i in range(m_preds)] measures = np.random.random(len(rocs)) compute_lift_curve(real_cat, pred_cat, 10) lift = compute_lift_curve(real_cont, pred_cont, 10)[1] lifts = [compute_lift_curve(real_cat, preds_cat[i], 10)[1] for i in range(m_preds)] ## Testing plotting ################### fig = plot_roc_curves(fprs, tprs, measures, tags) fig = plot_roc_curve(fpr, tpr, measures[0]) fig = plot_lift_curves(lifts, tags) fig = plot_lift_curve(lift) ## Testing main computing funcitons ################################### measures, fig = compute_measure(real_cat, pred_cat, metric="roc_curve", create_plot=True, tags=['0']) measures = compute_measure(real_cat, pred_cat, metric="roc_curve", create_plot=False, tags=['0']) measures, fig = compute_measure(real_cat, preds_cat, metric="roc_curve", create_plot=True, tags=tags) measures = compute_measure(real_cat, preds_cat, metric="roc_curve", create_plot=False, tags=tags) measures, fig = compute_measure(real_cat, pred_cat, metric="lift10", create_plot=True, tags=['0']) measures = compute_measure(real_cat, pred_cat, metric="lift10", create_plot=False, tags=['0']) measures, fig = compute_measure(real_cat, preds_cat, metric="lift10", create_plot=True, tags=tags) measures = compute_measure(real_cat, preds_cat, metric="lift10", create_plot=True, tags=tags) ######## names = ['network inferred '+str(i) for i in range(10)] measure, fig = network_roc_comparison(G_x, G_y) measure, fig = network_roc_comparison(G_x, G_y, names) measure, fig = network_roc_comparison(Gs_x, G_y, ['network inferred 0'])
def compute_measure(real, pred, metric="roc_curve", create_plot=True, tags=None): """This function compute some given measures of fit of the given real labels (real) and the predicted labels (pred). Parameters ---------- real: array_like, shape (N,) labels of correct real values. pred: array_like, shape (N,), list of arrays. predicted labels. metric: str, optional metric used to check how good is the prediction. There are available roc_curve, and lift10. tags: list the tags assigned for the possible predictions we are going to test. Returns ------- measures: float the measure of divergence between both fig : matplotlib figure the plot of related to the measure. Examples -------- >>> real = np.random.randint(0,2,50) >>> pred = np.random.rand(50) >>> measure, fig = compute_measure(real, pred, "roc_curve") >>> measure [0.6] >>> measure, fig = compute_measure(real, pred, "lift10") >>> measure [1.7] >>> measure = compute_measure(real, pred, "lift10", False) See also -------- sklearn.metrics.roc_curve, compute_lift_curve, pythonUtils.TesterResults.plotting_testerresults.plot_roc_curve , pythonUtils.TesterResults.plotting_testerresults.plot_lift_curve """ multiple = type(pred) == list # ROC measure if metric == 'roc_curve': if not multiple: # Compute the measure of ROC curve # fpr, tpr, thresholds = roc_curve(real, pred) # # numerical measure # measure = auc(fpr, tpr) fpr, tpr, measure = roc_comparison(real, pred) fprs, tprs, measures = [fpr], [tpr], [measure] else: assert(len(pred) == len(tags)) fprs, tprs, measures = [], [], [] for i in range(len(pred)): # # Compute the measure of ROC curve # fpr, tpr, thresholds = roc_curve(real, pred[i]) # # numerical measure # measure = auc(fpr, tpr) fpr, tpr, measure = roc_comparison(real, pred[i]) # Appending fprs.append(fpr) tprs.append(tpr) measures.append(measure) assert(len(measures) == len(pred)) # Plot: Handle this plot. if create_plot: # Call for the plot fig = plot_roc_curves(fprs, tprs, measures, tags) else: return measures # LIFT 10 MEASURE elif metric == 'lift10': if not multiple: # Compute lift10 curve _, lift10, _ = compute_lift_curve(real, pred, 10) # numerical measure lift10 = [lift10] measure = [lift10[0]/lift10[-1]] else: assert(len(pred) == len(tags)) lift10, measures = [], [] for i in range(len(pred)): # Compute lift10 curve _, lift10i, _ = compute_lift_curve(real, pred[i], 10) # numerical measure measure = lift10i[0]/lift10i[-1] # appending lift10.append(lift10i) measures.append(measure) # Plot if create_plot: fig = plot_lift_curves(lift10, tags) else: return measure return measure, fig