예제 #1
0
def test():
    ## Main functions to be used
    def sim_prediction_bin(n, real, noise):
        noise = noise*(np.random.random(n)-.5)*2
        pred = real_cat+noise
        pred = (pred-pred.min())/(pred.max()-pred.min())
        return pred

    def sim_prediction_cont(n, real, noise):
        noise = noise*np.random.random(n)
        pred = real_cat+noise
        return pred

    ## Parameters
    n, m_preds, noise = 1000, 10, .6

    real_cat = np.random.randint(0, 2, n)
    real_cont = np.random.random(n)

    pred_cat = sim_prediction_bin(n, real_cat, noise)
    preds_cat = [sim_prediction_bin(n, real_cat, noise)
                 for i in range(m_preds)]
    pred_cont = sim_prediction_cont(n, real_cont, noise)
    preds_cont = [sim_prediction_cont(n, real_cont, noise)
                  for i in range(m_preds)]
    tags = ['prediction '+str(i) for i in range(m_preds)]

    x, y = np.random.random(100), np.random.random(100)
    G_x = nx.from_numpy_matrix(x.reshape((10, 10)))
    G_y = nx.from_numpy_matrix(x.reshape((10, 10)) > 0.5)
    Gs_x = [G_x for i in range(10)]

    ## Testing sorting measures
    ###########################
    fpr, tpr, _ = roc_comparison(real_cat, pred_cat)
    rocs = [roc_comparison(real_cat, preds_cat[i])
            for i in range(m_preds)]
    fprs = [rocs[i][0] for i in range(m_preds)]
    tprs = [rocs[i][1] for i in range(m_preds)]
    measures = np.random.random(len(rocs))
    compute_lift_curve(real_cat, pred_cat, 10)
    lift = compute_lift_curve(real_cont, pred_cont, 10)[1]
    lifts = [compute_lift_curve(real_cat, preds_cat[i], 10)[1]
             for i in range(m_preds)]

    ## Testing plotting
    ###################
    fig = plot_roc_curves(fprs, tprs, measures, tags)
    fig = plot_roc_curve(fpr, tpr, measures[0])
    fig = plot_lift_curves(lifts, tags)
    fig = plot_lift_curve(lift)

    ## Testing main computing funcitons
    ###################################
    measures, fig = compute_measure(real_cat, pred_cat, metric="roc_curve",
                                    create_plot=True, tags=['0'])
    measures = compute_measure(real_cat, pred_cat, metric="roc_curve",
                               create_plot=False, tags=['0'])
    measures, fig = compute_measure(real_cat, preds_cat, metric="roc_curve",
                                    create_plot=True, tags=tags)
    measures = compute_measure(real_cat, preds_cat, metric="roc_curve",
                               create_plot=False, tags=tags)
    measures, fig = compute_measure(real_cat, pred_cat, metric="lift10",
                                    create_plot=True, tags=['0'])
    measures = compute_measure(real_cat, pred_cat, metric="lift10",
                               create_plot=False, tags=['0'])
    measures, fig = compute_measure(real_cat, preds_cat, metric="lift10",
                                    create_plot=True, tags=tags)
    measures = compute_measure(real_cat, preds_cat, metric="lift10",
                               create_plot=True, tags=tags)

    ########
    names = ['network inferred '+str(i) for i in range(10)]
    measure, fig = network_roc_comparison(G_x, G_y)
    measure, fig = network_roc_comparison(G_x, G_y, names)
    measure, fig = network_roc_comparison(Gs_x, G_y, ['network inferred 0'])
예제 #2
0
def compute_measure(real, pred, metric="roc_curve", create_plot=True,
                    tags=None):
    """This function compute some given measures of fit of the given real
    labels (real) and the predicted labels (pred).

    Parameters
    ----------
    real: array_like, shape (N,)
        labels of correct real values.
    pred: array_like, shape (N,), list of arrays.
        predicted labels.
    metric: str, optional
        metric used to check how good is the prediction.
        There are available roc_curve, and lift10.
    tags: list
        the tags assigned for the possible predictions we are going to test.

    Returns
    -------
    measures: float
        the measure of divergence between both
    fig : matplotlib figure
        the plot of related to the measure.

    Examples
    --------
    >>> real = np.random.randint(0,2,50)
    >>> pred = np.random.rand(50)
    >>> measure, fig = compute_measure(real, pred, "roc_curve")
    >>> measure
    [0.6]
    >>> measure, fig = compute_measure(real, pred, "lift10")
    >>> measure
    [1.7]
    >>> measure = compute_measure(real, pred, "lift10", False)

    See also
    --------
    sklearn.metrics.roc_curve, compute_lift_curve,
    pythonUtils.TesterResults.plotting_testerresults.plot_roc_curve ,
    pythonUtils.TesterResults.plotting_testerresults.plot_lift_curve

    """
    multiple = type(pred) == list

    # ROC measure
    if metric == 'roc_curve':
        if not multiple:
            # Compute the measure of ROC curve
#            fpr, tpr, thresholds = roc_curve(real, pred)
#            # numerical measure
#            measure = auc(fpr, tpr)
            fpr, tpr, measure = roc_comparison(real, pred)
            fprs, tprs, measures = [fpr], [tpr], [measure]
        else:
            assert(len(pred) == len(tags))
            fprs, tprs, measures = [], [], []
            for i in range(len(pred)):
#                # Compute the measure of ROC curve
#                fpr, tpr, thresholds = roc_curve(real, pred[i])
#                # numerical measure
#                measure = auc(fpr, tpr)
                fpr, tpr, measure = roc_comparison(real, pred[i])
                # Appending
                fprs.append(fpr)
                tprs.append(tpr)
                measures.append(measure)
            assert(len(measures) == len(pred))

        # Plot: Handle this plot.
        if create_plot:
            # Call for the plot
            fig = plot_roc_curves(fprs, tprs, measures, tags)
        else:
            return measures

    # LIFT 10 MEASURE
    elif metric == 'lift10':
        if not multiple:
            # Compute lift10 curve
            _, lift10, _ = compute_lift_curve(real, pred, 10)
            # numerical measure
            lift10 = [lift10]
            measure = [lift10[0]/lift10[-1]]
        else:
            assert(len(pred) == len(tags))
            lift10, measures = [], []
            for i in range(len(pred)):
                # Compute lift10 curve
                _, lift10i, _ = compute_lift_curve(real, pred[i], 10)
                # numerical measure
                measure = lift10i[0]/lift10i[-1]
                # appending
                lift10.append(lift10i)
                measures.append(measure)

        # Plot
        if create_plot:
            fig = plot_lift_curves(lift10, tags)
        else:
            return measure

    return measure, fig