예제 #1
0
파일: approach.py 프로젝트: LeyliG/ds4se
 def Compute_precision_recall_gain(self):
     '''One might choose PRG if there is little interest in identifying false negatives '''
     prg_curve = prg.create_prg_curve(self.y_test, self.y_score)
     auprg = prg.calc_auprg(prg_curve)
     prg.plot_prg(prg_curve)
     logging.info('auprg:  %.3f' %  auprg)
     logging.info("compute_precision_recall_gain Complete")
     pass
예제 #2
0
 def Compute_precision_recall_gain(self):
     '''One might choose PRG if there is little interest in identifying false negatives '''
     for count,sim in enumerate(self.sim_list):
         prg_curve = prg.create_prg_curve(self.y_test, self.y_score[count])
         auprg = prg.calc_auprg(prg_curve)
         prg.plot_prg(prg_curve)
         logging.info('auprg:  %.3f' %  auprg)
         logging.info("compute_precision_recall_gain Complete: "+str(sim))
     pass
예제 #3
0
def do_prg(scores, true_labels, file_name='', directory='', plot=False):
    prg_curve = prg.create_prg_curve(true_labels, scores)
    auprg = prg.calc_auprg(prg_curve)
    if plot:
        prg.plot_prg(prg_curve)
        plt.title('Precision-Recall-Gain curve: AUC=%0.4f' % (auprg))
        plt.savefig(directory + file_name + "prg.png")

    return auprg
예제 #4
0
    def Compute_precision_recall_gain(self,
                                      vecType=VectorizationType.word2vec,
                                      sim=SimilarityMetric.SCM_sim):
        '''One might choose PRG if there is little interest in identifying false negatives '''
        y_test, y_score = self.vecTypeVerificationSim(vecType=vecType, sim=sim)

        fig = go.Figure(layout_yaxis_range=[-0.05, 1.02],
                        layout_xaxis_range=[-0.05, 1.02])
        prg_curve = prg.create_prg_curve(y_test, y_score)
        indices = np.arange(
            np.argmax(prg_curve['in_unit_square']) - 1,
            len(prg_curve['in_unit_square']))
        pg = prg_curve['precision_gain']
        rg = prg_curve['recall_gain']
        fig.add_trace(
            go.Scatter(x=rg[indices],
                       y=pg[indices],
                       line=dict(color="cyan", width=2, dash="solid")))

        indices = np.logical_or(prg_curve['is_crossing'],
                                prg_curve['in_unit_square'])
        fig.add_trace(
            go.Scatter(x=rg[indices],
                       y=pg[indices],
                       line=dict(color="blue", width=2, dash="solid")))

        indices = np.logical_and(prg_curve['in_unit_square'],
                                 True - prg_curve['is_crossing'])
        fig.add_trace(go.Scatter(x=rg[indices], y=pg[indices], mode='markers'))

        valid_points = np.logical_and(~np.isnan(rg), ~np.isnan(pg))
        upper_hull = prg.convex_hull(zip(rg[valid_points], pg[valid_points]))
        rg_hull, pg_hull = zip(*upper_hull)
        fig.add_trace(
            go.Scatter(x=rg_hull,
                       y=pg_hull,
                       mode="lines",
                       line=dict(color="red", width=2, dash="dash")))
        auprg = prg.calc_auprg(prg_curve)

        logging.info('auprg:  %.3f' % auprg)
        logging.info("compute_precision_recall_gain Complete: " + str(sim))

        fig.update_layout(title=self.sys + "-[" + str(sim) + "]",
                          height=600,
                          width=600,
                          xaxis_title='Recall Gain',
                          xaxis=dict(tickmode='linear', tick0=0, dtick=0.25),
                          yaxis_title='Precision Gain',
                          yaxis=dict(tickmode='linear', tick0=0, dtick=0.25))
        fig.update_yaxes(
            scaleanchor="x",
            scaleratio=1,
        )

        return fig
예제 #5
0
    def get_prg_and_auc_df(self, error_df):
        prg_curve = prg.create_prg_curve(error_df.True_values,
                                         error_df.Reconstruction_error)
        prg_curve_df = pd.DataFrame.from_dict(prg_curve)

        i = np.arange(1)
        prg_auc = prg.calc_auprg(prg_curve)
        prg_auc_df = pd.DataFrame({'AUC': pd.Series(prg_auc, index=i)})

        return prg_curve_df, prg_auc_df
def auprgc_score(y_true, scores):
    """Compute the Area Under the Precision Recall Gain Curve (AUPRG)

    Note: this implementation is restricted to the binary classification task.

    Parameters
    ----------
    y_true : array, shape = [n_samples]
        True binary labels.

    scores : array, shape = [n_samples]
        Estimated probabilities or decision function.

    Examples
    --------
    >>> from prg import prg
    >>> import numpy as np
    >>> y_true = np.array([1, 1, 1, 1, 0, 1, 0, 1, 0, 0], dtype='int')
    >>> scores = np.arange(10, 1, -1)
    >>> auprgc_score(y_true, scores)
    0.683125
    >>> y_true = np.array([1, 1, 0, 0], dtype='int')
    >>> scores = np.arange(4, 1, -1)
    >>> auprgc_score(y_true, scores)
    1.0
    >>> y_true = np.array([0, 0, 1, 1], dtype='int')
    >>> scores = np.arange(4, 1, -1)
    >>> auprgc_score(y_true, scores)
    0
    >>> y_true = np.array([0, 1, 0], dtype='int')
    >>> scores = np.arange(3, 1, -1)
    >>> auprgc_score(y_true, scores)
    0.0
    """
    prg_curve = prg.create_prg_curve(y_true, scores)
    auprg = prg.calc_auprg(prg_curve)
    return auprg
예제 #7
0
def auPRG(labels, predictions):
    return calc_auprg(create_prg_curve(labels, predictions))
def auprg_score(y_true, y_score):
    y_true, y_score = np.array(y_true), np.array(y_score)
    prg_curve = prg.create_prg_curve(y_true, y_score)
    auprg = prg.calc_auprg(prg_curve)
    return auprg
예제 #9
0
def auPRG(labels, predictions):
    return calc_auprg(create_prg_curve(labels, predictions))
예제 #10
0
def area_under_prg(labels, scores):
    prg_curve = prg.create_prg_curve(labels, scores)
    auprg = prg.calc_auprg(prg_curve)
    return auprg