Ejemplo n.º 1
0
def h5d_auc_table(h5d_fl1, h5d_fl2, kfolds, params_od, mix, is_ttbl, strata, trec):
    """Area Under the Curve(AUC) paired with table of parameters

        # # #  Make proper Definition here # # #

    """

    # Beginning AUC-Params table building.
    res_lst = list()

    #  Loading data in a convenient form.
    for params_lst, params_path in zip(
        param_comb.ParamGridIter(params_od, 'list'),
            param_comb.ParamGridIter(params_od, 'path')):

        # Defining list for AUC values storage. For this loop.
        auc_values = list()

        if params_lst[0] > params_lst[1]:

            if mix:

                pred_scores, expd_y, pred_y = get_predictions_mix(
                    h5d_fl1, h5d_fl2, kfolds, params_path, params_lst[2],
                    genre_tag=None, binary=is_ttbl, strata=strata
                )

            else:

                pred_scores, expd_y, pred_y = get_predictions(
                    h5d_fl1, kfolds, params_path, genre_tag=None, binary=is_ttbl, strata=strata
                )

            if is_ttbl:

                # NOTE:Option is_truth_tbl is critical to be selected correctly depending...
                # ...on the input.
                prec, recl, t = mx.pr_curve(
                    expd_y, pred_scores, full_curve=True, is_truth_tbl=is_ttbl
                )

                # Interpolated at 11-Recall-Levels.
                prec, recl = mx.reclev11_max(prec, recl, trec=trec)

                try:
                    auc_values.append(mx.auc(recl, prec))
                except:
                    print "Warning:", params_path, "AUC is for these params has set to 0.0"
                    auc_values.append(0.0)

            else:

                # Finding unique genres.
                gnr_tgs = np.unique(expd_y)

                # Calculating AUC per genre tag.
                for gnr in gnr_tgs:

                    # Converting expected Y to binary format.
                    expd_y_bin = np.where((expd_y == gnr), 1, 0)

                    # NOTE:Option is_truth_tbl is critical to be selected correctly depending...
                    # ...on the input.
                    prec, recl, t = mx.pr_curve(
                        expd_y_bin, pred_scores, full_curve=True, is_truth_tbl=is_ttbl
                    )

                    # Interpolated at 11-Recall-Levels.
                    prec, recl = mx.reclev11_max(prec, recl, trec=trec)

                    try:
                        auc_values.append(auc(recl, prec))
                    except:
                        print "Warning:", params_path, "AUC is for these params has setted to 0.0"
                        auc_values.append(0.0)

            # Extending parameters list with AUC(s).
            params_lst.extend(auc_values)

            # Appending the parameters list together with their respective AUC(s).
            res_lst.append(params_lst)

    # Stacking and returning the data collected in a 2D array. Last column contain the AUC for...
    # ...every parameters values possible combination.

    return np.vstack(res_lst)
Ejemplo n.º 2
0
        # the highest values*.
        # *( it suppose the higest values to be normally fist in order )
        #y = smooth_linear(y[::-1])

        # Inverting the y (i.e. Precition) axis values after has been smoothed out.
        #y = y[::-1]

        # OR

        #y, x = smooth_linear(y[::-1], x[::-1]); y, x = y[::-1], x[::-1]

        #y, x = reclev11_averaging(y, x)

        #y, x = reclev11_nearest(y, x)

        y, x = reclev11_max(y1, x1)

        # plt.locator_params(nbins=4)
        ax1.plot(
            x, y,
            color[i] + line_type[i] + symbol[i], linewidth=1,
            markeredgewidth=1,
            #label="KI04 - 3Words"
            #"(" + str(i) + ") Feat " + str(params_lst[2]) + \
            #" - " + str(params_lst[3])
        )

        #ax1.plot(
        #    x1, y1,
        #    color[i] + line_type[i] + symbol[i], linewidth=1,
        #    markeredgewidth=1,
Ejemplo n.º 3
0
def params_prauc_tables(h5d_fl1, h5d_fl2, curvetype, kfolds, params_od, mix, strata, trec):
    """Area Under the Curve(AUC) paired with table of parameters for PR curve.

        # # #  Make proper Definition here # # #

    """
    # Selecting whether the resaults should be retured in binary(i.e. Trueth-Table)...
    # ...or multi-class value form.
    if curvetype == 'multiclass':
        binary = True
    else:
        binary = False

    # Beginning AUC-Params table building.
    res_lst = list()

    #  Loading data in a convenient form.
    for params_lst, params_path in zip(
        param_comb.ParamGridIter(params_od, 'list'),
            param_comb.ParamGridIter(params_od, 'path')):

        # Defining list for AUC values storage. For this loop.
        auc_values = list()

        if params_lst[0] > params_lst[1]:

            if mix:

                pred_scores, expd_y, pred_y = rfse_multiclass_multimeasure_res(
                    h5d_fl1, h5d_fl2, kfolds, params_path, binary=binary, strata=strata
                )

            else:

                pred_scores, expd_y, pred_y = multiclass_res(
                    h5d_fl1, kfolds, params_path, binary=binary, strata=strata
                )

            # NOTE: Crossckecking and replacing the class-tags of the experiment to virtual...
            # ...class tags refering to the index of the np.unique(expd_y) vector in order...
            # ...to ease the calculations of the curves.
            tags2idx_ref = np.unique(expd_y)
            i_fix = 0
            if tags2idx_ref[0] > 0:
                i_fix = 1
            for i, tg in enumerate(tags2idx_ref):
                expd_y[np.where(expd_y == tg)] = i + i_fix
                pred_y[np.where(pred_y == tg)] = i + i_fix

            # Selecting the case and calculating the precision recall curves.
            if curvetype == 'multiclass':

                # NOTE: Option 'is_truth_tbl' is critical to be selected correctly depending...
                # ...on the input.
                prec, recl, t = mx.pr_curve(
                    expd_y, pred_scores, full_curve=True, is_truth_tbl=True
                )

                # Interpolated at 11-Recall-Levels.
                prec, recl = mx.reclev11_max(prec, recl, trec=trec)

            elif curvetype == 'multiclass_macro':

                # NOTE: Option 'unknow_class' is critical to be selected correctly depending...
                # ...on the input.
                prec, recl, t = mx.pr_curve_macro(
                    expd_y, pred_y, pred_scores, full_curve=True,
                )

                # Interpolated at 11-Recall-Levels.
                prec, recl = mx.reclev11_max(prec, recl, trec=trec)

            elif curvetype == 'onevsall':

                # Finding unique genres.
                gnr_tgs = np.unique(expd_y)

                # Precsion and Recall scores lists of the PR curve per genre.
                prec_lst = list()
                recl_lst = list()

                # Calculating AUC per genre tag.
                for gnr in gnr_tgs:

                    if mix:

                        pred_scores, expd_y, pred_y = onevsall_multimeasure_res(
                            h5d_fl1, h5d_fl2, gnr, kfolds, params_path
                        )

                    else:

                        pred_scores, expd_y, pred_y = onevsall_res(
                            h5d_fl1, gnr, kfolds, params_path
                        )

                    # NOTE: Option 'is_truth_tbl' is critical to be selected correctly depending...
                    # ...on the input.
                    prec_val, recl_val, t = mx.pr_curve(
                        expd_y, pred_scores, full_curve=True, is_truth_tbl=False
                    )

                    # Interpolated at 11-Recall-Levels.
                    prec_val, recl_val = mx.reclev11_max(prec_val, recl_val, trec=trec)

                    # Keeping Precsion and Recall scores of the PR curve per genre.
                    prec_lst.append(prec)
                    recl_lst.append(recl)

                # Calculating the PR Averaged Macro Curves values for 1-vs-All case.
                prec = np.mean(np.vstack(prec_lst), axis=0)
                recl = np.mean(np.vstack(recl_lst), axis=0)

            else:
                raise Exception('Invalide curvetype argument value.')

            # Saving the AUC value and extending parameters list with AUC(s).
            try:
                params_lst.extend([mx.auc(recl, prec)])
            except:
                print "Warning:", params_path, "PR AUC is for these params has set to 0.0"
                params_lst.extend([0.0])

            # Appending the parameters list together with their respective AUC(s).
            res_lst.append(params_lst)

    # Stacking and returning the data collected in a 2D array. Last column contain the AUC for...
    # ...every parameters values possible combination.
    return np.vstack(res_lst)
Ejemplo n.º 4
0
        h5d_fl1.close()
        h5d_fl2.close()
    else:
        h5d_fl1.close()

    # Creating the Actual PRC.
    # y, x, t = pr_curve(expd_y, pred_scores, full_curve=True, is_truth_tbl=True)

    # Creating the Actual MACRO PRC.
    y, x, t = pr_curve_macro(
        expd_y, pred_y, pred_scores, full_curve=True
    )

    # Getting the max 11 Recall Leves in TREC way.
    # if i == 0:
    y, x = reclev11_max(y, x, trec=False)

    # Selecting array indices with non-zero cells.
    non_zero_idx = np.where(y > 0)

    # # # Do the Plotting
    linestyle = {
        "color": plt_dsp_attr[i][0],
        "linestyle": plt_dsp_attr[i][1],
        "marker": plt_dsp_attr[i][2],
        "linewidth": 2,
        "markeredgewidth": 2,
        'markeredgecolor': 'white',
    }

    ax.plot(x[non_zero_idx], y[non_zero_idx], **linestyle)