def h5d_auc_table(h5d_fl1, h5d_fl2, kfolds, params_od, mix, is_ttbl, strata, trec): """Area Under the Curve(AUC) paired with table of parameters # # # Make proper Definition here # # # """ # Beginning AUC-Params table building. res_lst = list() # Loading data in a convenient form. for params_lst, params_path in zip( param_comb.ParamGridIter(params_od, 'list'), param_comb.ParamGridIter(params_od, 'path')): # Defining list for AUC values storage. For this loop. auc_values = list() if params_lst[0] > params_lst[1]: if mix: pred_scores, expd_y, pred_y = get_predictions_mix( h5d_fl1, h5d_fl2, kfolds, params_path, params_lst[2], genre_tag=None, binary=is_ttbl, strata=strata ) else: pred_scores, expd_y, pred_y = get_predictions( h5d_fl1, kfolds, params_path, genre_tag=None, binary=is_ttbl, strata=strata ) if is_ttbl: # NOTE:Option is_truth_tbl is critical to be selected correctly depending... # ...on the input. prec, recl, t = mx.pr_curve( expd_y, pred_scores, full_curve=True, is_truth_tbl=is_ttbl ) # Interpolated at 11-Recall-Levels. prec, recl = mx.reclev11_max(prec, recl, trec=trec) try: auc_values.append(mx.auc(recl, prec)) except: print "Warning:", params_path, "AUC is for these params has set to 0.0" auc_values.append(0.0) else: # Finding unique genres. gnr_tgs = np.unique(expd_y) # Calculating AUC per genre tag. for gnr in gnr_tgs: # Converting expected Y to binary format. expd_y_bin = np.where((expd_y == gnr), 1, 0) # NOTE:Option is_truth_tbl is critical to be selected correctly depending... # ...on the input. prec, recl, t = mx.pr_curve( expd_y_bin, pred_scores, full_curve=True, is_truth_tbl=is_ttbl ) # Interpolated at 11-Recall-Levels. prec, recl = mx.reclev11_max(prec, recl, trec=trec) try: auc_values.append(auc(recl, prec)) except: print "Warning:", params_path, "AUC is for these params has setted to 0.0" auc_values.append(0.0) # Extending parameters list with AUC(s). params_lst.extend(auc_values) # Appending the parameters list together with their respective AUC(s). res_lst.append(params_lst) # Stacking and returning the data collected in a 2D array. Last column contain the AUC for... # ...every parameters values possible combination. return np.vstack(res_lst)
# the highest values*. # *( it suppose the higest values to be normally fist in order ) #y = smooth_linear(y[::-1]) # Inverting the y (i.e. Precition) axis values after has been smoothed out. #y = y[::-1] # OR #y, x = smooth_linear(y[::-1], x[::-1]); y, x = y[::-1], x[::-1] #y, x = reclev11_averaging(y, x) #y, x = reclev11_nearest(y, x) y, x = reclev11_max(y1, x1) # plt.locator_params(nbins=4) ax1.plot( x, y, color[i] + line_type[i] + symbol[i], linewidth=1, markeredgewidth=1, #label="KI04 - 3Words" #"(" + str(i) + ") Feat " + str(params_lst[2]) + \ #" - " + str(params_lst[3]) ) #ax1.plot( # x1, y1, # color[i] + line_type[i] + symbol[i], linewidth=1, # markeredgewidth=1,
def params_prauc_tables(h5d_fl1, h5d_fl2, curvetype, kfolds, params_od, mix, strata, trec): """Area Under the Curve(AUC) paired with table of parameters for PR curve. # # # Make proper Definition here # # # """ # Selecting whether the resaults should be retured in binary(i.e. Trueth-Table)... # ...or multi-class value form. if curvetype == 'multiclass': binary = True else: binary = False # Beginning AUC-Params table building. res_lst = list() # Loading data in a convenient form. for params_lst, params_path in zip( param_comb.ParamGridIter(params_od, 'list'), param_comb.ParamGridIter(params_od, 'path')): # Defining list for AUC values storage. For this loop. auc_values = list() if params_lst[0] > params_lst[1]: if mix: pred_scores, expd_y, pred_y = rfse_multiclass_multimeasure_res( h5d_fl1, h5d_fl2, kfolds, params_path, binary=binary, strata=strata ) else: pred_scores, expd_y, pred_y = multiclass_res( h5d_fl1, kfolds, params_path, binary=binary, strata=strata ) # NOTE: Crossckecking and replacing the class-tags of the experiment to virtual... # ...class tags refering to the index of the np.unique(expd_y) vector in order... # ...to ease the calculations of the curves. tags2idx_ref = np.unique(expd_y) i_fix = 0 if tags2idx_ref[0] > 0: i_fix = 1 for i, tg in enumerate(tags2idx_ref): expd_y[np.where(expd_y == tg)] = i + i_fix pred_y[np.where(pred_y == tg)] = i + i_fix # Selecting the case and calculating the precision recall curves. if curvetype == 'multiclass': # NOTE: Option 'is_truth_tbl' is critical to be selected correctly depending... # ...on the input. prec, recl, t = mx.pr_curve( expd_y, pred_scores, full_curve=True, is_truth_tbl=True ) # Interpolated at 11-Recall-Levels. prec, recl = mx.reclev11_max(prec, recl, trec=trec) elif curvetype == 'multiclass_macro': # NOTE: Option 'unknow_class' is critical to be selected correctly depending... # ...on the input. prec, recl, t = mx.pr_curve_macro( expd_y, pred_y, pred_scores, full_curve=True, ) # Interpolated at 11-Recall-Levels. prec, recl = mx.reclev11_max(prec, recl, trec=trec) elif curvetype == 'onevsall': # Finding unique genres. gnr_tgs = np.unique(expd_y) # Precsion and Recall scores lists of the PR curve per genre. prec_lst = list() recl_lst = list() # Calculating AUC per genre tag. for gnr in gnr_tgs: if mix: pred_scores, expd_y, pred_y = onevsall_multimeasure_res( h5d_fl1, h5d_fl2, gnr, kfolds, params_path ) else: pred_scores, expd_y, pred_y = onevsall_res( h5d_fl1, gnr, kfolds, params_path ) # NOTE: Option 'is_truth_tbl' is critical to be selected correctly depending... # ...on the input. prec_val, recl_val, t = mx.pr_curve( expd_y, pred_scores, full_curve=True, is_truth_tbl=False ) # Interpolated at 11-Recall-Levels. prec_val, recl_val = mx.reclev11_max(prec_val, recl_val, trec=trec) # Keeping Precsion and Recall scores of the PR curve per genre. prec_lst.append(prec) recl_lst.append(recl) # Calculating the PR Averaged Macro Curves values for 1-vs-All case. prec = np.mean(np.vstack(prec_lst), axis=0) recl = np.mean(np.vstack(recl_lst), axis=0) else: raise Exception('Invalide curvetype argument value.') # Saving the AUC value and extending parameters list with AUC(s). try: params_lst.extend([mx.auc(recl, prec)]) except: print "Warning:", params_path, "PR AUC is for these params has set to 0.0" params_lst.extend([0.0]) # Appending the parameters list together with their respective AUC(s). res_lst.append(params_lst) # Stacking and returning the data collected in a 2D array. Last column contain the AUC for... # ...every parameters values possible combination. return np.vstack(res_lst)
h5d_fl1.close() h5d_fl2.close() else: h5d_fl1.close() # Creating the Actual PRC. # y, x, t = pr_curve(expd_y, pred_scores, full_curve=True, is_truth_tbl=True) # Creating the Actual MACRO PRC. y, x, t = pr_curve_macro( expd_y, pred_y, pred_scores, full_curve=True ) # Getting the max 11 Recall Leves in TREC way. # if i == 0: y, x = reclev11_max(y, x, trec=False) # Selecting array indices with non-zero cells. non_zero_idx = np.where(y > 0) # # # Do the Plotting linestyle = { "color": plt_dsp_attr[i][0], "linestyle": plt_dsp_attr[i][1], "marker": plt_dsp_attr[i][2], "linewidth": 2, "markeredgewidth": 2, 'markeredgecolor': 'white', } ax.plot(x[non_zero_idx], y[non_zero_idx], **linestyle)