def params_prauc_tables(h5d_fl1, h5d_fl2, curvetype, kfolds, params_od, mix, strata, trec): """Area Under the Curve(AUC) paired with table of parameters for PR curve. # # # Make proper Definition here # # # """ # Selecting whether the resaults should be retured in binary(i.e. Trueth-Table)... # ...or multi-class value form. if curvetype == 'multiclass': binary = True else: binary = False # Beginning AUC-Params table building. res_lst = list() # Loading data in a convenient form. for params_lst, params_path in zip( param_comb.ParamGridIter(params_od, 'list'), param_comb.ParamGridIter(params_od, 'path')): # Defining list for AUC values storage. For this loop. auc_values = list() if params_lst[0] > params_lst[1]: if mix: pred_scores, expd_y, pred_y = rfse_multiclass_multimeasure_res( h5d_fl1, h5d_fl2, kfolds, params_path, binary=binary, strata=strata ) else: pred_scores, expd_y, pred_y = multiclass_res( h5d_fl1, kfolds, params_path, binary=binary, strata=strata ) # NOTE: Crossckecking and replacing the class-tags of the experiment to virtual... # ...class tags refering to the index of the np.unique(expd_y) vector in order... # ...to ease the calculations of the curves. tags2idx_ref = np.unique(expd_y) i_fix = 0 if tags2idx_ref[0] > 0: i_fix = 1 for i, tg in enumerate(tags2idx_ref): expd_y[np.where(expd_y == tg)] = i + i_fix pred_y[np.where(pred_y == tg)] = i + i_fix # Selecting the case and calculating the precision recall curves. if curvetype == 'multiclass': # NOTE: Option 'is_truth_tbl' is critical to be selected correctly depending... # ...on the input. prec, recl, t = mx.pr_curve( expd_y, pred_scores, full_curve=True, is_truth_tbl=True ) # Interpolated at 11-Recall-Levels. prec, recl = mx.reclev11_max(prec, recl, trec=trec) elif curvetype == 'multiclass_macro': # NOTE: Option 'unknow_class' is critical to be selected correctly depending... # ...on the input. prec, recl, t = mx.pr_curve_macro( expd_y, pred_y, pred_scores, full_curve=True, ) # Interpolated at 11-Recall-Levels. prec, recl = mx.reclev11_max(prec, recl, trec=trec) elif curvetype == 'onevsall': # Finding unique genres. gnr_tgs = np.unique(expd_y) # Precsion and Recall scores lists of the PR curve per genre. prec_lst = list() recl_lst = list() # Calculating AUC per genre tag. for gnr in gnr_tgs: if mix: pred_scores, expd_y, pred_y = onevsall_multimeasure_res( h5d_fl1, h5d_fl2, gnr, kfolds, params_path ) else: pred_scores, expd_y, pred_y = onevsall_res( h5d_fl1, gnr, kfolds, params_path ) # NOTE: Option 'is_truth_tbl' is critical to be selected correctly depending... # ...on the input. prec_val, recl_val, t = mx.pr_curve( expd_y, pred_scores, full_curve=True, is_truth_tbl=False ) # Interpolated at 11-Recall-Levels. prec_val, recl_val = mx.reclev11_max(prec_val, recl_val, trec=trec) # Keeping Precsion and Recall scores of the PR curve per genre. prec_lst.append(prec) recl_lst.append(recl) # Calculating the PR Averaged Macro Curves values for 1-vs-All case. prec = np.mean(np.vstack(prec_lst), axis=0) recl = np.mean(np.vstack(recl_lst), axis=0) else: raise Exception('Invalide curvetype argument value.') # Saving the AUC value and extending parameters list with AUC(s). try: params_lst.extend([mx.auc(recl, prec)]) except: print "Warning:", params_path, "PR AUC is for these params has set to 0.0" params_lst.extend([0.0]) # Appending the parameters list together with their respective AUC(s). res_lst.append(params_lst) # Stacking and returning the data collected in a 2D array. Last column contain the AUC for... # ...every parameters values possible combination. return np.vstack(res_lst)
def PRConf_table(h5d_fl1, h5d_fl2, kfolds, params_path, mix, strata, prereccon=0): """Precision Recall Tables and Contigency tables from H5D files. ### Make proper Definition here ### """ # Beginning Contingency table building if mix: rfse_data = rfse_multiclass_multimeasure_res( h5d_fl1, h5d_fl2, kfolds, params_path, binary=False, strata=strata ) else: rfse_data = multiclass_res( h5d_fl1, kfolds, params_path, binary=False, strata=strata ) # 3rd element contain predicted y values list. pred_y = rfse_data[2] # 2rd element contain predicted y values list. exp_y = rfse_data[1] # Getting the expected classes. exp_cls_tags_set = np.unique(exp_y) # Calculating contigency table. conf_mtrx = mx.seq_contingency_table( exp_y, pred_y, exp_cls_tags_set=exp_cls_tags_set, arr_type=np.int32 ) if prereccon in [0, 1]: # Calculating precision recall scores. # Getting the number of samples per class. Zero tag is inlcuded. smpls_per_cls = np.bincount(np.array(exp_y, dtype=np.int)) # Keeping from 1 to end array in case the expected class tags start with above zero values. if smpls_per_cls[0] == 0 and exp_cls_tags_set[0] > 0: smpls_per_cls = smpls_per_cls[1::] elif smpls_per_cls[0] > 0 and exp_cls_tags_set[0] == 0: pass # same as --> smpls_per_cls = smpls_per_cls # Anythig else should rase an Exception. else: raise Exception("Samples count in zero bin is different to the expected class tag cnt!") # Calculating Precision per class. precisions = [ dg / float(pred_docs) for dg, pred_docs in zip(np.diag(conf_mtrx), np.sum(conf_mtrx, axis=1)) if pred_docs > 0 ] # Calculating Recall per class. recalls = [ dg / float(splpc) for dg, splpc in zip(np.diag(conf_mtrx), smpls_per_cls) if splpc > 0 ] # This funciton works only for the mx.contingency_table() output. # pr_tbl = mx.precision_recall_scores(conf_mtrx) pr_tbl = [precisions, recalls] if prereccon in [0, 2]: col_sums = conf_mtrx.sum(axis=0) conf_mtrx = np.vstack((conf_mtrx, col_sums)) # conf_percent = np.divide(conf_mtrx, np.bincount(expected_y)) * 100 if prereccon == 0: # Returning... return (pr_tbl, conf_mtrx) elif prereccon == 1: # Returning... return pr_tbl elif prereccon == 2: # Returning... return conf_mtrx else: raise Exception("Returning mode 'prereccon' variable invalid value. Valid values {0,1,2}.")
elif comb_val[3] == 'Cosine' or comb_val[3] == '': h5d_fl1 = tb.open_file(h5d_fl + '.h5', 'r') else: raise Exception("Option: " + comb_val[3] + " is not valid for Measure Option") # Getting the predictions if comb_val[3] == 'Comb': # Building the parapmeters path params_path = plist2ppath(comb_val[4], ensbl=comb_val[0]) pred_scores, expd_y, pred_y = rfse_multiclass_multimeasure_res( # h5d_fl1, h5d_fl2, kfolds, params_path, comb_val[4][2], # genre_tag=None, binary=True, strata=None h5d_fl1, h5d_fl2, kfolds, params_path, binary=False, strata=None # binary=False <- for Micro ) else: # Building the parapmeters path params_path = plist2ppath(comb_val[4], ensbl=comb_val[0]) pred_scores, expd_y, pred_y = multiclass_res( h5d_fl1, kfolds, params_path, binary=False, strata=None ) # Closing the h5d files. if comb_val[3] == 'Comb': h5d_fl1.close()