def calc_pairs(self,labels,data):
     """
     Calculate the ustatistic for each operation and every label pairing
     Parameters:
     -----------
     labels : ndarray
         1-D array containing the labels for each row in data.
     data : ndarray
         Array containing the data. Each row corresponds to a timeseries and each column to an operation.
     Returns:
     --------
     ranks : ndarray
         Returns the scaled U statistic for each label pairing and each operation.
 
     """
     ranks,ustat_norm = fistat.u_stat_all_label(data,labels=labels)[0:2]
     return ranks/ustat_norm[:,np.newaxis]
 def calc_pairs(self, labels, data):
     """
     Calculate the ustatistic for each operation and every label pairing
     Parameters:
     -----------
     labels : ndarray
         1-D array containing the labels for each row in data.
     data : ndarray
         Array containing the data. Each row corresponds to a timeseries and each column to an operation.
     Returns:
     --------
     ranks : ndarray
         Returns the scaled U statistic for each label pairing and each operation.
 
     """
     ranks, ustat_norm = fistat.u_stat_all_label(data, labels=labels)[0:2]
     return ranks / ustat_norm[:, np.newaxis]
    
if COMPUTE_COMPLETE_DATA:
    data_all,op_id_good = fap.cat_data_from_matfile_root(mat_file_paths, count_op_id_min,is_from_old_matlab = IS_FROM_OLD_MATLAB,
                               data_all_good_op_path = data_all_good_op_path,op_id_good_path = op_id_good_path,is_return_masked = False)

# -- Create masked array from data_all    
# data_all = np.ma.masked_invalid(data_all)

# ---------------------------------------------------------------------------------
# -- Calculate U_statistics for the problems
# ---------------------------------------------------------------------------------   
if CALCULATE_U_STATS:
    
    # -- skip problems with already calculated U-stats
    if CALCULATE_ONLY_NEW_U_STATS:
        task_names = tstat.filter_calculated(mat_file_root,HCTSA_name_search_pattern = 'HCTSA_(.*)_N_70_100_reduced.mat')
        file_paths = [mat_file_root+"HCTSA_{0:s}_N_70_100_reduced.mat".format(s) for s in task_names]
    
    # -- calculate U-stats for all problems        
    else:
        file_paths = mat_file_paths
        _,task_names = tstat.get_calculated_names(mat_file_root,HCTSA_name_search_pattern = 'HCTSA_(.*)_N_70_100_reduced.mat')
    
    u_stat_file_paths = tstat.calculate_ustat_mult_tasks(mat_file_paths,task_names,ustat_data_out_folder,is_from_old_matlab = IS_FROM_OLD_MATLAB)  
    if CALCULATE_U_STATS_ALL_CLASSES_AVG:
        all_classes_avg = tstat.calculate_ustat_avg_mult_task(mat_file_paths,u_stat_file_paths,all_classes_avg_out_path ,is_from_old_matlab = IS_FROM_OLD_MATLAB)


  
  
# ---------------------------------------------------------------------------------
 def calc_pairs(self, labels, data):
     ranks, ustat_norm = fistat.u_stat_all_label(data, labels=labels)[0:2]
     return ranks / ustat_norm[:, np.newaxis]
# -- Calculate the average min (for each label pair separately) score for every problem
if False:
    avg_min_u_score = np.ones(problem_paths.shape[0]) * np.NaN
    ustat_paths = np.array(glob.glob(intermediate_data_root + "/*_ustat.npy"))
    reg_ex = re.compile("../data/(.*)_ustat.npy")
    ustat_names = np.array([reg_ex.match(ustat_path).group(1) for ustat_path in ustat_paths])
    # -- sort ustat paths to match the problem_paths
    ustat_sort_ind = hlp.ismember(problem_names, ustat_names)
    ustat_paths = ustat_paths[ustat_sort_ind]
    ustat_names = ustat_names[ustat_sort_ind]

    for i, (ustat_path, mat_file_path) in enumerate(zip(ustat_paths, problem_paths)):
        ustat = np.load(ustat_path)
        # -- calculate the scaling factor for every label pairing of the current classification problem
        u_scale = testst.u_stat_norm_factor(mat_file_path, is_from_old_matlab="True")
        print ustat_path
        avg_min_u_score[i] = (np.min(ustat, axis=1) / u_scale).mean()
    np.save(avg_min_u_score_path, avg_min_u_score)
else:
    avg_min_u_score = np.load(avg_min_u_score_path)
# -- average minimum (for each class pair) U-score for top features
ax_measures10.plot(x_loc, avg_min_u_score[porblem_sort_ind], marker="o", label="avg. min. U-score all")
ax_measures10.legend(loc=2, fontsize="small", labelspacing=0.1)
ax_measures10.set_ylabel("u-score")

# ax_measures1.plot(x_loc,(~all_classes_avg_masked_sort.mask).sum(axis=1))
# plt.savefig('/home/philip/Desktop/tmp/figure_tmp/u_stat_array.png')
# plt.savefig('/home/philip/Desktop/tmp/figure_tmp/u_stat_array_z_column.png')

plt.show()
Exemple #6
0
if False:
    avg_min_u_score = np.ones(problem_paths.shape[0]) * np.NaN
    ustat_paths = np.array(glob.glob(intermediate_data_root + '/*_ustat.npy'))
    reg_ex = re.compile('../data/(.*)_ustat.npy')
    ustat_names = np.array(
        [reg_ex.match(ustat_path).group(1) for ustat_path in ustat_paths])
    # -- sort ustat paths to match the problem_paths
    ustat_sort_ind = hlp.ismember(problem_names, ustat_names)
    ustat_paths = ustat_paths[ustat_sort_ind]
    ustat_names = ustat_names[ustat_sort_ind]

    for i, (ustat_path,
            mat_file_path) in enumerate(zip(ustat_paths, problem_paths)):
        ustat = np.load(ustat_path)
        # -- calculate the scaling factor for every label pairing of the current classification problem
        u_scale = testst.u_stat_norm_factor(mat_file_path,
                                            is_from_old_matlab='True')
        print ustat_path
        avg_min_u_score[i] = (np.min(ustat, axis=1) / u_scale).mean()
    np.save(avg_min_u_score_path, avg_min_u_score)
else:
    avg_min_u_score = np.load(avg_min_u_score_path)
# -- average minimum (for each class pair) U-score for top features
ax_measures10.plot(x_loc,
                   avg_min_u_score[porblem_sort_ind],
                   marker='o',
                   label='avg. min. U-score all')
ax_measures10.legend(loc=2, fontsize='small', labelspacing=.1)
ax_measures10.set_ylabel('u-score')

# ax_measures1.plot(x_loc,(~all_classes_avg_masked_sort.mask).sum(axis=1))
#plt.savefig('/home/philip/Desktop/tmp/figure_tmp/u_stat_array.png')
        data_all_good_op_path=data_all_good_op_path,
        op_id_good_path=op_id_good_path,
        is_return_masked=False)

# -- Create masked array from data_all
# data_all = np.ma.masked_invalid(data_all)

# ---------------------------------------------------------------------------------
# -- Calculate U_statistics for the problems
# ---------------------------------------------------------------------------------
if CALCULATE_U_STATS:

    # -- skip problems with already calculated U-stats
    if CALCULATE_ONLY_NEW_U_STATS:
        task_names = tstat.filter_calculated(
            mat_file_root,
            HCTSA_name_search_pattern='HCTSA_(.*)_N_70_100_reduced.mat')
        file_paths = [
            mat_file_root + "HCTSA_{0:s}_N_70_100_reduced.mat".format(s)
            for s in task_names
        ]

    # -- calculate U-stats for all problems
    else:
        file_paths = mat_file_paths
        _, task_names = tstat.get_calculated_names(
            mat_file_root,
            HCTSA_name_search_pattern='HCTSA_(.*)_N_70_100_reduced.mat')

    u_stat_file_paths = tstat.calculate_ustat_mult_tasks(
        mat_file_paths,