def calc_abs_corr(self): """ Calculate the distance matrix using a correlation approach for every column in self.ops_base_perf_vals """ # -- no normalisation in here as the best performing features have been picked already, potentially using normalisation self.similarity_array,sort_ind,_ = idtop.calc_perform_corr_mat(self.ops_base_perf_vals,norm=None, max_feat = self.ops_base_perf_vals.shape[1]) self.similarity_array_op_ids = self.good_perf_op_ids[sort_ind]
def calc_abs_corr(self): """ Calculate the distance matrix using a correlation approach for every column in self.ops_base_perf_vals """ # -- no normalisation in here as the best performing features have been picked already, potentially using normalisation self.similarity_array, sort_ind, _ = idtop.calc_perform_corr_mat( self.ops_base_perf_vals, norm=None, max_feat=self.ops_base_perf_vals.shape[1]) self.similarity_array_op_ids = self.good_perf_op_ids[sort_ind]
# -- Load the data all_classes_avg = np.load(all_classes_avg_out_path) op_id_good = np.load(op_id_good_path) # -- Mask NaN entires all_classes_avg_good = np.ma.masked_invalid(all_classes_avg[:, op_id_good]) # -- load a reference HCTSA_loc.mat containing all op_ids import modules.misc.PK_matlab_IO as mIO op_ref_HCTSA_path = '/home/philip/work/OperationImportanceProject/results/done/HCTSA_Beef.mat' op, = mIO.read_from_mat_file(op_ref_HCTSA_path, ['Operations'], is_from_old_matlab=True) max_feat = 50 # -- calculate the correlation abs_corr_array, sort_good_ind, all_classes_avg_good_norm = idtop.calc_perform_corr_mat( all_classes_avg_good, norm='z-score', max_feat=max_feat) # -- save the op id's in order of performance (first entry = best performance) np.save(op_id_order_path, op_id_good[sort_good_ind]) # -- sort the permutation vector that would sort the data array containing the good operations only np.save(sort_good_ind_path, sort_good_ind) # -- extract the top feature names names = hlp.ind_map_subset(op['id'], op['name'], op_id_good[sort_good_ind][:max_feat]) # -- Calculate the measures to be plotted problems_succ = (~all_classes_avg_good[:, sort_good_ind[:max_feat]].mask).sum( axis=0) u_stat_mean = all_classes_avg_good_norm[:, sort_good_ind[:max_feat]].mean(axis=0)
problem_names_path = intermediate_data_root+'problem_names.npy' measures_problems_path = intermediate_data_root+'measure_problems.npy' # -- Load the data all_classes_avg = np.load(all_classes_avg_out_path) op_id_order = np.load(op_id_order_path) op_id_good = np.load(op_id_good_path) max_feat = 50 max_corr_dist = 0.2 # # -- mask all nan values and take top 200 features # all_classes_avg_top = np.ma.masked_invalid(all_classes_avg[:,op_id_order[:100]]) # # -- calculate the z-score of the u stat array # all_classes_avg_top = ((all_classes_avg_top.T - np.ma.mean(all_classes_avg_top,axis=1)) / np.ma.std(all_classes_avg_top,axis=1)).T # abs_corr_array = np.abs(np.ma.corrcoef(all_classes_avg_top, rowvar=0)) # -- calculate the correlation array with respect to performance and mask nan. abs_corr_array,sort_good_ind,all_classes_avg_good_norm = idtop.calc_perform_corr_mat(all_classes_avg[:,op_id_good],norm='z-score', max_feat = max_feat) all_classes_avg_top = np.ma.masked_invalid(all_classes_avg[:,op_id_good][:,sort_good_ind[:max_feat]]) # -- calculate the linkage for the correlation corr_linkage = idtop.calc_linkage(abs_corr_array)[0] # -- extract operation names --- ------------------------------------------ # -- load a reference HCTSA_loc.mat containing all op_ids op_ref_HCTSA_path = '/home/philip/work/OperationImportanceProject/results/done/HCTSA_Beef.mat' op, = mIO.read_from_mat_file(op_ref_HCTSA_path,['Operations'],is_from_old_matlab = True) top_id = op_id_good[sort_good_ind][:max_feat] names = hlp.ind_map_subset(op['id'], op['name'], op_id_good[sort_good_ind][:max_feat]) # -- extract problem names --- ------------------------------------------ reg_ex = re.compile('.*\/HCTSA_(.*)_N_70_100_reduced.mat')