dist_matrix = sp.spatial.distance.squareform( sp.spatial.distance.pdist(sim_data_obj.pi.dot(sim_data_obj.MU), 'cosine')) if len(dist_matrix[dist_matrix > 0]): min_dist = np.min(dist_matrix[dist_matrix > 0]) max_dist = np.max(dist_matrix[dist_matrix > 0]) avg_dist = np.mean(dist_matrix[dist_matrix > 0]) else: min_dist, max_dist, avg_dist = np.nan, np.nan, np.nan avg_major_cn = np.mean(sim_data_obj.C_tumor_tot - sim_data_obj.C_tumor_minor) avg_tot_cn = np.mean(sim_data_obj.C_tumor_tot) actual_perc_diploid = sum((sim_data_obj.C_tumor_tot == 2) & ( sim_data_obj.C_tumor_minor == 1)) / sim_data_obj.N MU = get_MU() subMU = get_MU(cancer_type=cancer_type) id_list = list() metrics_list = list() mu_mat = {'all': MU, 'cancertype': subMU} for setting in ('all', 'cancertype', 'prefit'): mixture_file = pd.read_csv( '{}/tracksig/tracksig_mixtures_{}.csv'.format( folder_path, setting), sep=',') try: changepoint_file = pd.read_csv( '{}/tracksig/tracksig_changepoints_{}.txt'.format( folder_path, setting), header=None,
folder_path = sys.argv[1] MIXTURE_THRESHOLD = 0.05 ''' folder_path = '20190623_simulations_clonesig_cn_cancer_type/type2-perc_diploid100-nb_clones4-nb_mut100' ''' nb_pi = int(folder_path.split('pi')[1].split('-')[0]) nb_phi = int(folder_path.split('phi')[1].split('-')[0]) depth = int(folder_path.split('depth')[1].split('-')[0]) nb_mut = int(folder_path.split('nb_mut')[1].split('-')[0]) perc_diploid = float(folder_path.split('percdip')[1].split('-')[0]) nb_clones = 2 cancer_type = None MU = get_MU() data_df = pd.read_csv('{}/input_t.tsv'.format(folder_path), sep='\t') with open('{}/purity.txt'.format(folder_path), 'r') as f: purity = float(f.read()) # get metrics from simulated data with open('{}/sim_data'.format(folder_path), 'rb') as sim_pickle_file: sim_pickle = pickle.Unpickler(sim_pickle_file) sim_data_obj = sim_pickle.load() dist_matrix = sp.spatial.distance.squareform( sp.spatial.distance.pdist(sim_data_obj.pi.dot(sim_data_obj.MU), 'cosine')) if len(dist_matrix[dist_matrix > 0]): min_dist = np.min(dist_matrix[dist_matrix > 0]) max_dist = np.max(dist_matrix[dist_matrix > 0]) avg_dist = np.mean(dist_matrix[dist_matrix > 0])