def get_mutation_fixation_trajectories(population): mutations, depth_tuple = parse_annotated_timecourse(population) population_avg_depth_times, population_avg_depths, clone_avg_depth_times, clone_avg_depths = depth_tuple state_times, state_trajectories = parse_well_mixed_state_timecourse(population) times = mutations[0][9] Ms = numpy.zeros_like(times)*1.0 fixed_Ms = numpy.zeros_like(times)*1.0 #transit_times[population] = [] for mutation_idx in range(0,len(mutations)): #location, gene_name, allele, var_type, test_statistic, pvalue, cutoff_idx, depth_fold_change, depth_change_pvalue, times, alts, depths, clone_times, clone_alts, clone_depths = mutations[mutation_idx] location, gene_name, allele, var_type, codon, position_in_codon, AAs_count, test_statistic, pvalue, cutoff_idx, depth_fold_change, depth_change_pvalue, times, alts, depths, clone_times, clone_alts, clone_depths = mutations[mutation_idx] state_Ls = state_trajectories[mutation_idx] good_idxs, filtered_alts, filtered_depths = timecourse_utils.mask_timepoints(times, alts, depths, var_type, cutoff_idx, depth_fold_change, depth_change_pvalue) freqs = timecourse_utils.estimate_frequencies(filtered_alts, filtered_depths) masked_times = times[good_idxs] masked_freqs = freqs[good_idxs] masked_state_Ls = state_Ls[good_idxs] t0,tf,transit_time = timecourse_utils.calculate_appearance_fixation_time_from_hmm(masked_times, masked_freqs, masked_state_Ls) #print(t0,tf,transit_time) if t0==tf==transit_time==None: continue #print(masked_times, masked_freqs) interpolating_function = timecourse_utils.create_interpolation_function(masked_times, masked_freqs, tmax=100000) fs = interpolating_function(times) fs[fs<0]=0 # Record Ms += fs if masked_state_Ls[-1] in well_mixed_fixed_states: fixed_Ms += (times>=tf) return times, Ms, fixed_Ms
times = mutations[0][10] Ms = numpy.zeros_like(times)*1.0 fixed_Ms = numpy.zeros_like(times)*1.0 transit_times[population] = [] for mutation_idx in xrange(0,len(mutations)): location, gene_name, allele, var_type, test_statistic, pvalue, cutoff_idx, depth_fold_change, depth_change_pvalue, times, alts, depths, clone_times, clone_alts, clone_depths = mutations[mutation_idx] Ls = haplotype_trajectories[mutation_idx] state_Ls = state_trajectories[mutation_idx] good_idxs, filtered_alts, filtered_depths = timecourse_utils.mask_timepoints(times, alts, depths, var_type, cutoff_idx, depth_fold_change, depth_change_pvalue) freqs = timecourse_utils.estimate_frequencies(filtered_alts, filtered_depths) masked_times = times[good_idxs] masked_freqs = freqs[good_idxs] masked_state_Ls = state_Ls[good_idxs] t0,tf,transit_time = timecourse_utils.calculate_appearance_fixation_time_from_hmm(masked_times, masked_freqs, masked_state_Ls) transit_times[population].append(transit_time) interpolating_function = timecourse_utils.create_interpolation_function(masked_times, masked_freqs, tmax=100000) fs = interpolating_function(times) fs[fs<0]=0 # Record
def run_analyses(): r2s_obs_dict = {} #r2s_null_dict = {} for treatment in ['0', '1', '2']: r2s_obs_dict[treatment] = {} for taxon in taxa: r2s_all = [] ratio_f_all = [] abs_delta_f_all = [] for replicate in replicates: population = treatment + taxon + replicate sys.stderr.write("Processing %s...\n" % population) mutations, depth_tuple = parse_file.parse_annotated_timecourse( population) population_avg_depth_times, population_avg_depths, clone_avg_depth_times, clone_avg_depths = depth_tuple state_times, state_trajectories = parse_file.parse_well_mixed_state_timecourse( population) times = mutations[0][12] Ms = np.zeros_like(times) * 1.0 fixed_Ms = np.zeros_like(times) * 1.0 for mutation_idx_i in range(0, len(mutations)): location_i, gene_name_i, allele_i, var_type_i, codon_i, position_in_codon_i, AAs_count_i, test_statistic_i, pvalue_i, cutoff_idx_i, depth_fold_change_i, depth_change_pvalue_i, times_i, alts_i, depths_i, clone_times_i, clone_alts_i, clone_depths_i = mutations[ mutation_idx_i] state_Ls_i = state_trajectories[mutation_idx_i] good_idx_i, filtered_alts_i, filtered_depths_i = timecourse_utils.mask_timepoints( times_i, alts_i, depths_i, var_type_i, cutoff_idx_i, depth_fold_change_i, depth_change_pvalue_i) freqs_i = timecourse_utils.estimate_frequencies( filtered_alts_i, filtered_depths_i) masked_times_i = times[good_idx_i] masked_freqs_i = freqs_i[good_idx_i] masked_state_Ls_i = state_Ls_i[good_idx_i] P_idx_i = np.where(masked_state_Ls_i == 3)[0] if len(P_idx_i) < min_trajectory_length: continue first_P_i = P_idx_i[0] last_P_i = P_idx_i[-1] masked_freqs_P_i = masked_freqs_i[first_P_i:last_P_i + 1] masked_times_P_i = masked_times_i[first_P_i:last_P_i + 1] delta_masked_freqs_P_i = masked_freqs_P_i[ 1:] - masked_freqs_P_i[:-1] delta_masked_times_P_i = masked_times_P_i[:-1] #abs_delta_f = np.absolute(freqs_i[1:] - freqs_i[:-1]) #freqs_i_no_zero = freqs_i[freqs_i>0] # we want to get the ratio of freqs for freqs_i_k, freqs_i_l in zip(freqs_i[1:], freqs_i[:-1]): if (freqs_i_k == 0) or (freqs_i_l == 0): continue abs_delta_f_all.append( np.absolute(freqs_i_k - freqs_i_l)) ratio_f_all.append(freqs_i_k / freqs_i_l) #ratio_f = freqs_i_no_zero[] for mutation_idx_j in range(mutation_idx_i + 1, len(mutations)): location_j, gene_name_j, allele_j, var_type_j, codon_j, position_in_codon_j, AAs_count_j, test_statistic_j, pvalue_j, cutoff_jdx_j, depth_fold_change_j, depth_change_pvalue_j, times_j, alts_j, depths_j, clone_times_j, clone_alts_j, clone_depths_j = mutations[ mutation_idx_j] state_Ls_j = state_trajectories[mutation_idx_j] good_idx_j, filtered_alts_j, filtered_depths_j = timecourse_utils.mask_timepoints( times_j, alts_j, depths_j, var_type_j, cutoff_jdx_j, depth_fold_change_j, depth_change_pvalue_j) freqs_j = timecourse_utils.estimate_frequencies( filtered_alts_j, filtered_depths_j) masked_times_j = times[good_idx_j] masked_freqs_j = freqs_j[good_idx_j] masked_state_Ls_j = state_Ls_j[good_idx_j] P_jdx_j = np.where(masked_state_Ls_j == 3)[0] if len(P_jdx_j) < min_trajectory_length: continue first_P_j = P_jdx_j[0] last_P_j = P_jdx_j[-1] masked_freqs_P_j = masked_freqs_j[first_P_j:last_P_j + 1] masked_times_P_j = masked_times_j[first_P_j:last_P_j + 1] delta_masked_freqs_P_j = masked_freqs_P_j[ 1:] - masked_freqs_P_j[:-1] # delta_f = f_t_plus_1 - f_t delta_masked_times_P_j = masked_times_P_j[:-1] intersect_times = np.intersect1d( delta_masked_times_P_i, delta_masked_times_P_j) if len(intersect_times) >= 3: intersect_idx_i = [ np.where(delta_masked_times_P_i == intersect_time)[0][0] for intersect_time in intersect_times ] intersect_delta_i = delta_masked_freqs_P_i[ intersect_idx_i] intersect_idx_j = [ np.where(delta_masked_times_P_j == intersect_time)[0][0] for intersect_time in intersect_times ] intersect_delta_j = delta_masked_freqs_P_j[ intersect_idx_j] if len(intersect_delta_i) != len( intersect_delta_j): print(len(intersect_delta_j), len(intersect_delta_j)) r2 = stats.pearsonr(intersect_delta_i, intersect_delta_j)[0]**2 r2s_all.append(r2) r2s_all = np.asarray(r2s_all) ratio_f_all = np.asarray(ratio_f_all) abs_delta_f_all = np.asarray(abs_delta_f_all) #r2s_obs_dict[treatment + taxon] = {} #r2s_obs_dict[treatment + taxon]['r2'] = r2s_all #r2s_obs_dict[treatment + taxon]['ratio_f'] = ratio_f_all #r2s_obs_dict[treatment + taxon]['abs_delta_f'] = abs_delta_f_all r2s_obs_dict[treatment][taxon] = {} r2s_obs_dict[treatment][taxon]['r2'] = r2s_all r2s_obs_dict[treatment][taxon]['ratio_f'] = ratio_f_all r2s_obs_dict[treatment][taxon]['abs_delta_f'] = abs_delta_f_all with open(pt.get_path() + '/data/mutation_dynamics.pickle', 'wb') as handle: pickle.dump(r2s_obs_dict, handle, protocol=pickle.HIGHEST_PROTOCOL)