def run_analyses():
    r2s_obs_dict = {}
    #r2s_null_dict = {}
    for treatment in ['0', '1', '2']:
        r2s_obs_dict[treatment] = {}
        for taxon in taxa:
            r2s_all = []
            ratio_f_all = []
            abs_delta_f_all = []
            for replicate in replicates:

                population = treatment + taxon + replicate
                sys.stderr.write("Processing %s...\n" % population)

                mutations, depth_tuple = parse_file.parse_annotated_timecourse(
                    population)
                population_avg_depth_times, population_avg_depths, clone_avg_depth_times, clone_avg_depths = depth_tuple
                state_times, state_trajectories = parse_file.parse_well_mixed_state_timecourse(
                    population)

                times = mutations[0][12]
                Ms = np.zeros_like(times) * 1.0
                fixed_Ms = np.zeros_like(times) * 1.0

                for mutation_idx_i in range(0, len(mutations)):

                    location_i, gene_name_i, allele_i, var_type_i, codon_i, position_in_codon_i, AAs_count_i, test_statistic_i, pvalue_i, cutoff_idx_i, depth_fold_change_i, depth_change_pvalue_i, times_i, alts_i, depths_i, clone_times_i, clone_alts_i, clone_depths_i = mutations[
                        mutation_idx_i]

                    state_Ls_i = state_trajectories[mutation_idx_i]
                    good_idx_i, filtered_alts_i, filtered_depths_i = timecourse_utils.mask_timepoints(
                        times_i, alts_i, depths_i, var_type_i, cutoff_idx_i,
                        depth_fold_change_i, depth_change_pvalue_i)
                    freqs_i = timecourse_utils.estimate_frequencies(
                        filtered_alts_i, filtered_depths_i)

                    masked_times_i = times[good_idx_i]
                    masked_freqs_i = freqs_i[good_idx_i]
                    masked_state_Ls_i = state_Ls_i[good_idx_i]

                    P_idx_i = np.where(masked_state_Ls_i == 3)[0]
                    if len(P_idx_i) < min_trajectory_length:
                        continue
                    first_P_i = P_idx_i[0]
                    last_P_i = P_idx_i[-1]

                    masked_freqs_P_i = masked_freqs_i[first_P_i:last_P_i + 1]
                    masked_times_P_i = masked_times_i[first_P_i:last_P_i + 1]

                    delta_masked_freqs_P_i = masked_freqs_P_i[
                        1:] - masked_freqs_P_i[:-1]
                    delta_masked_times_P_i = masked_times_P_i[:-1]

                    #abs_delta_f = np.absolute(freqs_i[1:] - freqs_i[:-1])
                    #freqs_i_no_zero = freqs_i[freqs_i>0]
                    # we want to get the ratio of freqs

                    for freqs_i_k, freqs_i_l in zip(freqs_i[1:], freqs_i[:-1]):
                        if (freqs_i_k == 0) or (freqs_i_l == 0):
                            continue
                        abs_delta_f_all.append(
                            np.absolute(freqs_i_k - freqs_i_l))
                        ratio_f_all.append(freqs_i_k / freqs_i_l)

                    #ratio_f = freqs_i_no_zero[]

                    for mutation_idx_j in range(mutation_idx_i + 1,
                                                len(mutations)):

                        location_j, gene_name_j, allele_j, var_type_j, codon_j, position_in_codon_j, AAs_count_j, test_statistic_j, pvalue_j, cutoff_jdx_j, depth_fold_change_j, depth_change_pvalue_j, times_j, alts_j, depths_j, clone_times_j, clone_alts_j, clone_depths_j = mutations[
                            mutation_idx_j]

                        state_Ls_j = state_trajectories[mutation_idx_j]
                        good_idx_j, filtered_alts_j, filtered_depths_j = timecourse_utils.mask_timepoints(
                            times_j, alts_j, depths_j, var_type_j,
                            cutoff_jdx_j, depth_fold_change_j,
                            depth_change_pvalue_j)
                        freqs_j = timecourse_utils.estimate_frequencies(
                            filtered_alts_j, filtered_depths_j)

                        masked_times_j = times[good_idx_j]
                        masked_freqs_j = freqs_j[good_idx_j]
                        masked_state_Ls_j = state_Ls_j[good_idx_j]

                        P_jdx_j = np.where(masked_state_Ls_j == 3)[0]
                        if len(P_jdx_j) < min_trajectory_length:
                            continue
                        first_P_j = P_jdx_j[0]
                        last_P_j = P_jdx_j[-1]

                        masked_freqs_P_j = masked_freqs_j[first_P_j:last_P_j +
                                                          1]
                        masked_times_P_j = masked_times_j[first_P_j:last_P_j +
                                                          1]

                        delta_masked_freqs_P_j = masked_freqs_P_j[
                            1:] - masked_freqs_P_j[:-1]
                        # delta_f = f_t_plus_1 - f_t
                        delta_masked_times_P_j = masked_times_P_j[:-1]

                        intersect_times = np.intersect1d(
                            delta_masked_times_P_i, delta_masked_times_P_j)

                        if len(intersect_times) >= 3:

                            intersect_idx_i = [
                                np.where(delta_masked_times_P_i ==
                                         intersect_time)[0][0]
                                for intersect_time in intersect_times
                            ]
                            intersect_delta_i = delta_masked_freqs_P_i[
                                intersect_idx_i]

                            intersect_idx_j = [
                                np.where(delta_masked_times_P_j ==
                                         intersect_time)[0][0]
                                for intersect_time in intersect_times
                            ]
                            intersect_delta_j = delta_masked_freqs_P_j[
                                intersect_idx_j]

                            if len(intersect_delta_i) != len(
                                    intersect_delta_j):
                                print(len(intersect_delta_j),
                                      len(intersect_delta_j))

                            r2 = stats.pearsonr(intersect_delta_i,
                                                intersect_delta_j)[0]**2
                            r2s_all.append(r2)

            r2s_all = np.asarray(r2s_all)
            ratio_f_all = np.asarray(ratio_f_all)
            abs_delta_f_all = np.asarray(abs_delta_f_all)

            #r2s_obs_dict[treatment + taxon] = {}
            #r2s_obs_dict[treatment + taxon]['r2'] = r2s_all
            #r2s_obs_dict[treatment + taxon]['ratio_f'] = ratio_f_all
            #r2s_obs_dict[treatment + taxon]['abs_delta_f'] = abs_delta_f_all

            r2s_obs_dict[treatment][taxon] = {}
            r2s_obs_dict[treatment][taxon]['r2'] = r2s_all
            r2s_obs_dict[treatment][taxon]['ratio_f'] = ratio_f_all
            r2s_obs_dict[treatment][taxon]['abs_delta_f'] = abs_delta_f_all

    with open(pt.get_path() + '/data/mutation_dynamics.pickle',
              'wb') as handle:
        pickle.dump(r2s_obs_dict, handle, protocol=pickle.HIGHEST_PROTOCOL)
Exemple #2
0
from scipy.special import gammaln as loggamma
from math import log, exp

populations = parse_file.all_lines

coverages = []
nonmutator_coverages = []

for population in populations:

    sys.stderr.write("Processing %s...\t" %
                     parse_file.get_pretty_name(population))

    # calculate mutation trajectories
    # load mutations
    mutations, depth_tuple = parse_file.parse_annotated_timecourse(population)

    population_avg_depth_times, population_avg_depths, clone_avg_depth_times, clone_avg_depths = depth_tuple

    coverages.extend(population_avg_depths[population_avg_depths >= 5])

    if population in parse_file.complete_nonmutator_lines:
        nonmutator_coverages.extend(
            population_avg_depths[population_avg_depths >= 5])

    sys.stderr.write("Done!\n")

coverages = numpy.array(coverages)
nonmutator_coverages = numpy.array(nonmutator_coverages)

print "All populations: n=%d mean=%g median=%g\n" % (
import sys
import pylab
import numpy
import bz2
import parse_file

population = sys.argv[1]
desired_locations = set([long(item) for item in sys.argv[2:]])

pylab.figure(1, figsize=(17, 2))
pylab.figure(2, figsize=(17, 2))
pylab.figure(3, figsize=(17, 2))

mutations, depth_tuple = parse_file.parse_annotated_timecourse(
    population, only_passed=False)

population_avg_depth_times, population_avg_depths, clone_avg_depth_times, clone_avg_depths = depth_tuple

for mutation_idx in xrange(0, len(mutations)):

    location, gene_name, allele, var_type, test_statistic, pvalue, cutoff_idx, depth_fold_change, depth_change_pvalue, times, alts, depths, clone_times, clone_alts, clone_depths = mutations[
        mutation_idx]

    if location in desired_locations:

        depth_ratios = depths * 1.0 / population_avg_depths

        times = times[population_avg_depths > 5]
        alts = alts[population_avg_depths > 5]
        depth_ratios = depth_ratios[population_avg_depths > 5]
        depths = depths[population_avg_depths > 5]