Ejemplo n.º 1
0
    for taxon_idx, taxon in enumerate(taxa):

        populations_plot = [
            treatment + taxon + replicate for replicate in replicates
        ]

        taxon_treatment_dnds_appeared = [
            non_appeared[population] / (syn_appeared[population] +
                                        (syn_appeared[population] == 0)) *
            Lsyn / Lnon for population in populations_plot
        ]

        ax.scatter([int(treatment) + jitter_shift[taxon_idx]] *
                   len(taxon_treatment_dnds_appeared),
                   taxon_treatment_dnds_appeared,
                   marker=pt.plot_species_marker(taxon),
                   linewidth=2,
                   facecolors=pt.get_scatter_facecolor(taxon, treatment),
                   edgecolors=pt.get_colors(treatment),
                   s=120,
                   zorder=2)
        ax.errorbar(int(treatment) + jitter_shift[taxon_idx],
                    numpy.mean(taxon_treatment_dnds_appeared),
                    yerr=2 * numpy.std(taxon_treatment_dnds_appeared) /
                    numpy.sqrt(len(taxon_treatment_dnds_appeared)),
                    linestyle='-',
                    c='k',
                    marker=pt.plot_species_marker(taxon),
                    lw=2.5)
        dnds_treatment.append(taxon_treatment_dnds_appeared)
Ejemplo n.º 2
0
            delta_975 = []

            for fmax_cutoff in fmax_cutoffs:
                delta_l_list.append(
                    G_dict_all[taxon][treatment][fmax_cutoff]['G_mean'])
                delta_025.append(
                    G_dict_all[taxon][treatment][fmax_cutoff]['G_025'])
                delta_975.append(
                    G_dict_all[taxon][treatment][fmax_cutoff]['G_975'])

            delta_l_list = np.asarray(delta_l_list)
            delta_025 = np.asarray(delta_025)
            delta_975 = np.asarray(delta_975)

            ax.errorbar(fmax_cutoffs, delta_l_list, yerr = [ delta_l_list-delta_025,  delta_975-delta_l_list] , \
                    fmt = 'o', alpha = 1, barsabove = True, marker = pt.plot_species_marker(taxon), \
                    mfc = 'white', mec = 'white', lw=3, c = 'k', zorder=1, ms=17)

            ax.scatter(fmax_cutoffs, delta_l_list, marker=pt.plot_species_marker(taxon), s = 150, \
                linewidth=3, facecolors=pt.get_scatter_facecolor(taxon, treatment), edgecolors=pt.get_colors(treatment), alpha=1, zorder=2)

# now do divergence

significant_multiplicity_dict = {}

for taxon in pt.taxa:
    significant_multiplicity_dict[taxon] = {}
    for treatment_idx, treatment in enumerate(pt.treatments):

        significant_multiplicity_taxon_path = pt.get_path(
        ) + '/data/timecourse_final/parallel_genes_%s.txt' % (treatment +
Ejemplo n.º 3
0
def plot_mutation_trajectory_taxon(taxon):

    if taxon == 'J':
        treatments = ['0', '2']
        sub_plot_labels = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j']
        sub_plot_count_step = 2
        dim = (6, 15)
    else:
        treatments = pt.treatments
        sub_plot_labels = [
            'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
            'n', 'o'
        ]
        sub_plot_count_step = 3
        dim = (10, 15)

    sys.stderr.write("Loading mutation data...\n")

    mutation_trajectories = {}
    fixed_mutation_trajectories = {}
    delta_mutation_trajectories = {}
    #transit_times = {}
    median_trajectories = {}
    n_muts_trajectories = {}

    for treatment in treatments:
        for replicate in pt.replicates:

            population = treatment + taxon + replicate
            if population in pt.populations_to_ignore:
                continue

            sys.stderr.write("Processing %s...\t" % population)

            times, Ms, fixed_Ms = parse_file.get_mutation_fixation_trajectories(
                population)

            times_, medians_log10, num_muts = parse_file.get_mutation_fixation_trajectories_median_freq_and_mut_number(
                population)

            if isinstance(fixed_Ms, float) == True:
                fixed_Ms = np.asarray([0] * len(times))

            fixed_mutation_trajectories[population] = (times, fixed_Ms)
            mutation_trajectories[population] = (times, np.log10(Ms))
            delta_mutation_trajectories[population] = (times[1:],
                                                       np.log10(Ms[1:] /
                                                                Ms[:-1]))

            median_trajectories[population] = (times_, medians_log10)
            n_muts_trajectories[population] = (times_, num_muts)

            sys.stderr.write("analyzed %d mutations!\n" % len(Ms))

    fig = plt.figure(figsize=dim)

    column_count = 0

    for treatment in treatments:

        ax_t_vs_M = plt.subplot2grid((5, len(treatments)), (0, column_count),
                                     colspan=1)
        ax_t_vs_delta_M = plt.subplot2grid((5, len(treatments)),
                                           (1, column_count),
                                           colspan=1)
        ax_t_vs_F = plt.subplot2grid((5, len(treatments)), (2, column_count),
                                     colspan=1)

        ax_t_vs_median_freq = plt.subplot2grid((5, len(treatments)),
                                               (3, column_count),
                                               colspan=1)
        ax_t_vs_number_muts = plt.subplot2grid((5, len(treatments)),
                                               (4, column_count),
                                               colspan=1)

        ax_t_vs_M.text(-0.1,
                       1.07,
                       sub_plot_labels[column_count],
                       fontsize=14,
                       fontweight='bold',
                       ha='center',
                       va='center',
                       transform=ax_t_vs_M.transAxes)
        ax_t_vs_delta_M.text(-0.1,
                             1.07,
                             sub_plot_labels[column_count +
                                             sub_plot_count_step],
                             fontsize=14,
                             fontweight='bold',
                             ha='center',
                             va='center',
                             transform=ax_t_vs_delta_M.transAxes)
        ax_t_vs_F.text(-0.1,
                       1.07,
                       sub_plot_labels[column_count + sub_plot_count_step * 2],
                       fontsize=14,
                       fontweight='bold',
                       ha='center',
                       va='center',
                       transform=ax_t_vs_F.transAxes)
        ax_t_vs_median_freq.text(-0.1,
                                 1.07,
                                 sub_plot_labels[column_count +
                                                 sub_plot_count_step * 3],
                                 fontsize=14,
                                 fontweight='bold',
                                 ha='center',
                                 va='center',
                                 transform=ax_t_vs_median_freq.transAxes)
        ax_t_vs_number_muts.text(-0.1,
                                 1.07,
                                 sub_plot_labels[column_count +
                                                 sub_plot_count_step * 4],
                                 fontsize=14,
                                 fontweight='bold',
                                 ha='center',
                                 va='center',
                                 transform=ax_t_vs_number_muts.transAxes)

        treatment_taxon_populations = []

        all_medians = []
        all_numbers = []

        for replicate in pt.replicates:

            population = treatment + taxon + replicate
            if population in pt.populations_to_ignore:
                continue

            Mts, Ms = mutation_trajectories[population]
            fixed_Mts, fixed_Ms = fixed_mutation_trajectories[population]
            deltaMts, deltaMs = delta_mutation_trajectories[population]

            median_trajectories_ts, median_trajectories_ = median_trajectories[
                population]
            n_muts_trajectories_ts, n_muts_trajectories_ = n_muts_trajectories[
                population]

            ax_t_vs_M.plot(Mts,
                           10**Ms,
                           'o-',
                           color=pt.get_colors(treatment),
                           marker=pt.plot_species_marker(taxon),
                           fillstyle=pt.plot_species_fillstyle(taxon),
                           alpha=1,
                           markersize=7,
                           linewidth=3,
                           markeredgewidth=1.5,
                           zorder=1)
            ax_t_vs_M.set_yscale('log', base=10)
            ax_t_vs_M.tick_params(axis='x', labelsize=8)

            # back transform to format plot axes
            ax_t_vs_delta_M.plot(deltaMts,
                                 10**deltaMs,
                                 color=pt.get_colors(treatment),
                                 marker=pt.plot_species_marker(taxon),
                                 fillstyle=pt.plot_species_fillstyle(taxon))
            ax_t_vs_delta_M.set_yscale('log', base=10)

            ax_t_vs_F.plot(fixed_Mts,
                           fixed_Ms,
                           'o-',
                           color=pt.get_colors(treatment),
                           marker=pt.plot_species_marker(taxon),
                           fillstyle=pt.plot_species_fillstyle(taxon),
                           alpha=1,
                           markersize=7,
                           linewidth=3,
                           markeredgewidth=1.5,
                           zorder=1)
            #ax_M_vs_F.set_xlabel('Days, ' + r'$t$', fontsize = 12)

            ax_t_vs_median_freq.plot(
                median_trajectories_ts,
                10**median_trajectories_,
                'o-',
                color=pt.get_colors(treatment),
                marker=pt.plot_species_marker(taxon),
                fillstyle=pt.plot_species_fillstyle(taxon),
                alpha=1,
                markersize=7,
                linewidth=3,
                markeredgewidth=1.5,
                zorder=1)
            ax_t_vs_median_freq.set_yscale('log', base=10)
            #ax_t_vs_median_freq.tick_params(axis='y', labelsize=6)

            ax_t_vs_median_freq.yaxis.set_tick_params(labelsize=8)

            all_medians.extend(median_trajectories_.tolist())

            ax_t_vs_number_muts.plot(
                n_muts_trajectories_ts,
                n_muts_trajectories_,
                'o-',
                color=pt.get_colors(treatment),
                marker=pt.plot_species_marker(taxon),
                fillstyle=pt.plot_species_fillstyle(taxon),
                alpha=1,
                markersize=7,
                linewidth=3,
                markeredgewidth=1.5,
                zorder=1)
            ax_t_vs_number_muts.set_yscale('log', base=10)

            ax_t_vs_number_muts.tick_params(axis='y', labelsize=8)

            all_numbers.extend(n_muts_trajectories_.tolist())

            treatment_taxon_populations.append(population)

        print(10**(min(all_medians) * 0.8), 10**(max(all_medians) * 1.2))

        ax_t_vs_median_freq.set_ylim(
            [10**(min(all_medians)) * 0.8, 10**(max(all_medians)) * 1.2])

        ax_t_vs_number_muts.set_ylim(
            [min(all_numbers) * 0.8,
             max(all_numbers) * 1.2])

        avg_Mts, avg_Ms = timecourse_utils.average_trajectories([
            mutation_trajectories[population]
            for population in treatment_taxon_populations
        ])

        avg_deltaMts, avg_deltaMs = timecourse_utils.average_trajectories([
            delta_mutation_trajectories[population]
            for population in treatment_taxon_populations
        ])

        ax_t_vs_delta_M.axhline(y=1, c='grey', linestyle=':', lw=3, zorder=1)
        ax_t_vs_M.plot(avg_Mts,
                       10**avg_Ms,
                       '--',
                       color='k',
                       marker=" ",
                       alpha=1,
                       linewidth=4,
                       zorder=2)
        ax_t_vs_delta_M.plot(avg_deltaMts,
                             10**avg_deltaMs,
                             '--',
                             color='k',
                             marker=" ",
                             alpha=1,
                             linewidth=4,
                             zorder=2)

        # keep them on the same y axes
        if taxon == 'C':
            ax_t_vs_delta_M.set_ylim([0.2, 42])
        elif taxon == 'D':
            ax_t_vs_delta_M.set_ylim([0.2, 20])

        if (column_count == 0):
            legend_elements = [
                Line2D([0], [0],
                       ls='--',
                       color='k',
                       lw=1.5,
                       label=r'$\overline{M}(t)$')
            ]
            ax_t_vs_M.legend(handles=legend_elements,
                             loc='lower right',
                             fontsize=8)

        ax_t_vs_M.set_title(str(10**int(treatment)) + '-day transfers',
                            fontsize=17)

        #if treatment == '2':
        #    ax_M_vs_F.yaxis.set_major_locator(MaxNLocator(integer=True))

        if column_count == 0:

            ax_t_vs_M.set_ylabel('Mutations, ' + r'$M(t)$', fontsize=15)
            ax_t_vs_F.set_ylabel('Fixed mutations', fontsize=15)
            ax_t_vs_delta_M.set_ylabel('Change in mutations,\n' +
                                       r'$M(t)/M(t-1)$',
                                       fontsize=15)

            ax_t_vs_median_freq.set_ylabel(
                'Median mutation freq.\nat time $t$', fontsize=15)
            ax_t_vs_number_muts.set_ylabel('Number of mutations\nat time $t$',
                                           fontsize=15)

        column_count += 1

    fig.text(0.53, 0.05, 'Days, ' + r'$t$', ha='center', fontsize=28)
    fig.suptitle(pt.latex_genus_dict[taxon], fontsize=30)
    fig_name = pt.get_path() + '/figs/rate_%s.pdf' % taxon
    fig.savefig(fig_name,
                format='pdf',
                bbox_inches="tight",
                pad_inches=0.4,
                dpi=600)
    plt.close()
Ejemplo n.º 4
0
                                                                  subsamples)]
        G_subsample_975 = G_subsample_dict[treatment + taxon][int(0.975 *
                                                                  subsamples)]

        #xerr1 = [ [z_lclb_mpd_null_mean - lclb_mpd_025, z_lcpl_mpd_null_mean - lcpl_mpd_025, z_hclb_mpd_null_mean - hclb_mpd_025, z_hcpl_mpd_null_mean - hcpl_mpd_025 ] ,
        #        [lclb_mpd_975 - z_lclb_mpd_null_mean, lcpl_mpd_975 - z_lcpl_mpd_null_mean, hclb_mpd_975 - z_hclb_mpd_null_mean, hcpl_mpd_975 -z_hcpl_mpd_null_mean ]]

        plt.errorbar(int(treatment) + taxon_xaxis_dict[taxon], G_subsample_mean, yerr = [ [G_subsample_mean-G_subsample_025], [ G_subsample_975-G_subsample_mean]], \
                fmt = 'o', alpha = 1, barsabove = True, marker = 's', \
                mfc = 'white', mec = 'white', lw=3.5, c = 'k', zorder=1, ms=17)

        plt.scatter(int(treatment) + taxon_xaxis_dict[taxon], G_subsample_mean, marker='s', s = 250, \
            linewidth=3, facecolors=pt.get_scatter_facecolor(taxon, treatment), edgecolors=pt.get_colors(treatment), alpha=1, zorder=2)


        plt.scatter(int(treatment) + taxon_xaxis_dict[taxon], G_all_mutations_dict[treatment+taxon], marker=pt.plot_species_marker(taxon), s = 250, \
            linewidth=3, facecolors=pt.get_scatter_facecolor(taxon, treatment), edgecolors=pt.get_colors(treatment), alpha=1, zorder=2)

plt.xlabel("Transfer time (days)", fontsize=20)

plt.xticks((0, 1, 2), ('1', '10', '100'), fontsize=14)
plt.rc('ytick', labelsize=12)

plt.ylim([1.2, 6.2])

plt.ylabel("Net increase in log-likelihood, " r'$\Delta \ell$', fontsize=20)

legend_elements = [
    Line2D([0], [0],
           color='none',
           marker='o',
def plot_within_taxon_paralleliism(taxon, slope_null=1):

    fig = plt.figure(figsize=(12, 8))

    gene_data = parse_file.parse_gene_list(taxon)

    gene_names, gene_start_positions, gene_end_positions, promoter_start_positions, promoter_end_positions, gene_sequences, strands, genes, features, protein_ids = gene_data
    # to get the common gene names for each ID

    ax_multiplicity = plt.subplot2grid((2, 3), (0, 0), colspan=1)
    ax_mult_freq = plt.subplot2grid((2, 3), (0, 1), colspan=1)
    ax_venn = plt.subplot2grid((2, 3), (0, 2), colspan=1)

    ax_multiplicity.set_xscale('log', base=10)
    ax_multiplicity.set_yscale('log', base=10)
    ax_multiplicity.set_xlabel('Gene multiplicity, ' + r'$m$', fontsize=14)
    ax_multiplicity.set_ylabel('Fraction mutations ' + r'$\geq m$',
                               fontsize=14)
    ax_multiplicity.text(-0.1,
                         1.07,
                         pt.sub_plot_labels[0],
                         fontsize=18,
                         fontweight='bold',
                         ha='center',
                         va='center',
                         transform=ax_multiplicity.transAxes)

    ax_multiplicity.set_ylim([0.001, 1.1])
    ax_multiplicity.set_xlim([0.07, 130])

    ax_mult_freq.set_xscale('log', base=10)
    ax_mult_freq.set_yscale('log', base=10)
    ax_mult_freq.set_xlabel('Gene multiplicity, ' + r'$m$', fontsize=14)
    ax_mult_freq.set_ylabel('Mean maximum allele frequency, ' +
                            r'$\overline{f}_{max}$',
                            fontsize=11)
    ax_mult_freq.text(-0.1,
                      1.07,
                      pt.sub_plot_labels[1],
                      fontsize=18,
                      fontweight='bold',
                      ha='center',
                      va='center',
                      transform=ax_mult_freq.transAxes)

    ax_venn.axis('off')
    ax_venn.text(-0.1,
                 1.07,
                 pt.sub_plot_labels[2],
                 fontsize=18,
                 fontweight='bold',
                 ha='center',
                 va='center',
                 transform=ax_venn.transAxes)

    alpha_treatment_dict = {'0': 0.5, '1': 0.5, '2': 0.8}

    significant_multiplicity_dict = {}

    significant_multiplicity_values_dict = {}

    multiplicity_dict = {}

    g_score_p_label_dict = {}

    all_mults = []
    all_freqs = []

    treatments_in_taxon = []

    label_y_axes = [0.3, 0.2, 0.1]

    for treatment_idx, treatment in enumerate(pt.treatments):

        significan_multiplicity_taxon_path = pt.get_path(
        ) + '/data/timecourse_final/parallel_genes_%s.txt' % (treatment +
                                                              taxon)
        if os.path.exists(significan_multiplicity_taxon_path) == False:
            continue
        treatments_in_taxon.append(treatment)
        significan_multiplicity_taxon = open(
            significan_multiplicity_taxon_path, "r")

        significan_multiplicity_list = []
        for i, line in enumerate(significan_multiplicity_taxon):
            if i == 0:
                continue
            line = line.strip()
            items = line.split(",")
            significan_multiplicity_list.append(items[0])

            if items[0] not in significant_multiplicity_values_dict:
                significant_multiplicity_values_dict[items[0]] = {}
                significant_multiplicity_values_dict[
                    items[0]][treatment] = float(items[-2])
            else:
                significant_multiplicity_values_dict[
                    items[0]][treatment] = float(items[-2])

        significant_multiplicity_dict[treatment] = significan_multiplicity_list

        populations = [
            treatment + taxon + replicate for replicate in pt.replicates
        ]

        # Load convergence matrix
        convergence_matrix = parse_file.parse_convergence_matrix(
            pt.get_path() + '/data/timecourse_final/' +
            ("%s_convergence_matrix.txt" % (treatment + taxon)))
        gene_parallelism_statistics = mutation_spectrum_utils.calculate_parallelism_statistics(
            convergence_matrix, populations, Lmin=100)
        #print(gene_parallelism_statistics)
        G, pvalue = mutation_spectrum_utils.calculate_total_parallelism(
            gene_parallelism_statistics)

        sys.stdout.write("Total parallelism for %s = %g (p=%g)\n" %
                         (treatment + taxon, G, pvalue))

        predictors = []
        responses = []

        gene_hits = []
        gene_predictors = []
        mean_gene_freqs = []

        Ls = []

        ax_mult_freqs_x = []
        ax_mult_freqs_y = []

        for gene_name in convergence_matrix.keys():

            convergence_matrix[gene_name][
                'length'] < 50 and convergence_matrix[gene_name]['length']

            Ls.append(convergence_matrix[gene_name]['length'])
            m = gene_parallelism_statistics[gene_name]['multiplicity']

            if gene_name not in multiplicity_dict:
                multiplicity_dict[gene_name] = {}
                multiplicity_dict[gene_name][treatment] = m
            else:
                multiplicity_dict[gene_name][treatment] = m

            n = 0
            nfixed = 0
            freqs = []
            nf_max = 0

            for population in populations:
                for t, L, f, f_max in convergence_matrix[gene_name][
                        'mutations'][population]:
                    fixed_weight = timecourse_utils.calculate_fixed_weight(
                        L, f)

                    predictors.append(m)
                    responses.append(fixed_weight)

                    n += 1
                    nfixed += fixed_weight

                    # get freqs for regression
                    #if L == parse_file.POLYMORPHIC:
                    #freqs.append(f_max)
                    nf_max += timecourse_utils.calculate_fixed_weight(L, f_max)

            if n > 0.5:
                gene_hits.append(n)
                gene_predictors.append(m)
                #mean_gene_freqs.append(np.mean(freqs))

                if nf_max > 0:
                    ax_mult_freqs_x.append(m)
                    ax_mult_freqs_y.append(nf_max / n)

        Ls = np.asarray(Ls)
        ntot = len(predictors)
        mavg = ntot * 1.0 / len(Ls)

        predictors, responses = (np.array(x) for x in zip(
            *sorted(zip(predictors, responses), key=lambda pair: (pair[0]))))

        gene_hits, gene_predictors = (np.array(x) for x in zip(*sorted(
            zip(gene_hits, gene_predictors), key=lambda pair: (pair[0]))))

        rescaled_predictors = np.exp(np.fabs(np.log(predictors / mavg)))

        null_survival_function = mutation_spectrum_utils.NullMultiplicitySurvivalFunction.from_parallelism_statistics(
            gene_parallelism_statistics)

        # default base is 10
        theory_ms = np.logspace(-2, 2, 100)
        theory_survivals = null_survival_function(theory_ms)
        theory_survivals /= theory_survivals[0]

        sys.stderr.write("Done!\n")

        ax_multiplicity.plot(theory_ms,
                             theory_survivals,
                             lw=3,
                             color=pt.get_colors(treatment),
                             alpha=0.8,
                             ls=':',
                             zorder=1)

        ax_multiplicity.plot(
            predictors, (len(predictors) - np.arange(0, len(predictors))) *
            1.0 / len(predictors),
            lw=3,
            color=pt.get_colors(treatment),
            alpha=0.8,
            ls='--',
            label=str(int(10**int(treatment))) + '-day',
            drawstyle='steps',
            zorder=2)

        #ax_multiplicity.text(0.2, 0.3, g_score_p_label_dict['0'], fontsize=25, fontweight='bold', ha='center', va='center', transform=ax_multiplicity.transAxes)
        #ax_multiplicity.text(0.2, 0.2, g_score_p_label_dict['1'], fontsize=25, fontweight='bold', ha='center', va='center', transform=ax_multiplicity.transAxes)
        #ax_multiplicity.text(0.2, 0.1, g_score_p_label_dict['2'], fontsize=25, fontweight='bold', ha='center', va='center', transform=ax_multiplicity.transAxes)

        if pvalue < 0.001:
            pretty_pvalue = r'$\ll 0.001$'
        else:
            pretty_pvalue = '=' + str(round(pvalue, 4))

        g_score_p_label = r'$\Delta \ell_{{{}}}=$'.format(
            str(10**int(treatment))) + str(round(
                G, 3)) + ', ' + r'$P$' + pretty_pvalue

        text_color = pt.lighten_color(pt.get_colors(treatment), amount=1.3)

        ax_multiplicity.text(0.26,
                             label_y_axes[treatment_idx],
                             g_score_p_label,
                             fontsize=7,
                             ha='center',
                             va='center',
                             color='k',
                             transform=ax_multiplicity.transAxes)

        ax_mult_freq.scatter(ax_mult_freqs_x,
                             ax_mult_freqs_y,
                             color=pt.get_colors(treatment),
                             edgecolors=pt.get_colors(treatment),
                             marker=pt.plot_species_marker(taxon),
                             alpha=alpha_treatment_dict[treatment])

        all_mults.extend(ax_mult_freqs_x)
        all_freqs.extend(ax_mult_freqs_y)

        #slope, intercept, r_value, p_value, std_err = stats.linregress(np.log10(ax_mult_freqs_x), np.log10(ax_mult_freqs_y))
        #print(slope, p_value)

    # make treatment pairs
    treatments_in_taxon.sort(key=float)

    for i in range(0, len(treatments_in_taxon)):

        for j in range(i + 1, len(treatments_in_taxon)):

            ax_mult_i_j = plt.subplot2grid((2, 3), (1, i + j - 1), colspan=1)
            ax_mult_i_j.set_xscale('log', base=10)
            ax_mult_i_j.set_yscale('log', base=10)
            ax_mult_i_j.set_xlabel(str(10**int(treatments_in_taxon[i])) +
                                   '-day gene multiplicity, ' + r'$m$',
                                   fontsize=14)
            ax_mult_i_j.set_ylabel(str(10**int(treatments_in_taxon[j])) +
                                   '-day gene multiplicity, ' + r'$m$',
                                   fontsize=14)
            ax_mult_i_j.plot([0.05, 200], [0.05, 200],
                             lw=3,
                             c='grey',
                             ls='--',
                             zorder=1)
            ax_mult_i_j.set_xlim([0.05, 200])
            ax_mult_i_j.set_ylim([0.05, 200])

            ax_mult_i_j.text(-0.1,
                             1.07,
                             pt.sub_plot_labels[2 + i + j],
                             fontsize=18,
                             fontweight='bold',
                             ha='center',
                             va='center',
                             transform=ax_mult_i_j.transAxes)

            multiplicity_pair = [
                (multiplicity_dict[gene_name][treatments_in_taxon[i]],
                 multiplicity_dict[gene_name][treatments_in_taxon[j]])
                for gene_name in sorted(multiplicity_dict)
                if (multiplicity_dict[gene_name][treatments_in_taxon[i]] > 0)
                and (multiplicity_dict[gene_name][treatments_in_taxon[j]] > 0)
            ]
            significant_multiplicity_pair = [
                (significant_multiplicity_values_dict[gene_name][
                    treatments_in_taxon[i]],
                 significant_multiplicity_values_dict[gene_name][
                     treatments_in_taxon[j]])
                for gene_name in sorted(significant_multiplicity_values_dict)
                if (treatments_in_taxon[i] in
                    significant_multiplicity_values_dict[gene_name]) and (
                        treatments_in_taxon[j] in
                        significant_multiplicity_values_dict[gene_name])
            ]

            # get mean colors
            ccv = ColorConverter()

            color_1 = np.array(
                ccv.to_rgb(pt.get_colors(treatments_in_taxon[i])))
            color_2 = np.array(
                ccv.to_rgb(pt.get_colors(treatments_in_taxon[j])))

            mix_color = 0.7 * (color_1 + color_2)
            mix_color = np.min([mix_color, [1.0, 1.0, 1.0]], 0)

            if (treatments_in_taxon[i] == '0') and (treatments_in_taxon[j]
                                                    == '1'):
                #mix_color = pt.lighten_color(mix_color, amount=2.8)
                mix_color = 'gold'

            mult_i = [x[0] for x in multiplicity_pair]
            mult_j = [x[1] for x in multiplicity_pair]

            ax_mult_i_j.scatter(mult_i,
                                mult_j,
                                marker=pt.plot_species_marker(taxon),
                                facecolors=mix_color,
                                edgecolors='none',
                                alpha=0.8,
                                s=90,
                                zorder=2)

            mult_significant_i = [x[0] for x in significant_multiplicity_pair]
            mult_significant_j = [x[1] for x in significant_multiplicity_pair]
            ax_mult_i_j.scatter(mult_significant_i,
                                mult_significant_j,
                                marker=pt.plot_species_marker(taxon),
                                facecolors=mix_color,
                                edgecolors='k',
                                lw=1.5,
                                alpha=0.7,
                                s=90,
                                zorder=3)

            #slope_mult, intercept_mult, r_value_mult, p_value_mult, std_err_mult = stats.linregress(np.log10(mult_significant_i), np.log10(mult_significant_j))

            mult_ij = mult_significant_i + mult_significant_j + mult_i + mult_j

            ax_mult_i_j.set_xlim([min(mult_ij) * 0.5, max(mult_ij) * 1.5])
            ax_mult_i_j.set_ylim([min(mult_ij) * 0.5, max(mult_ij) * 1.5])

            # null slope of 1
            #ratio = (slope_mult - slope_null) / std_err_mult
            #p_value_mult_new_null = stats.t.sf(np.abs(ratio), len(mult_significant_j)-2)*2

            #if p_value_mult_new_null < 0.05:
            #    x_log10_fit_range =  np.linspace(np.log10(min(mult_i) * 0.5), np.log10(max(mult_i) * 1.5), 10000)

            #    y_fit_range = 10 ** (slope_mult*x_log10_fit_range + intercept_mult)
            #    ax_mult_i_j.plot(10**x_log10_fit_range, y_fit_range, c='k', lw=3, linestyle='--', zorder=4)

            #ax_mult_i_j.text(0.05, 0.9, r'$\beta_{1}=$'+str(round(slope_mult,3)), fontsize=12, transform=ax_mult_i_j.transAxes)
            #ax_mult_i_j.text(0.05, 0.82, r'$r^{2}=$'+str(round(r_value_mult**2,3)), fontsize=12, transform=ax_mult_i_j.transAxes)
            #ax_mult_i_j.text(0.05, 0.74, pt.get_p_value_latex(p_value_mult_new_null), fontsize=12, transform=ax_mult_i_j.transAxes)

    #if taxon == 'F':
    #    subset_tuple = (len( significant_multiplicity_dict['0']), \
    #                    len( significant_multiplicity_dict['1']), \
    #                    len(set(significant_multiplicity_dict['0']) & set(significant_multiplicity_dict['1'])))

    #    venn = venn2(subsets = subset_tuple, ax=ax_venn, set_labels=('', '', ''), set_colors=(pt.get_colors('0'), pt.get_colors('1')))
    #    c = venn2_circles(subsets=subset_tuple, ax=ax_venn, linestyle='dashed')

    subset_tuple = (len( significant_multiplicity_dict['0']), \
                    len( significant_multiplicity_dict['1']), \
                    len(set(significant_multiplicity_dict['0']) & set(significant_multiplicity_dict['1'])), \
                    len(significant_multiplicity_dict['2']), \
                    len(set(significant_multiplicity_dict['0']) & set(significant_multiplicity_dict['2'])), \
                    len(set(significant_multiplicity_dict['1']) & set(significant_multiplicity_dict['2'])),  \
                    len(set(significant_multiplicity_dict['1']) & set(significant_multiplicity_dict['1']) & set(significant_multiplicity_dict['2'])))

    venn = venn3(subsets=subset_tuple,
                 ax=ax_venn,
                 set_labels=('', '', ''),
                 set_colors=(pt.get_colors('0'), pt.get_colors('1'),
                             pt.get_colors('2')))
    c = venn3_circles(subsets=subset_tuple, ax=ax_venn, linestyle='dashed')

    ax_mult_freq.set_xlim([min(all_mults) * 0.5, max(all_mults) * 1.5])
    ax_mult_freq.set_ylim([min(all_freqs) * 0.5, max(all_freqs) * 1.5])

    fig.suptitle(pt.latex_dict[taxon], fontsize=30)

    fig.subplots_adjust(wspace=0.3)  #hspace=0.3, wspace=0.5
    fig_name = pt.get_path() + "/figs/multiplicity_%s.jpg" % taxon
    fig.savefig(fig_name,
                format='jpg',
                bbox_inches="tight",
                pad_inches=0.4,
                dpi=600)
    plt.close()
Ejemplo n.º 6
0
            if len(mutations_list) == 0:
                continue

            mutations_list = np.asarray(
                mutations_list)  #/ set_time_dict[taxon]
            #mutations_list = np.asarray([value[set_time][1] for key, value in mutation_trajectories.items() if (treatment+taxon in key) and (set_time in value.values())])
            times_list = np.repeat(int(treatment), len(mutations_list))

            ax.scatter(
                (10**times_list) + np.random.randn(len(times_list)) * 0.1,
                10**mutations_list,
                s=140,
                linewidth=3,
                facecolors=pt.get_scatter_facecolor(taxon, treatment),
                edgecolors=pt.get_colors(treatment),
                marker=pt.plot_species_marker(taxon),
                alpha=0.8,
                zorder=3)
            times_all_list.extend(times_list)
            mutations_all_list.extend(mutations_list)

        ax.set_ylim([(10**min(mutations_all_list)) * 0.5,
                     (10**max(mutations_all_list)) * 2])
        ax.set_xlim([(10**min(times_all_list)) * 0.5,
                     (10**max(times_all_list)) * 2])

        mutations_all_list = np.asarray(
            mutations_all_list)  #/ set_time_dict[taxon]

        if taxon == 'J':
            treatment_1 = [
Ejemplo n.º 7
0
    standardized_gene_overlap_treatment_pair = [standardized_gene_overlap[treatment_pair_set][taxon]['Z_jaccard'] for taxon in taxa_to_test]

    #standardized_gene_overlap_treatment_pair = []

    #for taxon in taxa_to_test:
    #    standardized_gene_overlap_treatment_pair.append(standardized_gene_overlap[treatment_pair_set][taxon]['Z_jaccard'] )


    #print(standardized_gene_overlap_treatment_pair)
    #standardized_gene_overlap_treatment_pair = []


    for taxon_i_idx, taxon_i in enumerate(taxa_to_test):

        marker_style = dict(color='k', marker=pt.plot_species_marker(taxon_i),
                    markerfacecoloralt=pt.get_colors(treatment_pair[1]),
                    markerfacecolor=pt.get_colors(treatment_pair[0]) )


        standardized_gene_overlap_i = standardized_gene_overlap_treatment_pair[taxon_i_idx]


        ax_divergence_gene.plot(ax_count_divergence_gene, standardized_gene_overlap_i, markersize = pt.plot_species_marker_size(taxon_i),   \
            linewidth=2,  alpha=1, zorder=3, fillstyle='left', **marker_style)

        #standardized_gene_overlap_treatment_pair.append(standardized_gene_overlap)

        ax_count_divergence_gene+=1

Ejemplo n.º 8
0
            PCs_ = principalComponents_df[
                principalComponents_df.index.str.contains(treatment + taxon)]

            ax_pca.axhline(y=0, color='k', linestyle=':', alpha=0.8, zorder=1)
            ax_pca.axvline(x=0, color='k', linestyle=':', alpha=0.8, zorder=1)
            ax_pca.scatter(0,
                           0,
                           marker="o",
                           edgecolors='none',
                           c='darkgray',
                           s=120,
                           zorder=2)

            ax_pca.scatter(PCs_.PC1.values, PCs_.PC2.values, \
                    c=pt.get_colors(treatment), marker=pt.plot_species_marker(taxon), s = 70, \
                    edgecolors=pt.get_colors(treatment), linewidth = 0.6, alpha = 0.8, zorder=4)#, edgecolors='none'

            pt.confidence_ellipse(PCs_.PC1.values,
                                  PCs_.PC2.values,
                                  ax_pca,
                                  n_std=2,
                                  edgecolor=pt.get_colors(treatment),
                                  linestyle='--',
                                  lw=4,
                                  zorder=3)

            # dn/ds
            populations_plot = [
                treatment + taxon + replicate for replicate in replicates
                if treatment + taxon +
Ejemplo n.º 9
0
            delta_l_list = []
            delta_025 = []
            delta_975 = []

            for fmax_cutoff in fmax_cutoffs:
                delta_l_list.append(G_dict_all[taxon][treatment][fmax_cutoff]['G_mean'])
                delta_025.append(G_dict_all[taxon][treatment][fmax_cutoff]['G_025'])
                delta_975.append(G_dict_all[taxon][treatment][fmax_cutoff]['G_975'])

            delta_l_list = np.asarray(delta_l_list)
            delta_025 = np.asarray(delta_025)
            delta_975 = np.asarray(delta_975)

            ax.errorbar(fmax_cutoffs, delta_l_list, yerr = [ delta_l_list-delta_025,  delta_975-delta_l_list] , \
                    fmt = 'o', alpha = 1, barsabove = True, marker = pt.plot_species_marker(taxon), \
                    mfc = 'white', mec = 'white', lw=2, c = 'k', zorder=1, ms=17)

            ax.scatter(fmax_cutoffs, delta_l_list, marker=pt.plot_species_marker(taxon), s = 150, \
                linewidth=3, facecolors=pt.get_scatter_facecolor(taxon, treatment), edgecolors=pt.get_colors(treatment), alpha=1, zorder=2)

            if taxon == 'P':
                marker_size_legend=16
            else:
                marker_size_legend=10


            legend_elements = [Line2D([0], [0], color='w', markerfacecolor=pt.get_colors('0'), marker=pt.plot_species_marker(taxon), markersize=marker_size_legend, label='1-Day'),
                            Line2D([0], [0], color='w', markerfacecolor=pt.get_colors('1'), marker=pt.plot_species_marker(taxon), markersize=marker_size_legend, label='10-Days')]

            ax.legend(handles=legend_elements, loc='upper left')
Ejemplo n.º 10
0
        # get mean colors
        ccv = ColorConverter()

        color_1 = np.array(ccv.to_rgb(pt.get_colors(treatment_pair[0])))
        color_2 = np.array(ccv.to_rgb(pt.get_colors(treatment_pair[1])))

        mix_color = 0.7 * (color_1 + color_2)
        mix_color = np.min([mix_color, [1.0, 1.0, 1.0]], 0)

        if (treatment_pair[0] == '0') and (treatment_pair[1] == '1'):
            #mix_color = pt.lighten_color(mix_color, amount=2.8)
            mix_color = 'gold'

        plt.errorbar(ax_count, new_slope, yerr = [ [new_slope-new_CI_025], [new_CI_975-new_slope]], \
                fmt = 'o', alpha = 1, barsabove = True, marker = pt.plot_species_marker(taxon), \
                mfc = 'white', mec = 'white', lw=3, c = 'k', zorder=2, ms=17)

        plt.scatter(ax_count, new_slope, marker=pt.plot_species_marker(taxon), s = 250, \
            linewidth=2, facecolors=mix_color, edgecolors='k', alpha=1, zorder=3)

        ax_count += 1

    plt.axvline(x=ax_count - 0.5,
                color='k',
                lw=2,
                linestyle=':',
                alpha=1,
                zorder=1)

    #plt.text(ax_count-2, 0.1, '%s-day vs. %s-day' %(str(10**int(treatment_pair[0])), str(10**int(treatment_pair[1]))),  fontsize=14)
        treatment_taxon_populations = []

        Mts_all_list = []
        Ms_all_list = []

        for replicate in replicates:

            population = treatment + taxon + replicate

            Mts, Ms = mutation_trajectories[population]

            ax_t_vs_M.plot(Mts,
                           10**Ms,
                           'o-',
                           color=pt.get_colors(treatment),
                           marker=pt.plot_species_marker(taxon),
                           fillstyle=pt.plot_species_fillstyle(taxon),
                           alpha=1,
                           markersize=7,
                           linewidth=3,
                           markeredgewidth=1.5,
                           zorder=1)

            Mts_all_list.append(Mts)
            Ms_all_list.append(Ms)

        Mts_all = np.concatenate(Mts_all_list)
        Ms_all = np.concatenate(Ms_all_list)

        Mts_shifted_all = Mts_all - min(Mts_all)
Ejemplo n.º 12
0
ax_count = 0

for taxon_list_idx, taxon_list in enumerate([['B','C','D'],['F','J','P']]):
    for taxon_idx, taxon in enumerate(taxon_list):
        ax = fig.add_subplot(gs[taxon_list_idx, taxon_idx])
        ax.set_title(pt.latex_genus_bold_dict[taxon], fontsize=12, fontweight='bold')

        dnds_samples = []
        for treatment in treatments:

            populations_plot = [ treatment+taxon+replicate for replicate in replicates if treatment+taxon+replicate not in pt.populations_to_ignore ]
            taxon_treatment_dnds_appeared = [non_appeared[population]/(syn_appeared[population]+(syn_appeared[population]==0))*taxon_Lsyn_dict[taxon]/taxon_Lnon_dict[taxon] for population in populations_plot]
            if len(taxon_treatment_dnds_appeared) < 2:
                continue
            ax.scatter( [int(treatment)] * len(taxon_treatment_dnds_appeared), taxon_treatment_dnds_appeared,  marker=pt.plot_species_marker(taxon),  linewidth=2, facecolors=pt.get_scatter_facecolor(taxon, treatment), edgecolors=pt.get_colors(treatment), s=100, zorder=2, alpha=0.8)
            if len(taxon_treatment_dnds_appeared) > 2:
                ax.errorbar(int(treatment),numpy.mean(taxon_treatment_dnds_appeared), yerr= 2*numpy.std(taxon_treatment_dnds_appeared) / numpy.sqrt(len(taxon_treatment_dnds_appeared)), linestyle='-', c = 'k', marker=pt.plot_species_marker(taxon), lw = 2.5,  zorder=3)
            #dnds_treatment.append(taxon_treatment_dnds_appeared)

            dnds_samples.append(taxon_treatment_dnds_appeared)

        ax.text(-0.1, 1.07, sub_plot_labels[ax_count], fontsize=12, fontweight='bold', ha='center', va='center', transform=ax.transAxes)
        ax.text(0.7, 0.9, r'$F=$'+ str(round( anova_F[ax_count],3) ), fontsize=10, ha='center', va='center', transform=ax.transAxes)
        ax.text(0.7, 0.8, r'$P_{BH}=$'+ str(round(pvals_corrected[ax_count], 3)) , fontsize=10, ha='center', va='center', transform=ax.transAxes)

        ax_count+=1

        if taxon == 'J':
            ax.set_xticks([0,2])
            ax.set_xticklabels( ['1','100'] )