def plot_within_taxon_paralleliism(taxon, slope_null=1): fig = plt.figure(figsize=(12, 8)) gene_data = parse_file.parse_gene_list(taxon) gene_names, gene_start_positions, gene_end_positions, promoter_start_positions, promoter_end_positions, gene_sequences, strands, genes, features, protein_ids = gene_data # to get the common gene names for each ID ax_multiplicity = plt.subplot2grid((2, 3), (0, 0), colspan=1) ax_mult_freq = plt.subplot2grid((2, 3), (0, 1), colspan=1) ax_venn = plt.subplot2grid((2, 3), (0, 2), colspan=1) ax_multiplicity.set_xscale('log', base=10) ax_multiplicity.set_yscale('log', base=10) ax_multiplicity.set_xlabel('Gene multiplicity, ' + r'$m$', fontsize=14) ax_multiplicity.set_ylabel('Fraction mutations ' + r'$\geq m$', fontsize=14) ax_multiplicity.text(-0.1, 1.07, pt.sub_plot_labels[0], fontsize=18, fontweight='bold', ha='center', va='center', transform=ax_multiplicity.transAxes) ax_multiplicity.set_ylim([0.001, 1.1]) ax_multiplicity.set_xlim([0.07, 130]) ax_mult_freq.set_xscale('log', base=10) ax_mult_freq.set_yscale('log', base=10) ax_mult_freq.set_xlabel('Gene multiplicity, ' + r'$m$', fontsize=14) ax_mult_freq.set_ylabel('Mean maximum allele frequency, ' + r'$\overline{f}_{max}$', fontsize=11) ax_mult_freq.text(-0.1, 1.07, pt.sub_plot_labels[1], fontsize=18, fontweight='bold', ha='center', va='center', transform=ax_mult_freq.transAxes) ax_venn.axis('off') ax_venn.text(-0.1, 1.07, pt.sub_plot_labels[2], fontsize=18, fontweight='bold', ha='center', va='center', transform=ax_venn.transAxes) alpha_treatment_dict = {'0': 0.5, '1': 0.5, '2': 0.8} significant_multiplicity_dict = {} significant_multiplicity_values_dict = {} multiplicity_dict = {} g_score_p_label_dict = {} all_mults = [] all_freqs = [] treatments_in_taxon = [] label_y_axes = [0.3, 0.2, 0.1] for treatment_idx, treatment in enumerate(pt.treatments): significan_multiplicity_taxon_path = pt.get_path( ) + '/data/timecourse_final/parallel_genes_%s.txt' % (treatment + taxon) if os.path.exists(significan_multiplicity_taxon_path) == False: continue treatments_in_taxon.append(treatment) significan_multiplicity_taxon = open( significan_multiplicity_taxon_path, "r") significan_multiplicity_list = [] for i, line in enumerate(significan_multiplicity_taxon): if i == 0: continue line = line.strip() items = line.split(",") significan_multiplicity_list.append(items[0]) if items[0] not in significant_multiplicity_values_dict: significant_multiplicity_values_dict[items[0]] = {} significant_multiplicity_values_dict[ items[0]][treatment] = float(items[-2]) else: significant_multiplicity_values_dict[ items[0]][treatment] = float(items[-2]) significant_multiplicity_dict[treatment] = significan_multiplicity_list populations = [ treatment + taxon + replicate for replicate in pt.replicates ] # Load convergence matrix convergence_matrix = parse_file.parse_convergence_matrix( pt.get_path() + '/data/timecourse_final/' + ("%s_convergence_matrix.txt" % (treatment + taxon))) gene_parallelism_statistics = mutation_spectrum_utils.calculate_parallelism_statistics( convergence_matrix, populations, Lmin=100) #print(gene_parallelism_statistics) G, pvalue = mutation_spectrum_utils.calculate_total_parallelism( gene_parallelism_statistics) sys.stdout.write("Total parallelism for %s = %g (p=%g)\n" % (treatment + taxon, G, pvalue)) predictors = [] responses = [] gene_hits = [] gene_predictors = [] mean_gene_freqs = [] Ls = [] ax_mult_freqs_x = [] ax_mult_freqs_y = [] for gene_name in convergence_matrix.keys(): convergence_matrix[gene_name][ 'length'] < 50 and convergence_matrix[gene_name]['length'] Ls.append(convergence_matrix[gene_name]['length']) m = gene_parallelism_statistics[gene_name]['multiplicity'] if gene_name not in multiplicity_dict: multiplicity_dict[gene_name] = {} multiplicity_dict[gene_name][treatment] = m else: multiplicity_dict[gene_name][treatment] = m n = 0 nfixed = 0 freqs = [] nf_max = 0 for population in populations: for t, L, f, f_max in convergence_matrix[gene_name][ 'mutations'][population]: fixed_weight = timecourse_utils.calculate_fixed_weight( L, f) predictors.append(m) responses.append(fixed_weight) n += 1 nfixed += fixed_weight # get freqs for regression #if L == parse_file.POLYMORPHIC: #freqs.append(f_max) nf_max += timecourse_utils.calculate_fixed_weight(L, f_max) if n > 0.5: gene_hits.append(n) gene_predictors.append(m) #mean_gene_freqs.append(np.mean(freqs)) if nf_max > 0: ax_mult_freqs_x.append(m) ax_mult_freqs_y.append(nf_max / n) Ls = np.asarray(Ls) ntot = len(predictors) mavg = ntot * 1.0 / len(Ls) predictors, responses = (np.array(x) for x in zip( *sorted(zip(predictors, responses), key=lambda pair: (pair[0])))) gene_hits, gene_predictors = (np.array(x) for x in zip(*sorted( zip(gene_hits, gene_predictors), key=lambda pair: (pair[0])))) rescaled_predictors = np.exp(np.fabs(np.log(predictors / mavg))) null_survival_function = mutation_spectrum_utils.NullMultiplicitySurvivalFunction.from_parallelism_statistics( gene_parallelism_statistics) # default base is 10 theory_ms = np.logspace(-2, 2, 100) theory_survivals = null_survival_function(theory_ms) theory_survivals /= theory_survivals[0] sys.stderr.write("Done!\n") ax_multiplicity.plot(theory_ms, theory_survivals, lw=3, color=pt.get_colors(treatment), alpha=0.8, ls=':', zorder=1) ax_multiplicity.plot( predictors, (len(predictors) - np.arange(0, len(predictors))) * 1.0 / len(predictors), lw=3, color=pt.get_colors(treatment), alpha=0.8, ls='--', label=str(int(10**int(treatment))) + '-day', drawstyle='steps', zorder=2) #ax_multiplicity.text(0.2, 0.3, g_score_p_label_dict['0'], fontsize=25, fontweight='bold', ha='center', va='center', transform=ax_multiplicity.transAxes) #ax_multiplicity.text(0.2, 0.2, g_score_p_label_dict['1'], fontsize=25, fontweight='bold', ha='center', va='center', transform=ax_multiplicity.transAxes) #ax_multiplicity.text(0.2, 0.1, g_score_p_label_dict['2'], fontsize=25, fontweight='bold', ha='center', va='center', transform=ax_multiplicity.transAxes) if pvalue < 0.001: pretty_pvalue = r'$\ll 0.001$' else: pretty_pvalue = '=' + str(round(pvalue, 4)) g_score_p_label = r'$\Delta \ell_{{{}}}=$'.format( str(10**int(treatment))) + str(round( G, 3)) + ', ' + r'$P$' + pretty_pvalue text_color = pt.lighten_color(pt.get_colors(treatment), amount=1.3) ax_multiplicity.text(0.26, label_y_axes[treatment_idx], g_score_p_label, fontsize=7, ha='center', va='center', color='k', transform=ax_multiplicity.transAxes) ax_mult_freq.scatter(ax_mult_freqs_x, ax_mult_freqs_y, color=pt.get_colors(treatment), edgecolors=pt.get_colors(treatment), marker=pt.plot_species_marker(taxon), alpha=alpha_treatment_dict[treatment]) all_mults.extend(ax_mult_freqs_x) all_freqs.extend(ax_mult_freqs_y) #slope, intercept, r_value, p_value, std_err = stats.linregress(np.log10(ax_mult_freqs_x), np.log10(ax_mult_freqs_y)) #print(slope, p_value) # make treatment pairs treatments_in_taxon.sort(key=float) for i in range(0, len(treatments_in_taxon)): for j in range(i + 1, len(treatments_in_taxon)): ax_mult_i_j = plt.subplot2grid((2, 3), (1, i + j - 1), colspan=1) ax_mult_i_j.set_xscale('log', base=10) ax_mult_i_j.set_yscale('log', base=10) ax_mult_i_j.set_xlabel(str(10**int(treatments_in_taxon[i])) + '-day gene multiplicity, ' + r'$m$', fontsize=14) ax_mult_i_j.set_ylabel(str(10**int(treatments_in_taxon[j])) + '-day gene multiplicity, ' + r'$m$', fontsize=14) ax_mult_i_j.plot([0.05, 200], [0.05, 200], lw=3, c='grey', ls='--', zorder=1) ax_mult_i_j.set_xlim([0.05, 200]) ax_mult_i_j.set_ylim([0.05, 200]) ax_mult_i_j.text(-0.1, 1.07, pt.sub_plot_labels[2 + i + j], fontsize=18, fontweight='bold', ha='center', va='center', transform=ax_mult_i_j.transAxes) multiplicity_pair = [ (multiplicity_dict[gene_name][treatments_in_taxon[i]], multiplicity_dict[gene_name][treatments_in_taxon[j]]) for gene_name in sorted(multiplicity_dict) if (multiplicity_dict[gene_name][treatments_in_taxon[i]] > 0) and (multiplicity_dict[gene_name][treatments_in_taxon[j]] > 0) ] significant_multiplicity_pair = [ (significant_multiplicity_values_dict[gene_name][ treatments_in_taxon[i]], significant_multiplicity_values_dict[gene_name][ treatments_in_taxon[j]]) for gene_name in sorted(significant_multiplicity_values_dict) if (treatments_in_taxon[i] in significant_multiplicity_values_dict[gene_name]) and ( treatments_in_taxon[j] in significant_multiplicity_values_dict[gene_name]) ] # get mean colors ccv = ColorConverter() color_1 = np.array( ccv.to_rgb(pt.get_colors(treatments_in_taxon[i]))) color_2 = np.array( ccv.to_rgb(pt.get_colors(treatments_in_taxon[j]))) mix_color = 0.7 * (color_1 + color_2) mix_color = np.min([mix_color, [1.0, 1.0, 1.0]], 0) if (treatments_in_taxon[i] == '0') and (treatments_in_taxon[j] == '1'): #mix_color = pt.lighten_color(mix_color, amount=2.8) mix_color = 'gold' mult_i = [x[0] for x in multiplicity_pair] mult_j = [x[1] for x in multiplicity_pair] ax_mult_i_j.scatter(mult_i, mult_j, marker=pt.plot_species_marker(taxon), facecolors=mix_color, edgecolors='none', alpha=0.8, s=90, zorder=2) mult_significant_i = [x[0] for x in significant_multiplicity_pair] mult_significant_j = [x[1] for x in significant_multiplicity_pair] ax_mult_i_j.scatter(mult_significant_i, mult_significant_j, marker=pt.plot_species_marker(taxon), facecolors=mix_color, edgecolors='k', lw=1.5, alpha=0.7, s=90, zorder=3) #slope_mult, intercept_mult, r_value_mult, p_value_mult, std_err_mult = stats.linregress(np.log10(mult_significant_i), np.log10(mult_significant_j)) mult_ij = mult_significant_i + mult_significant_j + mult_i + mult_j ax_mult_i_j.set_xlim([min(mult_ij) * 0.5, max(mult_ij) * 1.5]) ax_mult_i_j.set_ylim([min(mult_ij) * 0.5, max(mult_ij) * 1.5]) # null slope of 1 #ratio = (slope_mult - slope_null) / std_err_mult #p_value_mult_new_null = stats.t.sf(np.abs(ratio), len(mult_significant_j)-2)*2 #if p_value_mult_new_null < 0.05: # x_log10_fit_range = np.linspace(np.log10(min(mult_i) * 0.5), np.log10(max(mult_i) * 1.5), 10000) # y_fit_range = 10 ** (slope_mult*x_log10_fit_range + intercept_mult) # ax_mult_i_j.plot(10**x_log10_fit_range, y_fit_range, c='k', lw=3, linestyle='--', zorder=4) #ax_mult_i_j.text(0.05, 0.9, r'$\beta_{1}=$'+str(round(slope_mult,3)), fontsize=12, transform=ax_mult_i_j.transAxes) #ax_mult_i_j.text(0.05, 0.82, r'$r^{2}=$'+str(round(r_value_mult**2,3)), fontsize=12, transform=ax_mult_i_j.transAxes) #ax_mult_i_j.text(0.05, 0.74, pt.get_p_value_latex(p_value_mult_new_null), fontsize=12, transform=ax_mult_i_j.transAxes) #if taxon == 'F': # subset_tuple = (len( significant_multiplicity_dict['0']), \ # len( significant_multiplicity_dict['1']), \ # len(set(significant_multiplicity_dict['0']) & set(significant_multiplicity_dict['1']))) # venn = venn2(subsets = subset_tuple, ax=ax_venn, set_labels=('', '', ''), set_colors=(pt.get_colors('0'), pt.get_colors('1'))) # c = venn2_circles(subsets=subset_tuple, ax=ax_venn, linestyle='dashed') subset_tuple = (len( significant_multiplicity_dict['0']), \ len( significant_multiplicity_dict['1']), \ len(set(significant_multiplicity_dict['0']) & set(significant_multiplicity_dict['1'])), \ len(significant_multiplicity_dict['2']), \ len(set(significant_multiplicity_dict['0']) & set(significant_multiplicity_dict['2'])), \ len(set(significant_multiplicity_dict['1']) & set(significant_multiplicity_dict['2'])), \ len(set(significant_multiplicity_dict['1']) & set(significant_multiplicity_dict['1']) & set(significant_multiplicity_dict['2']))) venn = venn3(subsets=subset_tuple, ax=ax_venn, set_labels=('', '', ''), set_colors=(pt.get_colors('0'), pt.get_colors('1'), pt.get_colors('2'))) c = venn3_circles(subsets=subset_tuple, ax=ax_venn, linestyle='dashed') ax_mult_freq.set_xlim([min(all_mults) * 0.5, max(all_mults) * 1.5]) ax_mult_freq.set_ylim([min(all_freqs) * 0.5, max(all_freqs) * 1.5]) fig.suptitle(pt.latex_dict[taxon], fontsize=30) fig.subplots_adjust(wspace=0.3) #hspace=0.3, wspace=0.5 fig_name = pt.get_path() + "/figs/multiplicity_%s.jpg" % taxon fig.savefig(fig_name, format='jpg', bbox_inches="tight", pad_inches=0.4, dpi=600) plt.close()
G_dict_all[taxon][treatment][fmax_cutoff]['G_mean']) delta_025.append( G_dict_all[taxon][treatment][fmax_cutoff]['G_025']) delta_975.append( G_dict_all[taxon][treatment][fmax_cutoff]['G_975']) delta_l_list = np.asarray(delta_l_list) delta_025 = np.asarray(delta_025) delta_975 = np.asarray(delta_975) ax.errorbar(fmax_cutoffs, delta_l_list, yerr = [ delta_l_list-delta_025, delta_975-delta_l_list] , \ fmt = 'o', alpha = 1, barsabove = True, marker = pt.plot_species_marker(taxon), \ mfc = 'white', mec = 'white', lw=3, c = 'k', zorder=1, ms=17) ax.scatter(fmax_cutoffs, delta_l_list, marker=pt.plot_species_marker(taxon), s = 150, \ linewidth=3, facecolors=pt.get_scatter_facecolor(taxon, treatment), edgecolors=pt.get_colors(treatment), alpha=1, zorder=2) # now do divergence significant_multiplicity_dict = {} for taxon in pt.taxa: significant_multiplicity_dict[taxon] = {} for treatment_idx, treatment in enumerate(pt.treatments): significant_multiplicity_taxon_path = pt.get_path( ) + '/data/timecourse_final/parallel_genes_%s.txt' % (treatment + taxon) if os.path.exists(significant_multiplicity_taxon_path) == False: continue significant_multiplicity_taxon = open(
#marker_style = dict(color='k', marker='o', # markerfacecoloralt=color_1, # markerfacecolor=color_2 ) #ax_count_divergence_treatment_pair.append(ax_count_divergence) #treatment_pair_slopes.append(new_slope) ax_count_divergence+=1 #record_str = ",".join(['%s_%s' % treatment_pair_set, str(taxon), pt.tree_name_dict[taxon], str(slope), str(slope_std_error)]) #record_strs.append(record_str) marker_style = dict(color='k', marker='o', markerfacecoloralt=pt.get_colors(treatment_pair[1]), markerfacecolor=pt.get_colors(treatment_pair[0]) ) divergence_Z_mean = np.mean(divergence_Z_pearsons_pair) divergence_Z_se = np.std(divergence_Z_pearsons_pair) / np.sqrt(len(divergence_Z_pearsons_pair)) ax_divergence.errorbar(treatment_pair_idx, divergence_Z_mean, yerr =divergence_Z_se, \ fmt = 'o', alpha = 1, barsabove = True, marker = 'o', \ mfc = 'white', mec = 'white', lw=3.5, c = 'k', zorder=2, ms=17) ax_divergence.plot(treatment_pair_idx, np.mean(divergence_Z_pearsons_pair), markersize = 23, \ linewidth=2, alpha=1, zorder=3, fillstyle='left', **marker_style) #if treatment_pair_set == ('0','2'):
G_subsample_mean = np.mean(G_subsample_dict[treatment + taxon]) G_subsample_025 = G_subsample_dict[treatment + taxon][int(0.025 * subsamples)] G_subsample_975 = G_subsample_dict[treatment + taxon][int(0.975 * subsamples)] #xerr1 = [ [z_lclb_mpd_null_mean - lclb_mpd_025, z_lcpl_mpd_null_mean - lcpl_mpd_025, z_hclb_mpd_null_mean - hclb_mpd_025, z_hcpl_mpd_null_mean - hcpl_mpd_025 ] , # [lclb_mpd_975 - z_lclb_mpd_null_mean, lcpl_mpd_975 - z_lcpl_mpd_null_mean, hclb_mpd_975 - z_hclb_mpd_null_mean, hcpl_mpd_975 -z_hcpl_mpd_null_mean ]] plt.errorbar(int(treatment) + taxon_xaxis_dict[taxon], G_subsample_mean, yerr = [ [G_subsample_mean-G_subsample_025], [ G_subsample_975-G_subsample_mean]], \ fmt = 'o', alpha = 1, barsabove = True, marker = 's', \ mfc = 'white', mec = 'white', lw=3.5, c = 'k', zorder=1, ms=17) plt.scatter(int(treatment) + taxon_xaxis_dict[taxon], G_subsample_mean, marker='s', s = 250, \ linewidth=3, facecolors=pt.get_scatter_facecolor(taxon, treatment), edgecolors=pt.get_colors(treatment), alpha=1, zorder=2) plt.scatter(int(treatment) + taxon_xaxis_dict[taxon], G_all_mutations_dict[treatment+taxon], marker=pt.plot_species_marker(taxon), s = 250, \ linewidth=3, facecolors=pt.get_scatter_facecolor(taxon, treatment), edgecolors=pt.get_colors(treatment), alpha=1, zorder=2) plt.xlabel("Transfer time (days)", fontsize=20) plt.xticks((0, 1, 2), ('1', '10', '100'), fontsize=14) plt.rc('ytick', labelsize=12) plt.ylim([1.2, 6.2]) plt.ylabel("Net increase in log-likelihood, " r'$\Delta \ell$', fontsize=20) legend_elements = [
if len(mutations_list) == 0: continue mutations_list = np.asarray( mutations_list) #/ set_time_dict[taxon] #mutations_list = np.asarray([value[set_time][1] for key, value in mutation_trajectories.items() if (treatment+taxon in key) and (set_time in value.values())]) times_list = np.repeat(int(treatment), len(mutations_list)) ax.scatter( (10**times_list) + np.random.randn(len(times_list)) * 0.1, 10**mutations_list, s=140, linewidth=3, facecolors=pt.get_scatter_facecolor(taxon, treatment), edgecolors=pt.get_colors(treatment), marker=pt.plot_species_marker(taxon), alpha=0.8, zorder=3) times_all_list.extend(times_list) mutations_all_list.extend(mutations_list) ax.set_ylim([(10**min(mutations_all_list)) * 0.5, (10**max(mutations_all_list)) * 2]) ax.set_xlim([(10**min(times_all_list)) * 0.5, (10**max(times_all_list)) * 2]) mutations_all_list = np.asarray( mutations_all_list) #/ set_time_dict[taxon] if taxon == 'J':
import statsmodels.stats.multitest as multitest np.random.seed(123456789) sub_plot_labels = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l'] all_subplot_counts = 0 n_permutations = 10000 #treatments=pt.treatments replicates = pt.replicates taxa = ['B', 'C', 'D', 'F', 'J', 'P'] #taxa = ['B'] legend_elements = [ Line2D([0], [0], color=pt.get_colors('0'), lw=2, label='1-day'), Line2D([0], [0], color=pt.get_colors('1'), lw=2, label='10-days'), Line2D([0], [0], color=pt.get_colors('2'), lw=2, label='100-day') ] nonsynonymous_types = set(['missense', 'nonsense']) synonymous_types = set(['synonymous']) non_appeared = {} non_fixed = {} syn_appeared = {} syn_fixed = {} targeted_Lsyn = {} targeted_Lnon = {}
PCs_ = principalComponents_df[ principalComponents_df.index.str.contains(treatment + taxon)] ax_pca.axhline(y=0, color='k', linestyle=':', alpha=0.8, zorder=1) ax_pca.axvline(x=0, color='k', linestyle=':', alpha=0.8, zorder=1) ax_pca.scatter(0, 0, marker="o", edgecolors='none', c='darkgray', s=120, zorder=2) ax_pca.scatter(PCs_.PC1.values, PCs_.PC2.values, \ c=pt.get_colors(treatment), marker=pt.plot_species_marker(taxon), s = 70, \ edgecolors=pt.get_colors(treatment), linewidth = 0.6, alpha = 0.8, zorder=4)#, edgecolors='none' pt.confidence_ellipse(PCs_.PC1.values, PCs_.PC2.values, ax_pca, n_std=2, edgecolor=pt.get_colors(treatment), linestyle='--', lw=4, zorder=3) # dn/ds populations_plot = [ treatment + taxon + replicate for replicate in replicates if treatment + taxon +
ax_count = 0 for taxon_list_idx, taxon_list in enumerate([['B','C','D'],['F','J','P']]): for taxon_idx, taxon in enumerate(taxon_list): ax = fig.add_subplot(gs[taxon_list_idx, taxon_idx]) ax.set_title(pt.latex_genus_bold_dict[taxon], fontsize=12, fontweight='bold') dnds_samples = [] for treatment in treatments: populations_plot = [ treatment+taxon+replicate for replicate in replicates if treatment+taxon+replicate not in pt.populations_to_ignore ] taxon_treatment_dnds_appeared = [non_appeared[population]/(syn_appeared[population]+(syn_appeared[population]==0))*taxon_Lsyn_dict[taxon]/taxon_Lnon_dict[taxon] for population in populations_plot] if len(taxon_treatment_dnds_appeared) < 2: continue ax.scatter( [int(treatment)] * len(taxon_treatment_dnds_appeared), taxon_treatment_dnds_appeared, marker=pt.plot_species_marker(taxon), linewidth=2, facecolors=pt.get_scatter_facecolor(taxon, treatment), edgecolors=pt.get_colors(treatment), s=100, zorder=2, alpha=0.8) if len(taxon_treatment_dnds_appeared) > 2: ax.errorbar(int(treatment),numpy.mean(taxon_treatment_dnds_appeared), yerr= 2*numpy.std(taxon_treatment_dnds_appeared) / numpy.sqrt(len(taxon_treatment_dnds_appeared)), linestyle='-', c = 'k', marker=pt.plot_species_marker(taxon), lw = 2.5, zorder=3) #dnds_treatment.append(taxon_treatment_dnds_appeared) dnds_samples.append(taxon_treatment_dnds_appeared) ax.text(-0.1, 1.07, sub_plot_labels[ax_count], fontsize=12, fontweight='bold', ha='center', va='center', transform=ax.transAxes) ax.text(0.7, 0.9, r'$F=$'+ str(round( anova_F[ax_count],3) ), fontsize=10, ha='center', va='center', transform=ax.transAxes) ax.text(0.7, 0.8, r'$P_{BH}=$'+ str(round(pvals_corrected[ax_count], 3)) , fontsize=10, ha='center', va='center', transform=ax.transAxes) ax_count+=1 if taxon == 'J': ax.set_xticks([0,2]) ax.set_xticklabels( ['1','100'] )
gs = gridspec.GridSpec(nrows=2, ncols=1) fig = plt.figure(figsize = (10, 13)) ax_between_taxa = fig.add_subplot(gs[0, 0]) ax_between_treatments = fig.add_subplot(gs[1, 0]) ax_between_taxa.text(-0.1, 1.07, pt.sub_plot_labels[0], fontsize=12, fontweight='bold', ha='center', va='center', transform=ax_between_taxa.transAxes) ax_between_treatments.text(-0.1, 1.07, pt.sub_plot_labels[1], fontsize=12, fontweight='bold', ha='center', va='center', transform=ax_between_treatments.transAxes) for treatment_idx, treatment in enumerate(divergence_dict_between_taxa.keys()): Z_corr = divergence_dict_between_taxa[treatment]['Z_corr'] marker_style = dict(color=pt.get_colors(treatment), markerfacecoloralt='white', markerfacecolor=pt.get_colors(treatment), mew=3) ax_between_taxa.plot(treatment, Z_corr, markersize = 30, marker = 'o', \ linewidth=0.4, alpha=1, fillstyle='left', zorder=2 , **marker_style) ax_between_taxa.set_xlim([-0.5,2.5]) ax_between_taxa.set_ylim([-33,5]) ax_between_taxa.axhline( y=0, color='k', lw=3, linestyle=':', alpha = 1, zorder=1) ax_between_taxa.text(0.125, 0.91, 'Convergence', fontsize=15, fontweight='bold', ha='center', va='center', transform=ax_between_taxa.transAxes)
null_survival_function = mutation_spectrum_utils.NullMultiplicitySurvivalFunction.from_parallelism_statistics( gene_parallelism_statistics) # default base is 10 theory_ms = np.logspace(-2, 2, 100) theory_survivals = null_survival_function(theory_ms) theory_survivals /= theory_survivals[0] sys.stderr.write("Done!\n") # step function ax_multiplicity.plot( predictors, (len(predictors) - np.arange(0, len(predictors))) * 1.0 / len(predictors), lw=3, color=pt.get_colors(treatment), alpha=0.8, ls=pt.get_taxon_ls(taxon), label='Observed ' + multiplicity_label, drawstyle='steps', zorder=2) ax_multiplicity.plot(theory_ms, theory_survivals, lw=3, color='grey', alpha=0.8, ls=pt.get_taxon_ls(taxon), label='Null ' + multiplicity_label, zorder=1)
for fmax_cutoff in fmax_cutoffs: delta_l_list.append(G_dict_all[taxon][treatment][fmax_cutoff]['G_mean']) delta_025.append(G_dict_all[taxon][treatment][fmax_cutoff]['G_025']) delta_975.append(G_dict_all[taxon][treatment][fmax_cutoff]['G_975']) delta_l_list = np.asarray(delta_l_list) delta_025 = np.asarray(delta_025) delta_975 = np.asarray(delta_975) ax.errorbar(fmax_cutoffs, delta_l_list, yerr = [ delta_l_list-delta_025, delta_975-delta_l_list] , \ fmt = 'o', alpha = 1, barsabove = True, marker = pt.plot_species_marker(taxon), \ mfc = 'white', mec = 'white', lw=2, c = 'k', zorder=1, ms=17) ax.scatter(fmax_cutoffs, delta_l_list, marker=pt.plot_species_marker(taxon), s = 150, \ linewidth=3, facecolors=pt.get_scatter_facecolor(taxon, treatment), edgecolors=pt.get_colors(treatment), alpha=1, zorder=2) if taxon == 'P': marker_size_legend=16 else: marker_size_legend=10 legend_elements = [Line2D([0], [0], color='w', markerfacecolor=pt.get_colors('0'), marker=pt.plot_species_marker(taxon), markersize=marker_size_legend, label='1-Day'), Line2D([0], [0], color='w', markerfacecolor=pt.get_colors('1'), marker=pt.plot_species_marker(taxon), markersize=marker_size_legend, label='10-Days')] ax.legend(handles=legend_elements, loc='upper left') fig.subplots_adjust(hspace=0.35,wspace=0.3) #hspace=0.3, wspace=0.5
if slope < null: new_CI_025 = new_slope - delta_CI_975 new_CI_975 = new_slope + delta_CI_025 else: new_CI_025 = new_slope - delta_CI_025 new_CI_975 = new_slope + delta_CI_975 return new_slope, new_CI_025, new_CI_975 new_slope, new_CI_025, new_CI_975 = flip_slope_and_CIs( slope, CI_025, CI_975) # get mean colors ccv = ColorConverter() color_1 = np.array(ccv.to_rgb(pt.get_colors(treatment_pair[0]))) color_2 = np.array(ccv.to_rgb(pt.get_colors(treatment_pair[1]))) mix_color = 0.7 * (color_1 + color_2) mix_color = np.min([mix_color, [1.0, 1.0, 1.0]], 0) if (treatment_pair[0] == '0') and (treatment_pair[1] == '1'): #mix_color = pt.lighten_color(mix_color, amount=2.8) mix_color = 'gold' plt.errorbar(ax_count, new_slope, yerr = [ [new_slope-new_CI_025], [new_CI_975-new_slope]], \ fmt = 'o', alpha = 1, barsabove = True, marker = pt.plot_species_marker(taxon), \ mfc = 'white', mec = 'white', lw=3, c = 'k', zorder=2, ms=17) plt.scatter(ax_count, new_slope, marker=pt.plot_species_marker(taxon), s = 250, \ linewidth=2, facecolors=mix_color, edgecolors='k', alpha=1, zorder=3)
treatment_taxon_populations = [] Mts_all_list = [] Ms_all_list = [] for replicate in replicates: population = treatment + taxon + replicate Mts, Ms = mutation_trajectories[population] ax_t_vs_M.plot(Mts, 10**Ms, 'o-', color=pt.get_colors(treatment), marker=pt.plot_species_marker(taxon), fillstyle=pt.plot_species_fillstyle(taxon), alpha=1, markersize=7, linewidth=3, markeredgewidth=1.5, zorder=1) Mts_all_list.append(Mts) Ms_all_list.append(Ms) Mts_all = np.concatenate(Mts_all_list) Ms_all = np.concatenate(Ms_all_list) Mts_shifted_all = Mts_all - min(Mts_all)
def plot_mutation_trajectory_taxon(taxon): if taxon == 'J': treatments = ['0', '2'] sub_plot_labels = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j'] sub_plot_count_step = 2 dim = (6, 15) else: treatments = pt.treatments sub_plot_labels = [ 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o' ] sub_plot_count_step = 3 dim = (10, 15) sys.stderr.write("Loading mutation data...\n") mutation_trajectories = {} fixed_mutation_trajectories = {} delta_mutation_trajectories = {} #transit_times = {} median_trajectories = {} n_muts_trajectories = {} for treatment in treatments: for replicate in pt.replicates: population = treatment + taxon + replicate if population in pt.populations_to_ignore: continue sys.stderr.write("Processing %s...\t" % population) times, Ms, fixed_Ms = parse_file.get_mutation_fixation_trajectories( population) times_, medians_log10, num_muts = parse_file.get_mutation_fixation_trajectories_median_freq_and_mut_number( population) if isinstance(fixed_Ms, float) == True: fixed_Ms = np.asarray([0] * len(times)) fixed_mutation_trajectories[population] = (times, fixed_Ms) mutation_trajectories[population] = (times, np.log10(Ms)) delta_mutation_trajectories[population] = (times[1:], np.log10(Ms[1:] / Ms[:-1])) median_trajectories[population] = (times_, medians_log10) n_muts_trajectories[population] = (times_, num_muts) sys.stderr.write("analyzed %d mutations!\n" % len(Ms)) fig = plt.figure(figsize=dim) column_count = 0 for treatment in treatments: ax_t_vs_M = plt.subplot2grid((5, len(treatments)), (0, column_count), colspan=1) ax_t_vs_delta_M = plt.subplot2grid((5, len(treatments)), (1, column_count), colspan=1) ax_t_vs_F = plt.subplot2grid((5, len(treatments)), (2, column_count), colspan=1) ax_t_vs_median_freq = plt.subplot2grid((5, len(treatments)), (3, column_count), colspan=1) ax_t_vs_number_muts = plt.subplot2grid((5, len(treatments)), (4, column_count), colspan=1) ax_t_vs_M.text(-0.1, 1.07, sub_plot_labels[column_count], fontsize=14, fontweight='bold', ha='center', va='center', transform=ax_t_vs_M.transAxes) ax_t_vs_delta_M.text(-0.1, 1.07, sub_plot_labels[column_count + sub_plot_count_step], fontsize=14, fontweight='bold', ha='center', va='center', transform=ax_t_vs_delta_M.transAxes) ax_t_vs_F.text(-0.1, 1.07, sub_plot_labels[column_count + sub_plot_count_step * 2], fontsize=14, fontweight='bold', ha='center', va='center', transform=ax_t_vs_F.transAxes) ax_t_vs_median_freq.text(-0.1, 1.07, sub_plot_labels[column_count + sub_plot_count_step * 3], fontsize=14, fontweight='bold', ha='center', va='center', transform=ax_t_vs_median_freq.transAxes) ax_t_vs_number_muts.text(-0.1, 1.07, sub_plot_labels[column_count + sub_plot_count_step * 4], fontsize=14, fontweight='bold', ha='center', va='center', transform=ax_t_vs_number_muts.transAxes) treatment_taxon_populations = [] all_medians = [] all_numbers = [] for replicate in pt.replicates: population = treatment + taxon + replicate if population in pt.populations_to_ignore: continue Mts, Ms = mutation_trajectories[population] fixed_Mts, fixed_Ms = fixed_mutation_trajectories[population] deltaMts, deltaMs = delta_mutation_trajectories[population] median_trajectories_ts, median_trajectories_ = median_trajectories[ population] n_muts_trajectories_ts, n_muts_trajectories_ = n_muts_trajectories[ population] ax_t_vs_M.plot(Mts, 10**Ms, 'o-', color=pt.get_colors(treatment), marker=pt.plot_species_marker(taxon), fillstyle=pt.plot_species_fillstyle(taxon), alpha=1, markersize=7, linewidth=3, markeredgewidth=1.5, zorder=1) ax_t_vs_M.set_yscale('log', base=10) ax_t_vs_M.tick_params(axis='x', labelsize=8) # back transform to format plot axes ax_t_vs_delta_M.plot(deltaMts, 10**deltaMs, color=pt.get_colors(treatment), marker=pt.plot_species_marker(taxon), fillstyle=pt.plot_species_fillstyle(taxon)) ax_t_vs_delta_M.set_yscale('log', base=10) ax_t_vs_F.plot(fixed_Mts, fixed_Ms, 'o-', color=pt.get_colors(treatment), marker=pt.plot_species_marker(taxon), fillstyle=pt.plot_species_fillstyle(taxon), alpha=1, markersize=7, linewidth=3, markeredgewidth=1.5, zorder=1) #ax_M_vs_F.set_xlabel('Days, ' + r'$t$', fontsize = 12) ax_t_vs_median_freq.plot( median_trajectories_ts, 10**median_trajectories_, 'o-', color=pt.get_colors(treatment), marker=pt.plot_species_marker(taxon), fillstyle=pt.plot_species_fillstyle(taxon), alpha=1, markersize=7, linewidth=3, markeredgewidth=1.5, zorder=1) ax_t_vs_median_freq.set_yscale('log', base=10) #ax_t_vs_median_freq.tick_params(axis='y', labelsize=6) ax_t_vs_median_freq.yaxis.set_tick_params(labelsize=8) all_medians.extend(median_trajectories_.tolist()) ax_t_vs_number_muts.plot( n_muts_trajectories_ts, n_muts_trajectories_, 'o-', color=pt.get_colors(treatment), marker=pt.plot_species_marker(taxon), fillstyle=pt.plot_species_fillstyle(taxon), alpha=1, markersize=7, linewidth=3, markeredgewidth=1.5, zorder=1) ax_t_vs_number_muts.set_yscale('log', base=10) ax_t_vs_number_muts.tick_params(axis='y', labelsize=8) all_numbers.extend(n_muts_trajectories_.tolist()) treatment_taxon_populations.append(population) print(10**(min(all_medians) * 0.8), 10**(max(all_medians) * 1.2)) ax_t_vs_median_freq.set_ylim( [10**(min(all_medians)) * 0.8, 10**(max(all_medians)) * 1.2]) ax_t_vs_number_muts.set_ylim( [min(all_numbers) * 0.8, max(all_numbers) * 1.2]) avg_Mts, avg_Ms = timecourse_utils.average_trajectories([ mutation_trajectories[population] for population in treatment_taxon_populations ]) avg_deltaMts, avg_deltaMs = timecourse_utils.average_trajectories([ delta_mutation_trajectories[population] for population in treatment_taxon_populations ]) ax_t_vs_delta_M.axhline(y=1, c='grey', linestyle=':', lw=3, zorder=1) ax_t_vs_M.plot(avg_Mts, 10**avg_Ms, '--', color='k', marker=" ", alpha=1, linewidth=4, zorder=2) ax_t_vs_delta_M.plot(avg_deltaMts, 10**avg_deltaMs, '--', color='k', marker=" ", alpha=1, linewidth=4, zorder=2) # keep them on the same y axes if taxon == 'C': ax_t_vs_delta_M.set_ylim([0.2, 42]) elif taxon == 'D': ax_t_vs_delta_M.set_ylim([0.2, 20]) if (column_count == 0): legend_elements = [ Line2D([0], [0], ls='--', color='k', lw=1.5, label=r'$\overline{M}(t)$') ] ax_t_vs_M.legend(handles=legend_elements, loc='lower right', fontsize=8) ax_t_vs_M.set_title(str(10**int(treatment)) + '-day transfers', fontsize=17) #if treatment == '2': # ax_M_vs_F.yaxis.set_major_locator(MaxNLocator(integer=True)) if column_count == 0: ax_t_vs_M.set_ylabel('Mutations, ' + r'$M(t)$', fontsize=15) ax_t_vs_F.set_ylabel('Fixed mutations', fontsize=15) ax_t_vs_delta_M.set_ylabel('Change in mutations,\n' + r'$M(t)/M(t-1)$', fontsize=15) ax_t_vs_median_freq.set_ylabel( 'Median mutation freq.\nat time $t$', fontsize=15) ax_t_vs_number_muts.set_ylabel('Number of mutations\nat time $t$', fontsize=15) column_count += 1 fig.text(0.53, 0.05, 'Days, ' + r'$t$', ha='center', fontsize=28) fig.suptitle(pt.latex_genus_dict[taxon], fontsize=30) fig_name = pt.get_path() + '/figs/rate_%s.pdf' % taxon fig.savefig(fig_name, format='pdf', bbox_inches="tight", pad_inches=0.4, dpi=600) plt.close()
height="100%", loc='lower right', bbox_to_anchor=(0.12, 0.07, 0.4, 0.38), bbox_transform=analysis_ax.transAxes) for treatment_idx, treatment in enumerate(pt.treatments): for taxon in pt.taxa: f_max_array_sort = np.sort( r2s_obs_dict[treatment][taxon][analysis]) cdf = 1 - np.arange(len(f_max_array_sort)) / float( len(f_max_array_sort)) analysis_ax.plot(f_max_array_sort, cdf, c=pt.get_colors(treatment), ls=pt.get_taxon_ls(taxon), lw=3, alpha=0.8) D_treatment = ks_dict[analysis][treatment]['D'] marker_style = dict(color=pt.get_colors(treatment), markerfacecoloralt='white', markerfacecolor=pt.get_colors(treatment)) ins_ks.plot(treatment_idx, D_treatment, markersize = 11, marker = 'o', \ linewidth=0.4, alpha=1, fillstyle='left', zorder=2 , **marker_style) if ks_dict[analysis][treatment]['p_value_bh'] < 0.05: ins_ks.text(treatment_idx,
treatment + taxon + replicate for replicate in replicates ] taxon_treatment_dnds_appeared = [ non_appeared[population] / (syn_appeared[population] + (syn_appeared[population] == 0)) * Lsyn / Lnon for population in populations_plot ] ax.scatter([int(treatment) + jitter_shift[taxon_idx]] * len(taxon_treatment_dnds_appeared), taxon_treatment_dnds_appeared, marker=pt.plot_species_marker(taxon), linewidth=2, facecolors=pt.get_scatter_facecolor(taxon, treatment), edgecolors=pt.get_colors(treatment), s=120, zorder=2) ax.errorbar(int(treatment) + jitter_shift[taxon_idx], numpy.mean(taxon_treatment_dnds_appeared), yerr=2 * numpy.std(taxon_treatment_dnds_appeared) / numpy.sqrt(len(taxon_treatment_dnds_appeared)), linestyle='-', c='k', marker=pt.plot_species_marker(taxon), lw=2.5) dnds_treatment.append(taxon_treatment_dnds_appeared) t, p = stats.ttest_ind(dnds_treatment[0], dnds_treatment[1], equal_var=False)
principalComponents_ = pca_.fit_transform(X) principalComponents_df = pd.DataFrame(principalComponents_, index=df.columns, columns=['PC1', 'PC2']) print(principalComponents_df) fig, ax = plt.subplots(figsize=(6, 6)) for treatment in treatments: PCs_treatment = principalComponents_df[ principalComponents_df.index.str.contains(treatment)] ax.scatter(PCs_treatment.PC1.values, PCs_treatment.PC2.values, \ c=pt.get_colors(treatment), marker = 'o', s = 70, \ edgecolors='#244162', linewidth = 0.6, alpha = 0.5, zorder=2)#, edgecolors='none' confidence_ellipse(PCs_treatment.PC1.values, PCs_treatment.PC2.values, ax, n_std=2, edgecolor=pt.get_colors(treatment), linestyle='--', lw=3) ax.set_xlabel('PC 1 (' + str(round(pca_.explained_variance_ratio_[0] * 100, 2)) + '%)', fontsize=13) ax.set_ylabel('PC 2 (' + str(round(pca_.explained_variance_ratio_[1] * 100, 2)) + '%)',