def homogeneity_plot(sample_set_names_list, plot_file_prefix): rcParams.update({'font.size': 6}) letter_list_part1 = ["A", "B", "C", "D", "E", "F", "G", "H", "I", "J"] feature_type_list = ["all", "pre_5'_UTR", "5'_UTR", "CDS", "3'_UTR"] figure = plt.figure(1, dpi=600, figsize=(5 * 1.47, 2 * len(sample_set_names_list))) index = 1 for sample, letter in zip(sample_set_names_list, letter_list_part1): findex = 1 for feature_type in feature_type_list: vcf_file = "%s_good.vcf" % sample if feature_type == "all" \ else "%s_pre_UTR_variants_only_intergenic_l_300.vcf" % sample \ if feature_type == "pre_5'_UTR" \ else "%s_%s_variants.vcf" % (sample, feature_type) collection = CollectionVCF(from_file=True, vcf_file=vcf_file) sample_data = collection.count_strandness("%s_%s_variants" % (sample, feature_type)) ax = plt.subplot(len(sample_set_names_list), len(feature_type_list), index) n_groups = 4 points = np.arange(n_groups) bar_width = 0.35 C_values = sample_data["all"][0] G_values = sample_data["all"][1] rects1 = plt.bar(points, C_values, bar_width, color='b', label='C->T') rects2 = plt.bar(points + bar_width, G_values, bar_width, color='g', label='G->A') table = [C_values[1:-1], G_values[1:-1]] g, p_value, dof, expctd = chi2_contingency(table) phi = phi_coefficient_correlation(table) if index > len(feature_type_list) * (len(sample_set_names_list) - 1): plt.xlabel('Strand') if findex == 1: plt.ylabel('N of SNV') plt.text(-0.50, 0.5, sample[:-3], rotation=90, fontweight="bold", transform=ax.transAxes, fontsize=10, horizontalalignment='center', verticalalignment='center') #plt.title("%s%i. %s (%i SNV)\np=%.3f, phi=%.3f" % (letter, findex, sample, len(collection), p_value, phi), # fontweight='bold') #plt.title("%s%i. %s (%s)\np=%.3f, phi=%.3f" % (letter, findex, sample, feature_type, p_value, phi), # fontweight='bold', fontsize=6) title = "%s%i" % (letter, findex) title_text = r"$p=%.2f, \varphi=%.2f$" % (p_value, phi) if p_value >= 0.01 \ else r"$p=%.1e, \varphi=%.2f$" % (p_value, phi) plt.text( 0.23, 1.1, title_text, rotation=0, transform=ax.transAxes, fontsize=8, #horizontalalignment='center', verticalalignment='center') plt.title(title, fontweight='bold', fontsize=11, loc="left") plt.xticks(points + bar_width, ('None', '+', '-', 'Both')) if findex == len(feature_type_list): plt.legend(prop={'size': 8}) if index <= len(feature_type_list): plt.text(0.5, 1.25, feature_type, rotation=0, fontweight="bold", transform=ax.transAxes, fontsize=10, horizontalalignment='center', verticalalignment='center') #plt.suptitle("Strandness histograms", fontweight="bold", fontsize=20) findex += 1 index += 1 #plt.tight_layout() plt.subplots_adjust(hspace=0.5, wspace=0.25, top=0.88, left=0.09, right=0.99) for extension in [".pdf", ".svg", ".eps", ".png"]: plt.savefig("%s%s" % (plot_file_prefix, extension)) plt.close()
if __name__ == "__main__": workdir = "/media/mahajrod/d9e6e5ee-1bf7-4dba-934e-3f898d9611c8/Data/LAN2xx/combined_vcf/clusters/all/all/" letter_list_part1 = ["A", "B", "C", "D", "E", "F", "G", "H", "I", "J"] os.chdir(workdir) sample_set_names_list = ["PmCDA1_3d", "PmCDA1_sub1_3d", "PmCDA1_6d", "PmCDA1_sub1_6d" ] rcParams.update({'font.size': 7}) plt.figure(1, dpi=300, figsize=(6, 6)) index = 1 for sample, letter in zip(sample_set_names_list, letter_list_part1): collection = CollectionVCF(from_file=True, vcf_file=sample + "_good.vcf") sample_data = collection.count_strandness(sample + "_good_strandness") plt.subplot(2, 2, index) n_groups = 4 points = np.arange(n_groups) bar_width = 0.35 C_values = sample_data["all"][0] G_values = sample_data["all"][1] rects1 = plt.bar(points, C_values, bar_width, color='b', label='C->T') rects2 = plt.bar(points + bar_width, G_values, bar_width, color='g', label='G->A')
if __name__ == "__main__": workdir = "/media/mahajrod/d9e6e5ee-1bf7-4dba-934e-3f898d9611c8/Data/LAN2xx/combined_vcf/clusters/all/pre_UTR_strandness/" letter_list_part1 = ["A", "B", "C", "D", "E", "F", "G", "H", "I", "J"] os.chdir(workdir) sample_set_names_list = [ "PmCDA1_3d", "PmCDA1_sub1_3d", "PmCDA1_6d", "PmCDA1_sub1_6d" ] rcParams.update({'font.size': 7}) plt.figure(1, dpi=300, figsize=(6, 6)) index = 1 for sample, letter in zip(sample_set_names_list, letter_list_part1): collection = CollectionVCF( from_file=True, vcf_file=sample + "_pre_UTR_variants_only_intergenic_l_300.vcf") sample_data = collection.count_strandness( sample + "_pre_UTR_variants_only_intergenic_l_300_strandness") plt.subplot(2, 2, index) n_groups = 4 points = np.arange(n_groups) bar_width = 0.35 C_values = sample_data["all"][0] G_values = sample_data["all"][1] rects1 = plt.bar(points, C_values, bar_width, color='b', label='C->T') rects2 = plt.bar(points + bar_width, G_values, bar_width, color='g', label='G->A')
os.chdir(workdir) sample_set_names_list = [ "PmCDA1_3d", "PmCDA1_sub1_3d", "PmCDA1_6d", "PmCDA1_sub1_6d" ] rcParams.update({'font.size': 7}) feature_type_list = ["5'_UTR", "CDS", "3'_UTR"] for feature_type in feature_type_list: plt.figure(1, dpi=300, figsize=(6, 6)) index = 1 for sample, letter in zip(sample_set_names_list, letter_list_part1): collection = CollectionVCF(from_file=True, vcf_file="%s_%s_variants.vcf" % (sample, feature_type)) sample_data = collection.count_strandness("%s_%s_variants" % (sample, feature_type)) plt.subplot(2, 2, index) n_groups = 4 points = np.arange(n_groups) bar_width = 0.35 C_values = sample_data["all"][0] G_values = sample_data["all"][1] rects1 = plt.bar(points, C_values, bar_width, color='b', label='C->T') rects2 = plt.bar(points + bar_width,