def setUp(self) -> None: # noinspection DuplicatedCode self.df = pd.DataFrame.from_dict( {1: {'x': "a", 'y': 15, 'color': 'blue'}, 2: {'x': "a", 'y': 16, 'color': 'blue'}, 3: {'x': "b", 'y': 17, 'color': 'blue'}, 4: {'x': "b", 'y': 18, 'color': 'blue'}, 5: {'x': "a", 'y': 15, 'color': 'red'}, 6: {'x': "a", 'y': 16, 'color': 'red'}, 7: {'x': "b", 'y': 17, 'color': 'red'}, 8: {'x': "b", 'y': 18, 'color': 'red'} }).T plotting = { "data": self.df, "x": "x", "y": "y", "hue": 'color' } self.ax = sns.boxplot(**plotting) self.annotator = Annotator( self.ax, pairs=[(("a", "blue"), ("a", "red")), (("b", "blue"), ("b", "red")), (("a", "blue"), ("b", "blue"))], **plotting) self.pvalues = [0.03, 0.04, 0.9]
def test_wrong_plotter_engine(self): ax = sns.barplot(**self.plotting) with self.assertRaisesRegex(NotImplementedError, "plotly"): self.annotator = Annotator( ax, plot="barplot", engine="plotly", pairs=[(("a", "blue"), ("a", "red")), (("b", "blue"), ("b", "red")), (("a", "blue"), ("b", "blue"))], **self.plotting)
def test_dodge_false_raises(self): ax = sns.barplot(dodge=False, **self.plotting) with self.assertRaisesRegex(ValueError, "dodge"): self.annotator = Annotator( ax, dodge=False, plot="barplot", pairs=[(("a", "blue"), ("a", "red")), (("b", "blue"), ("b", "red")), (("a", "blue"), ("b", "blue"))], **self.plotting)
def test_fixed_offset(self): ax = sns.barplot(**self.plotting) self.annotator = Annotator( ax, plot="barplot", pairs=[(("a", "blue"), ("a", "red")), (("b", "blue"), ("b", "red")), (("a", "blue"), ("b", "blue"))], **self.plotting) self.annotator.configure(test="Mann-Whitney", use_fixed_offset=True) self.annotator.apply_and_annotate()
def boxplot_with_test(data, x, y, pairs): plotting_parameters = {'data': data, 'x': x, 'y': y} pvalues = [ mannwhitneyu(data[data[x] == pair[0]][y], data[data[x] == pair[1]][y]).pvalue for pair in pairs ] ax = sns.boxplot(**plotting_parameters) # Add annotations annotator = Annotator(ax, pairs, **plotting_parameters) annotator.set_pvalues(pvalues) annotator.annotate()
def test_orient_horizontal(self): plotting = {**self.plotting, 'orient': 'h', 'x': 'y', 'y': 'x', 'dodge': True} ax = sns.stripplot(**plotting) self.annotator = Annotator( ax, plot="stripplot", pairs=[(("a", "blue"), ("a", "red")), (("b", "blue"), ("b", "red")), (("a", "blue"), ("b", "blue"))], **plotting) self.annotator.configure(test="Mann-Whitney") self.annotator.apply_and_annotate()
def setUp(self) -> None: self.df = pd.DataFrame.from_dict({ 1: { "x": "a", "y": 15, "color": "blue" }, 2: { "x": "a", "y": 16, "color": "blue" }, 3: { "x": "b", "y": 17, "color": "blue" }, 4: { "x": "b", "y": 18, "color": "blue" }, 5: { "x": "a", "y": 15, "color": "red" }, 6: { "x": "a", "y": 16, "color": "red" }, 7: { "x": "b", "y": 17, "color": "red" }, 8: { "x": "b", "y": 18, "color": "red" } }).T plotting = {"data": self.df, "x": "x", "y": "y", "hue": "color"} self.ax = sns.boxplot(**plotting) self.annotator = Annotator(self.ax, pairs=[(("a", "blue"), ("a", "red")), (("b", "blue"), ("b", "red")), (("a", "blue"), ("b", "blue"))], verbose=False, **plotting) self.pvalues = [0.03, 0.04, 0.9]
def test_plot_and_annotate_facets(self): annotator = Annotator(None, self.simple_pairs) g = sns.FacetGrid(self.params_df.pop("data"), col=self.params_df.pop("hue"), height=10, sharey=False) self.params_df.pop("hue_order") g.map_dataframe(annotator.plot_and_annotate_facets, plot="boxplot", plot_params=self.params_df, configuration={ 'test': 'Mann-Whitney', 'text_format': 'simple' }, annotation_func='apply_test', ax_op_after=[['set_xlabel', ['Group'], None]], annotation_params={'num_comparisons': 'auto'})
def test_order_in_x(self): with self.assertRaisesRegex(ValueError, "(specified in `order`)"): self.annot = Annotator(self.ax, [(0, 2)], data=self.data, order=[0, 1, 2])
def test_unmatched_x_in_box_pairs_without_hue(self): with self.assertRaisesRegex(ValueError, "(specified in `pairs`)"): self.annot = Annotator(self.ax, [(0, 2)], data=self.data)
def test_init_barplot(self): ax = sns.barplot(data=self.data) self.annot = Annotator(ax, [(0, 1)], plot="barplot", data=self.data)
def test_init_df(self): self.ax = sns.boxplot(**self.params_df) self.annot = Annotator(self.ax, pairs=self.pairs_for_df, **self.params_df)
def test_init_simple(self): self.annot = Annotator(self.ax, [(0, 1)], data=self.data)
def test_comparisons_correction_by_name(self): self.ax = sns.boxplot(ax=self.ax, data=self.data2) annot = Annotator(self.ax, pairs=[("X", "Y")], data=self.data2) annot.configure(test="Mann-Whitney", comparisons_correction="BH") annot.apply_and_annotate()
plotting_parameters = { 'data': tmp, 'x': 'classe_mortalidade', 'y': ind, 'order': ['A', 'B', 'C'] } #tmp[['classe_mortalidade', i]].groupby('classe_mortalidade').mean() pvalues = [] for p in pairs: pvalues.append( stats.ttest_ind( tmp.loc[tmp.classe_mortalidade == p[0], ind], tmp.loc[tmp.classe_mortalidade == p[1], ind]).pvalue) formatted_pvalues = [f"p={p:.2e}" for p in pvalues] ax = get_log_ax() sns.boxplot(**plotting_parameters).set_title( 'mz:{:.2f} rt:{:.2f}'.format(*data[k][0].loc[ ind, ['row m/z', 'row retention time']].tolist())) sns.stripplot(**plotting_parameters) annotator = Annotator(ax, pairs, **plotting_parameters) annotator.set_pvalues(pvalues) #annotator.set_custom_annotations(formatted_pvalues) annotator.annotate() #plt.savefig("plot1A.png", bbox_inches='tight') pdf.savefig() plt.close()
def main(): date = datetime.date.today().strftime("%Y%m%d") # passages = "p2-p12" # opv_passages = "p1-p7" # pv_passages = "p3-p8" # input_dir = "/Users/odedkushnir/Projects/fitness" # rv_replica1_mutation_data = post_data_mutation("/Volumes/STERNADILABHOME$/volume3/okushnir/AccuNGS/" # "20201008RV-202329127/merged/passages/fits_all_pos_at_once_sampling/" # "replica1_syn/output/mutation/%s" % passages) # rv_replica1_mutation_data["Virus"] = "RVB14 #1" # rv_replica2_mutation_data = post_data_mutation("/Volumes/STERNADILABHOME$/volume3/okushnir/AccuNGS/" # "20201008RV-202329127/merged/passages/fits_all_pos_at_once_sampling/" # "replica2_syn/output/mutation/%s" % passages) # rv_replica2_mutation_data["Virus"] = "RVB14 #2" # rv_replica3_mutation_data = post_data_mutation("/Volumes/STERNADILABHOME$/volume3/okushnir/AccuNGS/" # "20201008RV-202329127/merged/passages/fits_all_pos_at_once_sampling/" # "replica3_syn/output/mutation/%s" % passages) # rv_replica3_mutation_data["Virus"] = "RVB14 #3" # cv_mutation_data = post_data_mutation("/Volumes/STERNADILABHOME$/volume3/okushnir/AccuNGS/190627_RV_CV" # "/merged/CVB3/Rank0_data_mutation/fits/output/mutation/%s" % passages) # cv_mutation_data["Virus"] = "CVB3" # opv_mutataion_data = post_data_mutation("/Volumes/STERNADILABHOME$/volume3/okushnir/CirSeq/OPV/fits/output/mutation/" # "all_positions_p1-p7") # opv_mutataion_data["Virus"] = "OPV2" # # pv_mutataion_data = post_data_mutation("/Volumes/STERNADILABHOME$/volume3/okushnir/CirSeq/Mahoney/fits/output/" # "mutation/p3-p8") # pv_mutataion_data["Virus"] = "PV1" # # # # output_dir = "/Volumes/STERNADILABHOME$/volume3/okushnir/AccuNGS/20201008RV-202329127/merged/passages/" \ # "%s_fits_syn_plots" % date # try: # os.mkdir(output_dir) # except OSError: # print("Creation of the directory %s failed" % output_dir) # else: # print("Successfully created the directory %s " % output_dir) # # all_data = pd.concat([rv_replica1_mutation_data, rv_replica2_mutation_data, rv_replica3_mutation_data, # cv_mutation_data, opv_mutataion_data, pv_mutataion_data], sort=False) # all_data = all_data.rename(columns={"allele0_1": "Transition rate"}) # all_data["Transition rate"] = all_data["Transition rate"].astype(float) # # print(all_data.to_string()) # # all_data = all_data.rename(columns={"inferred_mu": "Mutation rate"}) # # # print(all_data["Mutation rate"].dtype) # # all_data["Mutation rate"] = all_data["Mutation rate"].map(lambda x: str(x).lstrip('*')) # # all_data["Mutation rate"] = pd.to_numeric(all_data["Mutation rate"], errors='coerce')#.astype(float) # # # print(all_data["Mutation rate"].dtype) # # all_data["Mutation"] = all_data["Mutation"].apply(lambda x: x[0]+">"+x[1:]if len(x)<=2 else x)# if len(x)==2 else x[0]+">"+x[1:]) # # all_data["Mutation"] = all_data["Mutation"].apply(lambda x: x.split("_")[0] + "\n" + x.split("_")[-1] + "-like" if len(x)>3 else x) # all_data["Mutation"] = np.where(all_data["Mutation"] == "nonadar", "A>G\nNon-ADAR-like", all_data["Mutation"]) # all_data["Mutation"] = np.where(all_data["Mutation"] == "adar", "A>G\nADAR-like", all_data["Mutation"]) # all_data["Mutation"] = np.where(all_data["Mutation"] == "AG", "A>G", all_data["Mutation"]) # all_data["Mutation"] = np.where(all_data["Mutation"] == "UC", "U>C", all_data["Mutation"]) # all_data["Mutation"] = np.where(all_data["Mutation"] == "GA", "G>A", all_data["Mutation"]) # all_data["Mutation"] = np.where(all_data["Mutation"] == "CU", "C>U", all_data["Mutation"]) # # all_data = all_data[(all_data["pos"] >= 5785) & (all_data["pos"] <= 7212)] # # # # # q1 = all_data["Transition rate"].quantile(0.25) # # q3 = all_data["Transition rate"].quantile(0.75) # # all_data = all_data[all_data["Transition rate"] > q1] # # all_data = all_data[all_data["Transition rate"] < q3] # # all_data = all_data[all_data["Mutation"] != "A>G\nADAR-like"] # all_data = all_data[all_data["Mutation"] != "A>G\nNon-ADAR-like"] # print(all_data.shape[0]) # all_data.to_csv("/Users/odedkushnir/PhD_Projects/fitness/all_data.csv") #Plots - local all_data = pd.read_csv( "/Users/odedkushnir/PhD_Projects/fitness/all_data.csv") output_dir = "/Users/odedkushnir/PhD_Projects/fitness/{0}_fits_syn_plots".format( date) try: os.mkdir(output_dir) except OSError: print("Creation of the directory %s failed" % output_dir) else: print("Successfully created the directory %s " % output_dir) plt.style.use('classic') sns.set_palette("Set2") mutation_order = ["C>U", "G>A", "U>C", "A>G"] virus_order = ["RVB14 #1", "RVB14 #2", "RVB14 #3", "CVB3", "OPV2", "PV1"] g1 = sns.boxenplot(x="Mutation", y="Transition rate", data=all_data, order=mutation_order, hue="Virus", hue_order=virus_order) g1.set_yscale("log") """[((cat1, hue1), (cat2, hue2)), ((cat3, hue3), (cat4, hue4))]""" pairs = [(("A>G", "RVB14 #1"), ("C>U", "RVB14 #1")), (("A>G", "RVB14 #1"), ("G>A", "RVB14 #1")), (("A>G", "RVB14 #2"), ("C>U", "RVB14 #2")), (("A>G", "RVB14 #2"), ("G>A", "RVB14 #2")), (("A>G", "RVB14 #3"), ("C>U", "RVB14 #3")), (("A>G", "RVB14 #3"), ("G>A", "RVB14 #3")), (("A>G", "CVB3"), ("C>U", "CVB3")), (("A>G", "CVB3"), ("G>A", "CVB3")), (("A>G", "OPV2"), ("C>U", "OPV2")), (("A>G", "OPV2"), ("G>A", "OPV2")), (("A>G", "PV1"), ("C>U", "PV1")), (("A>G", "PV1"), ("G>A", "PV1")), (("U>C", "RVB14 #1"), ("C>U", "RVB14 #1")), (("U>C", "RVB14 #1"), ("G>A", "RVB14 #1")), (("U>C", "RVB14 #2"), ("C>U", "RVB14 #2")), (("U>C", "RVB14 #2"), ("G>A", "RVB14 #2")), (("U>C", "RVB14 #3"), ("C>U", "RVB14 #3")), (("U>C", "RVB14 #3"), ("G>A", "RVB14 #3")), (("U>C", "CVB3"), ("C>U", "CVB3")), (("U>C", "CVB3"), ("G>A", "CVB3")), (("U>C", "OPV2"), ("C>U", "OPV2")), (("U>C", "OPV2"), ("G>A", "OPV2")), (("U>C", "PV1"), ("C>U", "PV1")), (("U>C", "PV1"), ("G>A", "PV1"))] annotator = Annotator(g1, pairs, x="Mutation", y="Transition rate", data=all_data, order=mutation_order, hue="Virus", hue_order=virus_order) annotator.configure(test='Mann-Whitney', text_format='star', loc='outside', comparisons_correction="Bonferroni") annotator.apply_and_annotate() g1.set(xlabel="Type of mutation") g1.set(ylabel="Mutation rate inferred") g1.set_ylim(10**-10, 10**-1) g1.legend(loc='center left', bbox_to_anchor=(1.05, 0.5), borderaxespad=0., fontsize=7) plt.savefig(output_dir + "/%s_mutation_rate.png" % date, dpi=600, bbox_inches='tight') plt.close()
def test_unmatched_x_in_box_pairs_with_hue(self): with self.assertRaisesRegex(ValueError, "(specified in `pairs`)"): self.annot = Annotator(self.ax, [(("c", "blue"), ("b", "blue"))], data=self.df, x="x", y="y", order=["a", "b"], hue='color', hue_order=['red', 'blue'])
def test_working_hue_orders(self): self.annot = Annotator(self.ax, [(("a", "blue"), ("b", "blue"))], data=self.df, x="x", y="y", order=["a", "b"], hue='color', hue_order=['red', 'blue'])
def plots(input_dir, date, data_filter, virus, passage_order, transition_order, pairs, label_order, pairs_adar, filter_reads=None): output_dir = input_dir + date + "_plots" plus_minus = u"\u00B1" try: os.mkdir(output_dir) except OSError: print("Creation of the directory %s failed" % output_dir) else: print("Successfully created the directory %s " % output_dir) if filter_reads is True: data_filter["no_variants"] = np.where(data_filter["Prob"] < 0.95, 0, data_filter["no_variants"]) data_filter["Read_count"] = data_filter[data_filter["Read_count"] > 10000] mutation_order = ["A>G", "U>C", "G>A", "C>U", "A>C", "U>G", "A>U", "U>A", "G>C", "C>G", "C>A", "G>U"] type_order = ["Synonymous", "Non-Synonymous", "Premature Stop Codon"] # g1 = sns.catplot("label", "frac_and_weight", data=data_filter, hue="Mutation", order=label_order, palette="tab20", # kind="point", dodge=True, hue_order=mutation_order, join=False, estimator=weighted_varaint, # orient="v") # g1.set_axis_labels("Passage", "Variant Frequency {} CI=95%".format(plus_minus)) # g1.set_xticklabels(fontsize=9, rotation=45) # g1.set(yscale='log') # g1.set(ylim=(10**-5, 10**-1)) # # # plt.show() # g1.savefig(output_dir + "/All_Mutations_point_plot", dpi=300) # plt.close() data_filter["passage"] = data_filter["passage"].astype(str) data_filter["passage"] = np.where(data_filter["passage"] != "RNA\nControl", "p" + data_filter["passage"], data_filter["passage"]) g2 = sns.catplot("passage", "frac_and_weight", data=data_filter, hue="Mutation", order=passage_order, palette=mutation_palette(4) , kind="point", dodge=0.5, hue_order=transition_order, join=False, estimator=weighted_varaint, orient="v") g2.set_axis_labels("Passage", "Variant Frequency {} CI=95%".format(plus_minus)) g2.set(yscale='log') g2.set(ylim=(10 ** -6, 10 ** -2)) # g2.set_xticklabels(fontsize=10, rotation=45) # g2.savefig("/Users/odedkushnir/Google Drive/Studies/PhD/Prgress reports/20200913 Final report/plots" + # "/Transition_Mutations_point_plot_Mahoney", dpi=300) g2.savefig(output_dir + "/Transition_Mutations_point_plot_{0}".format(virus), dpi=300) plt.close() passage_g = sns.boxplot(x="passage", y="Frequency", data=data_filter, hue="Mutation", order=passage_order, palette=mutation_palette(4), dodge=True, hue_order=transition_order) passage_g.set_yscale('log') passage_g.set_ylim(10 ** -6, 10 ** -1) passage_g.set(xlabel="Passage", ylabel="Variant Frequency") annot = Annotator(passage_g, pairs, x="passage", y="Frequency", hue="Mutation", data=data_filter, order=passage_order, hue_order=transition_order) annot.configure(test='t-test_welch', text_format='star', loc='outside', verbose=2, comparisons_correction="Bonferroni") annot.apply_test() file_path = output_dir + "/sts.csv" with open(file_path, "w") as o: with contextlib.redirect_stdout(o): passage_g, test_results = annot.annotate() plt.legend(bbox_to_anchor=(1.05, 0.5), loc=2, borderaxespad=0.) plt.tight_layout() plt.savefig(output_dir + "/Transition_Mutations_box_stat_plot_{0}".format(virus), dpi=300) plt.close() data_filter_synonymous = data_filter.loc[data_filter.Type == "Synonymous"] data_filter_synonymous["Mutation"] = np.where(((data_filter_synonymous["Mutation"] == "A>G") & (data_filter_synonymous["5`_ADAR_Preference"] == "High")), "High\nADAR-like\nA>G", np.where(((data_filter_synonymous["Mutation"] == "A>G") & (data_filter_synonymous["5`_ADAR_Preference"] == "Intermediate")), "Intermediate\nADAR-like\nA>G", np.where(((data_filter_synonymous["Mutation"] == "A>G") & (data_filter_synonymous["5`_ADAR_Preference"] == "Low")), "Low\nADAR-like\nA>G", data_filter_synonymous["Mutation"]))) data_filter_synonymous["Mutation_adar"] = np.where(((data_filter_synonymous["Mutation"] == "U>C") & (data_filter_synonymous["3`_ADAR_Preference"] == "High")), "High\nADAR-like\nU>C", np.where(((data_filter_synonymous["Mutation"] == "U>C") & (data_filter_synonymous["3`_ADAR_Preference"] == "Intermediate")), "Intermediate\nADAR-like\nU>C", np.where(((data_filter_synonymous["Mutation"] == "U>C") & (data_filter_synonymous["3`_ADAR_Preference"] == "Low")), "Low\nADAR-like\nU>C", data_filter_synonymous["Mutation"]))) mutation_adar_order = ["High\nADAR-like\nA>G", "Low\nADAR-like\nA>G", "High\nADAR-like\nU>C", "Low\nADAR-like\nU>C"] data_filter_synonymous["passage"] = data_filter_synonymous["passage"].astype(str) catplot_adar = sns.catplot(x="passage", y="frac_and_weight", data=data_filter_synonymous, hue="Mutation_adar", order=passage_order, palette=mutation_palette(4, adar=True), kind="point", dodge=0.5, hue_order=mutation_adar_order, join=False, estimator=weighted_varaint, orient="v", legend=True) catplot_adar.set_axis_labels("Passage", "Variant Frequency {0} CI=95%".format(plus_minus)) catplot_adar.set(yscale='log') catplot_adar.set(ylim=(10 ** -6, 10 ** -2)) plt.savefig(output_dir + "/adar_pref_mutation_point_plot_{0}.png".format(virus), dpi=300) plt.close() adar_g = sns.boxplot(x="passage", y="Frequency", data=data_filter_synonymous, hue="Mutation_adar", order=passage_order, palette=mutation_palette(4, adar=True), dodge=True, hue_order=mutation_adar_order) adar_g.set_yscale('log') adar_g.set_ylim(10 ** -6, 10 ** -1) adar_g.set(xlabel="Passage", ylabel="Variant Frequency") annot = Annotator(adar_g, pairs_adar, x="passage", y="Frequency", hue="Mutation_adar", data=data_filter_synonymous, hue_order=mutation_adar_order, order=passage_order) annot.configure(test='t-test_welch', text_format='star', loc='outside', verbose=2, comparisons_correction="Bonferroni") annot.apply_test() file_path = output_dir + "/sts_adar.csv" with open(file_path, "w") as o: with contextlib.redirect_stdout(o): adar_g, test_results = annot.annotate() plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.) plt.tight_layout() plt.savefig(output_dir + "/adar_pref_mutation_box_plot_{0}.png".format(virus), dpi=300) plt.close()
def main(): # input_dir = "/Users/odedkushnir/Projects/fitness/AccuNGS/190627_RV_CV/RVB14/" # input_dir = "/Volumes/STERNADILABHOME$/volume3/okushnir/AccuNGS/20201008RV-202329127/merged/patients/" input_dir = "/Users/odedkushnir/PhD_Projects/After_review/AccuNGS/RV/patients/" prefix = "inosine_predict_context_freq0.01" date = datetime.today().strftime("%Y%m%d") output_dir = input_dir + "{0}_{1}".format(date, prefix) try: os.mkdir(output_dir) except OSError: print("Creation of the directory %s failed" % output_dir) else: print("Successfully created the directory %s " % output_dir) data_filter = pd.read_pickle(input_dir + prefix + "/data_filter.pkl") data_filter_ag = pd.read_pickle(input_dir + prefix + "/data_filter_ag.pkl") data_filter_uc = pd.read_pickle(input_dir + prefix + "/data_filter_uc.pkl") data_filter["label"] = np.where( data_filter["label"] == "RNA Control\nPrimer ID", "RNA\nControl", data_filter["label"]) #Plots label_order = [ "RNA\nControl", "p3 Cell Culture\nControl", "Patient-1", "Patient-4", "Patient-5", "Patient-9", "Patient-16", "Patient-17", "Patient-20" ] mutation_order = [ "A>G", "U>C", "G>A", "C>U", "A>C", "U>G", "A>U", "U>A", "G>C", "C>G", "C>A", "G>U" ] transition_order = ["A>G", "U>C", "G>A", "C>U"] type_order1 = ["Synonymous", "Non-Synonymous", "Premature Stop Codon"] context_order = ["UpA", "ApA", "CpA", "GpA"] type_order2 = ["Synonymous", "Non-Synonymous"] context_order_uc = ["UpA", "UpU", "UpG", "UpC"] type_order_ag = ["Synonymous", "Non-Synonymous", "NonCodingRegion"] adar_preference = ["High", "Intermediate", "Low"] plus_minus = u"\u00B1" pairs = [(("RNA\nControl", "A>G"), ("RNA\nControl", "G>A")), (("p3 Cell Culture\nControl", "A>G"), ("p3 Cell Culture\nControl", "G>A")), (("Patient-1", "A>G"), ("Patient-1", "G>A")), (("Patient-4", "A>G"), ("Patient-4", "G>A")), (("Patient-5", "A>G"), ("Patient-5", "G>A")), (("Patient-9", "A>G"), ("Patient-9", "G>A")), (("Patient-16", "A>G"), ("Patient-16", "G>A")), (("Patient-17", "A>G"), ("Patient-17", "G>A")), (("Patient-20", "A>G"), ("Patient-20", "G>A")), (("RNA\nControl", "A>G"), ("RNA\nControl", "U>C")), (("p3 Cell Culture\nControl", "A>G"), ("p3 Cell Culture\nControl", "U>C")), (("Patient-1", "A>G"), ("Patient-1", "U>C")), (("Patient-4", "A>G"), ("Patient-4", "U>C")), (("Patient-5", "A>G"), ("Patient-5", "U>C")), (("Patient-9", "A>G"), ("Patient-9", "U>C")), (("Patient-16", "A>G"), ("Patient-16", "U>C")), (("Patient-17", "A>G"), ("Patient-17", "U>C")), (("Patient-20", "A>G"), ("Patient-20", "U>C")), (("RNA\nControl", "A>G"), ("RNA\nControl", "C>U")), (("p3 Cell Culture\nControl", "A>G"), ("p3 Cell Culture\nControl", "C>U")), (("Patient-1", "A>G"), ("Patient-1", "C>U")), (("Patient-4", "A>G"), ("Patient-4", "C>U")), (("Patient-5", "A>G"), ("Patient-5", "C>U")), (("Patient-9", "A>G"), ("Patient-9", "C>U")), (("Patient-16", "A>G"), ("Patient-16", "C>U")), (("Patient-17", "A>G"), ("Patient-17", "C>U")), (("Patient-20", "A>G"), ("Patient-20", "C>U"))] # g1 = sns.catplot(x="label", y="frac_and_weight", data=data_filter, hue="Mutation", order=label_order, palette="tab20", # kind="point", dodge=True, hue_order=mutation_order, join=False, estimator=weighted_varaint, # orient="v") # g1.set_axis_labels("", "Variant Frequency {} CI=95%".format(plus_minus)) # g1.set_xticklabels(fontsize=9, rotation=90) # g1.set(yscale='log') # # g1.set(ylim=(10**-7, 10**-3)) # # # plt.show() # g1.savefig(output_dir + "/All_Mutations_point_plot", dpi=300) # plt.close() g2 = sns.catplot(x="label", y="frac_and_weight", data=data_filter, hue="Mutation", order=label_order, palette=mutation_palette(4), kind="point", dodge=0.5, hue_order=transition_order, join=False, estimator=weighted_varaint, orient="v", legend=True) g2.set_axis_labels("", "Variant Frequency {} CI=95%".format(plus_minus)) g2.set(yscale='log') g2.set(ylim=(10**-5, 10**-3)) # g2.set_yticklabels(fontsize=12) g2.set_xticklabels(fontsize=10, rotation=90) # plt.show() # g2.savefig("/Users/odedkushnir/Google Drive/Studies/PhD/MyPosters/20190924 GGE/plots/Transition_Mutations_point_plot_RV", dpi=300) g2.savefig(output_dir + "/Transition_Mutations_point_plot", dpi=300) # g2.savefig("/Users/odedkushnir/Google Drive/Studies/PhD/Prgress reports/20200913 Final report/plots" + # "/Fig9a_Transition_Mutations_point_plot_Patients", dpi=300) plt.close() data_filter["label"] = data_filter["label"].astype(str) data_filter["Frequency"] = data_filter["Frequency"].astype(float) passage_g = sns.boxplot(x="label", y="Frequency", data=data_filter, hue="Mutation", order=label_order, palette=mutation_palette(4), dodge=True, hue_order=transition_order) passage_g.set_yscale('log') passage_g.set_ylim(10**-6, 10**-1) passage_g.set(xlabel="", ylabel="Variant Frequency") passage_g.set_xticklabels(labels=label_order, fontsize=10, rotation=90) annot = Annotator(passage_g, pairs, x="label", y="Frequency", hue="Mutation", data=data_filter, order=label_order, hue_order=transition_order) annot.configure(test='t-test_welch', text_format='star', loc='outside', verbose=2, comparisons_correction="Bonferroni") annot.apply_test() file_path = output_dir + "/sts.csv" with open(file_path, "w") as o: with contextlib.redirect_stdout(o): passage_g, test_results = annot.annotate() plt.legend(bbox_to_anchor=(1.05, 0.5), loc=2, borderaxespad=0.) plt.tight_layout() plt.savefig(output_dir + "/Transition_Mutations_box_stat_plot_patients", dpi=300) plt.close() # g_rna = sns.catplot(x="RNA", y="frac_and_weight", data=data_filter, hue="Mutation", order=rna_order, # palette="tab20", kind="point", dodge=True, hue_order=transition_order, join=False, estimator=weighted_varaint, # orient="v", legend=True) # g_rna.set_axis_labels("", "Variant Frequency") # g_rna.set(yscale='log') # g_rna.set(ylim=(10 ** -6, 10 ** -2)) # # g2.set_yticklabels(fontsize=12) # g_rna.set_xticklabels(fontsize=10, rotation=45) # plt.show() # g2.savefig("/Users/odedkushnir/Google Drive/Studies/PhD/MyPosters/20190924 GGE/plots/Transition_Mutations_point_plot_RV", dpi=300) # g_rna.savefig(output_dir + "/Transition_Mutations_point_RNA_plot", dpi=300) # plt.close() # A>G Prev Context flatui = ["#3498db", "#9b59b6"] g5 = sns.catplot("label", "frac_and_weight", data=data_filter_ag, hue="ADAR_like", order=label_order, palette=mutation_palette(2), kind="point", dodge=True, hue_order=[True, False], estimator=weighted_varaint, orient="v", col="Type", join=False, col_order=type_order2) g5.set_axis_labels("", "Variant Frequency {} CI=95%".format(plus_minus)) g5.set(yscale='log') g5.set(ylim=(7 * 10**-7, 4 * 10**-3)) g5.set_xticklabels(rotation=90) # plt.show() g5.savefig(output_dir + "/Context_point_plot", dpi=300) # g5.savefig("/Users/odedkushnir/Google Drive/Studies/PhD/Prgress reports/20200913 Final report/plots" + # "/Fig9b_Context_point_plot_Patients", dpi=300) plt.close() mutation_ag = sns.catplot("label", "frac_and_weight", data=data_filter_ag, hue="5`_ADAR_Preference", palette=mutation_palette(3, adar=True, ag=True), kind="point", dodge=True, estimator=weighted_varaint, order=label_order, orient="v", col="Type", join=False, col_order=type_order_ag, hue_order=adar_preference) mutation_ag.set(yscale="log") mutation_ag.set(ylim=(1 * 10**-5, 1 * 10**-2)) mutation_ag.set_xticklabels(rotation=90) mutation_ag.fig.suptitle("A>G ADAR_like Mutation in RV patients", y=0.99) plt.subplots_adjust(top=0.85) mutation_ag.set_axis_labels( "", "Variant Frequency {} CI=95%".format(plus_minus)) mutation_ag.savefig(output_dir + "/ag_ADAR_like_Mutation_col_patients.png", dpi=300) plt.close() g6 = sns.catplot("label", "frac_and_weight", data=data_filter_ag, hue="ADAR_like", order=label_order, palette=mutation_palette(2), kind="point", dodge=True, hue_order=[True, False], estimator=weighted_varaint, orient="v", join=False) g6.set_axis_labels("", "Variant Frequency {} CI=95%".format(plus_minus)) g6.set(yscale='log') g6.set(ylim=(7 * 10**-7, 4 * 10**-3)) g6.set_xticklabels(rotation=90) # plt.show() g6.savefig(output_dir + "/Context_point_all_mutations_type_plot", dpi=300) plt.close() g9 = sns.catplot("label", "frac_and_weight", data=data_filter_uc, hue="Next", order=label_order, palette="tab20", hue_order=context_order_uc, estimator=weighted_varaint, orient="v", dodge=True, kind="point", col="Type", join=False, col_order=type_order2) g9.set_axis_labels("", "Variant Frequency {} CI=95%".format(plus_minus)) g9.set(yscale='log') g9.set(ylim=(10**-5, 10**-2)) g9.set_xticklabels(rotation=90) # plt.show() g9.savefig(output_dir + "/UC_Context_point_plot", dpi=300) plt.close() data_filter_ag_grouped = data_filter_ag.groupby( ["ADAR_like", "label", "Type"])["frac_and_weight"].agg(lambda x: weighted_varaint(x)) data_filter_ag_grouped = data_filter_ag_grouped.reset_index() data_filter_ag_grouped = data_filter_ag_grouped.rename( columns={"frac_and_weight": "Frequency"}) data_filter_ag_grouped["Frequency"] = data_filter_ag_grouped[ "Frequency"].astype(float) print(data_filter_ag_grouped.to_string()) data_filter_ag_grouped_silent = data_filter_ag_grouped[ data_filter_ag_grouped["Type"] == "Synonymous"] data_filter_ag_grouped_silent = data_filter_ag_grouped_silent[ data_filter_ag_grouped_silent["label"] == "Cell Cultureֿ\nControl"]
def test_unmatched_hue_in_hue_order(self): with self.assertRaisesRegex(ValueError, "(specified in `hue_order`)"): self.annot = Annotator(self.ax, [(("a", "blue"), ("b", "blue"))], data=self.df, x="x", y="y", order=["a", "b"], hue='color', hue_order=['red', 'yellow'])
def test_not_implemented_plot(self): with self.assertRaises(NotImplementedError): Annotator(self.ax, [(0, 1)], data=self.data, plot="thatplot")
def test_init_df_inverted(self): box_pairs = self.pairs_for_df[::-1] self.ax = sns.boxplot(**self.params_df) self.annot = Annotator(self.ax, pairs=box_pairs, **self.params_df)
def test_valid_parameters_df_data_only(self): self.ax = sns.boxplot(ax=self.ax, data=self.data2) annot = Annotator(self.ax, pairs=[("X", "Y")], data=self.data2) annot.configure(test="Mann-Whitney").apply_and_annotate()
def context_boxplot(context_loadings, metadict, included_factors=None, group_order=None, statistical_test='Mann-Whitney', pval_correction='benjamini-hochberg', text_format='star', nrows=1, figsize=(12, 6), cmap='tab10', title_size=14, axis_label_size=12, group_label_rotation=45, ylabel='Context Loadings', dot_color='lightsalmon', dot_edge_color='brown', filename=None, verbose=False): ''' Plots a boxplot to compare the loadings of context groups in each of the factors resulting from a tensor decomposition. Parameters ---------- context_loadings : pandas.DataFrame Dataframe containing the loadings of each of the contexts from a tensor decomposition. Rows are contexts and columns are the factors obtained. metadict : dict A dictionary containing the groups where each of the contexts belong to. Keys corresponds to the indexes in `context_loadings` and values are the respective groups. For example: metadict={'Context 1' : 'Group 1', 'Context 2' : 'Group 1', 'Context 3' : 'Group 2', 'Context 4' : 'Group 2'} included_factors : list, default=None Factors to be included. Factor names must be the same as column elements in the context_loadings. group_order : list, default=None Order of the groups to plot the boxplots. Considering the example of the metadict, it could be: group_order=['Group 1', 'Group 2'] or group_order=['Group 2', 'Group 1'] If None, the order that groups are found in `metadict` will be considered. statistical_test : str, default='Mann-Whitney' The statistical test to compare context groups within each factor. Options include: 't-test_ind', 't-test_welch', 't-test_paired', 'Mann-Whitney', 'Mann-Whitney-gt', 'Mann-Whitney-ls', 'Levene', 'Wilcoxon', 'Kruskal'. pval_correction : str, default='benjamini-hochberg' Multiple test correction method to reduce false positives. Options include: 'bonferroni', 'bonf', 'Bonferroni', 'holm-bonferroni', 'HB', 'Holm-Bonferroni', 'holm', 'benjamini-hochberg', 'BH', 'fdr_bh', 'Benjamini-Hochberg', 'fdr_by', 'Benjamini-Yekutieli', 'BY', None text_format : str, default='star' Format to display the results of the statistical test. Options are: - 'star', to display P- values < 1e-4 as "****"; < 1e-3 as "***"; < 1e-2 as "**"; < 0.05 as "*", and < 1 as "ns". - 'simple', to display P-values < 1e-5 as "1e-5"; < 1e-4 as "1e-4"; < 1e-3 as "0.001"; < 1e-2 as "0.01"; and < 5e-2 as "0.05". nrows : int, default=1 Number of rows to generate the subplots. figsize : tuple, default=(12, 6) Size of the figure (width*height), each in inches. cmap : str, default='tab10' Name of the color palette for coloring the major groups of contexts. title_size : int, default=14 Font size of the title in each of the factor boxplots. axis_label_size : int, default=12 Font size of the labels for X and Y axes. group_label_rotation : int, default=45 Angle of rotation for the tick labels in the X axis. ylabel : str, default='Context Loadings' Label for the Y axis. dot_color : str, default='lightsalmon' A matplotlib color for the dots representing individual contexts in the boxplot. For more info see: https://matplotlib.org/stable/gallery/color/named_colors.html dot_edge_color : str, default='brown' A matplotlib color for the edge of the dots in the boxplot. For more info see: https://matplotlib.org/stable/gallery/color/named_colors.html filename : str, default=None Path to save the figure of the elbow analysis. If None, the figure is not saved. verbose : boolean, default=None Whether printing out the result of the pairwise statistical tests in each of the factors Returns ------- fig : matplotlib.figure.Figure A matplotlib figure. axes : matplotlib.axes.Axes or array of Axes Matplotlib axes representing the subplots containing the boxplots. ''' if group_order is not None: assert len(set(group_order) & set(metadict.values())) == len( set(metadict.values()) ), "All groups in `metadict` must be contained in `group_order`" else: group_order = list(set(metadict.values())) df = context_loadings.copy() if included_factors is None: factor_labels = list(df.columns) else: factor_labels = included_factors rank = len(factor_labels) df['Group'] = [metadict[idx] for idx in df.index] nrows = min([rank, nrows]) ncols = int(np.ceil(rank / nrows)) fig, axes = plt.subplots(nrows=nrows, ncols=ncols, figsize=figsize, sharey='none') if rank == 1: axs = np.array([axes]) else: axs = axes.flatten() for i, factor in enumerate(factor_labels): ax = axs[i] x, y = 'Group', factor order = group_order # Plot the boxes ax = sns.boxplot(x=x, y=y, data=df, order=order, whis=[0, 100], width=.6, palette=cmap, boxprops=dict(alpha=.5), ax=ax) # Plot the dots sns.stripplot(x=x, y=y, data=df, size=6, order=order, color=dot_color, edgecolor=dot_edge_color, linewidth=0.6, jitter=False, ax=ax) if statistical_test is not None: # Add annotations about statistical test from itertools import combinations pairs = list(combinations(order, 2)) annotator = Annotator(ax=ax, pairs=pairs, data=df, x=x, y=y, order=order) annotator.configure(test=statistical_test, text_format=text_format, loc='inside', comparisons_correction=pval_correction, verbose=verbose) annotator.apply_and_annotate() ax.set_title(factor, fontsize=title_size) ax.set_xlabel('', fontsize=axis_label_size) if (i == 0) | (((i) % ncols) == 0): ax.set_ylabel(ylabel, fontsize=axis_label_size) else: ax.set_ylabel(' ', fontsize=axis_label_size) ax.set_xticklabels(ax.get_xticklabels(), rotation=group_label_rotation, rotation_mode='anchor', va='bottom', ha='right') # Remove extra subplots for j in range(i + 1, axs.shape[0]): ax = axs[j] ax.axis(False) if axes.shape[0] > 1: axes = axes.reshape(axes.shape[0], -1) fig.align_ylabels(axes[:, 0]) plt.tight_layout(rect=[0, 0.03, 1, 0.99]) if filename is not None: plt.savefig(filename, dpi=300, bbox_inches='tight') return fig, axes
def main(): # input_dir = "/Volumes/STERNADILABHOME$/volume3/okushnir/AccuNGS/20201008RV-202329127/merged/passages/" """Local""" input_dir = "/Users/odedkushnir/PhD_Projects/After_review/AccuNGS/RV/passages/" prefix = "inosine_predict_context" date = datetime.today().strftime("%Y%m%d") output_dir = input_dir + "{0}_{1}".format(date, prefix) try: os.mkdir(output_dir) except OSError: print("Creation of the directory %s failed" % output_dir) else: print("Successfully created the directory %s " % output_dir) data_filter = pd.read_pickle(input_dir + prefix + "/data_filter.pkl") data_filter_ag = pd.read_pickle(input_dir + prefix + "/data_filter_ag.pkl") data_filter_uc = pd.read_pickle(input_dir + prefix + "/data_filter_uc.pkl") data_filter["passage"] = data_filter["passage"].astype(int) data_filter["no_variants"] = np.where(data_filter["Prob"] < 0.95, 0, data_filter["no_variants"]) data_filter["Read_count"] = data_filter[data_filter["Read_count"] > 10000] #Plots label_order = [ "RNA Control\nRND", "RNA Control\nPrimer ID", "p2-1", "p2-2", "p2-3", "p5-1", "p5-2", "p5-3", "p8-1", "p8-2", "p8-3", "p10-2", "p10-3", "p12-1", "p12-2", "p12-3" ] mutation_order = [ "A>G", "U>C", "G>A", "C>U", "A>C", "U>G", "A>U", "U>A", "G>C", "C>G", "C>A", "G>U" ] transition_order = ["A>G", "U>C", "G>A", "C>U"] type_order = ["Synonymous", "Non-Synonymous", "Premature Stop Codon"] type_order_ag = ["Synonymous", "Non-Synonymous"] context_order = ["UpA", "ApA", "CpA", "GpA"] context_order_uc = ["UpU", "UpA", "UpC", "UpG"] adar_preference = ["High", "Intermediate", "Low"] plus_minus = u"\u00B1" # g1 = sns.catplot(x="label", y="frac_and_weight", data=data_filter, hue="Mutation", order=label_order, # palette="Set2", # kind="point", dodge=False, hue_order=mutation_order, join=True, estimator=weighted_varaint, # orient="v") # g1.set_axis_labels("", "Variant Frequency") # g1.set_xticklabels(fontsize=9, rotation=45) # g1.set(yscale='log') # g1.set(ylim=(10 ** -7, 10 ** -3)) # # # plt.show() # g1.savefig(output_dir + "/All_Mutations_point_plot", dpi=300) # plt.close() # # g2 = sns.catplot(x="label", y="frac_and_weight", data=data_filter, hue="Mutation", order=label_order, # palette=mutation_palette(4), kind="point", dodge=True, hue_order=transition_order, join=False, # estimator=weighted_varaint, # orient="v", legend=True) # g2.set_axis_labels("", "Variant Frequency") # g2.set(yscale='log', ylim=(10 ** -6, 10 ** -2), xlim=(0, 12, 2)) # # g2.set_yticklabels(fontsize=12) # g2.set_xticklabels(fontsize=9, rotation=90) # plt.show() # g2.savefig("/Users/odedkushnir/Google Drive/Studies/PhD/MyPosters/20190924 GGE/plots/Transition_Mutations_point_plot_RV", dpi=300) # g2.savefig(output_dir + "/Transition_Mutations_point_plot", dpi=300) # plt.close() replica_lst = [1, 2, 3] for replica in replica_lst: data_filter_replica = data_filter[data_filter["replica"] == replica] data_filter_replica["passage"] = data_filter_replica["passage"].astype( str) data_filter_replica["passage"] = "p" + data_filter_replica["passage"] if replica == 2: data_filter_replica = pd.read_pickle(input_dir + prefix + "/data_filter.pkl") data_filter_replica["passage"] = data_filter_replica[ "passage"].astype(int) data_filter_replica["no_variants"] = np.where( data_filter_replica["Prob"] < 0.95, 0, data_filter_replica["no_variants"]) data_filter_replica["Read_count"] = data_filter_replica[ data_filter_replica["Read_count"] > 10000] data_filter_replica["passage"] = data_filter_replica[ "passage"].astype(str) data_filter_replica[ "passage"] = "p" + data_filter_replica["passage"] data_filter_replica["replica"] = np.where( data_filter_replica["passage"] == "p0", 2, data_filter_replica["replica"]) data_filter_replica = data_filter_replica[ data_filter_replica["replica"] == replica] data_filter_replica["passage"] = np.where( data_filter_replica["passage"] == "p0", "RNA\nControl", data_filter_replica["passage"]) if replica == 1: passage_order = ["RNA\nControl", "p2", "p5", "p8", "p12"] pairs = [(("RNA\nControl", "A>G"), ("RNA\nControl", "G>A")), (("p2", "A>G"), ("p2", "G>A")), (("p5", "A>G"), ("p5", "G>A")), (("p8", "A>G"), ("p8", "G>A")), (("p12", "A>G"), ("p12", "G>A")), (("RNA\nControl", "A>G"), ("RNA\nControl", "U>C")), (("p2", "A>G"), ("p2", "U>C")), (("p5", "A>G"), ("p5", "U>C")), (("p8", "A>G"), ("p8", "U>C")), (("p12", "A>G"), ("p12", "U>C")), (("RNA\nControl", "A>G"), ("RNA\nControl", "C>U")), (("p2", "A>G"), ("p2", "C>U")), (("p5", "A>G"), ("p5", "C>U")), (("p8", "A>G"), ("p8", "C>U")), (("p12", "A>G"), ("p12", "C>U"))] pairs_adar = [(("RNA\nControl", "High\nADAR-like\nA>G"), ("RNA\nControl", "Low\nADAR-like\nA>G")), (("p2", "High\nADAR-like\nA>G"), ("p2", "Low\nADAR-like\nA>G")), (("p5", "High\nADAR-like\nA>G"), ("p5", "Low\nADAR-like\nA>G")), (("p8", "High\nADAR-like\nA>G"), ("p8", "Low\nADAR-like\nA>G")), (("p12", "High\nADAR-like\nA>G"), ("p12", "Low\nADAR-like\nA>G")), (("p2", "High\nADAR-like\nU>C"), ("p2", "Low\nADAR-like\nU>C")), (("p5", "High\nADAR-like\nU>C"), ("p5", "Low\nADAR-like\nU>C")), (("p8", "High\nADAR-like\nU>C"), ("p8", "Low\nADAR-like\nU>C")), (("p12", "High\nADAR-like\nU>C"), ("p12", "Low\nADAR-like\nU>C"))] else: passage_order = ["RNA\nControl", "p2", "p5", "p8", "p10", "p12"] pairs = [(("RNA\nControl", "A>G"), ("RNA\nControl", "G>A")), (("p2", "A>G"), ("p2", "G>A")), (("p5", "A>G"), ("p5", "G>A")), (("p8", "A>G"), ("p8", "G>A")), (("p10", "A>G"), ("p10", "G>A")), (("p12", "A>G"), ("p12", "G>A")), (("RNA\nControl", "A>G"), ("RNA\nControl", "U>C")), (("p2", "A>G"), ("p2", "U>C")), (("p5", "A>G"), ("p5", "U>C")), (("p8", "A>G"), ("p8", "U>C")), (("p10", "A>G"), ("p10", "U>C")), (("p12", "A>G"), ("p12", "U>C")), (("RNA\nControl", "A>G"), ("RNA\nControl", "C>U")), (("p2", "A>G"), ("p2", "C>U")), (("p5", "A>G"), ("p5", "C>U")), (("p8", "A>G"), ("p8", "C>U")), (("p10", "A>G"), ("p10", "C>U")), (("p12", "A>G"), ("p12", "C>U"))] pairs_adar = [(("RNA\nControl", "High\nADAR-like\nA>G"), ("RNA\nControl", "Low\nADAR-like\nA>G")), (("p2", "High\nADAR-like\nA>G"), ("p2", "Low\nADAR-like\nA>G")), (("p5", "High\nADAR-like\nA>G"), ("p5", "Low\nADAR-like\nA>G")), (("p8", "High\nADAR-like\nA>G"), ("p8", "Low\nADAR-like\nA>G")), (("p10", "High\nADAR-like\nA>G"), ("p10", "Low\nADAR-like\nA>G")), (("p12", "High\nADAR-like\nA>G"), ("p12", "Low\nADAR-like\nA>G")), (("RNA\nControl", "High\nADAR-like\nU>C"), ("RNA\nControl", "Low\nADAR-like\nU>C")), (("p2", "High\nADAR-like\nU>C"), ("p2", "Low\nADAR-like\nU>C")), (("p5", "High\nADAR-like\nU>C"), ("p5", "Low\nADAR-like\nU>C")), (("p8", "High\nADAR-like\nU>C"), ("p8", "Low\nADAR-like\nU>C")), (("p10", "High\nADAR-like\nU>C"), ("p10", "Low\nADAR-like\nU>C")), (("p12", "High\nADAR-like\nU>C"), ("p12", "Low\nADAR-like\nU>C"))] passage_g = sns.catplot(x="passage", y="frac_and_weight", data=data_filter_replica, hue="Mutation", order=passage_order, palette=mutation_palette(4), kind="point", dodge=0.5, hue_order=transition_order, join=False, estimator=weighted_varaint, orient="v", legend=True) passage_g.set_axis_labels( "Passage", "Variant Frequency {} CI=95%".format(plus_minus)) passage_g.set(yscale='log', ylim=(10**-6, 10**-2)) plt.savefig( output_dir + "/Transition_Mutations_point_plot_RVB14_replica%s" % str(replica), dpi=300) plt.close() passage_g1 = sns.boxplot(x="passage", y="Frequency", data=data_filter_replica, hue="Mutation", order=passage_order, palette=mutation_palette(4), dodge=True, hue_order=transition_order) passage_g1.set_yscale('log') passage_g1.set_ylim(10**-6, 10**-2) passage_g1.set(xlabel="Passage", ylabel="Variant Frequency") annot = Annotator(passage_g1, pairs, x="passage", y="Frequency", hue="Mutation", data=data_filter_replica, order=passage_order, hue_order=transition_order) annot.configure(test='t-test_welch', text_format='star', loc='outside', verbose=2, comparisons_correction="Bonferroni") annot.apply_test() file_path = output_dir + "/sts{0}.csv".format(replica) with open(file_path, "w") as o: with contextlib.redirect_stdout(o): passage_g1, test_results = annot.annotate() plt.legend(bbox_to_anchor=(1.05, 0.5), loc=2, borderaxespad=0.) plt.tight_layout() plt.savefig( output_dir + "/Transition_Mutations_box_stat_plot_RVB14_replica{0}".format( replica), dpi=300) plt.close() # data_filter["passage"] = data_filter["passage"].astype(int) # # # g4 = sns.relplot("passage", "frac_and_weight", data=data_filter, hue="Mutation", palette=mutation_palette(4), # hue_order=transition_order, estimator=weighted_varaint, col="Type", kind="line", # col_order=type_order) # # g4.axes.flat[0].set_yscale('symlog', linthreshy=10 ** -5) # g4.set_axis_labels("Passage", "Variant Frequency") # # plt.show() # g4.savefig(output_dir + "/Time_Transition_Mutations_line_plot", dpi=300) # plt.close() """ADAR preferences""" data_filter_replica_synonymous = data_filter_replica.loc[ data_filter_replica.Type == "Synonymous"] # data_filter_synonymous["ADAR_like"] = (data_filter_synonymous.Prev.str.contains('UpA') | data_filter_synonymous.Prev.str.contains('ApA')) data_filter_replica_synonymous["Mutation"] = np.where( ((data_filter_replica_synonymous["Mutation"] == "A>G") & (data_filter_replica_synonymous["5`_ADAR_Preference"] == "High")), "High\nADAR-like\nA>G", np.where( ((data_filter_replica_synonymous["Mutation"] == "A>G") & (data_filter_replica_synonymous["5`_ADAR_Preference"] == "Intermediate")), "Intermediate\nADAR-like\nA>G", np.where( ((data_filter_replica_synonymous["Mutation"] == "A>G") & (data_filter_replica_synonymous["5`_ADAR_Preference"] == "Low")), "Low\nADAR-like\nA>G", data_filter_replica_synonymous["Mutation"]))) data_filter_replica_synonymous["Mutation_adar"] = np.where( ((data_filter_replica_synonymous["Mutation"] == "U>C") & (data_filter_replica_synonymous["3`_ADAR_Preference"] == "High")), "High\nADAR-like\nU>C", np.where( ((data_filter_replica_synonymous["Mutation"] == "U>C") & (data_filter_replica_synonymous["3`_ADAR_Preference"] == "Intermediate")), "Intermediate\nADAR-like\nU>C", np.where( ((data_filter_replica_synonymous["Mutation"] == "U>C") & (data_filter_replica_synonymous["3`_ADAR_Preference"] == "Low")), "Low\nADAR-like\nU>C", data_filter_replica_synonymous["Mutation"]))) mutation_adar_order = [ "High\nADAR-like\nA>G", "Low\nADAR-like\nA>G", "High\nADAR-like\nU>C", "Low\nADAR-like\nU>C" ] # data_filter_replica_synonymous["passage"] = data_filter_replica_synonymous["passage"].astype(str) # data_filter_replica_synonymous["passage"] = "p" + data_filter_replica_synonymous["passage"] catplot_adar = sns.catplot(x="passage", y="frac_and_weight", data=data_filter_replica_synonymous, hue="Mutation_adar", order=passage_order, palette=mutation_palette(4, adar=True), kind="point", dodge=0.5, hue_order=mutation_adar_order, join=False, estimator=weighted_varaint, orient="v", legend=True) catplot_adar.set_axis_labels( "Passage", "Variant Frequency {} CI=95%".format(plus_minus)) catplot_adar.set(yscale='log') catplot_adar.set(ylim=(10**-6, 10**-2)) # catplot_adar.set_xticklabels(fontsize=8) # plt.tight_layout() plt.savefig( output_dir + "/adar_pref_mutation_point_plot_RVB14_replica{0}.png".format( replica), dpi=300) plt.close() adar_g = sns.boxplot(x="passage", y="Frequency", data=data_filter_replica_synonymous, hue="Mutation_adar", order=passage_order, palette=mutation_palette(4, adar=True), dodge=True, hue_order=mutation_adar_order) adar_g.set_yscale('log') adar_g.set_ylim(10**-6, 10**-1) adar_g.set(xlabel="Passage", ylabel="Variant Frequency") annot = Annotator(adar_g, pairs_adar, x="passage", y="Frequency", hue="Mutation_adar", data=data_filter_replica_synonymous, hue_order=mutation_adar_order) annot.configure(test='t-test_welch', text_format='star', loc='outside', verbose=2, comparisons_correction="Bonferroni") annot.apply_test() file_path = output_dir + "/sts_adar_{0}.csv".format(replica) with open(file_path, "w") as o: with contextlib.redirect_stdout(o): adar_g, test_results = annot.annotate() plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.) plt.tight_layout() plt.savefig( output_dir + "/adar_pref_mutation_box_stat_plot_RVB14_replica{0}".format( replica), dpi=300) plt.close()