def setUp(self) -> None: # noinspection DuplicatedCode self.df = pd.DataFrame.from_dict( {1: {'x': "a", 'y': 15, 'color': 'blue'}, 2: {'x': "a", 'y': 16, 'color': 'blue'}, 3: {'x': "b", 'y': 17, 'color': 'blue'}, 4: {'x': "b", 'y': 18, 'color': 'blue'}, 5: {'x': "a", 'y': 15, 'color': 'red'}, 6: {'x': "a", 'y': 16, 'color': 'red'}, 7: {'x': "b", 'y': 17, 'color': 'red'}, 8: {'x': "b", 'y': 18, 'color': 'red'} }).T plotting = { "data": self.df, "x": "x", "y": "y", "hue": 'color' } self.ax = sns.boxplot(**plotting) self.annotator = Annotator( self.ax, pairs=[(("a", "blue"), ("a", "red")), (("b", "blue"), ("b", "red")), (("a", "blue"), ("b", "blue"))], **plotting) self.pvalues = [0.03, 0.04, 0.9]
def test_wrong_plotter_engine(self): ax = sns.barplot(**self.plotting) with self.assertRaisesRegex(NotImplementedError, "plotly"): self.annotator = Annotator( ax, plot="barplot", engine="plotly", pairs=[(("a", "blue"), ("a", "red")), (("b", "blue"), ("b", "red")), (("a", "blue"), ("b", "blue"))], **self.plotting)
def test_dodge_false_raises(self): ax = sns.barplot(dodge=False, **self.plotting) with self.assertRaisesRegex(ValueError, "dodge"): self.annotator = Annotator( ax, dodge=False, plot="barplot", pairs=[(("a", "blue"), ("a", "red")), (("b", "blue"), ("b", "red")), (("a", "blue"), ("b", "blue"))], **self.plotting)
def test_fixed_offset(self): ax = sns.barplot(**self.plotting) self.annotator = Annotator( ax, plot="barplot", pairs=[(("a", "blue"), ("a", "red")), (("b", "blue"), ("b", "red")), (("a", "blue"), ("b", "blue"))], **self.plotting) self.annotator.configure(test="Mann-Whitney", use_fixed_offset=True) self.annotator.apply_and_annotate()
def test_orient_horizontal(self): plotting = {**self.plotting, 'orient': 'h', 'x': 'y', 'y': 'x', 'dodge': True} ax = sns.stripplot(**plotting) self.annotator = Annotator( ax, plot="stripplot", pairs=[(("a", "blue"), ("a", "red")), (("b", "blue"), ("b", "red")), (("a", "blue"), ("b", "blue"))], **plotting) self.annotator.configure(test="Mann-Whitney") self.annotator.apply_and_annotate()
def setUp(self) -> None: self.df = pd.DataFrame.from_dict({ 1: { "x": "a", "y": 15, "color": "blue" }, 2: { "x": "a", "y": 16, "color": "blue" }, 3: { "x": "b", "y": 17, "color": "blue" }, 4: { "x": "b", "y": 18, "color": "blue" }, 5: { "x": "a", "y": 15, "color": "red" }, 6: { "x": "a", "y": 16, "color": "red" }, 7: { "x": "b", "y": 17, "color": "red" }, 8: { "x": "b", "y": 18, "color": "red" } }).T plotting = {"data": self.df, "x": "x", "y": "y", "hue": "color"} self.ax = sns.boxplot(**plotting) self.annotator = Annotator(self.ax, pairs=[(("a", "blue"), ("a", "red")), (("b", "blue"), ("b", "red")), (("a", "blue"), ("b", "blue"))], verbose=False, **plotting) self.pvalues = [0.03, 0.04, 0.9]
def test_plot_and_annotate_without_test_name(self): ax, annotations, annotator = Annotator.plot_and_annotate( plot="boxplot", pairs=self.pairs_for_df, plot_params=self.params_df, configuration={ 'test': 'Mann-Whitney', 'text_format': 'full', 'show_test_name': False }, annotation_func='apply_test', ax_op_after=[['set_xlabel', ['Group'], None]], annotation_params={'num_comparisons': 'auto'}) self.assertEqual("p =", annotator.get_annotations_text()[0][:3])
def boxplot_with_test(data, x, y, pairs): plotting_parameters = {'data': data, 'x': x, 'y': y} pvalues = [ mannwhitneyu(data[data[x] == pair[0]][y], data[data[x] == pair[1]][y]).pvalue for pair in pairs ] ax = sns.boxplot(**plotting_parameters) # Add annotations annotator = Annotator(ax, pairs, **plotting_parameters) annotator.set_pvalues(pvalues) annotator.annotate()
def test_plot_and_annotate(self): ax, annotations, annotator = Annotator.plot_and_annotate( plot="boxplot", pairs=self.pairs_for_df, plot_params=self.params_df, configuration={ 'test': 'Mann-Whitney', 'text_format': 'simple' }, annotation_func='apply_test', ax_op_after=[['set_xlabel', ['Group'], None]], annotation_params={'num_comparisons': 'auto'}) expected = (['M.W.W. p = 0.25', 'M.W.W. p = 0.67'] if version.parse(scipy.__version__) < version.parse("1.7") else ['M.W.W. p = 0.33', 'M.W.W. p = 1.00']) self.assertEqual(expected, annotator.get_annotations_text())
def test_plot_and_annotate_facets(self): annotator = Annotator(None, self.simple_pairs) g = sns.FacetGrid(self.params_df.pop("data"), col=self.params_df.pop("hue"), height=10, sharey=False) self.params_df.pop("hue_order") g.map_dataframe(annotator.plot_and_annotate_facets, plot="boxplot", plot_params=self.params_df, configuration={ 'test': 'Mann-Whitney', 'text_format': 'simple' }, annotation_func='apply_test', ax_op_after=[['set_xlabel', ['Group'], None]], annotation_params={'num_comparisons': 'auto'})
def test_working_hue_orders(self): self.annot = Annotator(self.ax, [(("a", "blue"), ("b", "blue"))], data=self.df, x="x", y="y", order=["a", "b"], hue='color', hue_order=['red', 'blue'])
def test_order_in_x(self): with self.assertRaisesRegex(ValueError, "(specified in `order`)"): self.annot = Annotator(self.ax, [(0, 2)], data=self.data, order=[0, 1, 2])
def test_unmatched_x_in_box_pairs_without_hue(self): with self.assertRaisesRegex(ValueError, "(specified in `pairs`)"): self.annot = Annotator(self.ax, [(0, 2)], data=self.data)
def test_init_barplot(self): ax = sns.barplot(data=self.data) self.annot = Annotator(ax, [(0, 1)], plot="barplot", data=self.data)
def test_init_df(self): self.ax = sns.boxplot(**self.params_df) self.annot = Annotator(self.ax, pairs=self.pairs_for_df, **self.params_df)
class Test(unittest.TestCase): """Test that the annotations match the pvalues and format.""" def setUp(self) -> None: # noinspection DuplicatedCode self.df = pd.DataFrame.from_dict( {1: {'x': "a", 'y': 15, 'color': 'blue'}, 2: {'x': "a", 'y': 16, 'color': 'blue'}, 3: {'x': "b", 'y': 17, 'color': 'blue'}, 4: {'x': "b", 'y': 18, 'color': 'blue'}, 5: {'x': "a", 'y': 15, 'color': 'red'}, 6: {'x': "a", 'y': 16, 'color': 'red'}, 7: {'x': "b", 'y': 17, 'color': 'red'}, 8: {'x': "b", 'y': 18, 'color': 'red'} }).T plotting = { "data": self.df, "x": "x", "y": "y", "hue": 'color' } self.ax = sns.boxplot(**plotting) self.annotator = Annotator( self.ax, pairs=[(("a", "blue"), ("a", "red")), (("b", "blue"), ("b", "red")), (("a", "blue"), ("b", "blue"))], **plotting) self.pvalues = [0.03, 0.04, 0.9] def test_ns_without_correction_star(self): annotations = self.annotator._get_results("auto", pvalues=self.pvalues) self.assertEqual(["*", "*", "ns"], [annotation.text for annotation in annotations]) def test_signif_with_type1_correction_star(self): bh = ComparisonsCorrection("BH") self.annotator.configure(comparisons_correction=bh) self.annotator.set_pvalues(self.pvalues) self.assertEqual(["* (ns)", "* (ns)", "ns"], self.annotator.get_annotations_text()) def test_signif_with_type1_correction_star_replace(self): bh = ComparisonsCorrection("BH") self.annotator.configure(comparisons_correction=bh, correction_format="replace") self.annotator.set_pvalues(self.pvalues) self.assertEqual(["ns", "ns", "ns"], self.annotator.get_annotations_text()) def test_signif_with_type1_correction_star_incorrect_num_comparisons(self): bh = ComparisonsCorrection("BH") self.annotator.configure(comparisons_correction=bh) with self.assertRaisesRegex(ValueError, "positive"): self.annotator.set_pvalues(self.pvalues, num_comparisons=0) def test_signif_with_type1_correction_star_abnormal_num_comparisons(self): bh = ComparisonsCorrection("BH") self.annotator.configure(comparisons_correction=bh) with self.assertWarnsRegex(UserWarning, "Manually-specified"): self.annotator.set_pvalues(self.pvalues, num_comparisons=1) def test_signif_with_type0_correction_star(self): bonferroni = ComparisonsCorrection("bonferroni") self.annotator.configure(comparisons_correction=bonferroni) self.annotator.set_pvalues(self.pvalues) self.assertEqual(["ns", "ns", "ns"], self.annotator.get_annotations_text()) def test_signif_with_type1_correction_simple(self): bh = ComparisonsCorrection("BH") self.annotator.configure(comparisons_correction=bh, pvalue_format={'text_format': 'simple'}) self.annotator.set_pvalues(self.pvalues) expected = ["p ≤ 0.05 (ns)", "p ≤ 0.05 (ns)", "p = 0.90"] self.assertEqual(expected, self.annotator.get_annotations_text()) def test_signif_with_type0_correction_simple(self): bonferroni = ComparisonsCorrection("bonferroni") self.annotator.configure(comparisons_correction=bonferroni, pvalue_format={'text_format': 'simple'}) self.annotator.set_pvalues(self.pvalues) expected = ["p = 0.09", "p = 0.12", "p = 1.00"] self.assertEqual(expected, self.annotator.get_annotations_text()) def test_reapply_annotations(self): pass
class Test(unittest.TestCase): """Test that the annotations match the pvalues and format.""" def setUp(self) -> None: self.df = pd.DataFrame.from_dict({ 1: { "x": "a", "y": 15, "color": "blue" }, 2: { "x": "a", "y": 16, "color": "blue" }, 3: { "x": "b", "y": 17, "color": "blue" }, 4: { "x": "b", "y": 18, "color": "blue" }, 5: { "x": "a", "y": 15, "color": "red" }, 6: { "x": "a", "y": 16, "color": "red" }, 7: { "x": "b", "y": 17, "color": "red" }, 8: { "x": "b", "y": 18, "color": "red" } }).T plotting = {"data": self.df, "x": "x", "y": "y", "hue": "color"} self.ax = sns.boxplot(**plotting) self.annotator = Annotator(self.ax, pairs=[(("a", "blue"), ("a", "red")), (("b", "blue"), ("b", "red")), (("a", "blue"), ("b", "blue"))], verbose=False, **plotting) self.pvalues = [0.03, 0.04, 0.9] def test_format_simple(self): self.annotator.configure(pvalue_format={"text_format": "simple"}) annotations = self.annotator._get_results("auto", pvalues=self.pvalues) self.assertEqual(["p ≤ 0.05", "p ≤ 0.05", "p = 0.90"], [annotation.text for annotation in annotations]) def test_format_simple_in_annotator(self): self.annotator.configure(text_format="simple") annotations = self.annotator._get_results("auto", pvalues=self.pvalues) self.assertEqual(["p ≤ 0.05", "p ≤ 0.05", "p = 0.90"], [annotation.text for annotation in annotations]) def test_wrong_parameter(self): with self.assertRaisesRegex(InvalidParametersError, re.escape('parameter(s) "that"')): self.annotator.configure(pvalue_format={"that": "whatever"}) def test_format_string(self): self.annotator.configure(text_format="simple", pvalue_format_string="{:.3f}") self.assertEqual("{:.3f}", self.annotator.pvalue_format.pvalue_format_string) annotations = self.annotator._get_results("auto", pvalues=self.pvalues) self.assertEqual(["p ≤ 0.05", "p ≤ 0.05", "p = 0.900"], [annotation.text for annotation in annotations]) def test_format_string_default(self): self.annotator.configure(text_format="simple", pvalue_format_string="{:.3f}") self.annotator.pvalue_format.config(pvalue_format_string=DEFAULT) annotations = self.annotator._get_results("auto", pvalues=self.pvalues) self.assertEqual(["p ≤ 0.05", "p ≤ 0.05", "p = 0.90"], [annotation.text for annotation in annotations]) @unittest.mock.patch('sys.stdout', new_callable=io.StringIO) def assert_print_pvalue(self, pvalue_format, expected_output, mock_stdout): pvalue_format.print_legend_if_used() self.assertEqual(expected_output, mock_stdout.getvalue()) def test_print_pvalue_default(self): pvalue_format = PValueFormat() self.assert_print_pvalue( pvalue_format, "p-value annotation legend:\n" " ns: p <= 1.00e+00\n" " *: 1.00e-02 < p <= 5.00e-02\n" " **: 1.00e-03 < p <= 1.00e-02\n" " ***: 1.00e-04 < p <= 1.00e-03\n" " ****: p <= 1.00e-04\n\n") def test_print_pvalue_star(self): pvalue_format = PValueFormat() pvalue_format.config(text_format="star") self.assert_print_pvalue( pvalue_format, "p-value annotation legend:\n" " ns: p <= 1.00e+00\n" " *: 1.00e-02 < p <= 5.00e-02\n" " **: 1.00e-03 < p <= 1.00e-02\n" " ***: 1.00e-04 < p <= 1.00e-03\n" " ****: p <= 1.00e-04\n\n") def test_print_pvalue_other(self): pvalue_format = PValueFormat() pvalue_format.config(text_format="simple") self.assert_print_pvalue(pvalue_format, "") def test_get_configuration(self): pvalue_format = PValueFormat() self.assertDictEqual( pvalue_format.get_configuration(), { 'correction_format': '{star} ({suffix})', 'fontsize': 'medium', 'pvalue_format_string': '{:.3e}', 'show_test_name': True, 'simple_format_string': '{:.2f}', 'text_format': 'star', 'pvalue_thresholds': [[1e-4, "****"], [1e-3, "***"], [1e-2, "**"], [0.05, "*"], [1, "ns"]] }) def test_config_pvalue_thresholds(self): pvalue_format = PValueFormat() pvalue_format.config(pvalue_thresholds=[[0.001, "<= 0.001"], [0.05, "<= 0.05"], [1, 'ns']]) self.assert_print_pvalue( pvalue_format, "p-value annotation legend:\n" " ns: p <= 1.00e+00\n" " <= 0.05: 1.00e-03 < p <= 5.00e-02\n" "<= 0.001: p <= 1.00e-03\n\n")
def test_empty_annotator_wo_new_plot_raises(self): annot = Annotator.get_empty_annotator() with self.assertRaises(RuntimeError): annot.configure(test="Mann-Whitney")
def main(): date = datetime.date.today().strftime("%Y%m%d") # passages = "p2-p12" # opv_passages = "p1-p7" # pv_passages = "p3-p8" # input_dir = "/Users/odedkushnir/Projects/fitness" # rv_replica1_mutation_data = post_data_mutation("/Volumes/STERNADILABHOME$/volume3/okushnir/AccuNGS/" # "20201008RV-202329127/merged/passages/fits_all_pos_at_once_sampling/" # "replica1_syn/output/mutation/%s" % passages) # rv_replica1_mutation_data["Virus"] = "RVB14 #1" # rv_replica2_mutation_data = post_data_mutation("/Volumes/STERNADILABHOME$/volume3/okushnir/AccuNGS/" # "20201008RV-202329127/merged/passages/fits_all_pos_at_once_sampling/" # "replica2_syn/output/mutation/%s" % passages) # rv_replica2_mutation_data["Virus"] = "RVB14 #2" # rv_replica3_mutation_data = post_data_mutation("/Volumes/STERNADILABHOME$/volume3/okushnir/AccuNGS/" # "20201008RV-202329127/merged/passages/fits_all_pos_at_once_sampling/" # "replica3_syn/output/mutation/%s" % passages) # rv_replica3_mutation_data["Virus"] = "RVB14 #3" # cv_mutation_data = post_data_mutation("/Volumes/STERNADILABHOME$/volume3/okushnir/AccuNGS/190627_RV_CV" # "/merged/CVB3/Rank0_data_mutation/fits/output/mutation/%s" % passages) # cv_mutation_data["Virus"] = "CVB3" # opv_mutataion_data = post_data_mutation("/Volumes/STERNADILABHOME$/volume3/okushnir/CirSeq/OPV/fits/output/mutation/" # "all_positions_p1-p7") # opv_mutataion_data["Virus"] = "OPV2" # # pv_mutataion_data = post_data_mutation("/Volumes/STERNADILABHOME$/volume3/okushnir/CirSeq/Mahoney/fits/output/" # "mutation/p3-p8") # pv_mutataion_data["Virus"] = "PV1" # # # # output_dir = "/Volumes/STERNADILABHOME$/volume3/okushnir/AccuNGS/20201008RV-202329127/merged/passages/" \ # "%s_fits_syn_plots" % date # try: # os.mkdir(output_dir) # except OSError: # print("Creation of the directory %s failed" % output_dir) # else: # print("Successfully created the directory %s " % output_dir) # # all_data = pd.concat([rv_replica1_mutation_data, rv_replica2_mutation_data, rv_replica3_mutation_data, # cv_mutation_data, opv_mutataion_data, pv_mutataion_data], sort=False) # all_data = all_data.rename(columns={"allele0_1": "Transition rate"}) # all_data["Transition rate"] = all_data["Transition rate"].astype(float) # # print(all_data.to_string()) # # all_data = all_data.rename(columns={"inferred_mu": "Mutation rate"}) # # # print(all_data["Mutation rate"].dtype) # # all_data["Mutation rate"] = all_data["Mutation rate"].map(lambda x: str(x).lstrip('*')) # # all_data["Mutation rate"] = pd.to_numeric(all_data["Mutation rate"], errors='coerce')#.astype(float) # # # print(all_data["Mutation rate"].dtype) # # all_data["Mutation"] = all_data["Mutation"].apply(lambda x: x[0]+">"+x[1:]if len(x)<=2 else x)# if len(x)==2 else x[0]+">"+x[1:]) # # all_data["Mutation"] = all_data["Mutation"].apply(lambda x: x.split("_")[0] + "\n" + x.split("_")[-1] + "-like" if len(x)>3 else x) # all_data["Mutation"] = np.where(all_data["Mutation"] == "nonadar", "A>G\nNon-ADAR-like", all_data["Mutation"]) # all_data["Mutation"] = np.where(all_data["Mutation"] == "adar", "A>G\nADAR-like", all_data["Mutation"]) # all_data["Mutation"] = np.where(all_data["Mutation"] == "AG", "A>G", all_data["Mutation"]) # all_data["Mutation"] = np.where(all_data["Mutation"] == "UC", "U>C", all_data["Mutation"]) # all_data["Mutation"] = np.where(all_data["Mutation"] == "GA", "G>A", all_data["Mutation"]) # all_data["Mutation"] = np.where(all_data["Mutation"] == "CU", "C>U", all_data["Mutation"]) # # all_data = all_data[(all_data["pos"] >= 5785) & (all_data["pos"] <= 7212)] # # # # # q1 = all_data["Transition rate"].quantile(0.25) # # q3 = all_data["Transition rate"].quantile(0.75) # # all_data = all_data[all_data["Transition rate"] > q1] # # all_data = all_data[all_data["Transition rate"] < q3] # # all_data = all_data[all_data["Mutation"] != "A>G\nADAR-like"] # all_data = all_data[all_data["Mutation"] != "A>G\nNon-ADAR-like"] # print(all_data.shape[0]) # all_data.to_csv("/Users/odedkushnir/PhD_Projects/fitness/all_data.csv") #Plots - local all_data = pd.read_csv( "/Users/odedkushnir/PhD_Projects/fitness/all_data.csv") output_dir = "/Users/odedkushnir/PhD_Projects/fitness/{0}_fits_syn_plots".format( date) try: os.mkdir(output_dir) except OSError: print("Creation of the directory %s failed" % output_dir) else: print("Successfully created the directory %s " % output_dir) plt.style.use('classic') sns.set_palette("Set2") mutation_order = ["C>U", "G>A", "U>C", "A>G"] virus_order = ["RVB14 #1", "RVB14 #2", "RVB14 #3", "CVB3", "OPV2", "PV1"] g1 = sns.boxenplot(x="Mutation", y="Transition rate", data=all_data, order=mutation_order, hue="Virus", hue_order=virus_order) g1.set_yscale("log") """[((cat1, hue1), (cat2, hue2)), ((cat3, hue3), (cat4, hue4))]""" pairs = [(("A>G", "RVB14 #1"), ("C>U", "RVB14 #1")), (("A>G", "RVB14 #1"), ("G>A", "RVB14 #1")), (("A>G", "RVB14 #2"), ("C>U", "RVB14 #2")), (("A>G", "RVB14 #2"), ("G>A", "RVB14 #2")), (("A>G", "RVB14 #3"), ("C>U", "RVB14 #3")), (("A>G", "RVB14 #3"), ("G>A", "RVB14 #3")), (("A>G", "CVB3"), ("C>U", "CVB3")), (("A>G", "CVB3"), ("G>A", "CVB3")), (("A>G", "OPV2"), ("C>U", "OPV2")), (("A>G", "OPV2"), ("G>A", "OPV2")), (("A>G", "PV1"), ("C>U", "PV1")), (("A>G", "PV1"), ("G>A", "PV1")), (("U>C", "RVB14 #1"), ("C>U", "RVB14 #1")), (("U>C", "RVB14 #1"), ("G>A", "RVB14 #1")), (("U>C", "RVB14 #2"), ("C>U", "RVB14 #2")), (("U>C", "RVB14 #2"), ("G>A", "RVB14 #2")), (("U>C", "RVB14 #3"), ("C>U", "RVB14 #3")), (("U>C", "RVB14 #3"), ("G>A", "RVB14 #3")), (("U>C", "CVB3"), ("C>U", "CVB3")), (("U>C", "CVB3"), ("G>A", "CVB3")), (("U>C", "OPV2"), ("C>U", "OPV2")), (("U>C", "OPV2"), ("G>A", "OPV2")), (("U>C", "PV1"), ("C>U", "PV1")), (("U>C", "PV1"), ("G>A", "PV1"))] annotator = Annotator(g1, pairs, x="Mutation", y="Transition rate", data=all_data, order=mutation_order, hue="Virus", hue_order=virus_order) annotator.configure(test='Mann-Whitney', text_format='star', loc='outside', comparisons_correction="Bonferroni") annotator.apply_and_annotate() g1.set(xlabel="Type of mutation") g1.set(ylabel="Mutation rate inferred") g1.set_ylim(10**-10, 10**-1) g1.legend(loc='center left', bbox_to_anchor=(1.05, 0.5), borderaxespad=0., fontsize=7) plt.savefig(output_dir + "/%s_mutation_rate.png" % date, dpi=600, bbox_inches='tight') plt.close()
def test_init_df_inverted(self): box_pairs = self.pairs_for_df[::-1] self.ax = sns.boxplot(**self.params_df) self.annot = Annotator(self.ax, pairs=box_pairs, **self.params_df)
class TestAnnotator(unittest.TestCase): """Test validation of parameters""" def setUp(self): self.data = [[1, 2, 3], [2, 5, 7]] self.data2 = pd.DataFrame([[1, 2], [2, 5], [3, 7]], columns=["X", "Y"]) self.ax = sns.boxplot(data=self.data) self.df = pd.DataFrame.from_dict( {1: {'x': "a", 'y': 15, 'color': 'blue'}, 2: {'x': "a", 'y': 16, 'color': 'blue'}, 3: {'x': "b", 'y': 17, 'color': 'blue'}, 4: {'x': "b", 'y': 18, 'color': 'blue'}, 5: {'x': "a", 'y': 15, 'color': 'red'}, 6: {'x': "a", 'y': 16, 'color': 'red'}, 7: {'x': "b", 'y': 17, 'color': 'red'}, 8: {'x': "b", 'y': 18, 'color': 'red'} }).T self.pairs_for_df = [(("a", "blue"), ("b", "blue")), (("a", "blue"), ("a", "red"))] self.df.y = self.df.y.astype(float) self.params_df = { "data": self.df, "x": "x", "y": "y", "hue": "color", "order": ["a", "b"], "hue_order": ['red', 'blue']} def test_init_simple(self): self.annot = Annotator(self.ax, [(0, 1)], data=self.data) def test_init_df(self): self.ax = sns.boxplot(**self.params_df) self.annot = Annotator(self.ax, pairs=self.pairs_for_df, **self.params_df) def test_init_barplot(self): ax = sns.barplot(data=self.data) self.annot = Annotator(ax, [(0, 1)], plot="barplot", data=self.data) def test_test_name_provided(self): self.test_init_simple() with self.assertRaisesRegex(ValueError, "test"): self.annot.apply_test() def test_unmatched_x_in_box_pairs_without_hue(self): with self.assertRaisesRegex(ValueError, "(specified in `pairs`)"): self.annot = Annotator(self.ax, [(0, 2)], data=self.data) def test_order_in_x(self): with self.assertRaisesRegex(ValueError, "(specified in `order`)"): self.annot = Annotator(self.ax, [(0, 2)], data=self.data, order=[0, 1, 2]) def test_working_hue_orders(self): self.annot = Annotator(self.ax, [(("a", "blue"), ("b", "blue"))], data=self.df, x="x", y="y", order=["a", "b"], hue='color', hue_order=['red', 'blue']) def test_unmatched_hue_in_hue_order(self): with self.assertRaisesRegex(ValueError, "(specified in `hue_order`)"): self.annot = Annotator(self.ax, [(("a", "blue"), ("b", "blue"))], data=self.df, x="x", y="y", order=["a", "b"], hue='color', hue_order=['red', 'yellow']) def test_unmatched_hue_in_box_pairs(self): with self.assertRaisesRegex(ValueError, "(specified in `pairs`)"): self.annot = Annotator(self.ax, [(("a", "yellow"), ("b", "blue"))], data=self.df, x="x", y="y", order=["a", "b"], hue='color', hue_order=['red', 'blue']) def test_unmatched_x_in_box_pairs_with_hue(self): with self.assertRaisesRegex(ValueError, "(specified in `pairs`)"): self.annot = Annotator(self.ax, [(("c", "blue"), ("b", "blue"))], data=self.df, x="x", y="y", order=["a", "b"], hue='color', hue_order=['red', 'blue']) def test_location(self): self.test_init_simple() with self.assertRaisesRegex(ValueError, "argument `loc`"): self.annot.configure(loc="somewhere") def test_unknown_parameter(self): self.test_init_simple() with self.assertRaisesRegex( InvalidParametersError, re.escape("parameter(s) \"that\"")): self.annot.configure(that="this") def test_format(self): self.test_init_simple() with self.assertRaisesRegex(ValueError, "argument `text_format`"): self.annot.configure(pvalue_format={'text_format': 'that'}) def test_apply_comparisons_correction(self): self.test_init_simple() self.assertIsNone(self.annot._apply_comparisons_correction([])) def test_correct_num_custom_annotations(self): self.test_init_simple() with self.assertRaisesRegex(ValueError, "same length"): self.annot.set_custom_annotations(["One", "Two"]) def test_not_implemented_plot(self): with self.assertRaises(NotImplementedError): Annotator(self.ax, [(0, 1)], data=self.data, plot="thatplot") def test_reconfigure_alpha(self): self.test_init_simple() with self.assertWarnsRegex(UserWarning, "pvalue_thresholds"): self.annot.configure(alpha=0.1) self.annot.reset_configuration() self.assertEqual(0.05, self.annot.alpha) def test_reconfigure_alpha_with_thresholds(self): self.test_init_simple() self.annot.configure(alpha=0.1, pvalue_format={"pvalue_thresholds": DEFAULT}) self.annot.reset_configuration() self.assertEqual(0.05, self.annot.alpha) def test_get_annotation_text_undefined(self): self.test_init_simple() self.assertIsNone(self.annot.get_annotations_text()) def test_get_annotation_text_calculated(self): self.test_init_simple() self.annot.configure(test="Mann-Whitney", verbose=2) self.annot.apply_test() self.assertEqual(["ns"], self.annot.get_annotations_text()) def test_get_annotation_text_in_input_order(self): self.test_init_df() self.annot.configure(test="Mann-Whitney", text_format="simple") self.annot.apply_test() expected = (['M.W.W. p = 0.25', 'M.W.W. p = 0.67'] if version.parse(scipy.__version__) < version.parse("1.7") else ['M.W.W. p = 0.33', 'M.W.W. p = 1.00']) self.assertEqual(expected, self.annot.get_annotations_text()) def test_init_df_inverted(self): box_pairs = self.pairs_for_df[::-1] self.ax = sns.boxplot(**self.params_df) self.annot = Annotator(self.ax, pairs=box_pairs, **self.params_df) def test_get_annotation_text_in_input_order_inverted(self): self.test_init_df_inverted() self.annot.configure(test="Mann-Whitney", text_format="simple") self.annot.apply_test() expected = (['M.W.W. p = 0.67', 'M.W.W. p = 0.25'] if version.parse(scipy.__version__) < version.parse("1.7") else ['M.W.W. p = 1.00', 'M.W.W. p = 0.33']) self.assertEqual(expected, self.annot.get_annotations_text()) def test_apply_no_apply_warns(self): self.test_init_df_inverted() self.annot.configure(test="Mann-Whitney", text_format="simple") self.annot.apply_and_annotate() self.ax = sns.boxplot(**self.params_df) self.annot.new_plot(self.ax, self.pairs_for_df, **self.params_df) self.annot.configure(test="Levene", text_format="simple") with self.assertWarns(UserWarning): self.annot.annotate() def test_apply_apply_no_warns(self): self.test_init_df_inverted() self.annot.configure(test="Mann-Whitney", text_format="simple") self.annot.apply_and_annotate() self.ax = sns.boxplot(**self.params_df) self.annot.new_plot(self.ax, self.pairs_for_df, **self.params_df) self.annot.configure(test="Mann-Whitney-gt", text_format="simple") self.annot.apply_and_annotate() def test_valid_parameters_df_data_only(self): self.ax = sns.boxplot(ax=self.ax, data=self.data2) annot = Annotator(self.ax, pairs=[("X", "Y")], data=self.data2) annot.configure(test="Mann-Whitney").apply_and_annotate() def test_comparisons_correction_by_name(self): self.ax = sns.boxplot(ax=self.ax, data=self.data2) annot = Annotator(self.ax, pairs=[("X", "Y")], data=self.data2) annot.configure(test="Mann-Whitney", comparisons_correction="BH") annot.apply_and_annotate() def test_empty_annotator_wo_new_plot_raises(self): annot = Annotator.get_empty_annotator() with self.assertRaises(RuntimeError): annot.configure(test="Mann-Whitney") def test_empty_annotator_then_new_plot_ok(self): annot = Annotator.get_empty_annotator() self.ax = sns.boxplot(ax=self.ax, data=self.data2) annot.new_plot(self.ax, pairs=[("X", "Y")], data=self.data2) annot.configure(test="Mann-Whitney") def test_ensure_ax_operation_format_args_not_ok(self): with self.assertRaises(ValueError): _ensure_ax_operation_format(["func", "param", None]) def test_ensure_ax_operation_format_op_not_ok(self): with self.assertRaises(ValueError): _ensure_ax_operation_format(["func", ["param"]]) def test_ensure_ax_operation_format_kwargs_not_ok(self): with self.assertRaises(ValueError): _ensure_ax_operation_format(["func", ["param"], {"that"}]) def test_ensure_ax_operation_format_func_not_ok(self): with self.assertRaises(ValueError): _ensure_ax_operation_format([sum, ["param"], {"that": "this"}])
def test_not_implemented_plot(self): with self.assertRaises(NotImplementedError): Annotator(self.ax, [(0, 1)], data=self.data, plot="thatplot")
def test_unmatched_hue_in_hue_order(self): with self.assertRaisesRegex(ValueError, "(specified in `hue_order`)"): self.annot = Annotator(self.ax, [(("a", "blue"), ("b", "blue"))], data=self.df, x="x", y="y", order=["a", "b"], hue='color', hue_order=['red', 'yellow'])
def test_empty_annotator_then_new_plot_ok(self): annot = Annotator.get_empty_annotator() self.ax = sns.boxplot(ax=self.ax, data=self.data2) annot.new_plot(self.ax, pairs=[("X", "Y")], data=self.data2) annot.configure(test="Mann-Whitney")
def test_unmatched_x_in_box_pairs_with_hue(self): with self.assertRaisesRegex(ValueError, "(specified in `pairs`)"): self.annot = Annotator(self.ax, [(("c", "blue"), ("b", "blue"))], data=self.df, x="x", y="y", order=["a", "b"], hue='color', hue_order=['red', 'blue'])
def test_init_simple(self): self.annot = Annotator(self.ax, [(0, 1)], data=self.data)
def test_valid_parameters_df_data_only(self): self.ax = sns.boxplot(ax=self.ax, data=self.data2) annot = Annotator(self.ax, pairs=[("X", "Y")], data=self.data2) annot.configure(test="Mann-Whitney").apply_and_annotate()
def plots(input_dir, date, data_filter, virus, passage_order, transition_order, pairs, label_order, pairs_adar, filter_reads=None): output_dir = input_dir + date + "_plots" plus_minus = u"\u00B1" try: os.mkdir(output_dir) except OSError: print("Creation of the directory %s failed" % output_dir) else: print("Successfully created the directory %s " % output_dir) if filter_reads is True: data_filter["no_variants"] = np.where(data_filter["Prob"] < 0.95, 0, data_filter["no_variants"]) data_filter["Read_count"] = data_filter[data_filter["Read_count"] > 10000] mutation_order = ["A>G", "U>C", "G>A", "C>U", "A>C", "U>G", "A>U", "U>A", "G>C", "C>G", "C>A", "G>U"] type_order = ["Synonymous", "Non-Synonymous", "Premature Stop Codon"] # g1 = sns.catplot("label", "frac_and_weight", data=data_filter, hue="Mutation", order=label_order, palette="tab20", # kind="point", dodge=True, hue_order=mutation_order, join=False, estimator=weighted_varaint, # orient="v") # g1.set_axis_labels("Passage", "Variant Frequency {} CI=95%".format(plus_minus)) # g1.set_xticklabels(fontsize=9, rotation=45) # g1.set(yscale='log') # g1.set(ylim=(10**-5, 10**-1)) # # # plt.show() # g1.savefig(output_dir + "/All_Mutations_point_plot", dpi=300) # plt.close() data_filter["passage"] = data_filter["passage"].astype(str) data_filter["passage"] = np.where(data_filter["passage"] != "RNA\nControl", "p" + data_filter["passage"], data_filter["passage"]) g2 = sns.catplot("passage", "frac_and_weight", data=data_filter, hue="Mutation", order=passage_order, palette=mutation_palette(4) , kind="point", dodge=0.5, hue_order=transition_order, join=False, estimator=weighted_varaint, orient="v") g2.set_axis_labels("Passage", "Variant Frequency {} CI=95%".format(plus_minus)) g2.set(yscale='log') g2.set(ylim=(10 ** -6, 10 ** -2)) # g2.set_xticklabels(fontsize=10, rotation=45) # g2.savefig("/Users/odedkushnir/Google Drive/Studies/PhD/Prgress reports/20200913 Final report/plots" + # "/Transition_Mutations_point_plot_Mahoney", dpi=300) g2.savefig(output_dir + "/Transition_Mutations_point_plot_{0}".format(virus), dpi=300) plt.close() passage_g = sns.boxplot(x="passage", y="Frequency", data=data_filter, hue="Mutation", order=passage_order, palette=mutation_palette(4), dodge=True, hue_order=transition_order) passage_g.set_yscale('log') passage_g.set_ylim(10 ** -6, 10 ** -1) passage_g.set(xlabel="Passage", ylabel="Variant Frequency") annot = Annotator(passage_g, pairs, x="passage", y="Frequency", hue="Mutation", data=data_filter, order=passage_order, hue_order=transition_order) annot.configure(test='t-test_welch', text_format='star', loc='outside', verbose=2, comparisons_correction="Bonferroni") annot.apply_test() file_path = output_dir + "/sts.csv" with open(file_path, "w") as o: with contextlib.redirect_stdout(o): passage_g, test_results = annot.annotate() plt.legend(bbox_to_anchor=(1.05, 0.5), loc=2, borderaxespad=0.) plt.tight_layout() plt.savefig(output_dir + "/Transition_Mutations_box_stat_plot_{0}".format(virus), dpi=300) plt.close() data_filter_synonymous = data_filter.loc[data_filter.Type == "Synonymous"] data_filter_synonymous["Mutation"] = np.where(((data_filter_synonymous["Mutation"] == "A>G") & (data_filter_synonymous["5`_ADAR_Preference"] == "High")), "High\nADAR-like\nA>G", np.where(((data_filter_synonymous["Mutation"] == "A>G") & (data_filter_synonymous["5`_ADAR_Preference"] == "Intermediate")), "Intermediate\nADAR-like\nA>G", np.where(((data_filter_synonymous["Mutation"] == "A>G") & (data_filter_synonymous["5`_ADAR_Preference"] == "Low")), "Low\nADAR-like\nA>G", data_filter_synonymous["Mutation"]))) data_filter_synonymous["Mutation_adar"] = np.where(((data_filter_synonymous["Mutation"] == "U>C") & (data_filter_synonymous["3`_ADAR_Preference"] == "High")), "High\nADAR-like\nU>C", np.where(((data_filter_synonymous["Mutation"] == "U>C") & (data_filter_synonymous["3`_ADAR_Preference"] == "Intermediate")), "Intermediate\nADAR-like\nU>C", np.where(((data_filter_synonymous["Mutation"] == "U>C") & (data_filter_synonymous["3`_ADAR_Preference"] == "Low")), "Low\nADAR-like\nU>C", data_filter_synonymous["Mutation"]))) mutation_adar_order = ["High\nADAR-like\nA>G", "Low\nADAR-like\nA>G", "High\nADAR-like\nU>C", "Low\nADAR-like\nU>C"] data_filter_synonymous["passage"] = data_filter_synonymous["passage"].astype(str) catplot_adar = sns.catplot(x="passage", y="frac_and_weight", data=data_filter_synonymous, hue="Mutation_adar", order=passage_order, palette=mutation_palette(4, adar=True), kind="point", dodge=0.5, hue_order=mutation_adar_order, join=False, estimator=weighted_varaint, orient="v", legend=True) catplot_adar.set_axis_labels("Passage", "Variant Frequency {0} CI=95%".format(plus_minus)) catplot_adar.set(yscale='log') catplot_adar.set(ylim=(10 ** -6, 10 ** -2)) plt.savefig(output_dir + "/adar_pref_mutation_point_plot_{0}.png".format(virus), dpi=300) plt.close() adar_g = sns.boxplot(x="passage", y="Frequency", data=data_filter_synonymous, hue="Mutation_adar", order=passage_order, palette=mutation_palette(4, adar=True), dodge=True, hue_order=mutation_adar_order) adar_g.set_yscale('log') adar_g.set_ylim(10 ** -6, 10 ** -1) adar_g.set(xlabel="Passage", ylabel="Variant Frequency") annot = Annotator(adar_g, pairs_adar, x="passage", y="Frequency", hue="Mutation_adar", data=data_filter_synonymous, hue_order=mutation_adar_order, order=passage_order) annot.configure(test='t-test_welch', text_format='star', loc='outside', verbose=2, comparisons_correction="Bonferroni") annot.apply_test() file_path = output_dir + "/sts_adar.csv" with open(file_path, "w") as o: with contextlib.redirect_stdout(o): adar_g, test_results = annot.annotate() plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.) plt.tight_layout() plt.savefig(output_dir + "/adar_pref_mutation_box_plot_{0}.png".format(virus), dpi=300) plt.close()
def context_boxplot(context_loadings, metadict, included_factors=None, group_order=None, statistical_test='Mann-Whitney', pval_correction='benjamini-hochberg', text_format='star', nrows=1, figsize=(12, 6), cmap='tab10', title_size=14, axis_label_size=12, group_label_rotation=45, ylabel='Context Loadings', dot_color='lightsalmon', dot_edge_color='brown', filename=None, verbose=False): ''' Plots a boxplot to compare the loadings of context groups in each of the factors resulting from a tensor decomposition. Parameters ---------- context_loadings : pandas.DataFrame Dataframe containing the loadings of each of the contexts from a tensor decomposition. Rows are contexts and columns are the factors obtained. metadict : dict A dictionary containing the groups where each of the contexts belong to. Keys corresponds to the indexes in `context_loadings` and values are the respective groups. For example: metadict={'Context 1' : 'Group 1', 'Context 2' : 'Group 1', 'Context 3' : 'Group 2', 'Context 4' : 'Group 2'} included_factors : list, default=None Factors to be included. Factor names must be the same as column elements in the context_loadings. group_order : list, default=None Order of the groups to plot the boxplots. Considering the example of the metadict, it could be: group_order=['Group 1', 'Group 2'] or group_order=['Group 2', 'Group 1'] If None, the order that groups are found in `metadict` will be considered. statistical_test : str, default='Mann-Whitney' The statistical test to compare context groups within each factor. Options include: 't-test_ind', 't-test_welch', 't-test_paired', 'Mann-Whitney', 'Mann-Whitney-gt', 'Mann-Whitney-ls', 'Levene', 'Wilcoxon', 'Kruskal'. pval_correction : str, default='benjamini-hochberg' Multiple test correction method to reduce false positives. Options include: 'bonferroni', 'bonf', 'Bonferroni', 'holm-bonferroni', 'HB', 'Holm-Bonferroni', 'holm', 'benjamini-hochberg', 'BH', 'fdr_bh', 'Benjamini-Hochberg', 'fdr_by', 'Benjamini-Yekutieli', 'BY', None text_format : str, default='star' Format to display the results of the statistical test. Options are: - 'star', to display P- values < 1e-4 as "****"; < 1e-3 as "***"; < 1e-2 as "**"; < 0.05 as "*", and < 1 as "ns". - 'simple', to display P-values < 1e-5 as "1e-5"; < 1e-4 as "1e-4"; < 1e-3 as "0.001"; < 1e-2 as "0.01"; and < 5e-2 as "0.05". nrows : int, default=1 Number of rows to generate the subplots. figsize : tuple, default=(12, 6) Size of the figure (width*height), each in inches. cmap : str, default='tab10' Name of the color palette for coloring the major groups of contexts. title_size : int, default=14 Font size of the title in each of the factor boxplots. axis_label_size : int, default=12 Font size of the labels for X and Y axes. group_label_rotation : int, default=45 Angle of rotation for the tick labels in the X axis. ylabel : str, default='Context Loadings' Label for the Y axis. dot_color : str, default='lightsalmon' A matplotlib color for the dots representing individual contexts in the boxplot. For more info see: https://matplotlib.org/stable/gallery/color/named_colors.html dot_edge_color : str, default='brown' A matplotlib color for the edge of the dots in the boxplot. For more info see: https://matplotlib.org/stable/gallery/color/named_colors.html filename : str, default=None Path to save the figure of the elbow analysis. If None, the figure is not saved. verbose : boolean, default=None Whether printing out the result of the pairwise statistical tests in each of the factors Returns ------- fig : matplotlib.figure.Figure A matplotlib figure. axes : matplotlib.axes.Axes or array of Axes Matplotlib axes representing the subplots containing the boxplots. ''' if group_order is not None: assert len(set(group_order) & set(metadict.values())) == len( set(metadict.values()) ), "All groups in `metadict` must be contained in `group_order`" else: group_order = list(set(metadict.values())) df = context_loadings.copy() if included_factors is None: factor_labels = list(df.columns) else: factor_labels = included_factors rank = len(factor_labels) df['Group'] = [metadict[idx] for idx in df.index] nrows = min([rank, nrows]) ncols = int(np.ceil(rank / nrows)) fig, axes = plt.subplots(nrows=nrows, ncols=ncols, figsize=figsize, sharey='none') if rank == 1: axs = np.array([axes]) else: axs = axes.flatten() for i, factor in enumerate(factor_labels): ax = axs[i] x, y = 'Group', factor order = group_order # Plot the boxes ax = sns.boxplot(x=x, y=y, data=df, order=order, whis=[0, 100], width=.6, palette=cmap, boxprops=dict(alpha=.5), ax=ax) # Plot the dots sns.stripplot(x=x, y=y, data=df, size=6, order=order, color=dot_color, edgecolor=dot_edge_color, linewidth=0.6, jitter=False, ax=ax) if statistical_test is not None: # Add annotations about statistical test from itertools import combinations pairs = list(combinations(order, 2)) annotator = Annotator(ax=ax, pairs=pairs, data=df, x=x, y=y, order=order) annotator.configure(test=statistical_test, text_format=text_format, loc='inside', comparisons_correction=pval_correction, verbose=verbose) annotator.apply_and_annotate() ax.set_title(factor, fontsize=title_size) ax.set_xlabel('', fontsize=axis_label_size) if (i == 0) | (((i) % ncols) == 0): ax.set_ylabel(ylabel, fontsize=axis_label_size) else: ax.set_ylabel(' ', fontsize=axis_label_size) ax.set_xticklabels(ax.get_xticklabels(), rotation=group_label_rotation, rotation_mode='anchor', va='bottom', ha='right') # Remove extra subplots for j in range(i + 1, axs.shape[0]): ax = axs[j] ax.axis(False) if axes.shape[0] > 1: axes = axes.reshape(axes.shape[0], -1) fig.align_ylabels(axes[:, 0]) plt.tight_layout(rect=[0, 0.03, 1, 0.99]) if filename is not None: plt.savefig(filename, dpi=300, bbox_inches='tight') return fig, axes
def test_comparisons_correction_by_name(self): self.ax = sns.boxplot(ax=self.ax, data=self.data2) annot = Annotator(self.ax, pairs=[("X", "Y")], data=self.data2) annot.configure(test="Mann-Whitney", comparisons_correction="BH") annot.apply_and_annotate()