def test_change_samples_order(self): test = pp.read_folder(path) test.change_order(["Sample 4", "Sample 3", "Sample 2", "Sample 1"]) samples_test = ["Sample 4", "Sample 3", "Sample 2", "Sample 1"] self.assertTrue(test.samples == samples_test) comparison_test = [ 'Sample 1_vs_Sample 2', 'Sample 1_vs_Sample 3', 'Sample 1_vs_Sample 4', 'Sample 2_vs_Sample 3', 'Sample 2_vs_Sample 4', 'Sample 3_vs_Sample 4' ] self.assertTrue(test.comparisons == comparison_test) self.assertEqual(len(test.genes_detected), 5) self.assertEqual(len(test.Manipulate.significant(test, "gene")), 3) self.assertEqual(len(test.isoforms_detected), 28) self.assertEqual(len(test.Manipulate.significant(test, "isoform")), 5) a = len(test.genes_detected.columns) b = len(test.Manipulate.significant(test, "gene").columns) c = len(test.isoforms_detected.columns) d = len(test.Manipulate.significant(test, "isoform").columns) self.assertTrue(a == b and b == c and c == d and d == 24) with self.assertRaises(Exception): test.change_order(["Sample 4", "Sample 3", "Sample 2"]) with self.assertRaises(Exception): test.change_order(["Sample 4", "Sample 3", "Sample 2", "Wrong"]) test = pp.read_folder(path)
def drop(comp): test2=pp.read_folder(path,drop_comparison=comp) test3=pp.read_folder(path) test3.dropComparison(comp) df1=test2.genes_significant.all() df2=test3.genes_significant.all() self.assertTrue(df1.all()==df2.all())
def multidrop(comp): test2=pp.read_folder(path) test3=pp.read_folder(path) test2.dropComparison(comp) for c in comp: test3.dropComparison(c) df1=test2.genes_significant.all() df2=test3.genes_significant.all() self.assertTrue(df1.all()==df2.all())
def drop(comp): test2 = pp.read_folder(path, drop_comparison=comp) test3 = pp.read_folder(path) test3.drop_comparison(comp) df1 = test2.genes_detected.all() df2 = test3.genes_detected.all() self.assertTrue(df1.all() == df2.all()) df1 = test2.isoforms_detected.all() df2 = test3.isoforms_detected.all() self.assertTrue(df1.all() == df2.all())
def test_significant(self): test = pp.read_folder(path) self.assertEqual(len(test.Manipulate.significant(test, "gene")), 3) self.assertEqual(len(test.Manipulate.significant(test, "isoform")), 5) b = len(test.Manipulate.significant(test, "gene").columns) d = len(test.Manipulate.significant(test, "isoform").columns) self.assertTrue(b == d and d == 24)
def test_drop_comparison(self): def drop(comp): test2 = pp.read_folder(path, drop_comparison=comp) test3 = pp.read_folder(path) test3.drop_comparison(comp) df1 = test2.genes_detected.all() df2 = test3.genes_detected.all() self.assertTrue(df1.all() == df2.all()) df1 = test2.isoforms_detected.all() df2 = test3.isoforms_detected.all() self.assertTrue(df1.all() == df2.all()) def multidrop(comp): test2 = pp.read_folder(path) test3 = pp.read_folder(path) test2.drop_comparison(comp) for c in comp: test3.drop_comparison(c) df1 = test2.genes_detected.all() df2 = test3.genes_detected.all() self.assertTrue(df1.all() == df2.all()) df1 = test2.isoforms_detected.all() df2 = test3.isoforms_detected.all() self.assertTrue(df1.all() == df2.all()) drop("Sample 1_vs_Sample 2") drop("Sample 1_vs_Sample 3") drop("Sample 1_vs_Sample 4") drop("Sample 2_vs_Sample 3") drop("Sample 2_vs_Sample 4") drop("Sample 3_vs_Sample 4") multidrop(["Sample 1_vs_Sample 2", "Sample 1_vs_Sample 3"]) multidrop(["Sample 1_vs_Sample 2", "Sample 3_vs_Sample 4"]) multidrop([ "Sample 1_vs_Sample 4", "Sample 2_vs_Sample 4", "Sample 2_vs_Sample 3" ]) with self.assertRaises(Exception): pp.read_folder(path, drop_comparison="Wrong") test2 = pp.read_folder(path) with self.assertRaises(Exception): test2.drop_comparison("Wrong") del test2
def test_selected_exist(self): test2=pp.read_folder(path) with self.assertRaises(Exception): test2.selected_exist() with self.assertRaises(Exception): test2.selected_exist(remove="Wrong") test2.get_gene() self.assertTrue(test2.selected_exist()) del test2
def test_papillon_db(self): test = pp.read_folder(path) samples_test = ['Sample 1', 'Sample 2', 'Sample 3', 'Sample 4'] self.assertTrue(test.samples == samples_test) comparison_test = [ 'Sample 1_vs_Sample 2', 'Sample 1_vs_Sample 3', 'Sample 1_vs_Sample 4', 'Sample 2_vs_Sample 3', 'Sample 2_vs_Sample 4', 'Sample 3_vs_Sample 4' ] self.assertTrue(test.comparisons == comparison_test) self.assertEqual(len(test.genes_detected), 5) self.assertEqual(len(test.isoforms_detected), 28) a = len(test.genes_detected.columns) c = len(test.isoforms_detected.columns) self.assertTrue(a == c and c == 24) print_test = pp.read_folder(path) printable = "Samples: ['Sample 1', 'Sample 2', 'Sample 3', 'Sample 4']\nComparison: ['Sample 1_vs_Sample 2', 'Sample 1_vs_Sample 3', 'Sample 1_vs_Sample 4', 'Sample 2_vs_Sample 3', 'Sample 2_vs_Sample 4', 'Sample 3_vs_Sample 4']\nGenes Detected: 5\nGenes differential expressed: 3\nIsoform Detected: 28\nIsoform differential expressed: 5\n" # print(print_test.__str__(),"\n",printable) self.assertTrue(print_test.__str__() == printable) del print_test
def test_read_folder(self): test=pp.read_folder(path) samples_test=['Sample 1', 'Sample 2', 'Sample 3', 'Sample 4'] self.assertTrue(test.samples==samples_test) comparison_test=['Sample 1_vs_Sample 2', 'Sample 1_vs_Sample 3', 'Sample 1_vs_Sample 4', 'Sample 2_vs_Sample 3', 'Sample 2_vs_Sample 4', 'Sample 3_vs_Sample 4'] self.assertTrue(test.comparison==comparison_test) self.assertEqual(len(test.genes_detect),5) self.assertEqual(len(test.genes_significant),3) self.assertEqual(len(test.isoforms_detect),28) self.assertEqual(len(test.isoforms_significant),5) a=len(test.genes_detect.columns) b=len(test.genes_significant.columns) c=len(test.isoforms_detect.columns) d=len(test.isoforms_significant.columns) self.assertTrue(a==b and b==c and c==d and d==18) print_test=pp.read_folder(path) printable="Samples: ['Sample 1', 'Sample 2', 'Sample 3', 'Sample 4']\nComparison: ['Sample 1_vs_Sample 2', 'Sample 1_vs_Sample 3', 'Sample 1_vs_Sample 4', 'Sample 2_vs_Sample 3', 'Sample 2_vs_Sample 4', 'Sample 3_vs_Sample 4']\nGenes Detected: 5\nGenes differential expressed: 3\nIsoform Detected: 28\nIsoform differential expressed: 5\nNone of the genes is selected" print(print_test.__str__(),"\n",printable) self.assertTrue(print_test.__str__()==printable) del print_test
def test_different_read(self): with self.assertRaises(FileNotFoundError): pp.read_folder("Not working") pp.read_folder(path) pp.read_folder(path + "/galaxy") pp.read_files([ path + "/gene_exp.diff", path + "/genes.fpkm_tracking", path + "/isoform_exp.diff", path + "/isoforms.fpkm_tracking" ])
def test_list_search(self): test = pp.read_folder(path) sub = test.get_isoform() sub_search = sub.search("sfd") self.assertEqual(len(sub_search.df), 0) sub_search = sub.search("00") self.assertEqual(len(sub_search.df), 3) self.assertEqual(len(sub.df.columns), len(sub_search.df.columns)) sub = test.get_gene() sub_search = sub.search("sfd") self.assertEqual(len(sub_search.df), 0) sub_search = sub.search("il") self.assertEqual(len(sub_search.df), 2) self.assertEqual(len(sub.df.columns), len(sub_search.df.columns))
def heatmap_maker(z_score, type_sel): test = pp.read_folder(path) if type_sel == "gene": sub = test.get_gene() elif type_sel == "isoform": sub = test.get_isoform() df_heatmap = sub.onlyFPKM(return_as="gene name", remove_FPKM_name=True) df_heatmap = sub.plot._fusion_gene_id(df_heatmap, type_sel, change_index=True) im1 = sns.clustermap(df_heatmap, col_cluster=False, method="complete", cmap="seismic", z_score=z_score) im1.savefig(str(test.path + "test.png"))
def plot_maker(type_sel, z_score): test = pp.read_folder(path) if type_sel == "gene": sub = test.get_gene() elif type_sel == "isoform": sub = test.get_isoform() if z_score == True: df_ = sub.onlyFPKM(return_as="df", remove_FPKM_name=True) df_norm = sub.plot._z_score(df_) df_norm["gene_short_name"] = sub.df["gene_short_name"] df_ = df_norm.copy() elif z_score == False: df_ = sub.onlyFPKM(return_as="gene name", remove_FPKM_name=True) if type_sel == "gene": hue = "gene_short_name" df_ = sub.plot._fusion_gene_id(df_, type_sel, change_index=False) elif type_sel == "isoform": hue = "gene/ID" df_ = sub.plot._fusion_gene_id(df_, type_sel, change_index=True) df_ = df_.reset_index() df = pd.melt(df_, id_vars=hue, var_name="Sample", value_name="FPKM") g = sns.factorplot(x="Sample", y="FPKM", hue=hue, data=df, ci=None, legend=True, size=10) g.fig.suptitle(" Significant in AT LEAST one condition") g.savefig(str(test.path + "test_plot.png"))
def test_add(self): test = pp.read_folder(path) # test isoform sub1 = test.get_isoform("IL6") sub2 = test.get_isoform("CD44") sub = sub1 + sub2 self.assertEqual(len(sub.df), 3) self.assertEqual(len(sub.df.columns), len(sub1.df.columns)) sub3 = test.get_isoform("CCL15") sub = sum([sub1, sub2, sub3]) self.assertEqual(len(sub.df), 5) self.assertEqual(len(sub.df.columns), len(sub1.df.columns)) sub4 = test.get_isoform("IL6") sub = sub + sub4 self.assertEqual(len(sub.df), 5) self.assertEqual(len(sub.df.columns), len(sub1.df.columns)) # test genes sub_g1 = test.get_gene("IL6") sub_g2 = test.get_gene("IL17RC") sub = sub_g1 + sub_g2 self.assertEqual(len(sub.df), 2) self.assertEqual(len(sub.df.columns), len(sub_g1.df.columns)) sub_g3 = test.get_gene("CCL15") sub = sum([sub_g1, sub_g2, sub_g3]) self.assertEqual(len(sub.df), 3) self.assertEqual(len(sub.df.columns), len(sub_g1.df.columns)) sub_g4 = test.get_gene("IL17RC") sub = sub + sub_g4 self.assertEqual(len(sub.df), 3) self.assertEqual(len(sub.df.columns), len(sub1.df.columns)) with self.assertRaises(Exception): sub = sub1 + sub_g2 with self.assertRaises(Exception): sub_g1 = test.get_gene("IL6") sub_g2 = test.get_gene(comparison="Sample 3_vs_Sample 4") sub = sub_g1 + sub_g2
def test_sub_select(self): test = pp.read_folder(path) sub = test.get_isoform() a = sub.select("IL6") self.assertEqual(len(a.df), 1) self.assertEqual(len(a.df.columns), len(sub.df.columns)) a = sub.select(["IL6"]) self.assertEqual(len(a.df), 1) self.assertEqual(len(a.df.columns), len(sub.df.columns)) a = sub.select(["IL6", "wrong"]) self.assertEqual(len(a.df), 1) self.assertEqual(len(a.df.columns), len(sub.df.columns)) a = sub.select(["IL6", "CCL15"]) self.assertEqual(len(a.df), 3) self.assertEqual(len(a.df.columns), len(sub.df.columns)) b = test.get_isoform(["IL6", "CCL15"]) a = sub.select(b) self.assertEqual(len(a.df), 3) self.assertEqual(len(a.df.columns), len(sub.df.columns)) sub = test.get_gene() a = sub.select("IL6") self.assertEqual(len(a.df), 1) self.assertEqual(len(a.df.columns), len(sub.df.columns)) a = sub.select(["IL6"]) self.assertEqual(len(a.df), 1) self.assertEqual(len(a.df.columns), len(sub.df.columns)) a = sub.select(["IL6", "wrong"]) self.assertEqual(len(a.df), 1) self.assertEqual(len(a.df.columns), len(sub.df.columns)) a = sub.select(["IL6", "CCL15"]) self.assertEqual(len(a.df), 2) self.assertEqual(len(a.df.columns), len(sub.df.columns)) b = test.get_isoform(["IL6", "CCL15"]) a = sub.select(b) self.assertEqual(len(a.df), 2) self.assertEqual(len(a.df.columns), len(sub.df.columns))
def test_print(self): test = pp.read_folder(path) sub = test.get_gene() printable2 = "Type of selection: gene\nNumber of gene selected: 3\nSamples: ['Sample 1', 'Sample 2', 'Sample 3', 'Sample 4']\nComparison selected: ['Sample 1_vs_Sample 2', 'Sample 1_vs_Sample 3', 'Sample 1_vs_Sample 4', 'Sample 2_vs_Sample 3', 'Sample 2_vs_Sample 4', 'Sample 3_vs_Sample 4']\n" self.assertTrue(sub.__str__() == printable2)
def test_onlyFPKM(self): test=pp.read_folder(path) test.get_isoform() df=test.onlyFPKM("df") self.assertTrue(type(df)==pd.DataFrame) self.assertEqual(len(df),5) self.assertEqual(len(df.columns),4) self.assertEqual(df.index[0],"NM_000600.3") self.assertEqual(df.index[-1],"NM_032965.4-2") df=test.onlyFPKM("gene name") self.assertTrue(type(df)==pd.DataFrame) self.assertEqual(len(df),5) self.assertEqual(len(df.columns),5) self.assertEqual(df.index[0],"NM_000600.3") self.assertEqual(df["gene_short_name"][0],"IL6") self.assertEqual(df.index[-1],"NM_032965.4-2") self.assertEqual(df["gene_short_name"][-1],"CCL15") df=test.onlyFPKM("array") self.assertTrue(type(df)==numpy.ndarray) self.assertEqual(len(df),5) self.assertEqual(list(df[1]),[0.0, 3.0, 0.0, 0.0]) self.assertEqual(list(df[-1]),[0.0, 0.0, 0.0, 3.0]) #making extra_df test.get_isoform() extra_df=test.selected.iloc[:4,2:6].T.copy() extra_df=pd.DataFrame(data=extra_df.values, index=test.selected.index[:4], columns=test.selected.columns[2:6]) extra_df['gene_short_name']=test.selected['gene_short_name'][:4] #testing extra_df df=test.onlyFPKM("df",extra_df=extra_df) self.assertTrue(type(df)==pd.DataFrame) self.assertEqual(len(df),4) self.assertEqual(len(df.columns),4) self.assertEqual(df.index[0],"NM_000600.3") self.assertEqual(df.index[-1],"NM_032965.4") df=test.onlyFPKM("gene name",extra_df=extra_df) self.assertTrue(type(df)==pd.DataFrame) self.assertEqual(len(df),4) self.assertEqual(len(df.columns),5) self.assertEqual(df.index[0],"NM_000600.3") self.assertEqual(df["gene_short_name"][0],"IL6") self.assertEqual(df.index[-1],"NM_032965.4") self.assertEqual(df["gene_short_name"][-1],"CCL15") df=test.onlyFPKM("array",extra_df=extra_df) self.assertTrue(type(df)==numpy.ndarray) self.assertEqual(len(df),4) self.assertEqual(list(df[0]),[0.0, 0.0, 4.0, 0.0]) a=list(df[-1]) b=[0.016800, 0.0, 0.0, 0.0] self.assertAlmostEqual(a[0],b[0], places=0) #testing remove_FPKM_name df=test.onlyFPKM("df",extra_df=extra_df, remove_FPKM_name=True) self.assertTrue(type(df)==pd.DataFrame) self.assertEqual(len(df),4) self.assertEqual(len(df.columns),4) self.assertEqual(list(df.columns),test.samples) self.assertEqual(df.index[0],"NM_000600.3") self.assertEqual(df.index[-1],"NM_032965.4") df=test.onlyFPKM("gene name",extra_df=extra_df, remove_FPKM_name=True) self.assertTrue(type(df)==pd.DataFrame) self.assertEqual(len(df),4) self.assertEqual(len(df.columns),5) self.assertEqual(list(df.columns[1:]),test.samples) self.assertEqual(df.index[0],"NM_000600.3") self.assertEqual(df["gene_short_name"][0],"IL6") self.assertEqual(df.index[-1],"NM_032965.4") self.assertEqual(df["gene_short_name"][-1],"CCL15") # Final test.get_isoform() self.assertEqual(len(test.selected),5) self.assertEqual(len(test.selected.columns),18)
import numpy from scipy.stats import zscore import unittest import imagehash from PIL import Image sys.path.append(os.path.abspath(os.path.join('..'))) import papillon as pp path_to_current_file = os.path.realpath(__file__) current_directory = os.path.dirname(path_to_current_file) os.chdir(current_directory) path="Test_files" test=pp.read_folder(path) class papillon_Test(unittest.TestCase): def test_different_read(self): with self.assertRaises(FileNotFoundError): pp.read_folder("Not working") pp.read_folder(path) pp.read_folder(path+"/galaxy") pp.read_files([path+"/gene_exp.diff",path+"/genes.fpkm_tracking",path+"/isoform_exp.diff",path+"/isoforms.fpkm_tracking"]) # with self.assertWarns(DeprecationWarning): # pp.read_db(path) def test_functions_FPKM(self): self.assertEqual(pp._FPKM("ciao"),"ciao_FPKM") self.assertEqual(pp._FPKM("ciao_FPKM"),"ciao")