def test_change_samples_order(self):
        test = pp.read_folder(path)
        test.change_order(["Sample 4", "Sample 3", "Sample 2", "Sample 1"])
        samples_test = ["Sample 4", "Sample 3", "Sample 2", "Sample 1"]
        self.assertTrue(test.samples == samples_test)
        comparison_test = [
            'Sample 1_vs_Sample 2', 'Sample 1_vs_Sample 3',
            'Sample 1_vs_Sample 4', 'Sample 2_vs_Sample 3',
            'Sample 2_vs_Sample 4', 'Sample 3_vs_Sample 4'
        ]
        self.assertTrue(test.comparisons == comparison_test)
        self.assertEqual(len(test.genes_detected), 5)
        self.assertEqual(len(test.Manipulate.significant(test, "gene")), 3)
        self.assertEqual(len(test.isoforms_detected), 28)
        self.assertEqual(len(test.Manipulate.significant(test, "isoform")), 5)
        a = len(test.genes_detected.columns)
        b = len(test.Manipulate.significant(test, "gene").columns)
        c = len(test.isoforms_detected.columns)
        d = len(test.Manipulate.significant(test, "isoform").columns)
        self.assertTrue(a == b and b == c and c == d and d == 24)

        with self.assertRaises(Exception):
            test.change_order(["Sample 4", "Sample 3", "Sample 2"])
        with self.assertRaises(Exception):
            test.change_order(["Sample 4", "Sample 3", "Sample 2", "Wrong"])

        test = pp.read_folder(path)
Beispiel #2
0
 def drop(comp):
     test2=pp.read_folder(path,drop_comparison=comp)
     test3=pp.read_folder(path)
     test3.dropComparison(comp)
     df1=test2.genes_significant.all()
     df2=test3.genes_significant.all()
     self.assertTrue(df1.all()==df2.all())
Beispiel #3
0
 def multidrop(comp):
     test2=pp.read_folder(path)
     test3=pp.read_folder(path)
     test2.dropComparison(comp)
     for c in comp:
         test3.dropComparison(c)
     df1=test2.genes_significant.all()
     df2=test3.genes_significant.all()
     self.assertTrue(df1.all()==df2.all())
 def drop(comp):
     test2 = pp.read_folder(path, drop_comparison=comp)
     test3 = pp.read_folder(path)
     test3.drop_comparison(comp)
     df1 = test2.genes_detected.all()
     df2 = test3.genes_detected.all()
     self.assertTrue(df1.all() == df2.all())
     df1 = test2.isoforms_detected.all()
     df2 = test3.isoforms_detected.all()
     self.assertTrue(df1.all() == df2.all())
 def test_significant(self):
     test = pp.read_folder(path)
     self.assertEqual(len(test.Manipulate.significant(test, "gene")), 3)
     self.assertEqual(len(test.Manipulate.significant(test, "isoform")), 5)
     b = len(test.Manipulate.significant(test, "gene").columns)
     d = len(test.Manipulate.significant(test, "isoform").columns)
     self.assertTrue(b == d and d == 24)
    def test_drop_comparison(self):
        def drop(comp):
            test2 = pp.read_folder(path, drop_comparison=comp)
            test3 = pp.read_folder(path)
            test3.drop_comparison(comp)
            df1 = test2.genes_detected.all()
            df2 = test3.genes_detected.all()
            self.assertTrue(df1.all() == df2.all())
            df1 = test2.isoforms_detected.all()
            df2 = test3.isoforms_detected.all()
            self.assertTrue(df1.all() == df2.all())

        def multidrop(comp):
            test2 = pp.read_folder(path)
            test3 = pp.read_folder(path)
            test2.drop_comparison(comp)
            for c in comp:
                test3.drop_comparison(c)
            df1 = test2.genes_detected.all()
            df2 = test3.genes_detected.all()
            self.assertTrue(df1.all() == df2.all())
            df1 = test2.isoforms_detected.all()
            df2 = test3.isoforms_detected.all()
            self.assertTrue(df1.all() == df2.all())

        drop("Sample 1_vs_Sample 2")
        drop("Sample 1_vs_Sample 3")
        drop("Sample 1_vs_Sample 4")
        drop("Sample 2_vs_Sample 3")
        drop("Sample 2_vs_Sample 4")
        drop("Sample 3_vs_Sample 4")
        multidrop(["Sample 1_vs_Sample 2", "Sample 1_vs_Sample 3"])
        multidrop(["Sample 1_vs_Sample 2", "Sample 3_vs_Sample 4"])
        multidrop([
            "Sample 1_vs_Sample 4", "Sample 2_vs_Sample 4",
            "Sample 2_vs_Sample 3"
        ])

        with self.assertRaises(Exception):
            pp.read_folder(path, drop_comparison="Wrong")

        test2 = pp.read_folder(path)
        with self.assertRaises(Exception):
            test2.drop_comparison("Wrong")
        del test2
Beispiel #7
0
 def test_selected_exist(self):
     test2=pp.read_folder(path)  
     with self.assertRaises(Exception):
         test2.selected_exist()
     with self.assertRaises(Exception):
         test2.selected_exist(remove="Wrong")
     test2.get_gene()
     self.assertTrue(test2.selected_exist())
     del test2
 def test_papillon_db(self):
     test = pp.read_folder(path)
     samples_test = ['Sample 1', 'Sample 2', 'Sample 3', 'Sample 4']
     self.assertTrue(test.samples == samples_test)
     comparison_test = [
         'Sample 1_vs_Sample 2', 'Sample 1_vs_Sample 3',
         'Sample 1_vs_Sample 4', 'Sample 2_vs_Sample 3',
         'Sample 2_vs_Sample 4', 'Sample 3_vs_Sample 4'
     ]
     self.assertTrue(test.comparisons == comparison_test)
     self.assertEqual(len(test.genes_detected), 5)
     self.assertEqual(len(test.isoforms_detected), 28)
     a = len(test.genes_detected.columns)
     c = len(test.isoforms_detected.columns)
     self.assertTrue(a == c and c == 24)
     print_test = pp.read_folder(path)
     printable = "Samples: ['Sample 1', 'Sample 2', 'Sample 3', 'Sample 4']\nComparison: ['Sample 1_vs_Sample 2', 'Sample 1_vs_Sample 3', 'Sample 1_vs_Sample 4', 'Sample 2_vs_Sample 3', 'Sample 2_vs_Sample 4', 'Sample 3_vs_Sample 4']\nGenes Detected: 5\nGenes differential expressed: 3\nIsoform Detected: 28\nIsoform differential expressed: 5\n"
     #        print(print_test.__str__(),"\n",printable)
     self.assertTrue(print_test.__str__() == printable)
     del print_test
Beispiel #9
0
 def test_read_folder(self):
     test=pp.read_folder(path)
     samples_test=['Sample 1', 'Sample 2', 'Sample 3', 'Sample 4']
     self.assertTrue(test.samples==samples_test)
     comparison_test=['Sample 1_vs_Sample 2', 'Sample 1_vs_Sample 3', 
                      'Sample 1_vs_Sample 4', 'Sample 2_vs_Sample 3', 
                      'Sample 2_vs_Sample 4', 'Sample 3_vs_Sample 4']
     self.assertTrue(test.comparison==comparison_test)
     self.assertEqual(len(test.genes_detect),5)
     self.assertEqual(len(test.genes_significant),3)
     self.assertEqual(len(test.isoforms_detect),28)
     self.assertEqual(len(test.isoforms_significant),5)
     a=len(test.genes_detect.columns)
     b=len(test.genes_significant.columns)
     c=len(test.isoforms_detect.columns)
     d=len(test.isoforms_significant.columns)
     self.assertTrue(a==b and b==c and c==d and d==18)
     print_test=pp.read_folder(path)
     printable="Samples: ['Sample 1', 'Sample 2', 'Sample 3', 'Sample 4']\nComparison: ['Sample 1_vs_Sample 2', 'Sample 1_vs_Sample 3', 'Sample 1_vs_Sample 4', 'Sample 2_vs_Sample 3', 'Sample 2_vs_Sample 4', 'Sample 3_vs_Sample 4']\nGenes Detected: 5\nGenes differential expressed: 3\nIsoform Detected: 28\nIsoform differential expressed: 5\nNone of the genes is selected"
     print(print_test.__str__(),"\n",printable)
     self.assertTrue(print_test.__str__()==printable)
     del print_test
 def test_different_read(self):
     with self.assertRaises(FileNotFoundError):
         pp.read_folder("Not working")
     pp.read_folder(path)
     pp.read_folder(path + "/galaxy")
     pp.read_files([
         path + "/gene_exp.diff", path + "/genes.fpkm_tracking",
         path + "/isoform_exp.diff", path + "/isoforms.fpkm_tracking"
     ])
    def test_list_search(self):
        test = pp.read_folder(path)
        sub = test.get_isoform()
        sub_search = sub.search("sfd")
        self.assertEqual(len(sub_search.df), 0)
        sub_search = sub.search("00")
        self.assertEqual(len(sub_search.df), 3)
        self.assertEqual(len(sub.df.columns), len(sub_search.df.columns))

        sub = test.get_gene()
        sub_search = sub.search("sfd")
        self.assertEqual(len(sub_search.df), 0)
        sub_search = sub.search("il")
        self.assertEqual(len(sub_search.df), 2)
        self.assertEqual(len(sub.df.columns), len(sub_search.df.columns))
 def heatmap_maker(z_score, type_sel):
     test = pp.read_folder(path)
     if type_sel == "gene":
         sub = test.get_gene()
     elif type_sel == "isoform":
         sub = test.get_isoform()
     df_heatmap = sub.onlyFPKM(return_as="gene name",
                               remove_FPKM_name=True)
     df_heatmap = sub.plot._fusion_gene_id(df_heatmap,
                                           type_sel,
                                           change_index=True)
     im1 = sns.clustermap(df_heatmap,
                          col_cluster=False,
                          method="complete",
                          cmap="seismic",
                          z_score=z_score)
     im1.savefig(str(test.path + "test.png"))
        def plot_maker(type_sel, z_score):
            test = pp.read_folder(path)
            if type_sel == "gene":
                sub = test.get_gene()
            elif type_sel == "isoform":
                sub = test.get_isoform()

            if z_score == True:
                df_ = sub.onlyFPKM(return_as="df", remove_FPKM_name=True)
                df_norm = sub.plot._z_score(df_)
                df_norm["gene_short_name"] = sub.df["gene_short_name"]
                df_ = df_norm.copy()
            elif z_score == False:
                df_ = sub.onlyFPKM(return_as="gene name",
                                   remove_FPKM_name=True)

            if type_sel == "gene":
                hue = "gene_short_name"
                df_ = sub.plot._fusion_gene_id(df_,
                                               type_sel,
                                               change_index=False)
            elif type_sel == "isoform":
                hue = "gene/ID"
                df_ = sub.plot._fusion_gene_id(df_,
                                               type_sel,
                                               change_index=True)
                df_ = df_.reset_index()

            df = pd.melt(df_,
                         id_vars=hue,
                         var_name="Sample",
                         value_name="FPKM")
            g = sns.factorplot(x="Sample",
                               y="FPKM",
                               hue=hue,
                               data=df,
                               ci=None,
                               legend=True,
                               size=10)
            g.fig.suptitle(" Significant in AT LEAST one condition")
            g.savefig(str(test.path + "test_plot.png"))
    def test_add(self):
        test = pp.read_folder(path)
        # test isoform
        sub1 = test.get_isoform("IL6")
        sub2 = test.get_isoform("CD44")
        sub = sub1 + sub2
        self.assertEqual(len(sub.df), 3)
        self.assertEqual(len(sub.df.columns), len(sub1.df.columns))
        sub3 = test.get_isoform("CCL15")
        sub = sum([sub1, sub2, sub3])
        self.assertEqual(len(sub.df), 5)
        self.assertEqual(len(sub.df.columns), len(sub1.df.columns))
        sub4 = test.get_isoform("IL6")
        sub = sub + sub4
        self.assertEqual(len(sub.df), 5)
        self.assertEqual(len(sub.df.columns), len(sub1.df.columns))

        # test genes
        sub_g1 = test.get_gene("IL6")
        sub_g2 = test.get_gene("IL17RC")
        sub = sub_g1 + sub_g2
        self.assertEqual(len(sub.df), 2)
        self.assertEqual(len(sub.df.columns), len(sub_g1.df.columns))
        sub_g3 = test.get_gene("CCL15")
        sub = sum([sub_g1, sub_g2, sub_g3])
        self.assertEqual(len(sub.df), 3)
        self.assertEqual(len(sub.df.columns), len(sub_g1.df.columns))
        sub_g4 = test.get_gene("IL17RC")
        sub = sub + sub_g4
        self.assertEqual(len(sub.df), 3)
        self.assertEqual(len(sub.df.columns), len(sub1.df.columns))

        with self.assertRaises(Exception):
            sub = sub1 + sub_g2
        with self.assertRaises(Exception):
            sub_g1 = test.get_gene("IL6")
            sub_g2 = test.get_gene(comparison="Sample 3_vs_Sample 4")
            sub = sub_g1 + sub_g2
    def test_sub_select(self):
        test = pp.read_folder(path)
        sub = test.get_isoform()
        a = sub.select("IL6")
        self.assertEqual(len(a.df), 1)
        self.assertEqual(len(a.df.columns), len(sub.df.columns))
        a = sub.select(["IL6"])
        self.assertEqual(len(a.df), 1)
        self.assertEqual(len(a.df.columns), len(sub.df.columns))
        a = sub.select(["IL6", "wrong"])
        self.assertEqual(len(a.df), 1)
        self.assertEqual(len(a.df.columns), len(sub.df.columns))
        a = sub.select(["IL6", "CCL15"])
        self.assertEqual(len(a.df), 3)
        self.assertEqual(len(a.df.columns), len(sub.df.columns))
        b = test.get_isoform(["IL6", "CCL15"])
        a = sub.select(b)
        self.assertEqual(len(a.df), 3)
        self.assertEqual(len(a.df.columns), len(sub.df.columns))

        sub = test.get_gene()
        a = sub.select("IL6")
        self.assertEqual(len(a.df), 1)
        self.assertEqual(len(a.df.columns), len(sub.df.columns))
        a = sub.select(["IL6"])
        self.assertEqual(len(a.df), 1)
        self.assertEqual(len(a.df.columns), len(sub.df.columns))
        a = sub.select(["IL6", "wrong"])
        self.assertEqual(len(a.df), 1)
        self.assertEqual(len(a.df.columns), len(sub.df.columns))
        a = sub.select(["IL6", "CCL15"])
        self.assertEqual(len(a.df), 2)
        self.assertEqual(len(a.df.columns), len(sub.df.columns))
        b = test.get_isoform(["IL6", "CCL15"])
        a = sub.select(b)
        self.assertEqual(len(a.df), 2)
        self.assertEqual(len(a.df.columns), len(sub.df.columns))
 def test_print(self):
     test = pp.read_folder(path)
     sub = test.get_gene()
     printable2 = "Type of selection: gene\nNumber of gene selected: 3\nSamples: ['Sample 1', 'Sample 2', 'Sample 3', 'Sample 4']\nComparison selected: ['Sample 1_vs_Sample 2', 'Sample 1_vs_Sample 3', 'Sample 1_vs_Sample 4', 'Sample 2_vs_Sample 3', 'Sample 2_vs_Sample 4', 'Sample 3_vs_Sample 4']\n"
     self.assertTrue(sub.__str__() == printable2)
Beispiel #17
0
    def test_onlyFPKM(self):
        test=pp.read_folder(path)
        test.get_isoform()
        df=test.onlyFPKM("df")
        self.assertTrue(type(df)==pd.DataFrame)
        self.assertEqual(len(df),5)
        self.assertEqual(len(df.columns),4)
        self.assertEqual(df.index[0],"NM_000600.3")
        self.assertEqual(df.index[-1],"NM_032965.4-2")
        
        df=test.onlyFPKM("gene name")
        self.assertTrue(type(df)==pd.DataFrame)
        self.assertEqual(len(df),5)
        self.assertEqual(len(df.columns),5)
        self.assertEqual(df.index[0],"NM_000600.3")
        self.assertEqual(df["gene_short_name"][0],"IL6")
        self.assertEqual(df.index[-1],"NM_032965.4-2")
        self.assertEqual(df["gene_short_name"][-1],"CCL15")
        
        df=test.onlyFPKM("array")
        self.assertTrue(type(df)==numpy.ndarray)
        self.assertEqual(len(df),5)
        self.assertEqual(list(df[1]),[0.0, 3.0, 0.0, 0.0])
        self.assertEqual(list(df[-1]),[0.0, 0.0, 0.0, 3.0])
        
        #making extra_df
        test.get_isoform()
        extra_df=test.selected.iloc[:4,2:6].T.copy()
        extra_df=pd.DataFrame(data=extra_df.values, index=test.selected.index[:4], columns=test.selected.columns[2:6])
        extra_df['gene_short_name']=test.selected['gene_short_name'][:4]        

        #testing extra_df
        df=test.onlyFPKM("df",extra_df=extra_df)
        self.assertTrue(type(df)==pd.DataFrame)
        self.assertEqual(len(df),4)
        self.assertEqual(len(df.columns),4)
        self.assertEqual(df.index[0],"NM_000600.3")
        self.assertEqual(df.index[-1],"NM_032965.4")
        
        df=test.onlyFPKM("gene name",extra_df=extra_df)
        self.assertTrue(type(df)==pd.DataFrame)
        self.assertEqual(len(df),4)
        self.assertEqual(len(df.columns),5)
        self.assertEqual(df.index[0],"NM_000600.3")
        self.assertEqual(df["gene_short_name"][0],"IL6")
        self.assertEqual(df.index[-1],"NM_032965.4")
        self.assertEqual(df["gene_short_name"][-1],"CCL15")
        
        df=test.onlyFPKM("array",extra_df=extra_df)
        self.assertTrue(type(df)==numpy.ndarray)
        self.assertEqual(len(df),4)
        self.assertEqual(list(df[0]),[0.0, 0.0, 4.0, 0.0])
        a=list(df[-1])
        b=[0.016800, 0.0, 0.0, 0.0]
        self.assertAlmostEqual(a[0],b[0], places=0)
        
        #testing remove_FPKM_name
        df=test.onlyFPKM("df",extra_df=extra_df, remove_FPKM_name=True)
        self.assertTrue(type(df)==pd.DataFrame)
        self.assertEqual(len(df),4)
        self.assertEqual(len(df.columns),4)
        self.assertEqual(list(df.columns),test.samples)
        self.assertEqual(df.index[0],"NM_000600.3")
        self.assertEqual(df.index[-1],"NM_032965.4")
        
        df=test.onlyFPKM("gene name",extra_df=extra_df, remove_FPKM_name=True)
        self.assertTrue(type(df)==pd.DataFrame)
        self.assertEqual(len(df),4)
        self.assertEqual(len(df.columns),5)
        self.assertEqual(list(df.columns[1:]),test.samples)
        self.assertEqual(df.index[0],"NM_000600.3")
        self.assertEqual(df["gene_short_name"][0],"IL6")
        self.assertEqual(df.index[-1],"NM_032965.4")
        self.assertEqual(df["gene_short_name"][-1],"CCL15")
        
        # Final        
        test.get_isoform()
        self.assertEqual(len(test.selected),5)
        self.assertEqual(len(test.selected.columns),18)
Beispiel #18
0
import numpy
from scipy.stats import zscore
import unittest
import imagehash
from PIL import Image

sys.path.append(os.path.abspath(os.path.join('..')))

import papillon as pp

path_to_current_file = os.path.realpath(__file__)
current_directory = os.path.dirname(path_to_current_file)
os.chdir(current_directory)

path="Test_files"
test=pp.read_folder(path)


class papillon_Test(unittest.TestCase):
    def test_different_read(self):
        with self.assertRaises(FileNotFoundError):
            pp.read_folder("Not working")
        pp.read_folder(path)
        pp.read_folder(path+"/galaxy")
        pp.read_files([path+"/gene_exp.diff",path+"/genes.fpkm_tracking",path+"/isoform_exp.diff",path+"/isoforms.fpkm_tracking"])
#        with self.assertWarns(DeprecationWarning):
#            pp.read_db(path)
    
    def test_functions_FPKM(self):
        self.assertEqual(pp._FPKM("ciao"),"ciao_FPKM")
        self.assertEqual(pp._FPKM("ciao_FPKM"),"ciao")