Esempio n. 1
0
 def test_load_all_vars_kmer(self):
     p = pd.read_csv(P,
                     index_col=0,
                     sep='\t')['binary']
     infile = gzip.open(KMER)
     variants, sidx, vidx  = load_all_vars('kmers', p, False, None,
                                           infile, set(p.index), None,
                                           0.45, 0.55, 1.0, False)
     self.assertEqual(variants.shape, (20, 50))
     self.assertEqual(variants.sum(), 474.0)
     self.assertTrue(abs(variants.toarray()[0] -
                         np.array([1., 1., 0., 1., 0., 0., 0., 0., 1.,
                                   0., 0., 1., 1., 0., 1., 1., 0.,
                                   1., 1., 1., 0., 0., 0., 0., 0., 1.,
                                   0., 0., 1., 0., 0., 1., 1., 0.,
                                   1., 1., 1., 0., 1., 1., 0., 0., 0.,
                                   1., 0., 1., 1.,
                                   1., 0., 1.])).max() < 1E-7)
     self.assertEqual(len(sidx), 20)
     self.assertEqual(sidx, [2, 6, 20, 32, 39, 54,
                             58, 60, 69, 89, 93,
                             123, 127, 134, 153,
                             156, 179, 180, 184, 194])
     self.assertEqual(vidx, 200)
     # not providing samples
     infile = gzip.open(KMER)
     with self.assertRaises(ZeroDivisionError):
         _  = load_all_vars('kmers', p, False, None,
                            infile, set([]), None,
                            0.45, 0.55, 1.0, False)
     # uncompressed option - only with python3+
     if sys.version_info[0] >= 3:
         infile = gzip.open(KMER)
         with self.assertRaises(TypeError):
             _  = load_all_vars('kmers', p, False, None,
                                infile, set(p.index), None,
                                0.45, 0.55, 1.0, True)
     # different type
     infile = gzip.open(KMER)
     with self.assertRaises(ValueError):
         _  = load_all_vars('Rtab', p, False, None,
                            infile, set([]), None,
                            0.45, 0.55, 1.0, False)
     infile = gzip.open(KMER)
     with self.assertRaises(AttributeError):
         _  = load_all_vars('vcf', p, False, None,
                            infile, set([]), None,
                            0.45, 0.55, 1.0, False)
     # different file
     infile = gzip.open(PRES)
     with self.assertRaises(IndexError):
         _  = load_all_vars('kmers', p, False, None,
                            infile, set(p.index), None,
                            0.45, 0.55, 1.0, False)
     infile = gzip.open(VCF)
     with self.assertRaises(IndexError):
         _  = load_all_vars('kmers', p, False, None,
                            infile, set(p.index), None,
                            0.45, 0.55, 1.0, False)
Esempio n. 2
0
 def test_load_all_vars_rtab(self):
     p = pd.read_csv(P,
                     index_col=0,
                     sep='\t')['binary']
     infile, sample_order = open_rtab(PRES)
     variants, sidx, vidx  = load_all_vars('Rtab', p, False, None,
                                           infile, set(p.index),
                                           sample_order,
                                           0.25, 0.75, 1.0, False)
     self.assertEqual(variants.shape, (7, 50))
     self.assertEqual(variants.sum(), 103.0)
     self.assertTrue(abs(variants.toarray()[0] -
                         np.array([0., 0., 1., 0., 0., 0., 1., 0., 1., 0.,
                                   0., 0., 1., 0., 0., 0., 0.,
                                   0., 1., 0., 0., 0., 0., 0., 0., 1., 0.,
                                   0., 1., 1., 0., 0., 0., 0.,
                                   0., 0., 0., 0., 1., 1., 0., 1., 0., 1.,
                                   0., 1., 0., 0., 1., 0.
                                   ])).max() < 1E-7)
     self.assertEqual(len(sidx), 7)
     self.assertEqual(sidx, [1426, 1436, 1463, 1484, 1492, 1496, 1498])
     self.assertEqual(vidx, 1499)
     # too few OGs
     with self.assertRaises(ValueError):
         infile, sample_order = open_rtab(PRESSMALL, compressed=False)
         _  = load_all_vars('Rtab', p, False, None,
                            infile, set(p.index), sample_order,
                            0.01, 0.99, 1.0, False)
     # not providing samples
     with self.assertRaises(ValueError):
         infile, sample_order = open_rtab(PRESSMALL, compressed=False)
         _  = load_all_vars('Rtab', p, False, None,
                            infile, set([]), [],
                            0.45, 0.55, 1.0, False)
     # different type
     with self.assertRaises(IndexError):
         infile, sample_orders = open_rtab(PRES)
         _  = load_all_vars('kmers', p, False, None,
                            infile, set(p.index), sample_order,
                            0.01, 0.99, 1.0, False)
     with self.assertRaises(AttributeError):
         infile, sample_orders = open_rtab(PRES)
         _  = load_all_vars('vcf', p, False, None,
                            infile, set(p.index), sample_order,
                            0.01, 0.99, 1.0, False)
     # different file
     infile = gzip.open(KMER)
     with self.assertRaises(ValueError):
         _  = load_all_vars('Rtab', p, False, None,
                            infile, set(p.index), None,
                            0.45, 0.55, 1.0, False)
     infile = gzip.open(VCF)
     with self.assertRaises(ValueError):
         _  = load_all_vars('Rtab', p, False, None,
                            infile, set(p.index), None,
                            0.45, 0.55, 1.0, False)
Esempio n. 3
0
 def test_no_file(self):
     with self.assertRaises(AttributeError):
         load_all_vars('kmers', None, None, None, None,
                       None, None, None, None, None, None)
     with self.assertRaises(TypeError):
         load_all_vars('vcf', None, None, None, None,
                       None, None, None, None, None, None)
     with self.assertRaises(AttributeError):
         load_all_vars('Rtab', None, None, None, None,
                       None, None, None, None, None, None)
Esempio n. 4
0
 def test_load_all_vars_vcf(self):
     p = pd.read_csv(P,
                     index_col=0,
                     sep='\t')['binary']
     infile  = VariantFile(VCF)
     variants, sidx, vidx  = load_all_vars('vcf', p, False, None,
                                           infile, set(p.index),
                                           None,
                                           0.25, 0.75, 1.0, False)
     self.assertEqual(variants.shape, (8, 50))
     self.assertEqual(variants.sum(), 140)
     self.assertTrue(abs(variants.toarray()[0] -
                         np.array([
                                  0., 0., 0., 0., 0., 0., 0., 0., 0.,
                                  1., 1., 0., 1., 0., 0., 0., 0.,
                                  1., 1., 1., 0., 0., 1., 0., 1., 1.,
                                  0., 0., 0., 0., 1., 0., 0., 1.,
                                  0., 1., 0., 0., 0., 1., 0., 0., 0.,
                                  0., 1., 0., 0., 0., 0., 0.
                                  ])).max() < 1E-7)
     self.assertEqual(len(sidx), 8)
     self.assertEqual(sidx, [10, 29, 39, 95, 110, 153, 156, 164])
     self.assertEqual(vidx, 254)
     # not providing samples
     with self.assertRaises(ZeroDivisionError):
         infile  = VariantFile(VCF)
         _  = load_all_vars('vcf', p, False, None,
                            infile, set([]), None,
                            0.45, 0.55, 1.0, False)
     # different type
     with self.assertRaises(AttributeError):
         infile  = VariantFile(VCF)
         _  = load_all_vars('kmers', p, False, None,
                            infile, set(p.index), None,
                            0.01, 0.99, 1.0, True)
     with self.assertRaises(AttributeError):
         infile  = VariantFile(VCF)
         _  = load_all_vars('Rtab', p, False, None,
                            infile, set(p.index), None,
                            0.01, 0.99, 1.0, False)
     # different file
     infile = gzip.open(KMER)
     with self.assertRaises(AttributeError):
         _  = load_all_vars('vcf', p, False, None,
                            infile, set(p.index), None,
                            0.45, 0.55, 1.0, False)
     infile = gzip.open(PRES)
     with self.assertRaises(AttributeError):
         _  = load_all_vars('vcf', p, False, None,
                            infile, set(p.index), None,
                            0.45, 0.55, 1.0, True)
Esempio n. 5
0
 def test_unsupported(self):
     with self.assertRaises(ValueError):
         load_all_vars('test', None, None, None, None,
                       None, None, None, None, None, None)