def test_load_all_vars_kmer(self): p = pd.read_csv(P, index_col=0, sep='\t')['binary'] infile = gzip.open(KMER) variants, sidx, vidx = load_all_vars('kmers', p, False, None, infile, set(p.index), None, 0.45, 0.55, 1.0, False) self.assertEqual(variants.shape, (20, 50)) self.assertEqual(variants.sum(), 474.0) self.assertTrue(abs(variants.toarray()[0] - np.array([1., 1., 0., 1., 0., 0., 0., 0., 1., 0., 0., 1., 1., 0., 1., 1., 0., 1., 1., 1., 0., 0., 0., 0., 0., 1., 0., 0., 1., 0., 0., 1., 1., 0., 1., 1., 1., 0., 1., 1., 0., 0., 0., 1., 0., 1., 1., 1., 0., 1.])).max() < 1E-7) self.assertEqual(len(sidx), 20) self.assertEqual(sidx, [2, 6, 20, 32, 39, 54, 58, 60, 69, 89, 93, 123, 127, 134, 153, 156, 179, 180, 184, 194]) self.assertEqual(vidx, 200) # not providing samples infile = gzip.open(KMER) with self.assertRaises(ZeroDivisionError): _ = load_all_vars('kmers', p, False, None, infile, set([]), None, 0.45, 0.55, 1.0, False) # uncompressed option - only with python3+ if sys.version_info[0] >= 3: infile = gzip.open(KMER) with self.assertRaises(TypeError): _ = load_all_vars('kmers', p, False, None, infile, set(p.index), None, 0.45, 0.55, 1.0, True) # different type infile = gzip.open(KMER) with self.assertRaises(ValueError): _ = load_all_vars('Rtab', p, False, None, infile, set([]), None, 0.45, 0.55, 1.0, False) infile = gzip.open(KMER) with self.assertRaises(AttributeError): _ = load_all_vars('vcf', p, False, None, infile, set([]), None, 0.45, 0.55, 1.0, False) # different file infile = gzip.open(PRES) with self.assertRaises(IndexError): _ = load_all_vars('kmers', p, False, None, infile, set(p.index), None, 0.45, 0.55, 1.0, False) infile = gzip.open(VCF) with self.assertRaises(IndexError): _ = load_all_vars('kmers', p, False, None, infile, set(p.index), None, 0.45, 0.55, 1.0, False)
def test_load_all_vars_rtab(self): p = pd.read_csv(P, index_col=0, sep='\t')['binary'] infile, sample_order = open_rtab(PRES) variants, sidx, vidx = load_all_vars('Rtab', p, False, None, infile, set(p.index), sample_order, 0.25, 0.75, 1.0, False) self.assertEqual(variants.shape, (7, 50)) self.assertEqual(variants.sum(), 103.0) self.assertTrue(abs(variants.toarray()[0] - np.array([0., 0., 1., 0., 0., 0., 1., 0., 1., 0., 0., 0., 1., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 1., 0., 0., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 0., 1., 0., 1., 0., 1., 0., 0., 1., 0. ])).max() < 1E-7) self.assertEqual(len(sidx), 7) self.assertEqual(sidx, [1426, 1436, 1463, 1484, 1492, 1496, 1498]) self.assertEqual(vidx, 1499) # too few OGs with self.assertRaises(ValueError): infile, sample_order = open_rtab(PRESSMALL, compressed=False) _ = load_all_vars('Rtab', p, False, None, infile, set(p.index), sample_order, 0.01, 0.99, 1.0, False) # not providing samples with self.assertRaises(ValueError): infile, sample_order = open_rtab(PRESSMALL, compressed=False) _ = load_all_vars('Rtab', p, False, None, infile, set([]), [], 0.45, 0.55, 1.0, False) # different type with self.assertRaises(IndexError): infile, sample_orders = open_rtab(PRES) _ = load_all_vars('kmers', p, False, None, infile, set(p.index), sample_order, 0.01, 0.99, 1.0, False) with self.assertRaises(AttributeError): infile, sample_orders = open_rtab(PRES) _ = load_all_vars('vcf', p, False, None, infile, set(p.index), sample_order, 0.01, 0.99, 1.0, False) # different file infile = gzip.open(KMER) with self.assertRaises(ValueError): _ = load_all_vars('Rtab', p, False, None, infile, set(p.index), None, 0.45, 0.55, 1.0, False) infile = gzip.open(VCF) with self.assertRaises(ValueError): _ = load_all_vars('Rtab', p, False, None, infile, set(p.index), None, 0.45, 0.55, 1.0, False)
def test_no_file(self): with self.assertRaises(AttributeError): load_all_vars('kmers', None, None, None, None, None, None, None, None, None, None) with self.assertRaises(TypeError): load_all_vars('vcf', None, None, None, None, None, None, None, None, None, None) with self.assertRaises(AttributeError): load_all_vars('Rtab', None, None, None, None, None, None, None, None, None, None)
def test_load_all_vars_vcf(self): p = pd.read_csv(P, index_col=0, sep='\t')['binary'] infile = VariantFile(VCF) variants, sidx, vidx = load_all_vars('vcf', p, False, None, infile, set(p.index), None, 0.25, 0.75, 1.0, False) self.assertEqual(variants.shape, (8, 50)) self.assertEqual(variants.sum(), 140) self.assertTrue(abs(variants.toarray()[0] - np.array([ 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 0., 1., 0., 0., 0., 0., 1., 1., 1., 0., 0., 1., 0., 1., 1., 0., 0., 0., 0., 1., 0., 0., 1., 0., 1., 0., 0., 0., 1., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0. ])).max() < 1E-7) self.assertEqual(len(sidx), 8) self.assertEqual(sidx, [10, 29, 39, 95, 110, 153, 156, 164]) self.assertEqual(vidx, 254) # not providing samples with self.assertRaises(ZeroDivisionError): infile = VariantFile(VCF) _ = load_all_vars('vcf', p, False, None, infile, set([]), None, 0.45, 0.55, 1.0, False) # different type with self.assertRaises(AttributeError): infile = VariantFile(VCF) _ = load_all_vars('kmers', p, False, None, infile, set(p.index), None, 0.01, 0.99, 1.0, True) with self.assertRaises(AttributeError): infile = VariantFile(VCF) _ = load_all_vars('Rtab', p, False, None, infile, set(p.index), None, 0.01, 0.99, 1.0, False) # different file infile = gzip.open(KMER) with self.assertRaises(AttributeError): _ = load_all_vars('vcf', p, False, None, infile, set(p.index), None, 0.45, 0.55, 1.0, False) infile = gzip.open(PRES) with self.assertRaises(AttributeError): _ = load_all_vars('vcf', p, False, None, infile, set(p.index), None, 0.45, 0.55, 1.0, True)
def test_unsupported(self): with self.assertRaises(ValueError): load_all_vars('test', None, None, None, None, None, None, None, None, None, None)