def test_load_var_block_rtab(self): p = pd.read_csv(P, index_col=0, sep='\t')['binary'] m = None cov = pd.DataFrame([0, 1]) infile = open(PRES) header = infile.readline().rstrip() sample_order = header.split()[1:] i_var = load_var_block('Rtab', p.head(5), False, [], infile, p.head(5).index, sample_order, 0.0, 1.0, 1.0, False, 4) variants, variant_mat, eof = next(i_var) self.assertEqual(eof, False) self.assertEqual(variant_mat.shape, (5, 4)) self.assertEqual(len(variants), 4) t = np.array([[1., 1., 1., 1.], [1., 1., 1., 1.], [1., 1., 1., 1.], [1., 1., 1., 1.], [1., 1., 1., 1.]]) self.assertTrue(abs((variant_mat - t).max()) < 1E-7) t = [ b'2UFzg+SaUQpQxlmqSQ5lmQ==\n', ] * 4 self.assertEqual([x[0].pattern for x in variants], t) # read until the end of the file while not eof: variants, variant_mat, eof = next(i_var) variants, variant_mat, eof = next(i_var) self.assertEqual(eof, True) self.assertEqual(variants, None) self.assertEqual(variant_mat, None)
def test_load_var_block_vcf(self): p = pd.read_csv(P, index_col=0, sep='\t')['binary'] m = None cov = pd.DataFrame([0, 1]) infile = VariantFile(VCF) i_var = load_var_block('vcf', p.head(5), False, [], infile, p.head(5).index, [], 0.0, 1.0, 1.0, False, 4) variants, variant_mat, eof = next(i_var) self.assertEqual(eof, False) self.assertEqual(variant_mat.shape, (5, 4)) self.assertEqual(len(variants), 4) t = np.array([[0., 0., 0., 0.], [0., 0., 0., 0.], [0., 0., 0., 0.], [0., 0., 0., 0.], [0., 0., 0., 0.]]) self.assertTrue(abs((variant_mat - t).max()) < 1E-7) t = [ b'/Us46UKS4AJRufOcR+5XEA==\n', ] + [ None, ] + [ b'/Us46UKS4AJRufOcR+5XEA==\n', ] * 2 self.assertEqual([x[0].pattern for x in variants], t) # read until the end of the file while not eof: variants, variant_mat, eof = next(i_var) variants, variant_mat, eof = next(i_var) self.assertEqual(eof, True) self.assertEqual(variants, None) self.assertEqual(variant_mat, None)
def test_load_var_block_kmers(self): p = pd.read_csv(P, index_col=0, sep='\t')['binary'] m = None cov = pd.DataFrame([0, 1]) infile = gzip.open(KMER) i_var = load_var_block('kmers', p.head(5), False, [], infile, p.head(5).index, [], 0.2, 0.8, 1.0, False, 4) variants, variant_mat, eof = next(i_var) self.assertEqual(eof, False) self.assertEqual(variant_mat.shape, (5, 4)) self.assertEqual(len(variants), 4) t = np.array([[0., 0., 1., 0.], [0., 0., 1., 0.], [0., 0., 0., 0.], [0., 0., 1., 0.], [0., 0., 0., 0.]]) self.assertTrue(abs((variant_mat - t).max()) < 1E-7) t = [None, None, b'WZMmBpWOV4GTJ81l+lQgBA==\n', None] self.assertEqual([x[0].pattern for x in variants], t) # read until the end of the file while not eof: variants, variant_mat, eof = next(i_var) variants, variant_mat, eof = next(i_var) self.assertEqual(eof, True) self.assertEqual(variants, None) self.assertEqual(variant_mat, None)