def test_update_indel_deletion(self): '''Test update_indel extends deletions correctly''' deletion = variant.Variant( snp.Snp('\t'.join([ '42', 'A', '.', '100', 'x', 'x', '300', '400', 'x', 'x', 'ref', 'qry' ]))) to_add = snp.Snp('\t'.join([ '43', 'C', '.', '100', 'x', 'x', '300', '400', 'x', 'x', 'ref', 'qry' ])) expected = copy.copy(deletion) # coords stored zero-based, so subtract 1 from the real expected coords expected.ref_start = 41 expected.ref_end = 42 expected.ref_length = 300 expected.ref_name = 'ref' expected.ref_base = 'AC' expected.qry_start = 99 expected.qry_end = 99 expected.qry_length = 400 expected.qry_name = 'qry' expected.qry_base = '.' self.assertTrue(deletion.update_indel(to_add)) self.assertEqual(expected, deletion)
def test_get_all_variants(self): '''Test load all variants from file''' deletion_snps = [ '\t'.join([ '125', 'T', '.', '124', '1', '124', '500', '497', '1', '1', 'ref1', 'qry1' ]), '\t'.join([ '126', 'A', '.', '124', '1', '124', '500', '497', '1', '1', 'ref1', 'qry1' ]), '\t'.join([ '127', 'C', '.', '124', '1', '124', '500', '497', '1', '1', 'ref1', 'qry1' ]), ] deletion_snps = [snp.Snp(x) for x in deletion_snps] deletion_variant = variant.Variant(deletion_snps[0]) deletion_variant.update_indel(deletion_snps[1]) deletion_variant.update_indel(deletion_snps[2]) just_a_snp = '\t'.join([ '386', 'C', 'T', '383', '115', '115', '500', '497', '1', '1', 'ref1', 'qry1' ]) snp_variant = variant.Variant(snp.Snp(just_a_snp)) insertion_snps = [ '\t'.join([ '479', '.', 'G', '480', '0', '22', '500', '504', '1', '1', 'ref2', 'qry2' ]), '\t'.join([ '479', '.', 'A', '481', '0', '22', '500', '504', '1', '1', 'ref2', 'qry2' ]), '\t'.join([ '479', '.', 'T', '482', '0', '22', '500', '504', '1', '1', 'ref2', 'qry2' ]), '\t'.join([ '479', '.', 'A', '483', '0', '22', '500', '504', '1', '1', 'ref2', 'qry2' ]), ] insertion_snps = [snp.Snp(x) for x in insertion_snps] insertion_variant = variant.Variant(insertion_snps[0]) for i in range(1, len(insertion_snps)): insertion_variant.update_indel(insertion_snps[i]) variants_from_file = snp_file.get_all_variants( os.path.join(data_dir, 'snp_file_test_get_all_variants.snps')) self.assertEqual(len(variants_from_file), 3) self.assertEqual(variants_from_file[0], deletion_variant) self.assertEqual(variants_from_file[1], snp_variant) self.assertEqual(variants_from_file[2], insertion_variant)
def test_snp_file(self): '''test coords_file''' expected = [ '\t'.join([ '133', 'G', '.', '122', '1', '122', '500', '489', '1', '1', 'ref', 'qry' ]), '\t'.join([ '143', '.', 'C', '131', '1', '132', '500', '489', '1', '1', 'ref', 'qry' ]), '\t'.join([ '253', 'T', 'A', '242', '120', '242', '500', '489', '1', '1', 'ref', 'qry' ]) ] expected = [snp.Snp(x) for x in expected] infiles = [ os.path.join(data_dir, 'snp_file_test_with_header.snps'), os.path.join(data_dir, 'snp_file_test_no_header.snps') ] for fname in infiles: fr = snp_file.reader(fname) snps = [x for x in fr] self.assertEqual(snps, expected)
def reader(fname): f = pyfastaq.utils.open_file_read(fname) for line in f: if line.startswith('[') or (not '\t' in line): continue yield snp.Snp(line) pyfastaq.utils.close(f)
def test_str_no_c_option(self): '''Test __str__ with format with no -C option''' l_in = [ '187', 'A', 'C', '269', '187', '187', '654', '853', '1', '1', 'ref_name', 'qry_name' ] s = snp.Snp('\t'.join(l_in)) expected = '\t'.join( ['187', 'A', 'C', '269', '654', '853', 'ref_name', 'qry_name']) self.assertEqual(str(s), expected)
def test_ref_coords_from_qry_coord_when_variant_not_in_nucmer_match(self): '''Test ref_coords_from_qry_coord when variant not in nucmer match''' aln = alignment.Alignment('1\t606\t596\t1201\t606\t606\t100.00\t606\t1700\t1\t1\tref\tqry') snp0 = snp.Snp('127\tA\t.\t77\t75\t77\t1\t0\t606\t1700\t1\t1\tref\tqry') indel = variant.Variant(snp0) self.assertEqual((0, False), aln.ref_coords_from_qry_coord(595, [])) self.assertEqual((0, False), aln.ref_coords_from_qry_coord(595, [indel])) self.assertEqual((400, False), aln.ref_coords_from_qry_coord(995, [])) self.assertEqual((400, False), aln.ref_coords_from_qry_coord(995, [indel])) self.assertEqual((605, False), aln.ref_coords_from_qry_coord(1200, [])) self.assertEqual((605, False), aln.ref_coords_from_qry_coord(1200, [indel]))
def test_intersects_variant(self): 'Test intersects_variant''' snp0 = snp.Snp('100\tA\t.\t600\t75\t77\t1\t0\t606\t1700\t1\t1\tref\tqry') #100 in ref, 600 in qry indel = variant.Variant(snp0) aln1 = alignment.Alignment('100\t500\t600\t1000\t501\t501\t100.00\t600\t1700\t1\t1\tref\tqry') aln2 = alignment.Alignment('101\t500\t600\t1000\t501\t501\t100.00\t600\t1700\t1\t1\tref\tqry') aln3 = alignment.Alignment('100\t500\t601\t1000\t501\t501\t100.00\t600\t1700\t1\t1\tref\tqry') aln4 = alignment.Alignment('101\t500\t601\t1000\t501\t501\t100.00\t600\t1700\t1\t1\tref\tqry') self.assertTrue(aln1.intersects_variant(indel)) self.assertFalse(aln2.intersects_variant(indel)) self.assertFalse(aln3.intersects_variant(indel)) self.assertFalse(aln4.intersects_variant(indel))
def test_init(self): '''Test init gets correct variant type''' lines = [[ '42', 'T', 'A', '42', '42', '42', '1000', '1000', '1', '1', 'ref', 'ref' ], [ '242', 'G', '.', '241', '1', '241', '1000', '1000', '1', '1', 'ref', 'ref' ], [ '300', '.', 'G', '298', '0', '298', '1000', '1000', '1', '1', 'ref', 'ref' ]] variants = [variant.Variant(snp.Snp('\t'.join(x))) for x in lines] expected = [variant.SNP, variant.DEL, variant.INS] for i in range(len(lines)): self.assertEqual(variants[i].var_type, expected[i])
def test_qry_coords_from_ref_coord_test_different_strand(self): '''Test qry_coords_from_ref_coord on different strand''' aln = alignment.Alignment('\t'.join(['100', '200', '101', '1', '100', '100', '100.00', '300', '300', '1', '1', 'ref', 'qry'])) snp0 = snp.Snp('\t'.join(['140', 'A', 'T', '40', 'x', 'x', '300', '300', 'x', '1', 'ref', 'qry'])) # snp snp0 = variant.Variant(snp0) snp1 = snp.Snp('\t'.join(['140', 'A', '.', '40', 'x', 'x', '300', '300', 'x', '1', 'ref', 'qry'])) # del from qry snp2 = snp.Snp('\t'.join(['141', 'C', '.', '40', 'x', 'x', '300', '300', 'x', '1', 'ref', 'qry'])) # del from qry del1 = variant.Variant(snp1) del2 = variant.Variant(snp1) self.assertTrue(del2.update_indel(snp2)) snp3 = snp.Snp('\t'.join(['150', '.', 'A', '50', 'x', 'x', '300', '300', 'x', '1', 'ref', 'qry'])) # del from ref snp4 = snp.Snp('\t'.join(['150', '.', 'C', '51', 'x', 'x', '300', '300', 'x', '1', 'ref', 'qry'])) # del from ref snp5 = snp.Snp('\t'.join(['150', '.', 'G', '52', 'x', 'x', '300', '300', 'x', '1', 'ref', 'qry'])) # del from ref ins1 = variant.Variant(snp3) ins2 = variant.Variant(snp3) self.assertTrue(ins2.update_indel(snp4)) self.assertTrue(ins2.update_indel(snp5)) tests = [ (99, [], (100, False)), (100, [], (99, False)), (199, [], (0, False)), (119, [], (80, False)), (119, [del1], (80, False)), (149, [], (50, False)), (149, [del1], (51, False)), (149, [del2], (52, False)), (159, [], (40, False)), (159, [ins1], (39, False)), (159, [ins2], (37, False)), (159, [del1, ins1], (40, False)), (159, [del1, ins2], (38, False)), (159, [del2, ins1], (41, False)), (159, [del2, ins2], (39, False)), (139, [del1], (39, True)), (139, [snp0], (60, False)), (149, [ins1], (49, True)), ] for ref_coord, variant_list, expected in tests: got = aln.qry_coords_from_ref_coord(ref_coord, variant_list) self.assertEqual(expected, got) # if we reverse the direction of hit in query and reference, should get the same answer aln.qry_start, aln.qry_end = aln.qry_end, aln.qry_start aln.ref_start, aln.ref_end = aln.ref_end, aln.ref_start got = aln.qry_coords_from_ref_coord(ref_coord, variant_list) self.assertEqual(expected, got) aln.qry_start, aln.qry_end = aln.qry_end, aln.qry_start aln.ref_start, aln.ref_end = aln.ref_end, aln.ref_start
def test_update_indel_no_change(self): '''Test update_indel does nothing in the right cases''' initial_vars = [ snp.Snp('\t'.join([ '42', 'A', 'C', '100', 'x', 'x', '300', '400', 'x', 'x', 'ref', 'qry' ])), snp.Snp('\t'.join([ '42', 'A', 'C', '100', 'x', 'x', '300', '400', 'x', 'x', 'ref', 'qry' ])), snp.Snp('\t'.join([ '42', 'A', '.', '100', 'x', 'x', '300', '400', 'x', 'x', 'ref', 'qry' ])), snp.Snp('\t'.join([ '42', 'A', '.', '100', 'x', 'x', '300', '400', 'x', 'x', 'ref', 'qry' ])), snp.Snp('\t'.join([ '42', 'A', '.', '100', 'x', 'x', '300', '400', 'x', 'x', 'ref', 'qry' ])), snp.Snp('\t'.join([ '42', 'A', '.', '100', 'x', 'x', '300', '400', 'x', 'x', 'ref', 'qry' ])), snp.Snp('\t'.join([ '42', 'A', '.', '100', 'x', 'x', '300', '400', 'x', 'x', 'ref', 'qry' ])), snp.Snp('\t'.join([ '42', '.', 'A', '100', 'x', 'x', '300', '400', 'x', 'x', 'ref', 'qry' ])), snp.Snp('\t'.join([ '42', '.', 'A', '100', 'x', 'x', '300', '400', 'x', 'x', 'ref', 'qry' ])), snp.Snp('\t'.join([ '42', '.', 'A', '100', 'x', 'x', '300', '400', 'x', 'x', 'ref', 'qry' ])), snp.Snp('\t'.join([ '42', '.', 'A', '100', 'x', 'x', '300', '400', 'x', 'x', 'ref', 'qry' ])), snp.Snp('\t'.join([ '42', '.', 'A', '100', 'x', 'x', '300', '400', 'x', 'x', 'ref', 'qry' ])), ] to_add = [ snp.Snp('\t'.join([ '142', 'A', '.', '1000', 'x', 'x', '2000', '3000', 'x', 'x', 'ref', 'qry' ])), snp.Snp('\t'.join([ '142', '.', 'A', '1000', 'x', 'x', '2000', '3000', 'x', 'x', 'ref', 'qry' ])), snp.Snp('\t'.join([ '43', 'A', '.', '100', 'x', 'x', '300', '400', 'x', 'x', 'ref2', 'qry' ])), snp.Snp('\t'.join([ '43', 'A', '.', '100', 'x', 'x', '300', '400', 'x', 'x', 'ref', 'qry2' ])), snp.Snp('\t'.join([ '44', 'A', '.', '100', 'x', 'x', '300', '400', 'x', 'x', 'ref', 'qry' ])), snp.Snp('\t'.join([ '42', 'A', '.', '100', 'x', 'x', '300', '400', 'x', 'x', 'ref', 'qry' ])), snp.Snp('\t'.join([ '43', '.', 'A', '100', 'x', 'x', '300', '400', 'x', 'x', 'ref', 'qry' ])), snp.Snp('\t'.join([ '43', '.', 'A', '100', 'x', 'x', '300', '400', 'x', 'x', 'ref2', 'qry' ])), snp.Snp('\t'.join([ '43', '.', 'A', '100', 'x', 'x', '300', '400', 'x', 'x', 'ref', 'qry2' ])), snp.Snp('\t'.join([ '44', '.', 'A', '100', 'x', 'x', '300', '400', 'x', 'x', 'ref', 'qry' ])), snp.Snp('\t'.join([ '42', '.', 'A', '100', 'x', 'x', '300', '400', 'x', 'x', 'ref', 'qry' ])), snp.Snp('\t'.join([ '42', 'A', '.', '100', 'x', 'x', '300', '400', 'x', 'x', 'ref', 'qry' ])), ] assert len(initial_vars) == len(to_add) for i in range(len(initial_vars)): var = variant.Variant(initial_vars[i]) var_original = copy.copy(var) self.assertFalse(var.update_indel(to_add[i])) self.assertEqual(var, var_original)