def test_swap(self): ''' test swap''' l_in = ['1', '100', '2', '200', '101', '202', '42.42', '123', '456', '-1', '0', 'ref', 'qry'] l_out = ['2', '200', '1', '100', '202', '101', '42.42', '456', '123', '-1', '0', 'qry', 'ref'] a_in = alignment.Alignment('\t'.join(l_in)) a_in._swap() self.assertEqual(a_in, alignment.Alignment('\t'.join(l_out))) a_in._swap() self.assertEqual(a_in, alignment.Alignment('\t'.join(l_in)))
def test_reverse_query(self): '''Test reverse_query''' aln = alignment.Alignment('\t'.join([ '100', '142', '1', '42', '43', '42', '100.00', '150', '100', '1', '1', 'ref', 'qry' ])) expected = alignment.Alignment('\t'.join([ '100', '142', '100', '59', '43', '42', '100.00', '150', '100', '1', '1', 'ref', 'qry' ])) aln.reverse_query() self.assertEqual(expected, aln)
def test_intersects_variant(self): 'Test intersects_variant''' snp0 = snp.Snp('100\tA\t.\t600\t75\t77\t1\t0\t606\t1700\t1\t1\tref\tqry') #100 in ref, 600 in qry indel = variant.Variant(snp0) aln1 = alignment.Alignment('100\t500\t600\t1000\t501\t501\t100.00\t600\t1700\t1\t1\tref\tqry') aln2 = alignment.Alignment('101\t500\t600\t1000\t501\t501\t100.00\t600\t1700\t1\t1\tref\tqry') aln3 = alignment.Alignment('100\t500\t601\t1000\t501\t501\t100.00\t600\t1700\t1\t1\tref\tqry') aln4 = alignment.Alignment('101\t500\t601\t1000\t501\t501\t100.00\t600\t1700\t1\t1\tref\tqry') self.assertTrue(aln1.intersects_variant(indel)) self.assertFalse(aln2.intersects_variant(indel)) self.assertFalse(aln3.intersects_variant(indel)) self.assertFalse(aln4.intersects_variant(indel))
def test_str(self): '''Test __str__''' l_in = ['1', '100', '2', '200', '101', '202', '42.42', '123', '456', '-1', '0', 'ref', 'qry'] # the 10th column (counting from zero) is ignored and so not output by __str__ l_out = l_in[:10] + l_in[11:] a = alignment.Alignment('\t'.join(l_in)) self.assertEqual(str(a), '\t'.join(l_out))
def test_is_self_hit(self): '''Test is_self_hit''' tests = [ ('\t'.join([ '1', '100', '1', '100', '100', '100', '100.00', '1000', '1000', '1', '1', 'ref', 'ref' ]), True), ('\t'.join([ '1', '101', '1', '100', '100', '100', '100.00', '1000', '1000', '1', '1', 'ref', 'ref' ]), False), ('\t'.join([ '2', '100', '1', '100', '100', '100', '100.00', '1000', '1000', '1', '1', 'ref', 'ref' ]), False), ('\t'.join([ '1', '100', '1', '100', '100', '100', '100.00', '1000', '1000', '1', '1', 'ref', 'ref2' ]), False), ('\t'.join([ '1', '100', '1', '100', '100', '100', '99.9', '1000', '1000', '1', '1', 'ref', 'ref' ]), False), ] for t in tests: a = alignment.Alignment(t[0]) self.assertEqual(a.is_self_hit(), t[1])
def test_ref_coords(self): '''Test ref_coords''' hits = ['\t'.join(['1', '100', '1', '100', '100', '100', '100.00', '1000', '1000', '1', '1', 'ref', 'ref']), '\t'.join(['100', '1', '100', '1', '100', '100', '100.00', '1000', '1000', '1', '1', 'ref', 'ref']) ] for h in hits: a = alignment.Alignment(h) self.assertEqual(pyfastaq.intervals.Interval(0,99), a.ref_coords())
def test_qry_coords_from_ref_coord_test_bad_ref_coord(self): '''Test qry_coords_from_ref_coord with bad ref coords''' aln = alignment.Alignment('\t'.join(['100', '200', '1', '100', '100', '100', '100.00', '300', '300', '1', '1', 'ref', 'qry'])) with self.assertRaises(alignment.Error): got = aln.qry_coords_from_ref_coord(98, []) with self.assertRaises(alignment.Error): got = aln.qry_coords_from_ref_coord(200, [])
def test_to_msp_crunch(self): '''Test to_msp_crunch''' l_in = [ '100', '110', '1', '10', '10', '11', '80.00', '123', '456', '-1', '0', 'ref', 'qry' ] a = alignment.Alignment('\t'.join(l_in)) expected = '8 80.00 1 10 qry 100 110 ref' self.assertEqual(expected, a.to_msp_crunch())
def test_ref_coords_from_qry_coord_when_variant_not_in_nucmer_match(self): '''Test ref_coords_from_qry_coord when variant not in nucmer match''' aln = alignment.Alignment('1\t606\t596\t1201\t606\t606\t100.00\t606\t1700\t1\t1\tref\tqry') snp0 = snp.Snp('127\tA\t.\t77\t75\t77\t1\t0\t606\t1700\t1\t1\tref\tqry') indel = variant.Variant(snp0) self.assertEqual((0, False), aln.ref_coords_from_qry_coord(595, [])) self.assertEqual((0, False), aln.ref_coords_from_qry_coord(595, [indel])) self.assertEqual((400, False), aln.ref_coords_from_qry_coord(995, [])) self.assertEqual((400, False), aln.ref_coords_from_qry_coord(995, [indel])) self.assertEqual((605, False), aln.ref_coords_from_qry_coord(1200, [])) self.assertEqual((605, False), aln.ref_coords_from_qry_coord(1200, [indel]))
def reader(fname): '''Helper function to open the results file (coords file) and create alignment objects with the values in it''' f = pyfastaq.utils.open_file_read(fname) for line in f: if line.startswith('[') or (not '\t' in line): continue yield alignment.Alignment(line) pyfastaq.utils.close(f)
def test_on_same_strand(self): '''test on_same_strand''' self.assertTrue( alignment.Alignment('\t'.join([ '1', '100', '1', '100', '100', '100', '100.00', '1000', '1000', '1', '1', 'ref', 'ref' ])).on_same_strand()) self.assertTrue( alignment.Alignment('\t'.join([ '100', '1', '100', '1', '100', '100', '100.00', '1000', '1000', '1', '1', 'ref', 'ref' ])).on_same_strand()) self.assertFalse( alignment.Alignment('\t'.join([ '1', '100', '100', '1', '100', '100', '100.00', '1000', '1000', '1', '1', 'ref', 'ref' ])).on_same_strand()) self.assertFalse( alignment.Alignment('\t'.join([ '100', '1', '1', '100', '100', '100', '100.00', '1000', '1000', '1', '1', 'ref', 'ref' ])).on_same_strand())
def test_qry_coords_from_ref_coord_test_different_strand(self): '''Test qry_coords_from_ref_coord on different strand''' aln = alignment.Alignment('\t'.join(['100', '200', '101', '1', '100', '100', '100.00', '300', '300', '1', '1', 'ref', 'qry'])) snp0 = snp.Snp('\t'.join(['140', 'A', 'T', '40', 'x', 'x', '300', '300', 'x', '1', 'ref', 'qry'])) # snp snp0 = variant.Variant(snp0) snp1 = snp.Snp('\t'.join(['140', 'A', '.', '40', 'x', 'x', '300', '300', 'x', '1', 'ref', 'qry'])) # del from qry snp2 = snp.Snp('\t'.join(['141', 'C', '.', '40', 'x', 'x', '300', '300', 'x', '1', 'ref', 'qry'])) # del from qry del1 = variant.Variant(snp1) del2 = variant.Variant(snp1) self.assertTrue(del2.update_indel(snp2)) snp3 = snp.Snp('\t'.join(['150', '.', 'A', '50', 'x', 'x', '300', '300', 'x', '1', 'ref', 'qry'])) # del from ref snp4 = snp.Snp('\t'.join(['150', '.', 'C', '51', 'x', 'x', '300', '300', 'x', '1', 'ref', 'qry'])) # del from ref snp5 = snp.Snp('\t'.join(['150', '.', 'G', '52', 'x', 'x', '300', '300', 'x', '1', 'ref', 'qry'])) # del from ref ins1 = variant.Variant(snp3) ins2 = variant.Variant(snp3) self.assertTrue(ins2.update_indel(snp4)) self.assertTrue(ins2.update_indel(snp5)) tests = [ (99, [], (100, False)), (100, [], (99, False)), (199, [], (0, False)), (119, [], (80, False)), (119, [del1], (80, False)), (149, [], (50, False)), (149, [del1], (51, False)), (149, [del2], (52, False)), (159, [], (40, False)), (159, [ins1], (39, False)), (159, [ins2], (37, False)), (159, [del1, ins1], (40, False)), (159, [del1, ins2], (38, False)), (159, [del2, ins1], (41, False)), (159, [del2, ins2], (39, False)), (139, [del1], (39, True)), (139, [snp0], (60, False)), (149, [ins1], (49, True)), ] for ref_coord, variant_list, expected in tests: got = aln.qry_coords_from_ref_coord(ref_coord, variant_list) self.assertEqual(expected, got) # if we reverse the direction of hit in query and reference, should get the same answer aln.qry_start, aln.qry_end = aln.qry_end, aln.qry_start aln.ref_start, aln.ref_end = aln.ref_end, aln.ref_start got = aln.qry_coords_from_ref_coord(ref_coord, variant_list) self.assertEqual(expected, got) aln.qry_start, aln.qry_end = aln.qry_end, aln.qry_start aln.ref_start, aln.ref_end = aln.ref_end, aln.ref_start
def test_coords_file(self): '''test coords_file''' expected = [ '\t'.join(['61', '900', '1', '840', '840', '840', '99.76', '1000', '840', '1', '1', 'test_ref1', 'test_qry1', '[CONTAINS]']), '\t'.join(['62', '901', '2', '841', '841', '850', '99.66', '999', '839', '1', '1', 'test_ref2', 'test_qry2', '[CONTAINS]']), '\t'.join(['63', '902', '3', '842', '842', '860', '99.56', '998', '838', '1', '1', 'test_ref3', 'test_qry3', '[CONTAINS]']) ] expected = [alignment.Alignment(x) for x in expected] infiles = [os.path.join(data_dir, 'coords_file_test_with_header.coords'), os.path.join(data_dir, 'coords_file_test_no_header.coords')] for fname in infiles: fr = coords_file.reader(fname) alignments = [x for x in fr] self.assertEqual(alignments, expected)
def test_init_nucmer(self): '''test __init__ nucmer''' line = '\t'.join(['1', '100', '2', '200', '101', '202', '42.42', '123', '456', '-1', '0', 'ref', 'qry', '[FOO]']) a = alignment.Alignment(line) self.assertEqual(a.ref_start, 0) self.assertEqual(a.ref_end, 99) self.assertEqual(a.qry_start, 1) self.assertEqual(a.qry_end, 199) self.assertEqual(a.hit_length_ref, 101) self.assertEqual(a.hit_length_qry, 202) self.assertEqual(a.percent_identity, 42.42) self.assertEqual(a.ref_length, 123) self.assertEqual(a.qry_length, 456) self.assertEqual(a.frame, -1) self.assertEqual(a.ref_name, 'ref') self.assertEqual(a.qry_name, 'qry')
def test_init_promer(self): '''test __init__ promer''' line = '\t'.join(['1', '1398', '4891054', '4892445', '1398', '1392', '89.55', '93.18', '0.21', '1398', '5349013', '1', '1', 'ref', 'qry', '[CONTAINED]']) a = alignment.Alignment(line) self.assertEqual(a.ref_start, 0) self.assertEqual(a.ref_end, 1397) self.assertEqual(a.qry_start, 4891053) self.assertEqual(a.qry_end, 4892444) self.assertEqual(a.hit_length_ref, 1398) self.assertEqual(a.hit_length_qry, 1392) self.assertEqual(a.percent_identity, 89.55) self.assertEqual(a.ref_length, 1398) self.assertEqual(a.qry_length, 5349013) self.assertEqual(a.frame, 1) self.assertEqual(a.ref_name, 'ref') self.assertEqual(a.qry_name, 'qry')
def test_contig_overlap_trimmer(self): '''Test contig overlap trimming''' # test data test_fasta_file = os.path.join(data_dir, "TRIMMING_input_1.fa") test_overlap_coords = ['\t'.join(['1', '4', '57', '60', '4', '4', '100.00', '60', '60', '1', '1', 'contig1', 'contig1']), '\t'.join(['1', '4', '57', '60', '4', '4', '100.00', '60', '60', '1', '1', 'contig2', 'contig2']), '\t'.join(['2', '4', '57', '59', '3', '3', '100.00', '60', '60', '1', '1', 'contig3', 'contig3']), '\t'.join(['2', '4', '54', '56', '3', '3', '100.00', '60', '60', '1', '1', 'contig4', 'contig4']), '\t'.join(['1', '3', '58', '60', '3', '3', '100.00', '60', '60', '1', '1', 'contig4', 'contig4']), '\t'.join(['1', '4', '57', '60', '4', '4', '100.00', '60', '60', '1', '1', 'contig5', 'contig5']), '\t'.join(['1', '2', '59', '60', '2', '2', '100.00', '60', '60', '1', '1', 'contig6', 'contig6']), #Overlap too short '\t'.join(['1', '12', '49', '60', '12', '12', '100.00', '60', '60', '1', '1', 'contig7', 'contig7']), # Trimmed length would be too short '\t'.join(['1', '3', '60', '58', '3', '3', '100.00', '60', '60', '-1', '-1', 'contig8', 'contig8']), #overlap reversed # No overlap for contig 9 '\t'.join(['4', '7', '36', '38', '4', '4', '100.00', '60', '60', '1', '-1', 'contig10', 'contig10']), #beyond offset ] test_overlap_alignments = [alignment.Alignment(coord) for coord in test_overlap_coords] overlap_trimmer = contig_overlap_trimmer.ContigOverlapTrimmer(fasta_file = test_fasta_file, alignments = test_overlap_alignments, overlap_offset = 10, overlap_min_length=3, overlap_max_length=12, ) overlap_trimmer.run() self.assertTrue(os.path.isfile(overlap_trimmer.output_file)) self.assertTrue(os.path.isfile(overlap_trimmer.summary_file)) expected_contigs = {} tasks.file_to_dict(os.path.join(data_dir, "TRIMMING_output_1.fa"), expected_contigs) for id in expected_contigs.keys(): self.assertTrue(expected_contigs[id] == overlap_trimmer.contigs[id]) os.remove(overlap_trimmer.output_file) os.remove(overlap_trimmer.summary_file)