def setUp(self): self.gbfetch = package_paths.scripts_path("gbfetch.py") self.mkTestDir() self.out = self.getTestFile('gbfetch_test.txt') self.expected_fasta = package_paths.data_path( 'JF314863-JF314866.fasta') self.expected_gb = package_paths.data_path('JF314863-JF314866.gb')
def setUp(self): self.seqs = [ SeqRecord(Seq('CCA--CGTAA'), id='1'), SeqRecord(Seq('CCG--CGTAA'), id='2'), SeqRecord(Seq('CCA--TATAA'), id='3') ] self.expected_means = {'1': 1.5, '2': 2.0, '3': 2.5} self.expected_maxs = {'1': 2, '2': 3, '3': 3} self.rc_path = package_paths.data_path('primates-rev-comp-error.fasta') self.gappy_path = package_paths.data_path('melittobia-its1.fasta') self.rc_gappy_path = package_paths.data_path( 'melittobia-its1-rev-comp-error.fasta')
def test_copy(self): seq2 = SeqIO.read(package_paths.data_path('JF314862.gb'), format='gb', alphabet=IUPAC.ambiguous_dna) self.assertTrue(sequtils.sequences_are_equal(self.seq, seq2)) seq2.name += 'a' self.assertFalse(sequtils.sequences_are_equal(self.seq, seq2))
def test_format_conversion_protein(self): for filename in [ 'caenophidia.fasta', 'caenophidia.phylip', 'caenophidia.nexus' ]: in_ext = os.path.splitext(filename)[-1] if in_ext == '.phylip': in_format = 'phylip-relaxed' else: in_format = in_ext.replace('.', '') in_path = package_paths.data_path(filename) for out_ext, out_format in iteritems(self.to_formats): if out_ext == in_ext: continue if out_format == 'genbank': continue out_path = self.getTestFile(filename.replace(in_ext, out_ext)) _LOG.info('converting {0} to {1}'.format(filename, out_ext)) self.exe_seqaid([in_path, out_path, '-d', 'aa']) seqs_in = SeqIO.parse(in_path, format=in_format, alphabet=IUPAC.extended_protein) seqs_out = SeqIO.parse(out_path, format=out_format, alphabet=IUPAC.extended_protein) self.assertSameSequenceData(seqs_in, seqs_out, aligned=True)
def test_copy(self): seq2 = SeqIO.read( package_paths.data_path('JF314862.gb'), format='gb', alphabet=IUPAC.ambiguous_dna) self.assertTrue(sequtils.sequences_are_equal(self.seq, seq2)) seq2.name += 'a' self.assertFalse(sequtils.sequences_are_equal(self.seq, seq2))
def test_basic(self): p1 = package_paths.data_path('primates.nexus') p2 = package_paths.data_path('primates.fasta') l = 898 summaries = seqstats.get_seq_summaries_from_files([p1, p2]) g = summaries.pop('global') self.assertEqual(g.n, 24) self.assertEqual(g.maximum, l) self.assertEqual(g.minimum, l) self.assertAlmostEqual(g.mean, 898.0) self.assertAlmostEqual(g.variance, 0.0) for k, s in summaries.items(): self.assertTrue(k.endswith('primates.nexus') or k.endswith( 'primates.fasta')) self.assertEqual(s.maximum, l) self.assertEqual(s.minimum, l) self.assertAlmostEqual(s.mean, 898.0) self.assertAlmostEqual(s.variance, 0.0)
def setUp(self): self.mkTestDir() self.gb_path = package_paths.data_path('JF314862.gb') self.fasta_path = package_paths.data_path('JF314862.fasta') self.seq_str = 'CATCATCAACATCATCGTGCCCTGCGTGCTCATCTCCTTCGTGGCTGTGC' + \ 'TCGTCTACTTTCTGCCTGCCAAGGGTAACGCTGGCACCAGGCGGCTGTGG' + \ 'GACTGCCTGTGCCATAGGCGTGAAGAGGGCAGGCCATGTGGCTGGGCAGA' + \ 'GGGAGGGAAGTGGGGGACAGCCACCGCTGGGAGACTGGCACCTGGGCCCA' + \ 'GTGCCCGTCATTTCCCCATCACATGGGCTTGGGGACATGGAAGCCAGTCC' + \ 'TGTGGGAGCAGACAGACACTCCCGGCTGCCGTGTCAGTCCTTAGGGCTGG' + \ 'CTGGACTCTCTCTGCACAGCCTCCCACTGTCAGTCCCAGGACCATCCATG' + \ 'TCCTAGGCATGTCTAGGCAGAGCCAGGCCCTTTCCAGGTGCCCTGGGACC' + \ 'CCGTCTCACGTGTCGATCCCCTCACTCTCCACATCCTGGCAGCGGGTGGG' + \ 'CAGAAGTGCACCGTCTCCATCAATGTCC' self.fasta_str = ">s1\nACGTGCTATCTATCGTATTTAG\n" self.small_fasta = self.getTestFile('small.fasta') out = open(self.small_fasta, 'w') out.write(self.fasta_str) out.close()
def setUp(self): self.id = '354698774' self.acc = 'JF314862' self.singleton_fasta = package_paths.data_path('JF314862.fasta') self.singleton_gb = package_paths.data_path('JF314862.gb') self.id_list = ['354698776', '354698778', '354698780', '354698782'] self.acc_list = ['JF314863', 'JF314864', 'JF314865', 'JF314866'] self.long_acc_list = [ 'JF314862', 'JF314863', 'JF314864', 'JF314865', 'JF314866', 'JF314867', 'JF314868', 'JF314869', 'JF314870', 'JF314871', 'JF314872', 'JF314873', 'JF314874', 'JF314875', 'JF314876',] self.ids = ','.join(self.id_list) self.multi_fasta = package_paths.data_path('JF314863-JF314866.fasta') self.multi_gb = package_paths.data_path('JF314863-JF314866.gb') self.long_multi_fasta = package_paths.data_path( 'JF314862-JF314876.fasta') self.long_multi_gb = package_paths.data_path('JF314862-JF314876.gb')
def setUp(self): self.id = '354698774' self.acc = 'JF314862' self.singleton_fasta = package_paths.data_path('JF314862.fasta') self.singleton_gb = package_paths.data_path('JF314862.gb') self.id_list = ['354698776', '354698778', '354698780', '354698782'] self.acc_list = ['JF314863', 'JF314864', 'JF314865', 'JF314866'] self.long_acc_list = [ 'JF314862', 'JF314863', 'JF314864', 'JF314865', 'JF314866', 'JF314867', 'JF314868', 'JF314869', 'JF314870', 'JF314871', 'JF314872', 'JF314873', 'JF314874', 'JF314875', 'JF314876', ] self.ids = ','.join(self.id_list) self.multi_fasta = package_paths.data_path('JF314863-JF314866.fasta') self.multi_gb = package_paths.data_path('JF314863-JF314866.gb') self.long_multi_fasta = package_paths.data_path( 'JF314862-JF314876.fasta') self.long_multi_gb = package_paths.data_path('JF314862-JF314876.gb')
def test_simble_gb_seq(self): rs = RecognitionSeq('TAG', 3) fp = package_paths.data_path('JF314863-JF314866.gb') seqs = SeqIO.parse(fp, format='gb', alphabet=IUPAC.ambiguous_dna) s = next(seqs) self.assertEqual(s.name, 'JF314863') fragments = list(rs.digest(s)) self.assertEqual(len(fragments), 6) lengths = [117, 172, 62, 10, 10, 102] for i in range(len(fragments)): f = fragments[i] self.assertIsInstance(f, Fragment) self.assertEqual(len(f), lengths[i])
def test_caenophidia(self): formats = {'fasta': '.fasta', 'phylip-relaxed': '.phylip', 'nexus': '.nexus'} for in_format, in_ext in iteritems(formats): in_file = package_paths.data_path('caenophidia' + in_ext) for out_format, out_ext in iteritems(formats): out_file = self.getTestFile('caenophidia' + out_ext) n = convert_format(in_file=in_file, in_format=in_format, out_file=out_file, out_format=out_format, data_type='protein') self.assertEqual(n, 114) in_seqs = SeqIO.parse(in_file, format=in_format, alphabet=IUPAC.extended_protein) out_seqs = SeqIO.parse(out_file, format=out_format, alphabet=IUPAC.extended_protein) self.assertSameData(in_seqs, out_seqs)
def test_simble_gb_seq(self): rs = RecognitionSeq('TAG', 3) fp = package_paths.data_path('JF314863-JF314866.gb') seqs = SeqIO.parse(fp, format='gb', alphabet=IUPAC.ambiguous_dna) s = next(seqs) self.assertEqual(s.name, 'JF314863') ds = DigestSummary(rs, s) self.assertIsInstance(ds, DigestSummary) self.assertEqual(ds.recognition_seq, str(rs.seq)) self.assertEqual(ds.molecule_id, s.id) self.assertEqual(ds.molecule_name, s.name) self.assertEqual(ds.molecule_description, s.description) self.assertIsInstance(ds.length_distribution, dict) self.assertEqual(ds.length_distribution, { 13: 2, 65: 1, 175: 1,}) self.assertEqual(ds.molecule_length, len(s))
def test_limnonectes(self): formats = {'fasta': '.fasta', 'phylip-relaxed': '.phylip', 'nexus': '.nexus'} for in_format, in_ext in iteritems(formats): in_file = package_paths.data_path('limnonectes' + in_ext) for out_format, out_ext in iteritems(formats): out_file = self.getTestFile('limnonectes' + out_ext) n = convert_format(in_file=in_file, in_format=in_format, out_file=out_file, out_format=out_format, data_type='dna') self.assertEqual(n, 80) in_seqs = SeqIO.parse(in_file, format=in_format, alphabet=IUPAC.ambiguous_dna) out_seqs = SeqIO.parse(out_file, format=out_format, alphabet=IUPAC.ambiguous_dna) self.assertSameData(in_seqs, out_seqs)
def test_extra_length(self): expected = { 'JF314863': { 23: 2, 75: 1, 185: 1,}, 'JF314864': { 23: 1, 75: 1, 190: 1,}, 'JF314865': { 23: 1, 85: 1, 188: 1,}, 'JF314866': { 23: 2, 75: 1, 185: 1,}, 'combined' : { 23: 6, 75: 3, 85: 1, 185: 2, 188: 1, 190: 1,}} rs = 'TAG' cs = '3' self.exe_seqdigest(['-s', rs, '-c', cs, '-g', '354698776,354698778', '-x', '10', package_paths.data_path('JF314865-JF314866.gb')]) results = {} for k in iterkeys(expected): result_file_path = os.path.join(self.test_dir, ".".join([k, 'txt'])) self.appendTestFile(result_file_path) results[k] = self.parse_result_file(result_file_path) self.assertEqual(expected, results)
def test_accessions(self): expected = { 'JF314863': { 13: 2, 65: 1, 175: 1,}, 'JF314864': { 13: 1, 65: 1, 180: 1,}, 'JF314865': { 13: 1, 75: 1, 178: 1,}, 'JF314866': { 13: 2, 65: 1, 175: 1,}, 'combined' : { 13: 6, 65: 3, 75: 1, 175: 2, 178: 1, 180: 1,}} rs = 'TAG' cs = '3' self.exe_seqdigest(['-s', rs, '-c', cs, '-a', 'JF314863,JF314864', package_paths.data_path('JF314865-JF314866.gb')]) results = {} for k in iterkeys(expected): result_file_path = os.path.join(self.test_dir, ".".join([k, 'txt'])) self.appendTestFile(result_file_path) results[k] = self.parse_result_file(result_file_path) self.assertEqual(expected, results)
def test_limnonectes(self): formats = { 'fasta': '.fasta', 'phylip-relaxed': '.phylip', 'nexus': '.nexus' } for in_format, in_ext in iteritems(formats): in_file = package_paths.data_path('limnonectes' + in_ext) for out_format, out_ext in iteritems(formats): out_file = self.getTestFile('limnonectes' + out_ext) n = convert_format(in_file=in_file, in_format=in_format, out_file=out_file, out_format=out_format, data_type='dna') self.assertEqual(n, 80) in_seqs = SeqIO.parse(in_file, format=in_format, alphabet=IUPAC.ambiguous_dna) out_seqs = SeqIO.parse(out_file, format=out_format, alphabet=IUPAC.ambiguous_dna) self.assertSameData(in_seqs, out_seqs)
def test_caenophidia(self): formats = { 'fasta': '.fasta', 'phylip-relaxed': '.phylip', 'nexus': '.nexus' } for in_format, in_ext in iteritems(formats): in_file = package_paths.data_path('caenophidia' + in_ext) for out_format, out_ext in iteritems(formats): out_file = self.getTestFile('caenophidia' + out_ext) n = convert_format(in_file=in_file, in_format=in_format, out_file=out_file, out_format=out_format, data_type='protein') self.assertEqual(n, 114) in_seqs = SeqIO.parse(in_file, format=in_format, alphabet=IUPAC.extended_protein) out_seqs = SeqIO.parse(out_file, format=out_format, alphabet=IUPAC.extended_protein) self.assertSameData(in_seqs, out_seqs)
def setUp(self): self.gb_path = package_paths.data_path('JF314863-JF314866.gb') self.seqs = get_seq_dict(self.gb_path, format='gb', data_type='dna')
def setUp(self): self.id_list = ['354698776', '354698778', '354698780', '354698782'] self.multi_gb = package_paths.data_path('JF314863-JF314866.gb')
def setUp(self): self.gbfetch = package_paths.scripts_path("gbfetch.py") self.mkTestDir() self.out = self.getTestFile('gbfetch_test.txt') self.expected_fasta = package_paths.data_path('JF314863-JF314866.fasta') self.expected_gb = package_paths.data_path('JF314863-JF314866.gb')
def setUp(self): self.gb_path = package_paths.data_path('JF314863-JF314866.gb') self.fasta_path = package_paths.data_path('JF314863-JF314866.fasta') self.names = ['JF' + str(x) for x in range(314863, 314867)] self.ids = [str(x) + '.1' for x in self.names]
def setUp(self): self.seq = SeqIO.read(package_paths.data_path('JF314862.gb'), format='gb', alphabet=IUPAC.ambiguous_dna)
def setUp(self): self.gb_path = package_paths.data_path('JF314863-JF314866.gb') self.seqs = get_buffered_seq_iter([self.gb_path], format='gb', data_type='dna')
def setUp(self): self.seq = SeqIO.read( package_paths.data_path('JF314862.gb'), format='gb', alphabet=IUPAC.ambiguous_dna)
def setUp(self): self.path = package_paths.data_path('primates.partitions.txt') self.line = 'another line\n'