def test_split_capillary_id(self): '''Tests that we get information from a sanger capillary read name OK''' ids = [ 'abcde.p1k', 'abcde.x.p1k', 'abcde.p1ka', 'abcde.q1k', 'abcde.w2k' ] expected = [{ 'prefix': 'abcde', 'dir': 'fwd', 'suffix': 'p1k' }, { 'prefix': 'abcde.x', 'dir': 'fwd', 'suffix': 'p1k' }, { 'prefix': 'abcde', 'dir': 'fwd', 'suffix': 'p1ka' }, { 'prefix': 'abcde', 'dir': 'rev', 'suffix': 'q1k' }, { 'prefix': 'abcde', 'dir': 'unk', 'suffix': 'w2k' }] for i in range(len(ids)): fa = sequences.Fasta(ids[i], 'A') self.assertEqual(fa.split_capillary_id(), expected[i]) with self.assertRaises(sequences.Error): fa = sequences.Fasta('name', 'A') fa.split_capillary_id()
def make_long_reads(infile, outfile, method='tiling', fixed_read_length=20000, tile_step=10000, gamma_shape=1.2, gamma_scale=6000, coverage=10, gamma_min_length=20000, seed=None, ins_skip=None, ins_window=None,): assert method in ['tiling', 'gamma', 'uniform'] assert ins_skip == ins_window == None or None not in [ins_skip, ins_window] if seed is not None: random.seed(a=seed) seq_reader = sequences.file_reader(infile) f = utils.open_file_write(outfile) for seq in seq_reader: if method == 'tiling': if len(seq) < fixed_read_length: print('Skipping sequence', seq.id, 'because it is too short at', len(seq), 'bases', file=sys.stderr) continue for i in range(0, len(seq), tile_step): end = min(len(seq), i + fixed_read_length) fa = sequences.Fasta('_'.join([seq.id, str(i + 1), str(end)]), seq[i:end]) if ins_skip: fa.add_insertions(skip=ins_skip, window=ins_window) print(fa, file=f) if end >= len(seq): break elif method == 'gamma': if len(seq) < gamma_min_length: print('Skipping sequence', seq.id, 'because it is too short at', len(seq), 'bases', file=sys.stderr) continue total_read_length = 0 while total_read_length < coverage * len(seq) - 0.5 * gamma_min_length: read_length = int(numpy.random.gamma(gamma_shape, scale=gamma_scale)) while read_length < gamma_min_length or read_length > len(seq): read_length = int(numpy.random.gamma(gamma_shape, scale=gamma_scale)) start = random.randint(0, len(seq) - read_length) end = start + read_length - 1 fa = sequences.Fasta('_'.join([seq.id, str(start + 1), str(end + 1)]), seq[start:end+1]) total_read_length += len(fa) if ins_skip: fa.add_insertions(skip=ins_skip, window=ins_window) print(fa, file=f) elif method == 'uniform': if len(seq) < fixed_read_length: print('Skipping sequence', seq.id, 'because it is too short at', len(seq), 'bases', file=sys.stderr) continue total_read_length = 0 while total_read_length < coverage * len(seq) - 0.5 * fixed_read_length: start = random.randint(0, len(seq) - fixed_read_length) end = start + fixed_read_length - 1 fa = sequences.Fasta('_'.join([seq.id, str(start + 1), str(end + 1)]), seq[start:end+1]) total_read_length += len(fa) if ins_skip: fa.add_insertions(skip=ins_skip, window=ins_window) print(fa, file=f) utils.close(f)
def test_get_next_from_file(self): '''get_next_from_file() should read seqs from OK, including weirdness in file''' f_in = utils.open_file_read(os.path.join(data_dir, 'sequences_test.fa')) fa = sequences.Fasta() counter = 1 while fa.get_next_from_file(f_in): self.assertEqual(fa, sequences.Fasta(str(counter), 'ACGTA')) counter += 1 utils.close(f_in)
def test_strip_after_first_whitespace(self): '''Test strip_after_first_whitespace()''' seqs = [ sequences.Fasta('name', 'A'), sequences.Fasta('name foo', 'A'), sequences.Fasta('name foo bar', 'A'), sequences.Fasta('name\tfoo', 'A'), ] for seq in seqs: seq.strip_after_first_whitespace() for seq in seqs: self.assertEqual(seq.id, 'name')
def test_contig_coords(self): '''contig_coords() should get the coords of all contigs in a sequence correctly''' test_seqs = [ sequences.Fasta('ID', 'ACGT'), sequences.Fasta('ID', 'NACGT'), sequences.Fasta('ID', 'NNACGT'), sequences.Fasta('ID', 'ACGTN'), sequences.Fasta('ID', 'ACGTNN'), sequences.Fasta('ID', 'NANNCGT'), sequences.Fasta('ID', 'ACNNNGTNA'), sequences.Fasta('ID', 'ANNCGTNNAAAAA') ] correct_coords = [[intervals.Interval(0, 3)], [intervals.Interval(1, 4)], [intervals.Interval(2, 5)], [intervals.Interval(0, 3)], [intervals.Interval(0, 3)], [intervals.Interval(1, 1), intervals.Interval(4, 6)], [ intervals.Interval(0, 1), intervals.Interval(5, 6), intervals.Interval(8, 8) ], [ intervals.Interval(0, 0), intervals.Interval(3, 5), intervals.Interval(8, 12) ]] for i in range(len(test_seqs)): gaps = test_seqs[i].contig_coords() self.assertListEqual(correct_coords[i], gaps)
def test_file_reader_gff(self): '''Test read gff file''' good_files = [ 'sequences_test_gffv3.gff', 'sequences_test_gffv3.no_FASTA_line.gff' ] good_files = [os.path.join(data_dir, x) for x in good_files] for f in good_files: reader = sequences.file_reader(f) counter = 1 for seq in reader: self.assertEqual( seq, sequences.Fasta('seq' + str(counter), 'ACGTACGTAC')) counter += 1 bad_files = [ 'sequences_test_gffv3.no_seq.gff', 'sequences_test_gffv3.no_seq.2.gff' ] bad_files = [os.path.join(data_dir, x) for x in bad_files] for filename in bad_files: with self.assertRaises(sequences.Error): reader = sequences.file_reader(filename) for seq in reader: pass
def test_getitem(self): '''getitem() should return the right subsequence''' seq = 'AACGTGTCA' fa = sequences.Fasta('x', seq) self.assertEqual(seq[1], fa[1]) self.assertEqual(seq[0:2], fa[0:2]) self.assertEqual(seq[1:], fa[1:])
def test_to_Fastq(self): '''Check to_Fastq converts OK, including out of range quality scores''' fa = sequences.Fasta('X', 'AAAAA') quals = [-1, 0, 40, 93, 94] self.assertEqual(sequences.Fastq('X', 'AAAAA', '!!I~~'), fa.to_Fastq(quals)) with self.assertRaises(sequences.Error): fa.to_Fastq('AAAAAAAAAAAAA')
def test_file_reader_fasta(self): '''file_reader should iterate through a fasta file correctly''' reader = sequences.file_reader( os.path.join(data_dir, 'sequences_test.fa')) counter = 1 for seq in reader: self.assertEqual(seq, sequences.Fasta(str(counter), 'ACGTA')) counter += 1
def split_by_fixed_size(infile, outfiles_prefix, chunk_size, tolerance, skip_if_all_Ns=False): '''Splits fasta/q file into separate files, with up to (chunk_size + tolerance) bases in each file''' file_count = 1 coords = [] small_sequences = [] # sequences shorter than chunk_size seq_reader = sequences.file_reader(infile) f_coords = utils.open_file_write(outfiles_prefix + '.coords') for seq in seq_reader: if skip_if_all_Ns and seq.is_all_Ns(): continue if len(seq) < chunk_size: small_sequences.append(copy.copy(seq)) elif len(seq) <= chunk_size + tolerance: f = utils.open_file_write(outfiles_prefix + '.' + str(file_count)) print(seq, file=f) utils.close(f) file_count += 1 else: # make list of chunk coords chunks = [(x,x+chunk_size) for x in range(0, len(seq), chunk_size)] if chunks[-1][1] - 1 > len(seq): chunks[-1] = (chunks[-1][0], len(seq)) if len(chunks) > 1 and (chunks[-1][1] - chunks[-1][0]) <= tolerance: chunks[-2] = (chunks[-2][0], chunks[-1][1]) chunks.pop() # write one output file per chunk offset = 0 for chunk in chunks: if not(skip_if_all_Ns and seq.is_all_Ns(start=chunk[0], end=chunk[1]-1)): f = utils.open_file_write(outfiles_prefix + '.' + str(file_count)) chunk_id = seq.id + ':' + str(chunk[0]+1) + '-' + str(chunk[1]) print(sequences.Fasta(chunk_id, seq[chunk[0]:chunk[1]]), file=f) print(chunk_id, seq.id, offset, sep='\t', file=f_coords) utils.close(f) file_count += 1 offset += chunk[1] - chunk[0] # write files of small sequences if len(small_sequences): f = utils.open_file_write(outfiles_prefix + '.' + str(file_count)) file_count += 1 base_count = 0 for seq in small_sequences: if base_count > 0 and base_count + len(seq) > chunk_size + tolerance: utils.close(f) f = utils.open_file_write(outfiles_prefix + '.' + str(file_count)) file_count += 1 base_count = 0 print(seq, file=f) base_count += len(seq) utils.close(f)
def to_fasta_union(infile, outfile, seqname='union'): seq_reader = sequences.file_reader(infile) new_seq = [] for seq in seq_reader: new_seq.append(seq.seq) f_out = utils.open_file_write(outfile) print(sequences.Fasta(seqname, ''.join(new_seq)), file=f_out) utils.close(f_out)
def test_search_string(self): '''Check that search_string() finds all the hits''' fa = sequences.Fasta('X', 'AAA') hits = fa.search('G') self.assertTrue(len(hits) == 0) hits = fa.search('AAA') self.assertListEqual(hits, [(0, '+')]) hits = fa.search('AA') self.assertListEqual(hits, [(0, '+'), (1, '+')]) hits = fa.search('TTT') self.assertListEqual(hits, [(0, '-')])
def test_equality(self): self.assertTrue(self.fasta == sequences.Fasta('ID', 'ACGTA')) self.assertFalse(self.fasta == sequences.Fasta('I', 'ACGTA')) self.assertFalse(self.fasta == sequences.Fasta('ID', 'ACGT')) self.assertFalse(self.fasta != sequences.Fasta('ID', 'ACGTA')) self.assertTrue(self.fasta != sequences.Fasta('I', 'ACGTA')) self.assertTrue(self.fasta != sequences.Fasta('ID', 'ACGT'))
def test_file_reader_phylip(self): '''Test read phylip file''' test_files = [ 'sequences_test_phylip.interleaved', 'sequences_test_phylip.interleaved2', 'sequences_test_phylip.sequential' ] test_files = [os.path.join(data_dir, f) for f in test_files] expected_seqs = [ sequences.Fasta('Turkey', 'AACTNGGGCATTTCAGGGTGAGCCCGGGCAATACAGGGTAT'), sequences.Fasta('Salmo_gair', 'AAGCCTTGGCAGTGCAGGGTGAGCCGTGGCCGGGCACGGTAT'), sequences.Fasta('H. Sapiens', 'ACCGGTTGGCCGTTCAGGGTACAGGTTGGCCGTTCAGGGTAA') ] for fname in test_files: reader = sequences.file_reader(fname) i = 0 for seq in reader: self.assertEqual(expected_seqs[i], seq) i += 1 # files made by seaview are a little different in the first line. # Test one of these expected_seqs = [ sequences.Fasta('seq1', 96 * 'G' + 'T'), sequences.Fasta('seq2', 94 * 'A' + 'G') ] reader = sequences.file_reader( os.path.join(data_dir, 'sequences_test_phylip.made_by_seaview')) i = 0 for seq in reader: print(seq) self.assertEqual(expected_seqs[i], seq) i += 1
def test_file_to_dict(self): '''check file_to_dict fills dictionary correctly''' d_test = {} d = {} tasks.file_to_dict(os.path.join(data_dir, 'sequences_test.fa'), d_test) for i in range(1, 5): d[str(i)] = sequences.Fasta(str(i), 'ACGTA') self.assertSequenceEqual(d_test.keys(), d.keys()) for i in range(1, 5): key = str(i) self.assertEqual(d_test[key].id, d[key].id) self.assertEqual(d_test[key].seq, d[key].seq)
def test_gaps(self): '''gaps() should find the gaps in a sequence correctly''' test_seqs = [ sequences.Fasta('ID', 'ACGT'), sequences.Fasta('ID', 'NACGT'), sequences.Fasta('ID', 'NACGTN'), sequences.Fasta('ID', 'ANNCGT'), sequences.Fasta('ID', 'NANNCGTNN') ] correct_gaps = [[], [intervals.Interval(0, 0)], [intervals.Interval(0, 0), intervals.Interval(5, 5)], [intervals.Interval(1, 2)], [ intervals.Interval(0, 0), intervals.Interval(2, 3), intervals.Interval(7, 8) ]] for i in range(len(test_seqs)): gaps = test_seqs[i].gaps() self.assertListEqual(correct_gaps[i], gaps)
def deinterleave(infile, outfile_1, outfile_2, fasta_out=False): seq_reader = sequences.file_reader(infile) f_1 = utils.open_file_write(outfile_1) f_2 = utils.open_file_write(outfile_2) for seq in seq_reader: if fasta_out: print(sequences.Fasta(seq.id, seq.seq), file=f_1) else: print(seq, file=f_1) try: next(seq_reader) except StopIteration: utils.close(f_1) utils.close(f_2) raise Error('Error getting mate for sequence. Cannot continue') if fasta_out: print(sequences.Fasta(seq.id, seq.seq), file=f_2) else: print(seq, file=f_2) utils.close(f_1) utils.close(f_2)
def test_translate(self): '''Test nucleotide -> amino acid conversion works on Fasta''' fa = sequences.Fasta( 'ID', 'GCAGCCGCGGCTAGAAGGCGACGCCGGCGTAACAATGACGATTGCTGTGAAGAGCAACAGGGAGGCGGGGGTCACCATATAATCATTTTATTGCTACTCCTGCTTAAAAAGATGTTCTTTCCACCCCCGCCTAGCAGTTCATCCTCGTCTACAACCACGACTTGGTACTATGTAGTCGTGGTTTAATAGTGA' ) self.assertEqual( sequences.Fasta( 'ID', 'AAAARRRRRRNNDDCCEEQQGGGGHHIIILLLLLLKKMFFPPPPSSSSSSTTTTWYYVVVV***' ), fa.translate()) self.assertEqual( sequences.Fasta( 'ID', 'QPRLEGDAGVTMTIAVKSNREAGVTI*SFYCYSCLKRCSFHPRLAVHPRLQPRLGTM*SWFNS' ), fa.translate(frame=1)) print(fa.translate(frame=1)) self.assertEqual( sequences.Fasta( 'ID', 'SRG*KATPA*Q*RLL*RATGRRGSPYNHFIATPA*KDVLSTPA*QFILVYNHDLVLCSRGLIV' ), fa.translate(frame=2))
def test_get_next_from_embl_file(self): f_in = utils.open_file_read( os.path.join(data_dir, 'sequences_test.embl')) embl = sequences.Embl() counter = 1 while embl.get_next_from_file(f_in): self.assertEqual( embl, sequences.Fasta('seq' + str(counter), expected_embl[counter - 1])) counter += 1 utils.close(f_in)
def to_fasta(infile, outfile, line_length=60, strip_after_first_whitespace=False): seq_reader = sequences.file_reader(infile) f_out = utils.open_file_write(outfile) original_line_length = sequences.Fasta.line_length sequences.Fasta.line_length = line_length for seq in seq_reader: if strip_after_first_whitespace: seq.strip_after_first_whitespace() if type(seq) == sequences.Fastq: print(sequences.Fasta(seq.id, seq.seq), file=f_out) else: print(seq, file=f_out) utils.close(f_out) sequences.Fasta.line_length = original_line_length
def test_trim_Ns(self): '''trim_Ns() should do the right trimming of a sequence''' fa = sequences.Fasta('ID', 'ANNANA') test_seqs = [ sequences.Fasta('ID', 'ANNANA'), sequences.Fasta('ID', 'NANNANA'), sequences.Fasta('ID', 'NANNANAN'), sequences.Fasta('ID', 'ANNANAN'), sequences.Fasta('ID', 'NNNNNNANNANAN'), sequences.Fasta('ID', 'NNANNANANn') ] for s in test_seqs: s.trim_Ns() self.assertEqual(fa, s)
def test_get_next_from_gbk_file(self): f_in = utils.open_file_read( os.path.join(data_dir, 'sequences_test.gbk')) embl = sequences.Embl() counter = 1 expected = [ 'gatcctccatatacaacggtatctccacctcaggtttagatctcaacaacggaaccattgccgacatgagacagttaggtatcgtcgagagttacaagctaaaacgagcagtagtcagctctgcatctgaagccgctgaagttctactaagggtggataacatcatccgtgcaagaccaatgccatgactcagattctaattttaagctattcaatttctctttgatc', 'gatcctccatatacaacggtatctccacctcaggtttagatctcaacaacggaaccattgccgacatgagacagttaggtatcgtcgagagttacaagctaaaacgagcagtagtcagctctgcatctgaagccgctgaagttctactaagggtggataacatcatccgtgcaagaccaatgccatgactcagattctaattttaagctattcaatttctctttgaaa' ] while embl.get_next_from_file(f_in): self.assertEqual( embl, sequences.Fasta('NAME' + str(counter), expected[counter - 1])) counter += 1 utils.close(f_in)
def scaffolds_to_contigs(infile, outfile, number_contigs=False): '''Makes a file of contigs from scaffolds by splitting at every N. Use number_contigs=True to add .1, .2, etc onto end of each contig, instead of default to append coordinates.''' seq_reader = sequences.file_reader(infile) fout = utils.open_file_write(outfile) for seq in seq_reader: contigs = seq.contig_coords() counter = 1 for contig in contigs: if number_contigs: name = seq.id + '.' + str(counter) counter += 1 else: name = '.'.join([seq.id, str(contig.start + 1), str(contig.end + 1)]) print(sequences.Fasta(name, seq[contig.start:contig.end+1]), file=fout) utils.close(fout)
def make_random_contigs(contigs, length, outfile, name_by_letters=False, prefix='', seed=None, first_number=1): '''Makes a multi fasta file of random sequences, all the same length''' random.seed(a=seed) fout = utils.open_file_write(outfile) letters = list('ABCDEFGHIJKLMNOPQRSTUVWXYZ') letters_index = 0 for i in range(contigs): if name_by_letters: name = letters[letters_index] letters_index += 1 if letters_index == len(letters): letters_index = 0 else: name = str(i + first_number) fa = sequences.Fasta(prefix + name, ''.join([random.choice('ACGT') for x in range(length)])) print(fa, file=fout) utils.close(fout)
def merge_to_one_seq(infile, outfile, seqname='union'): '''Takes a multi fasta or fastq file and writes a new file that contains just one sequence, with the original sequences catted together, preserving their order''' seq_reader = sequences.file_reader(infile) seqs = [] for seq in seq_reader: seqs.append(copy.copy(seq)) new_seq = ''.join([seq.seq for seq in seqs]) if type(seqs[0]) == sequences.Fastq: new_qual = ''.join([seq.qual for seq in seqs]) seqs[:] = [] merged = sequences.Fastq(seqname, new_seq, new_qual) else: merged = sequences.Fasta(seqname, new_seq) seqs[:] = [] f = utils.open_file_write(outfile) print(merged, file=f) utils.close(f)
def test_strip_illumina_suffix(self): '''Check that /1 and /2 removed correctly from IDs''' seqs = [ sequences.Fasta('name/1', 'A'), sequences.Fasta('name/2', 'A'), sequences.Fasta('name', 'A'), sequences.Fasta('name/1/2', 'A'), sequences.Fasta('name/2/1', 'A'), sequences.Fasta('name/3', 'A') ] correct_names = ['name', 'name', 'name', 'name/1', 'name/2', 'name/3'] for seq in seqs: seq.strip_illumina_suffix() for i in range(len(seqs)): self.assertEqual(seqs[i].id, correct_names[i])
def test_file_reader_embl(self): '''Test read embl file''' reader = sequences.file_reader( os.path.join(data_dir, 'sequences_test.embl')) counter = 1 for seq in reader: self.assertEqual( seq, sequences.Fasta('seq' + str(counter), expected_embl[counter - 1])) counter += 1 bad_files = [ 'sequences_test.embl.bad', 'sequences_test.embl.bad2', ] bad_files = [os.path.join(data_dir, x) for x in bad_files] for filename in bad_files: with self.assertRaises(sequences.Error): reader = sequences.file_reader(filename) for seq in reader: pass
def test_replace_interval(self): '''Test replace_interval()''' fa = sequences.Fasta('ID', 'ACGTA') fa.replace_interval(0, 0, 'NEW') self.assertEqual(fa, sequences.Fasta('ID', 'NEWCGTA')) fa = sequences.Fasta('ID', 'ACGTA') fa.replace_interval(4, 4, 'NEW') self.assertEqual(fa, sequences.Fasta('ID', 'ACGTNEW')) fa = sequences.Fasta('ID', 'ACGTA') fa.replace_interval(2, 3, 'NEW') self.assertEqual(fa, sequences.Fasta('ID', 'ACNEWA')) fa = sequences.Fasta('ID', 'ACGTA') with self.assertRaises(sequences.Error): fa.replace_interval(3, 2, 'x') with self.assertRaises(sequences.Error): fa.replace_interval(1, 5, 'x') with self.assertRaises(sequences.Error): fa.replace_interval(5, 10, 'x') fq = sequences.Fastq('ID', 'ACGTA', 'ABCDE') fq.replace_interval(0, 0, 'NEW', 'III') self.assertEqual(fq, sequences.Fastq('ID', 'NEWCGTA', 'IIIBCDE')) fq = sequences.Fastq('ID', 'ACGTA', 'ABCDE') fq.replace_interval(4, 4, 'NEW', 'III') self.assertEqual(fq, sequences.Fastq('ID', 'ACGTNEW', 'ABCDIII')) fq = sequences.Fastq('ID', 'ACGTA', 'ABCDE') fq.replace_interval(2, 3, 'NEW', 'III') self.assertEqual(fq, sequences.Fastq('ID', 'ACNEWA', 'ABIIIE')) with self.assertRaises(sequences.Error): fq.replace_interval(1, 1, 'x', 'xx')
def test_expand_nucleotides(self): '''Test expand_nucleotides''' tests = [ (sequences.Fasta('1', 'A'), [sequences.Fasta('1.1', 'A')]), (sequences.Fasta('2', 'C'), [sequences.Fasta('2.1', 'C')]), (sequences.Fasta('3', 'G'), [sequences.Fasta('3.1', 'G')]), (sequences.Fasta('4', 'T'), [sequences.Fasta('4.1', 'T')]), (sequences.Fasta('6', 'R'), [sequences.Fasta('6.1', 'A'), sequences.Fasta('6.2', 'G')]), (sequences.Fasta('7', 'Y'), [sequences.Fasta('7.1', 'C'), sequences.Fasta('7.2', 'T')]), (sequences.Fasta('8', 'S'), [sequences.Fasta('8.1', 'C'), sequences.Fasta('8.2', 'G')]), (sequences.Fasta('9', 'W'), [sequences.Fasta('9.1', 'A'), sequences.Fasta('9.2', 'T')]), (sequences.Fasta('10', 'K'), [sequences.Fasta('10.1', 'G'), sequences.Fasta('10.2', 'T')]), (sequences.Fasta('11', 'M'), [sequences.Fasta('11.1', 'A'), sequences.Fasta('11.2', 'C')]), (sequences.Fasta('12', 'B'), [ sequences.Fasta('12.1', 'C'), sequences.Fasta('12.2', 'G'), sequences.Fasta('12.3', 'T') ]), (sequences.Fasta('13', 'D'), [ sequences.Fasta('13.1', 'A'), sequences.Fasta('13.2', 'G'), sequences.Fasta('13.3', 'T') ]), (sequences.Fasta('14', 'H'), [ sequences.Fasta('14.1', 'A'), sequences.Fasta('14.2', 'C'), sequences.Fasta('14.3', 'T') ]), (sequences.Fasta('15', 'V'), [ sequences.Fasta('15.1', 'A'), sequences.Fasta('15.2', 'C'), sequences.Fasta('15.3', 'G') ]), (sequences.Fasta('16', 'N'), [ sequences.Fasta('16.1', 'A'), sequences.Fasta('16.2', 'C'), sequences.Fasta('16.3', 'G'), sequences.Fasta('16.4', 'T') ]), (sequences.Fasta('17', 'ART'), [sequences.Fasta('17.1', 'AAT'), sequences.Fasta('17.2', 'AGT')]), (sequences.Fasta('18', 'ARRT'), [ sequences.Fasta('18.1', 'AAAT'), sequences.Fasta('18.2', 'AAGT'), sequences.Fasta('18.3', 'AGAT'), sequences.Fasta('18.4', 'AGGT') ]), (sequences.Fasta('19', 'ARTR'), [ sequences.Fasta('19.1', 'AATA'), sequences.Fasta('19.2', 'AATG'), sequences.Fasta('19.3', 'AGTA'), sequences.Fasta('19.4', 'AGTG') ]), (sequences.Fastq('20', 'ART', 'GHI'), [ sequences.Fastq('20.1', 'AAT', 'GHI'), sequences.Fastq('20.2', 'AGT', 'GHI') ]), ] for t in tests: self.assertListEqual(t[0].expand_nucleotides(), t[1])
def test_to_Fasta_and_qual(self): '''Check to_Fasta_and_qual converts quality scores correctly''' fq = sequences.Fastq('ID', 'ACGT', '>ADI') (fa, qual) = fq.to_Fasta_and_qual() self.assertEqual(fa, sequences.Fasta('ID', 'ACGT')) self.assertListEqual(qual, [29, 32, 35, 40])