Exemple #1
0
    def test_split_capillary_id(self):
        '''Tests that we get information from a sanger capillary read name OK'''
        ids = [
            'abcde.p1k', 'abcde.x.p1k', 'abcde.p1ka', 'abcde.q1k', 'abcde.w2k'
        ]
        expected = [{
            'prefix': 'abcde',
            'dir': 'fwd',
            'suffix': 'p1k'
        }, {
            'prefix': 'abcde.x',
            'dir': 'fwd',
            'suffix': 'p1k'
        }, {
            'prefix': 'abcde',
            'dir': 'fwd',
            'suffix': 'p1ka'
        }, {
            'prefix': 'abcde',
            'dir': 'rev',
            'suffix': 'q1k'
        }, {
            'prefix': 'abcde',
            'dir': 'unk',
            'suffix': 'w2k'
        }]

        for i in range(len(ids)):
            fa = sequences.Fasta(ids[i], 'A')
            self.assertEqual(fa.split_capillary_id(), expected[i])

        with self.assertRaises(sequences.Error):
            fa = sequences.Fasta('name', 'A')
            fa.split_capillary_id()
Exemple #2
0
def make_long_reads(infile, outfile, method='tiling', fixed_read_length=20000, tile_step=10000, gamma_shape=1.2,  gamma_scale=6000, coverage=10, gamma_min_length=20000, seed=None, ins_skip=None, ins_window=None,):
    assert method in ['tiling', 'gamma', 'uniform']
    assert ins_skip == ins_window == None or None not in [ins_skip, ins_window]
    if seed is not None:
        random.seed(a=seed)
    seq_reader = sequences.file_reader(infile)
    f = utils.open_file_write(outfile)

    for seq in seq_reader:
        if method == 'tiling':
            if len(seq) < fixed_read_length:
                print('Skipping sequence', seq.id, 'because it is too short at', len(seq), 'bases', file=sys.stderr)
                continue
            for i in range(0, len(seq), tile_step):
                end = min(len(seq), i + fixed_read_length)
                fa = sequences.Fasta('_'.join([seq.id, str(i + 1), str(end)]), seq[i:end])
                if ins_skip:
                    fa.add_insertions(skip=ins_skip, window=ins_window)
                print(fa, file=f)
                if end >= len(seq):
                    break
        elif method == 'gamma':
            if len(seq) < gamma_min_length:
                print('Skipping sequence', seq.id, 'because it is too short at', len(seq), 'bases', file=sys.stderr)
                continue
            total_read_length = 0
            while total_read_length < coverage * len(seq) - 0.5 * gamma_min_length:
                read_length = int(numpy.random.gamma(gamma_shape, scale=gamma_scale))
                while read_length < gamma_min_length or read_length > len(seq):
                    read_length = int(numpy.random.gamma(gamma_shape, scale=gamma_scale))

                start = random.randint(0, len(seq) - read_length)
                end = start + read_length - 1
                fa = sequences.Fasta('_'.join([seq.id, str(start + 1), str(end + 1)]), seq[start:end+1])
                total_read_length += len(fa)
                if ins_skip:
                    fa.add_insertions(skip=ins_skip, window=ins_window)
                print(fa, file=f)
        elif method == 'uniform':
            if len(seq) < fixed_read_length:
                print('Skipping sequence', seq.id, 'because it is too short at', len(seq), 'bases', file=sys.stderr)
                continue
            total_read_length = 0
            while total_read_length < coverage * len(seq) - 0.5 * fixed_read_length:
                start = random.randint(0, len(seq) - fixed_read_length)
                end = start + fixed_read_length - 1
                fa = sequences.Fasta('_'.join([seq.id, str(start + 1), str(end + 1)]), seq[start:end+1])
                total_read_length += len(fa)
                if ins_skip:
                    fa.add_insertions(skip=ins_skip, window=ins_window)
                print(fa, file=f)


    utils.close(f)
Exemple #3
0
    def test_get_next_from_file(self):
        '''get_next_from_file() should read seqs from OK, including weirdness in file'''
        f_in = utils.open_file_read(os.path.join(data_dir,
                                                 'sequences_test.fa'))
        fa = sequences.Fasta()
        counter = 1

        while fa.get_next_from_file(f_in):
            self.assertEqual(fa, sequences.Fasta(str(counter), 'ACGTA'))
            counter += 1

        utils.close(f_in)
Exemple #4
0
    def test_strip_after_first_whitespace(self):
        '''Test strip_after_first_whitespace()'''
        seqs = [
            sequences.Fasta('name', 'A'),
            sequences.Fasta('name foo', 'A'),
            sequences.Fasta('name foo bar', 'A'),
            sequences.Fasta('name\tfoo', 'A'),
        ]

        for seq in seqs:
            seq.strip_after_first_whitespace()

        for seq in seqs:
            self.assertEqual(seq.id, 'name')
Exemple #5
0
    def test_contig_coords(self):
        '''contig_coords() should get the coords of all contigs in a sequence correctly'''
        test_seqs = [
            sequences.Fasta('ID', 'ACGT'),
            sequences.Fasta('ID', 'NACGT'),
            sequences.Fasta('ID', 'NNACGT'),
            sequences.Fasta('ID', 'ACGTN'),
            sequences.Fasta('ID', 'ACGTNN'),
            sequences.Fasta('ID', 'NANNCGT'),
            sequences.Fasta('ID', 'ACNNNGTNA'),
            sequences.Fasta('ID', 'ANNCGTNNAAAAA')
        ]

        correct_coords = [[intervals.Interval(0,
                                              3)], [intervals.Interval(1, 4)],
                          [intervals.Interval(2,
                                              5)], [intervals.Interval(0, 3)],
                          [intervals.Interval(0, 3)],
                          [intervals.Interval(1, 1),
                           intervals.Interval(4, 6)],
                          [
                              intervals.Interval(0, 1),
                              intervals.Interval(5, 6),
                              intervals.Interval(8, 8)
                          ],
                          [
                              intervals.Interval(0, 0),
                              intervals.Interval(3, 5),
                              intervals.Interval(8, 12)
                          ]]

        for i in range(len(test_seqs)):
            gaps = test_seqs[i].contig_coords()
            self.assertListEqual(correct_coords[i], gaps)
Exemple #6
0
    def test_file_reader_gff(self):
        '''Test read gff file'''
        good_files = [
            'sequences_test_gffv3.gff',
            'sequences_test_gffv3.no_FASTA_line.gff'
        ]
        good_files = [os.path.join(data_dir, x) for x in good_files]

        for f in good_files:
            reader = sequences.file_reader(f)
            counter = 1
            for seq in reader:
                self.assertEqual(
                    seq, sequences.Fasta('seq' + str(counter), 'ACGTACGTAC'))
                counter += 1

        bad_files = [
            'sequences_test_gffv3.no_seq.gff',
            'sequences_test_gffv3.no_seq.2.gff'
        ]
        bad_files = [os.path.join(data_dir, x) for x in bad_files]

        for filename in bad_files:
            with self.assertRaises(sequences.Error):
                reader = sequences.file_reader(filename)
                for seq in reader:
                    pass
Exemple #7
0
 def test_getitem(self):
     '''getitem() should return the right subsequence'''
     seq = 'AACGTGTCA'
     fa = sequences.Fasta('x', seq)
     self.assertEqual(seq[1], fa[1])
     self.assertEqual(seq[0:2], fa[0:2])
     self.assertEqual(seq[1:], fa[1:])
Exemple #8
0
 def test_to_Fastq(self):
     '''Check to_Fastq converts OK, including out of range quality scores'''
     fa = sequences.Fasta('X', 'AAAAA')
     quals = [-1, 0, 40, 93, 94]
     self.assertEqual(sequences.Fastq('X', 'AAAAA', '!!I~~'),
                      fa.to_Fastq(quals))
     with self.assertRaises(sequences.Error):
         fa.to_Fastq('AAAAAAAAAAAAA')
Exemple #9
0
 def test_file_reader_fasta(self):
     '''file_reader should iterate through a fasta file correctly'''
     reader = sequences.file_reader(
         os.path.join(data_dir, 'sequences_test.fa'))
     counter = 1
     for seq in reader:
         self.assertEqual(seq, sequences.Fasta(str(counter), 'ACGTA'))
         counter += 1
Exemple #10
0
def split_by_fixed_size(infile, outfiles_prefix, chunk_size, tolerance, skip_if_all_Ns=False):
    '''Splits  fasta/q file into separate files, with up to (chunk_size + tolerance) bases in each file'''
    file_count = 1
    coords = []
    small_sequences = []  # sequences shorter than chunk_size
    seq_reader = sequences.file_reader(infile)
    f_coords = utils.open_file_write(outfiles_prefix + '.coords')

    for seq in seq_reader:
        if skip_if_all_Ns and seq.is_all_Ns():
             continue
        if len(seq) < chunk_size:
            small_sequences.append(copy.copy(seq))
        elif len(seq) <= chunk_size + tolerance:
            f = utils.open_file_write(outfiles_prefix + '.' + str(file_count))
            print(seq, file=f)
            utils.close(f)
            file_count += 1
        else:
            # make list of chunk coords
            chunks = [(x,x+chunk_size) for x in range(0, len(seq), chunk_size)]
            if chunks[-1][1] - 1 > len(seq):
                chunks[-1] = (chunks[-1][0], len(seq))
            if len(chunks) > 1 and (chunks[-1][1] - chunks[-1][0]) <= tolerance:
                chunks[-2] = (chunks[-2][0], chunks[-1][1])
                chunks.pop()

            # write one output file per chunk
            offset = 0
            for chunk in chunks:
                if not(skip_if_all_Ns and seq.is_all_Ns(start=chunk[0], end=chunk[1]-1)):
                    f = utils.open_file_write(outfiles_prefix + '.' + str(file_count))
                    chunk_id = seq.id + ':' + str(chunk[0]+1) + '-' + str(chunk[1])
                    print(sequences.Fasta(chunk_id, seq[chunk[0]:chunk[1]]), file=f)
                    print(chunk_id, seq.id, offset, sep='\t', file=f_coords)
                    utils.close(f)
                    file_count += 1

                offset += chunk[1] - chunk[0]

    # write files of small sequences
    if len(small_sequences):
        f = utils.open_file_write(outfiles_prefix + '.' + str(file_count))
        file_count += 1
        base_count = 0
        for seq in small_sequences:
            if base_count > 0 and base_count + len(seq) > chunk_size + tolerance:
                utils.close(f)
                f = utils.open_file_write(outfiles_prefix + '.' + str(file_count))
                file_count += 1
                base_count = 0

            print(seq, file=f)
            base_count += len(seq)

        utils.close(f)
Exemple #11
0
def to_fasta_union(infile, outfile, seqname='union'):
    seq_reader = sequences.file_reader(infile)
    new_seq = []

    for seq in seq_reader:
        new_seq.append(seq.seq)

    f_out = utils.open_file_write(outfile)
    print(sequences.Fasta(seqname, ''.join(new_seq)), file=f_out)
    utils.close(f_out)
Exemple #12
0
 def test_search_string(self):
     '''Check that search_string() finds all the hits'''
     fa = sequences.Fasta('X', 'AAA')
     hits = fa.search('G')
     self.assertTrue(len(hits) == 0)
     hits = fa.search('AAA')
     self.assertListEqual(hits, [(0, '+')])
     hits = fa.search('AA')
     self.assertListEqual(hits, [(0, '+'), (1, '+')])
     hits = fa.search('TTT')
     self.assertListEqual(hits, [(0, '-')])
Exemple #13
0
 def test_equality(self):
     self.assertTrue(self.fasta == sequences.Fasta('ID', 'ACGTA'))
     self.assertFalse(self.fasta == sequences.Fasta('I', 'ACGTA'))
     self.assertFalse(self.fasta == sequences.Fasta('ID', 'ACGT'))
     self.assertFalse(self.fasta != sequences.Fasta('ID', 'ACGTA'))
     self.assertTrue(self.fasta != sequences.Fasta('I', 'ACGTA'))
     self.assertTrue(self.fasta != sequences.Fasta('ID', 'ACGT'))
Exemple #14
0
    def test_file_reader_phylip(self):
        '''Test read phylip file'''
        test_files = [
            'sequences_test_phylip.interleaved',
            'sequences_test_phylip.interleaved2',
            'sequences_test_phylip.sequential'
        ]

        test_files = [os.path.join(data_dir, f) for f in test_files]

        expected_seqs = [
            sequences.Fasta('Turkey',
                            'AACTNGGGCATTTCAGGGTGAGCCCGGGCAATACAGGGTAT'),
            sequences.Fasta('Salmo_gair',
                            'AAGCCTTGGCAGTGCAGGGTGAGCCGTGGCCGGGCACGGTAT'),
            sequences.Fasta('H. Sapiens',
                            'ACCGGTTGGCCGTTCAGGGTACAGGTTGGCCGTTCAGGGTAA')
        ]

        for fname in test_files:
            reader = sequences.file_reader(fname)
            i = 0
            for seq in reader:
                self.assertEqual(expected_seqs[i], seq)
                i += 1

        # files made by seaview are a little different in the first line.
        # Test one of these
        expected_seqs = [
            sequences.Fasta('seq1', 96 * 'G' + 'T'),
            sequences.Fasta('seq2', 94 * 'A' + 'G')
        ]

        reader = sequences.file_reader(
            os.path.join(data_dir, 'sequences_test_phylip.made_by_seaview'))
        i = 0
        for seq in reader:
            print(seq)
            self.assertEqual(expected_seqs[i], seq)
            i += 1
Exemple #15
0
    def test_file_to_dict(self):
        '''check file_to_dict fills dictionary correctly'''
        d_test = {}
        d = {}
        tasks.file_to_dict(os.path.join(data_dir, 'sequences_test.fa'), d_test)
        for i in range(1, 5):
            d[str(i)] = sequences.Fasta(str(i), 'ACGTA')

        self.assertSequenceEqual(d_test.keys(), d.keys())
        for i in range(1, 5):
            key = str(i)
            self.assertEqual(d_test[key].id, d[key].id)
            self.assertEqual(d_test[key].seq, d[key].seq)
Exemple #16
0
    def test_gaps(self):
        '''gaps() should find the gaps in a sequence correctly'''
        test_seqs = [
            sequences.Fasta('ID', 'ACGT'),
            sequences.Fasta('ID', 'NACGT'),
            sequences.Fasta('ID', 'NACGTN'),
            sequences.Fasta('ID', 'ANNCGT'),
            sequences.Fasta('ID', 'NANNCGTNN')
        ]

        correct_gaps = [[], [intervals.Interval(0, 0)],
                        [intervals.Interval(0, 0),
                         intervals.Interval(5, 5)], [intervals.Interval(1, 2)],
                        [
                            intervals.Interval(0, 0),
                            intervals.Interval(2, 3),
                            intervals.Interval(7, 8)
                        ]]

        for i in range(len(test_seqs)):
            gaps = test_seqs[i].gaps()
            self.assertListEqual(correct_gaps[i], gaps)
Exemple #17
0
def deinterleave(infile, outfile_1, outfile_2, fasta_out=False):
    seq_reader = sequences.file_reader(infile)
    f_1 = utils.open_file_write(outfile_1)
    f_2 = utils.open_file_write(outfile_2)
    for seq in seq_reader:
        if fasta_out:
            print(sequences.Fasta(seq.id, seq.seq), file=f_1)
        else:
            print(seq, file=f_1)
        try:
            next(seq_reader)
        except StopIteration:
            utils.close(f_1)
            utils.close(f_2)
            raise Error('Error getting mate for sequence. Cannot continue')
        if fasta_out:
            print(sequences.Fasta(seq.id, seq.seq), file=f_2)
        else:
            print(seq, file=f_2)

    utils.close(f_1)
    utils.close(f_2)
Exemple #18
0
 def test_translate(self):
     '''Test nucleotide -> amino acid conversion works on Fasta'''
     fa = sequences.Fasta(
         'ID',
         'GCAGCCGCGGCTAGAAGGCGACGCCGGCGTAACAATGACGATTGCTGTGAAGAGCAACAGGGAGGCGGGGGTCACCATATAATCATTTTATTGCTACTCCTGCTTAAAAAGATGTTCTTTCCACCCCCGCCTAGCAGTTCATCCTCGTCTACAACCACGACTTGGTACTATGTAGTCGTGGTTTAATAGTGA'
     )
     self.assertEqual(
         sequences.Fasta(
             'ID',
             'AAAARRRRRRNNDDCCEEQQGGGGHHIIILLLLLLKKMFFPPPPSSSSSSTTTTWYYVVVV***'
         ), fa.translate())
     self.assertEqual(
         sequences.Fasta(
             'ID',
             'QPRLEGDAGVTMTIAVKSNREAGVTI*SFYCYSCLKRCSFHPRLAVHPRLQPRLGTM*SWFNS'
         ), fa.translate(frame=1))
     print(fa.translate(frame=1))
     self.assertEqual(
         sequences.Fasta(
             'ID',
             'SRG*KATPA*Q*RLL*RATGRRGSPYNHFIATPA*KDVLSTPA*QFILVYNHDLVLCSRGLIV'
         ), fa.translate(frame=2))
Exemple #19
0
    def test_get_next_from_embl_file(self):
        f_in = utils.open_file_read(
            os.path.join(data_dir, 'sequences_test.embl'))
        embl = sequences.Embl()
        counter = 1

        while embl.get_next_from_file(f_in):
            self.assertEqual(
                embl,
                sequences.Fasta('seq' + str(counter),
                                expected_embl[counter - 1]))
            counter += 1

        utils.close(f_in)
Exemple #20
0
def to_fasta(infile, outfile, line_length=60, strip_after_first_whitespace=False):
    seq_reader = sequences.file_reader(infile)
    f_out = utils.open_file_write(outfile)
    original_line_length = sequences.Fasta.line_length
    sequences.Fasta.line_length = line_length

    for seq in seq_reader:
        if strip_after_first_whitespace:
            seq.strip_after_first_whitespace()

        if type(seq) == sequences.Fastq:
            print(sequences.Fasta(seq.id, seq.seq), file=f_out)
        else:
            print(seq, file=f_out)

    utils.close(f_out)
    sequences.Fasta.line_length = original_line_length
Exemple #21
0
    def test_trim_Ns(self):
        '''trim_Ns() should do the right trimming of a sequence'''
        fa = sequences.Fasta('ID', 'ANNANA')
        test_seqs = [
            sequences.Fasta('ID', 'ANNANA'),
            sequences.Fasta('ID', 'NANNANA'),
            sequences.Fasta('ID', 'NANNANAN'),
            sequences.Fasta('ID', 'ANNANAN'),
            sequences.Fasta('ID', 'NNNNNNANNANAN'),
            sequences.Fasta('ID', 'NNANNANANn')
        ]

        for s in test_seqs:
            s.trim_Ns()
            self.assertEqual(fa, s)
Exemple #22
0
    def test_get_next_from_gbk_file(self):
        f_in = utils.open_file_read(
            os.path.join(data_dir, 'sequences_test.gbk'))
        embl = sequences.Embl()
        counter = 1
        expected = [
            'gatcctccatatacaacggtatctccacctcaggtttagatctcaacaacggaaccattgccgacatgagacagttaggtatcgtcgagagttacaagctaaaacgagcagtagtcagctctgcatctgaagccgctgaagttctactaagggtggataacatcatccgtgcaagaccaatgccatgactcagattctaattttaagctattcaatttctctttgatc',
            'gatcctccatatacaacggtatctccacctcaggtttagatctcaacaacggaaccattgccgacatgagacagttaggtatcgtcgagagttacaagctaaaacgagcagtagtcagctctgcatctgaagccgctgaagttctactaagggtggataacatcatccgtgcaagaccaatgccatgactcagattctaattttaagctattcaatttctctttgaaa'
        ]

        while embl.get_next_from_file(f_in):
            self.assertEqual(
                embl,
                sequences.Fasta('NAME' + str(counter), expected[counter - 1]))
            counter += 1

        utils.close(f_in)
Exemple #23
0
def scaffolds_to_contigs(infile, outfile, number_contigs=False):
    '''Makes a file of contigs from scaffolds by splitting at every N.
       Use number_contigs=True to add .1, .2, etc onto end of each
       contig, instead of default to append coordinates.'''
    seq_reader = sequences.file_reader(infile)
    fout = utils.open_file_write(outfile)

    for seq in seq_reader:
        contigs = seq.contig_coords()
        counter = 1
        for contig in contigs:
            if number_contigs:
                name = seq.id + '.' + str(counter)
                counter += 1
            else:
                name = '.'.join([seq.id, str(contig.start + 1), str(contig.end + 1)])
            print(sequences.Fasta(name, seq[contig.start:contig.end+1]), file=fout)

    utils.close(fout)
Exemple #24
0
def make_random_contigs(contigs, length, outfile, name_by_letters=False, prefix='', seed=None, first_number=1):
    '''Makes a multi fasta file of random sequences, all the same length'''
    random.seed(a=seed)
    fout = utils.open_file_write(outfile)
    letters = list('ABCDEFGHIJKLMNOPQRSTUVWXYZ')
    letters_index = 0

    for i in range(contigs):
        if name_by_letters:
            name = letters[letters_index]
            letters_index += 1
            if letters_index == len(letters):
                letters_index = 0
        else:
            name = str(i + first_number)

        fa = sequences.Fasta(prefix + name, ''.join([random.choice('ACGT') for x in range(length)]))
        print(fa, file=fout)

    utils.close(fout)
Exemple #25
0
def merge_to_one_seq(infile, outfile, seqname='union'):
    '''Takes a multi fasta or fastq file and writes a new file that contains just one sequence, with the original sequences catted together, preserving their order'''
    seq_reader = sequences.file_reader(infile)
    seqs = []

    for seq in seq_reader:
        seqs.append(copy.copy(seq))

    new_seq = ''.join([seq.seq for seq in seqs])

    if type(seqs[0]) == sequences.Fastq:
        new_qual = ''.join([seq.qual for seq in seqs])
        seqs[:] = []
        merged = sequences.Fastq(seqname, new_seq, new_qual)
    else:
        merged = sequences.Fasta(seqname, new_seq)
        seqs[:] = []

    f = utils.open_file_write(outfile)
    print(merged, file=f)
    utils.close(f)
Exemple #26
0
    def test_strip_illumina_suffix(self):
        '''Check that /1 and /2 removed correctly from IDs'''
        seqs = [
            sequences.Fasta('name/1', 'A'),
            sequences.Fasta('name/2', 'A'),
            sequences.Fasta('name', 'A'),
            sequences.Fasta('name/1/2', 'A'),
            sequences.Fasta('name/2/1', 'A'),
            sequences.Fasta('name/3', 'A')
        ]

        correct_names = ['name', 'name', 'name', 'name/1', 'name/2', 'name/3']

        for seq in seqs:
            seq.strip_illumina_suffix()

        for i in range(len(seqs)):
            self.assertEqual(seqs[i].id, correct_names[i])
Exemple #27
0
    def test_file_reader_embl(self):
        '''Test read embl file'''
        reader = sequences.file_reader(
            os.path.join(data_dir, 'sequences_test.embl'))

        counter = 1
        for seq in reader:
            self.assertEqual(
                seq,
                sequences.Fasta('seq' + str(counter),
                                expected_embl[counter - 1]))
            counter += 1

        bad_files = [
            'sequences_test.embl.bad',
            'sequences_test.embl.bad2',
        ]
        bad_files = [os.path.join(data_dir, x) for x in bad_files]

        for filename in bad_files:
            with self.assertRaises(sequences.Error):
                reader = sequences.file_reader(filename)
                for seq in reader:
                    pass
Exemple #28
0
    def test_replace_interval(self):
        '''Test replace_interval()'''
        fa = sequences.Fasta('ID', 'ACGTA')
        fa.replace_interval(0, 0, 'NEW')
        self.assertEqual(fa, sequences.Fasta('ID', 'NEWCGTA'))

        fa = sequences.Fasta('ID', 'ACGTA')
        fa.replace_interval(4, 4, 'NEW')
        self.assertEqual(fa, sequences.Fasta('ID', 'ACGTNEW'))

        fa = sequences.Fasta('ID', 'ACGTA')
        fa.replace_interval(2, 3, 'NEW')
        self.assertEqual(fa, sequences.Fasta('ID', 'ACNEWA'))

        fa = sequences.Fasta('ID', 'ACGTA')
        with self.assertRaises(sequences.Error):
            fa.replace_interval(3, 2, 'x')
        with self.assertRaises(sequences.Error):
            fa.replace_interval(1, 5, 'x')
        with self.assertRaises(sequences.Error):
            fa.replace_interval(5, 10, 'x')

        fq = sequences.Fastq('ID', 'ACGTA', 'ABCDE')
        fq.replace_interval(0, 0, 'NEW', 'III')
        self.assertEqual(fq, sequences.Fastq('ID', 'NEWCGTA', 'IIIBCDE'))

        fq = sequences.Fastq('ID', 'ACGTA', 'ABCDE')
        fq.replace_interval(4, 4, 'NEW', 'III')
        self.assertEqual(fq, sequences.Fastq('ID', 'ACGTNEW', 'ABCDIII'))

        fq = sequences.Fastq('ID', 'ACGTA', 'ABCDE')
        fq.replace_interval(2, 3, 'NEW', 'III')
        self.assertEqual(fq, sequences.Fastq('ID', 'ACNEWA', 'ABIIIE'))

        with self.assertRaises(sequences.Error):
            fq.replace_interval(1, 1, 'x', 'xx')
Exemple #29
0
    def test_expand_nucleotides(self):
        '''Test expand_nucleotides'''
        tests = [
            (sequences.Fasta('1', 'A'), [sequences.Fasta('1.1', 'A')]),
            (sequences.Fasta('2', 'C'), [sequences.Fasta('2.1', 'C')]),
            (sequences.Fasta('3', 'G'), [sequences.Fasta('3.1', 'G')]),
            (sequences.Fasta('4', 'T'), [sequences.Fasta('4.1', 'T')]),
            (sequences.Fasta('6', 'R'),
             [sequences.Fasta('6.1', 'A'),
              sequences.Fasta('6.2', 'G')]),
            (sequences.Fasta('7', 'Y'),
             [sequences.Fasta('7.1', 'C'),
              sequences.Fasta('7.2', 'T')]),
            (sequences.Fasta('8', 'S'),
             [sequences.Fasta('8.1', 'C'),
              sequences.Fasta('8.2', 'G')]),
            (sequences.Fasta('9', 'W'),
             [sequences.Fasta('9.1', 'A'),
              sequences.Fasta('9.2', 'T')]),
            (sequences.Fasta('10', 'K'),
             [sequences.Fasta('10.1', 'G'),
              sequences.Fasta('10.2', 'T')]),
            (sequences.Fasta('11', 'M'),
             [sequences.Fasta('11.1', 'A'),
              sequences.Fasta('11.2', 'C')]),
            (sequences.Fasta('12', 'B'), [
                sequences.Fasta('12.1', 'C'),
                sequences.Fasta('12.2', 'G'),
                sequences.Fasta('12.3', 'T')
            ]),
            (sequences.Fasta('13', 'D'), [
                sequences.Fasta('13.1', 'A'),
                sequences.Fasta('13.2', 'G'),
                sequences.Fasta('13.3', 'T')
            ]),
            (sequences.Fasta('14', 'H'), [
                sequences.Fasta('14.1', 'A'),
                sequences.Fasta('14.2', 'C'),
                sequences.Fasta('14.3', 'T')
            ]),
            (sequences.Fasta('15', 'V'), [
                sequences.Fasta('15.1', 'A'),
                sequences.Fasta('15.2', 'C'),
                sequences.Fasta('15.3', 'G')
            ]),
            (sequences.Fasta('16', 'N'), [
                sequences.Fasta('16.1', 'A'),
                sequences.Fasta('16.2', 'C'),
                sequences.Fasta('16.3', 'G'),
                sequences.Fasta('16.4', 'T')
            ]),
            (sequences.Fasta('17', 'ART'),
             [sequences.Fasta('17.1', 'AAT'),
              sequences.Fasta('17.2', 'AGT')]),
            (sequences.Fasta('18', 'ARRT'), [
                sequences.Fasta('18.1', 'AAAT'),
                sequences.Fasta('18.2', 'AAGT'),
                sequences.Fasta('18.3', 'AGAT'),
                sequences.Fasta('18.4', 'AGGT')
            ]),
            (sequences.Fasta('19', 'ARTR'), [
                sequences.Fasta('19.1', 'AATA'),
                sequences.Fasta('19.2', 'AATG'),
                sequences.Fasta('19.3', 'AGTA'),
                sequences.Fasta('19.4', 'AGTG')
            ]),
            (sequences.Fastq('20', 'ART', 'GHI'), [
                sequences.Fastq('20.1', 'AAT', 'GHI'),
                sequences.Fastq('20.2', 'AGT', 'GHI')
            ]),
        ]

        for t in tests:
            self.assertListEqual(t[0].expand_nucleotides(), t[1])
Exemple #30
0
 def test_to_Fasta_and_qual(self):
     '''Check to_Fasta_and_qual converts quality scores correctly'''
     fq = sequences.Fastq('ID', 'ACGT', '>ADI')
     (fa, qual) = fq.to_Fasta_and_qual()
     self.assertEqual(fa, sequences.Fasta('ID', 'ACGT'))
     self.assertListEqual(qual, [29, 32, 35, 40])