Ejemplos de reverse_and_complement en Python, ejemplos de micall.utils.translation.reverse_and_complement en Python

Ejemplo n.º 1

0

Mostrar archivo

def merge_reads(reads):
    """ Generator over merged reads.

    :param reads: iterable of reads from FastqReader
    :return: a generator with items (merged_bases may be None if merge fails):
    (pair_name,
     (read1_name, bases, quality),
     (read2_name, bases, quality),
     merged_bases)
    """
    for pair_name, (r1_name, seq1, qual1), (r2_name, seq2, qual2) in reads:
        if not (seq1 and seq2):
            score = -1
        else:
            seq2_rev = reverse_and_complement(seq2)
            aligned1, aligned2, score = align_it(seq1, seq2_rev, GAP_OPEN_COST,
                                                 GAP_EXTEND_COST,
                                                 USE_TERMINAL_COST)
        if score >= MIN_PAIR_ALIGNMENT_SCORE and aligned1[0] != '-':
            aligned_qual1 = align_quality(aligned1, qual1)
            aligned_qual2 = align_quality(aligned2, reversed(qual2))
            merged = merge_pairs(aligned1,
                                 aligned2,
                                 aligned_qual1,
                                 aligned_qual2,
                                 q_cutoff=Q_CUTOFF)
        else:
            merged = None
        yield (pair_name, (r1_name, seq1, qual1), (r2_name, seq2, qual2),
               merged)

Ejemplo n.º 2

0

Mostrar archivo

Archivo: samples_from_454.py Proyecto: pastvir/MiCall

def main():
    args = parse_args()
    with args.source as source:
        reader = csv.DictReader(source)
        for (run, sample), rows in groupby(reader, itemgetter('run', 'enum')):
            sample_name = format_sample_name(run, sample)
            filename1 = os.path.join(args.dest, sample_name + '_R1_001.fastq.gz')
            filename2 = os.path.join(args.dest, sample_name + '_R2_001.fastq.gz')
            print(filename1)
            with open(filename1, 'wb') as dest1, open(filename2, 'wb') as dest2:
                dest1_zip = GzipFile(fileobj=dest1)
                dest2_zip = GzipFile(fileobj=dest2)
                for i, row in enumerate(rows):
                    seq = row['string'].replace('-', '')
                    for j in range(3):
                        # Three duplicates so that G2P doesn't ignore it.
                        prefix = '@M454:01:000000000-AAAAA:1:1101:{}:{}'.format(
                            10*i + j,
                            row['count'])
                        dest1_zip.write(prefix + ' 1:N:0:1\n')
                        dest2_zip.write(prefix + ' 2:N:0:1\n')
                        dest1_zip.write(seq + '\n')
                        dest2_zip.write(reverse_and_complement(seq) + '\n')
                        dest1_zip.write('+\n')
                        dest2_zip.write('+\n')
                        quality = 'A' * len(seq)
                        dest1_zip.write(quality + '\n')
                        dest2_zip.write(quality + '\n')
                dest1_zip.close()
                dest2_zip.close()
    print('Done.')

Ejemplo n.º 3

0

Mostrar archivo

    def testSimple(self):
        fwd = 'ACTG'
        expected = 'CAGT'

        rev = reverse_and_complement(fwd)

        self.assertEqual(expected, rev)

Ejemplo n.º 4

0

Mostrar archivo

Archivo: fastq_g2p.py Proyecto: cfe-lab/MiCall

def merge_reads(reads):
    """ Generator over merged reads.

    :param reads: iterable of reads from FastqReader
    :return: a generator with items (merged_bases may be None if merge fails):
    (pair_name,
     (read1_name, bases, quality),
     (read2_name, bases, quality),
     merged_bases)
    """
    for pair_name, (r1_name, seq1, qual1), (r2_name, seq2, qual2) in reads:
        if not (seq1 and seq2):
            score = -1
            aligned1 = aligned2 = None
        else:
            seq2_rev = reverse_and_complement(seq2)
            aligned1, aligned2, score = align_it(seq1,
                                                 seq2_rev,
                                                 GAP_OPEN_COST,
                                                 GAP_EXTEND_COST,
                                                 USE_TERMINAL_COST)
        if score >= MIN_PAIR_ALIGNMENT_SCORE and aligned1[0] != '-':
            aligned_qual1 = align_quality(aligned1, qual1)
            aligned_qual2 = align_quality(aligned2, reversed(qual2))
            merged = merge_pairs(aligned1,
                                 aligned2,
                                 aligned_qual1,
                                 aligned_qual2,
                                 q_cutoff=Q_CUTOFF)
        else:
            merged = None
        yield (pair_name,
               (r1_name, seq1, qual1),
               (r2_name, seq2, qual2),
               merged)

Ejemplo n.º 5

0

Mostrar archivo

Archivo: remap.py Proyecto: tarah28/MiCall

 def write_fastq(self, fields, fastq, is_reversed=False):
     qname = fields[0]
     seq = fields[9]
     quality = fields[10]
     if is_reversed:
         seq = reverse_and_complement(seq)
         quality = ''.join(reversed(quality))
     fastq.write('@{}\n{}\n+\n{}\n'.format(qname, seq, quality))

Ejemplo n.º 6

0

Mostrar archivo

Archivo: remap.py Proyecto: cfe-lab/MiCall

 def write_fastq(fields, fastq, is_reversed=False):
     qname = fields[0]
     seq = fields[9]
     quality = fields[10]
     if is_reversed:
         seq = reverse_and_complement(seq)
         quality = ''.join(reversed(quality))
     fastq.write('@{}\n{}\n+\n{}\n'.format(qname, seq, quality))

Ejemplo n.º 7

0

Mostrar archivo

 def __init__(self, contig_seq: str, target_seq: str):
     gap_open_penalty = 15
     gap_extend_penalty = 3
     use_terminal_gap_penalty = 1
     best_acontig = best_atarget = best_target = best_score = None
     best_reversed = None
     for target_nucs, is_reversed in unpack_mixtures_and_reverse(
             target_seq):
         aligned_contig, aligned_target, score = align_it(
             contig_seq, target_nucs, gap_open_penalty, gap_extend_penalty,
             use_terminal_gap_penalty)
         if best_score is None or score > best_score:
             best_acontig = aligned_contig
             best_atarget = aligned_target
             best_target = target_nucs
             best_score = score
             best_reversed = is_reversed
     aligned_contig = best_acontig
     aligned_target = best_atarget
     target_nucs = best_target
     self.score = best_score
     self.is_reversed = best_reversed
     if self.is_reversed:
         aligned_contig = reverse_and_complement(aligned_contig)
         aligned_target = reverse_and_complement(aligned_target)
     match = re.match('-*([^-](.*[^-])?)', aligned_target)
     self.start = match.start(1)
     end = match.end(1)
     self.contig_match = aligned_contig[self.start:end].replace('-', '')
     self.dist = Levenshtein.distance(target_nucs, self.contig_match)
     stripped_contig = aligned_contig.lstrip('-')
     overhang = len(aligned_contig) - len(stripped_contig)
     if overhang > 0:
         stripped_target = target_nucs[overhang:]
         self.end_dist = Levenshtein.distance(stripped_target,
                                              self.contig_match)
     else:
         stripped_contig = aligned_contig.rstrip('-')
         overhang = len(aligned_contig) - len(stripped_contig)
         if overhang == 0:
             self.end_dist = self.dist
         else:
             stripped_target = target_nucs[:-overhang]
             self.end_dist = Levenshtein.distance(stripped_target,
                                                  self.contig_match)

Ejemplo n.º 8

0

Mostrar archivo

Archivo: test_probe_finder.py Proyecto: pastvir/MiCall

def test_probe_finder_reversed():
    target_seq = 'ATCGACCTAGCT'
    contig_seq = reverse_and_complement('ATCGACCTGGCTAATTCCAGT')
    expected_match = 'ATCGACCTGGCT'

    finder = ProbeFinder(contig_seq, target_seq)

    assert finder.contig_match == expected_match
    assert finder.is_reversed

Ejemplo n.º 9

0

Mostrar archivo

Archivo: make_sample.py Proyecto: Richardharrigan/MiCall

def main():
    fastq_files = [
        FastqFile('2130A-HCV_S15_L001_R1_001.fastq', '2130', False,
                  (FastqSection('HCV2-JFH-1-NS5b', 1, 60, 100),
                   FastqSection('HCV2-JFH-1-NS5b', 117, 176, 100)),
                  (CodonMutation(159, 'GTC'), )),
        FastqFile('2130A-HCV_S15_L001_R2_001.fastq', '2130', True,
                  (FastqSection('HCV2-JFH-1-NS5b', 57, 116, 100),
                   FastqSection('HCV2-JFH-1-NS5b', 171, 230, 100)),
                  (CodonMutation(159, 'GTC'), )),
        FastqFile('2130AMIDI-MidHCV_S16_L001_R1_001.fastq', '2130', False,
                  (FastqSection('HCV2-JFH-1-NS5b', 231, 313, 100),
                   FastqSection('HCV2-JFH-1-NS5b', 396, 478, 100)),
                  (CodonMutation(316, 'AGC'), )),
        FastqFile('2130AMIDI-MidHCV_S16_L001_R2_001.fastq', '2130', True,
                  (FastqSection('HCV2-JFH-1-NS5b', 313, 395, 100),
                   FastqSection('HCV2-JFH-1-NS5b', 479, 561, 100)),
                  (CodonMutation(316, 'AGC'), ))
    ]
    projects = ProjectConfig.loadDefault()
    for fastq_file in fastq_files:
        with open(fastq_file.name, 'w') as f:
            next_cluster = 1
            for section in fastq_file.sections:
                ref_name, ref_start, ref_end = find_coord_pos(
                    projects, section.coord_name, section.start_pos,
                    section.end_pos)

                ref_nuc_seq = projects.getReference(ref_name)
                ref_nuc_section = list(ref_nuc_seq[ref_start:ref_end])
                for mutation in fastq_file.mutations:
                    if section.start_pos <= mutation.pos <= section.end_pos:
                        section_pos = (mutation.pos - section.start_pos) * 3
                        ref_nuc_section[section_pos:section_pos + 3] = list(
                            mutation.codon)
                ref_nuc_section = ''.join(ref_nuc_section)
                if fastq_file.is_reversed:
                    ref_nuc_section = reverse_and_complement(ref_nuc_section)
                phred_scores = 'A' * (ref_end - ref_start)
                file_num = '2' if fastq_file.is_reversed else '1'
                for cluster in range(section.count):
                    f.write(
                        '@M01234:01:000000000-AAAAA:1:1101:{}:{:04} {}:N:0:1\n'
                        .format(fastq_file.extract_num, cluster + next_cluster,
                                file_num))
                    f.write(ref_nuc_section + '\n')
                    f.write('+\n')
                    f.write(phred_scores + '\n')
                next_cluster += section.count

Ejemplo n.º 10

0

Mostrar archivo

def unpack_mixtures_and_reverse(
        seq: str) -> typing.Set[typing.Tuple[str, bool]]:
    """ Unpack mixture nucleotide codes, and add reverse complements.

    :param seq: nucleotide sequence, possibly including mixture codes
    :return: unpacked and reversed sequences, along with is_reversed flag
    """
    old_mixtures = {''}
    for mixture in seq:
        new_mixtures = set()
        for nuc in mixture_dict.get(mixture, mixture):
            for old_mixture in old_mixtures:
                new_mixtures.add(old_mixture + nuc)
        old_mixtures = new_mixtures
    forward_results = {(mixture, False) for mixture in old_mixtures}
    reversed_results = {(reverse_and_complement(mixture), True)
                        for mixture in old_mixtures}
    return forward_results | reversed_results

Ejemplo n.º 11

0

Mostrar archivo

Archivo: test_trim_fastqs.py Proyecto: pastvir/MiCall

def test_trim(tmpdir):
    read1_content = 'TATCTACTAACTGTCGGTCTAC'
    read2_content = reverse_and_complement(read1_content)
    expected1 = build_fastq(read1_content)
    expected2 = build_fastq(read2_content)

    tmp_path = Path(str(tmpdir))
    fastq1_path = tmp_path / 'read1.fastq'
    fastq2_path = tmp_path / 'read2.fastq'
    trimmed1_path = tmp_path / 'trimmed1.fastq'
    trimmed2_path = tmp_path / 'trimmed2.fastq'
    fastq1_path.write_text(expected1)
    fastq2_path.write_text(expected2)

    trim([fastq1_path, fastq2_path],
         'no_bad_cycles.csv',
         [str(trimmed1_path), str(trimmed2_path)],
         use_gzip=False)

    trimmed1 = trimmed1_path.read_text()
    trimmed2 = trimmed2_path.read_text()
    assert trimmed1 == expected1
    assert trimmed2 == expected2

Ejemplo n.º 12

0

Mostrar archivo

Archivo: make_sample.py Proyecto: pastvir/MiCall

def main():
    projects = ProjectConfig.loadDefault()
    sections_2100hcv_1, sections_2100hcv_2 = make_random_sections(
        'HCV1A-H77-NS5a', 1, 300, projects, 400)
    sections_2100v3_1, sections_2100v3_2 = ([
        FastqSection('HIV1-B-FR-K03455-seed', 7056, 7312, 50),
        FastqSection('HIV1-B-FR-K03455-seed', 7062, 7312, 50)
    ], [
        FastqSection('HIV1-B-FR-K03455-seed', 7123, 7373, 50),
        FastqSection('HIV1-B-FR-K03455-seed', 7123, 7376, 50)
    ])
    sections_2100hiv_1, sections_2100hiv_2 = make_random_sections(
        'RT', 1, 300, projects, 400)
    sections_2160_1, sections_2160_2 = make_random_sections(
        'HCV2-JFH-1-NS5b',
        1,
        230,
        projects,
        mutations=(CodonMutation(159, 'GTC'), ))
    sections_2160midi_1, sections_2160midi_2 = make_random_sections(
        'HCV2-JFH-1-NS5b',
        231,
        561,
        projects,
        mutations=(CodonMutation(316, 'AGC'), ))
    sections_2170_1a_1, sections_2170_1a_2 = make_random_sections(
        'HCV-1a', 6258, 9375)
    sections_2170_2_1, sections_2170_2_2 = make_random_sections(
        'HCV-2a', 6269, 9440)
    sections_2180_1, sections_2180_2 = make_random_sections(
        'HIV1-B-FR-K03455-seed', 6225, 7757)
    hxb2_ref = projects.getReference('HIV1-B-FR-K03455-seed')

    projects.config['regions']['HXB2-with-deletion'] = dict(
        reference=hxb2_ref[617:928] + hxb2_ref[9358:9652],
        is_nucleotide=True,
        seed_group=None)
    sections_2210_1, sections_2210_2 = make_random_sections(
        'HXB2-with-deletion', projects=projects)
    fastq_files = [
        FastqFile('2010A-V3LOOP_S3_L001_R1_001.fastq', '2010', False,
                  (FastqSection('HIV1-CON-XX-Consensus-seed', 855, 906, 10),
                   FastqSection('HIV1-CON-XX-Consensus-seed', 912, 960, 10))),
        FastqFile('2010A-V3LOOP_S3_L001_R2_001.fastq', '2010', True,
                  (FastqSection('HIV1-CON-XX-Consensus-seed', 855, 906, 10),
                   FastqSection('HIV1-CON-XX-Consensus-seed', 912, 960, 10))),
        FastqFile('2020A-GP41_S4_L001_R1_001.fastq', '2020', False,
                  (FastqSection('HIV1-B-FR-KF716496-seed', 6957, 7065, 10,
                                (CodonMutation(6981, 'GGGATA'), )), )),
        FastqFile('2020A-GP41_S4_L001_R2_001.fastq', '2020', True,
                  (FastqSection('HIV1-B-FR-KF716496-seed', 6957, 7065, 10,
                                (CodonMutation(6981, 'GGGATA'), )), )),
        FastqFile('2040A-HLA-B_S6_L001_R1_001.fastq', '2040', False,
                  (FastqSection('HLA-B-seed', 201, 315, 80),
                   FastqSection('HLA-B-seed', 201, 315, 20,
                                (CodonMutation(207, 'TCT'), )))),
        FastqFile('2040A-HLA-B_S6_L001_R2_001.fastq', '2040', True,
                  (FastqSection('HLA-B-seed', 201, 315, 80),
                   FastqSection('HLA-B-seed', 201, 315, 20,
                                (CodonMutation(207, 'TCT'), )))),
        FastqFile(
            '2070A-PR_S9_L001_R1_001.fastq', '2070', False,
            (FastqSection('PR', 40, 80, 12, (CodonMutation(45, ''), )),
             FastqSection('PR', 40, 80, 3,
                          (CodonMutation(45, ''), CodonMutation(64, ''))))),
        FastqFile(
            '2070A-PR_S9_L001_R2_001.fastq', '2070', True,
            (FastqSection('PR', 40, 80, 12, (CodonMutation(45, ''), )),
             FastqSection('PR', 40, 80, 3,
                          (CodonMutation(45, ''), CodonMutation(64, ''))))),
        FastqFile('2100A-HCV-1337B-V3LOOP-PWND-HIV_S12_L001_R1_001.fastq',
                  '2100', False,
                  sections_2100hcv_1 + sections_2100v3_1 + sections_2100hiv_1),
        FastqFile('2100A-HCV-1337B-V3LOOP-PWND-HIV_S12_L001_R2_001.fastq',
                  '2100', True,
                  sections_2100hcv_2 + sections_2100v3_2 + sections_2100hiv_2),
        FastqFile('2130A-HCV_S15_L001_R1_001.fastq', '2130', False,
                  (FastqSection('HCV2-JFH-1-NS5b', 1, 66, 100),
                   FastqSection('HCV2-JFH-1-NS5b', 115, 181, 100,
                                (CodonMutation(159, 'GTC'), )))),
        FastqFile('2130A-HCV_S15_L001_R2_001.fastq', '2130', True,
                  (FastqSection('HCV2-JFH-1-NS5b', 51, 114, 100),
                   FastqSection('HCV2-JFH-1-NS5b', 165, 230, 100))),
        FastqFile('2130AMIDI-MidHCV_S16_L001_R1_001.fastq', '2130', False,
                  (FastqSection('HCV2-JFH-1-NS5b', 231, 315, 100),
                   FastqSection('HCV2-JFH-1-NS5b', 398, 485, 100))),
        FastqFile('2130AMIDI-MidHCV_S16_L001_R2_001.fastq', '2130', True,
                  (FastqSection('HCV2-JFH-1-NS5b', 305, 397, 100,
                                (CodonMutation(316, 'AGC'), )),
                   FastqSection('HCV2-JFH-1-NS5b', 470, 561, 100))),
        FastqFile('2140A-HIV_S17_L001_R1_001.fastq', '2140', False,
                  (FastqSection('PR', 1, 80, 100,
                                (CodonMutation(24, 'ATA'), )), )),
        FastqFile('2140A-HIV_S17_L001_R2_001.fastq', '2140', True,
                  (FastqSection('PR', 20, 99, 100,
                                (CodonMutation(24, 'ATA'), )), )),
        # Simplify with one_contig.
        FastqFile('2160A-HCV_S19_L001_R1_001.fastq', '2160', False,
                  sections_2160_1),
        FastqFile('2160A-HCV_S19_L001_R2_001.fastq', '2160', True,
                  sections_2160_2),
        # Simplify with one_contig.
        FastqFile('2160AMIDI-MidHCV_S20_L001_R1_001.fastq', '2160', False,
                  sections_2160midi_1),
        FastqFile('2160AMIDI-MidHCV_S20_L001_R2_001.fastq', '2160', True,
                  sections_2160midi_2),
        # Simplify with two_long_contigs.
        FastqFile('2170A-HCV_S21_L001_R1_001.fastq', '2170', False,
                  sections_2170_1a_1 + sections_2170_2_1),
        FastqFile('2170A-HCV_S21_L001_R2_001.fastq', '2170', True,
                  sections_2170_1a_2 + sections_2170_2_2),
        FastqFile('2180A-HIV_S22_L001_R1_001.fastq', '2180', False,
                  sections_2180_1),
        FastqFile('2180A-HIV_S22_L001_R2_001.fastq', '2180', True,
                  sections_2180_2),
        FastqFile('2190A-SARSCOV2_S23_L001_R1_001.fastq', '2190', False,
                  (FastqSection('SARS-CoV-2-ORF1ab', 4393, 4429, 50,
                                (CodonMutation(4400, 'TCA'), )),
                   FastqSection('SARS-CoV-2-ORF1ab', 4393, 4430, 50,
                                (CodonMutation(4400, 'TCA'), )))),
        FastqFile('2190A-SARSCOV2_S23_L001_R2_001.fastq', '2190', True,
                  (FastqSection('SARS-CoV-2-ORF1ab', 4393, 4429, 50,
                                (CodonMutation(4400, 'TCA'), )),
                   FastqSection('SARS-CoV-2-ORF1ab', 4393, 4430, 50,
                                (CodonMutation(4400, 'TCA'), )))),
        FastqFile('2200A-SARSCOV2_S24_L001_R1_001.fastq', '2200', False,
                  (FastqSection('SARS-CoV-2-nsp1', 20, 66, 100), )),
        FastqFile('2200A-SARSCOV2_S24_L001_R2_001.fastq', '2200', True,
                  (FastqSection('SARS-CoV-2-nsp1', 56, 102, 100), )),
        FastqFile('2210A-NFLHIVDNA_S25_L001_R1_001.fastq', '2210', False,
                  sections_2210_1),
        FastqFile('2210A-NFLHIVDNA_S25_L001_R2_001.fastq', '2210', True,
                  sections_2210_2)
    ]
    for fastq_file in fastq_files:
        with open(fastq_file.name, 'w') as f:
            next_cluster = 1
            for section in fastq_file.sections:
                ref_name, ref_start, ref_end = find_coord_pos(
                    projects, section.coord_name, section.start_pos,
                    section.end_pos)

                ref_nuc_seq = projects.getReference(ref_name)
                ref_nuc_section = list(ref_nuc_seq[ref_start:ref_end])
                is_nucleotide = ((ref_start, ref_end) == (section.start_pos,
                                                          section.end_pos))
                for mutation in section.mutations:
                    if section.start_pos <= mutation.pos <= section.end_pos:
                        section_pos = mutation.pos - section.start_pos
                        if not is_nucleotide:
                            section_pos *= 3
                        ref_nuc_section[section_pos:section_pos + 3] = list(
                            mutation.codon)
                ref_nuc_section = ''.join(ref_nuc_section)
                if fastq_file.is_reversed:
                    ref_nuc_section = reverse_and_complement(ref_nuc_section)
                phred_scores = 'A' * len(ref_nuc_section)
                file_num = '2' if fastq_file.is_reversed else '1'
                # noinspection PyTypeChecker
                for cluster in range(section.count):
                    f.write(
                        '@M01234:01:000000000-AAAAA:1:1101:{}:{:04} {}:N:0:1\n'
                        .format(fastq_file.extract_num, cluster + next_cluster,
                                file_num))
                    f.write(ref_nuc_section + '\n')
                    f.write('+\n')
                    f.write(phred_scores + '\n')
                next_cluster += section.count

Ejemplo n.º 13

0

Mostrar archivo

""" Reverse a nucleotide sequence and replace with complementary nucleotides.
Mixtures are allowed, as well as *, N, and -.
If you want to compare the result to an expected sequence, put the expected
sequence in reverse_compare.
Source: https://github.com/ArtPoon/bioinfo/blob/master/seqUtils.py#L143
"""
from micall.utils.translation import reverse_and_complement

nuc_seq = ''.join([
    "TGTACAAGACCCAACAACAATACAAGAAAAAGTATACATATAGGACCAGGGAGAGCATTTTATGC",
    "AACAGGAGAAATAATAGGAGATATAAGACAAGCACATTGT"
])
reverse_compare = ''.join([
    "ACAATGTGCTTGTCTTATATCTCCTATTATTTCTCCTGTTGCATAAAATGCTCTCCCTGGTCCTA",
    "TATGTATACTTTTTCTTGTATTGTTGTTGGGTCTTGTACA"
])

reverse_seq = reverse_and_complement(nuc_seq)
pairs = zip(reverse_seq, reverse_compare)
diffs = [' ' if a == b else '*' for a, b in pairs]
print 'result ', reverse_seq
print 'diffs  ', ''.join(
    diffs) if reverse_seq != reverse_compare else 'no diffs'
print 'compare', reverse_compare

Ejemplo n.º 14

0

Mostrar archivo

Archivo: reverse_and_complement.py Proyecto: tarah28/MiCall

""" Reverse a nucleotide sequence and replace with complementary nucleotides.
Mixtures are allowed, as well as *, N, and -.
If you want to compare the result to an expected sequence, put the expected
sequence in reverse_compare.
Source: https://github.com/ArtPoon/bioinfo/blob/master/seqUtils.py#L143
"""
from micall.utils.translation import reverse_and_complement

nuc_seq = ''.join([
        "TGTACAAGACCCAACAACAATACAAGAAAAAGTATACATATAGGACCAGGGAGAGCATTTTATGC",
        "AACAGGAGAAATAATAGGAGATATAAGACAAGCACATTGT"
      ])
reverse_compare = ''.join([
        "ACAATGTGCTTGTCTTATATCTCCTATTATTTCTCCTGTTGCATAAAATGCTCTCCCTGGTCCTA",
        "TATGTATACTTTTTCTTGTATTGTTGTTGGGTCTTGTACA"
      ])


reverse_seq = reverse_and_complement(nuc_seq)
pairs = zip(reverse_seq, reverse_compare)
diffs = [' ' if a == b else '*' for a, b in pairs]
print 'result ', reverse_seq
print 'diffs  ', ''.join(diffs) if reverse_seq != reverse_compare else 'no diffs'
print 'compare', reverse_compare

Ejemplo n.º 15

0

Mostrar archivo

Archivo: make_sample.py Proyecto: cfe-lab/MiCall

def main():
    fastq_files = [FastqFile('2130A-HCV_S15_L001_R1_001.fastq',
                             '2130',
                             False,
                             (FastqSection('HCV2-JFH-1-NS5b', 1, 60, 100),
                              FastqSection('HCV2-JFH-1-NS5b', 117, 176, 100)),
                             (CodonMutation(159, 'GTC'),)),
                   FastqFile('2130A-HCV_S15_L001_R2_001.fastq',
                             '2130',
                             True,
                             (FastqSection('HCV2-JFH-1-NS5b', 57, 116, 100),
                              FastqSection('HCV2-JFH-1-NS5b', 171, 230, 100)),
                             (CodonMutation(159, 'GTC'),)),
                   FastqFile('2130AMIDI-MidHCV_S16_L001_R1_001.fastq',
                             '2130',
                             False,
                             (FastqSection('HCV2-JFH-1-NS5b', 231, 313, 100),
                              FastqSection('HCV2-JFH-1-NS5b', 396, 478, 100)),
                             (CodonMutation(316, 'AGC'),)),
                   FastqFile('2130AMIDI-MidHCV_S16_L001_R2_001.fastq',
                             '2130',
                             True,
                             (FastqSection('HCV2-JFH-1-NS5b', 313, 395, 100),
                              FastqSection('HCV2-JFH-1-NS5b', 479, 561, 100)),
                             (CodonMutation(316, 'AGC'),)),
                   FastqFile('2140A-HIV_S17_L001_R1_001.fastq',
                             '2140',
                             False,
                             (FastqSection('PR', 1, 80, 100),),
                             (CodonMutation(24, 'ATA'),)),
                   FastqFile('2140A-HIV_S17_L001_R2_001.fastq',
                             '2140',
                             True,
                             (FastqSection('PR', 20, 99, 100),),
                             (CodonMutation(24, 'ATA'),))]
    projects = ProjectConfig.loadDefault()
    for fastq_file in fastq_files:
        with open(fastq_file.name, 'w') as f:
            next_cluster = 1
            for section in fastq_file.sections:
                ref_name, ref_start, ref_end = find_coord_pos(projects,
                                                              section.coord_name,
                                                              section.start_pos,
                                                              section.end_pos)

                ref_nuc_seq = projects.getReference(ref_name)
                ref_nuc_section = list(ref_nuc_seq[ref_start:ref_end])
                for mutation in fastq_file.mutations:
                    if section.start_pos <= mutation.pos <= section.end_pos:
                        section_pos = (mutation.pos - section.start_pos) * 3
                        ref_nuc_section[section_pos:section_pos+3] = list(mutation.codon)
                ref_nuc_section = ''.join(ref_nuc_section)
                if fastq_file.is_reversed:
                    ref_nuc_section = reverse_and_complement(ref_nuc_section)
                phred_scores = 'A' * (ref_end-ref_start)
                file_num = '2' if fastq_file.is_reversed else '1'
                for cluster in range(section.count):
                    f.write('@M01234:01:000000000-AAAAA:1:1101:{}:{:04} {}:N:0:1\n'.format(
                        fastq_file.extract_num,
                        cluster + next_cluster,
                        file_num))
                    f.write(ref_nuc_section+'\n')
                    f.write('+\n')
                    f.write(phred_scores+'\n')
                next_cluster += section.count