Ejemplo n.º 1
0
def count_triplets(reads, gene, codon_number):
    ''' Counts triplets that occur at codon_number in the coding sequence of
    gene in reads by local alignment of each read sequence to the context
    around the codon with the codon itself replaced with 'NNN'.
    '''
    sequence = gene.get_coding_sequence()

    start = (codon_number - 1) * 3
    around = 28
    context = sequence[start - around:start] + 'NNN' + sequence[start + 3: start + 3 + around]

    def relevant_alignment(alignment, context, seq):
        path = dict(alignment['path'])
        relevant = True
        triplet = ''
        for position in range(around, around + 3):
            if position not in path:
                relevant = False
            elif min(path[position], len(seq) - 1 - path[position]) < 3:
                relevant = False
            else:
                triplet += seq[path[position]]

        return relevant, triplet

    triplets = Counter()

    for read in reads:
        alignment = sw.generate_alignments(context, read.seq, 'overlap')[0]
        if len(alignment['path']) >= 20 and len(alignment['mismatches']) <= 6 and alignment['XO'] == 0:
            relevant, triplet = relevant_alignment(alignment, context, read.seq)
            if relevant:
                triplets[triplet] += 1

    return triplets
Ejemplo n.º 2
0
def is_synthetic(read, synthetic_sequences):
    for synthetic_seq in synthetic_sequences:
        alignment, = sw.generate_alignments(read.seq, synthetic_seq, 'overlap')
        score_diff = 2 * len(alignment['path']) - alignment['score']
        if len(alignment['path']) > 10 and score_diff <= 7:
            return True
    return False
Ejemplo n.º 3
0
def is_synthetic(read, synthetic_sequences):
    for synthetic_seq in synthetic_sequences:
        alignment, = sw.generate_alignments(read.seq, synthetic_seq, "overlap")
        score_diff = 2 * len(alignment["path"]) - alignment["score"]
        if len(alignment["path"]) > 10 and score_diff <= 7:
            return True
    return False
Ejemplo n.º 4
0
def trim_by_local_alignment(adapter, seq):
    ''' Try to find a near-exact match. If this fails, do a local alignment. '''
    trim_at = find_adapter(adapter[:adapter_prefix_length], 1, seq)

    if trim_at > len(seq) - adapter_prefix_length:
        alignment, = sw.generate_alignments(adapter,
                                            seq,
                                            'unpaired_adapter',
                                            max_alignments=1,
                                           )

        score_diff = 2 * len(alignment['path']) - alignment['score']
        adapter_start_in_seq = sw.first_target_index(alignment['path'])

        if alignment['path'] and score_diff <= 10. / 22 * len(alignment['path']):
            trim_at = adapter_start_in_seq

    return trim_at
Ejemplo n.º 5
0
def trim_by_local_alignment(adapter, seq):
    ''' Try to find a near-exact match. If this fails, do a local alignment. '''
    trim_at = find_adapter(adapter[:adapter_prefix_length], 1, seq)

    if trim_at > len(seq) - adapter_prefix_length:
        alignment, = sw.generate_alignments(
            adapter,
            seq,
            'unpaired_adapter',
            max_alignments=1,
        )

        score_diff = 2 * len(alignment['path']) - alignment['score']
        adapter_start_in_seq = sw.first_target_index(alignment['path'])

        if alignment['path'] and score_diff <= 10. / 22 * len(
                alignment['path']):
            trim_at = adapter_start_in_seq

    return trim_at
Ejemplo n.º 6
0
def get_edge_alignments(read, targets):
    seq = read.seq
    seq_rc = utilities.reverse_complement(read.seq)
    all_alignments = []
    min_score = 12
    for target in targets:
        for query, is_reverse in [(seq, False), (seq_rc, True)]:
            alignments = sw.generate_alignments(query,
                                                target.seq,
                                                'unpaired_adapter',
                                                min_score=min_score,
                                               )
            for alignment in alignments:
                if alignment['score'] >= 2 * len(alignment['path']):
                    alignment['query'] = query
                    alignment['rname'] = target.name
                    alignment['is_reverse'] = is_reverse
                    all_alignments.append(alignment)

    return all_alignments
Ejemplo n.º 7
0
def get_edge_alignments(read, targets):
    seq = read.seq
    seq_rc = utilities.reverse_complement(read.seq)
    all_alignments = []
    min_score = 10
    for target in targets:
        for query, is_reverse in [(seq, False), (seq_rc, True)]:
            alignments = sw.generate_alignments(
                query,
                target.seq,
                'unpaired_adapter',
                min_score=min_score,
            )
            for alignment in alignments:
                if alignment['score'] >= 2 * len(alignment['path']):
                    alignment['query'] = query
                    alignment['rname'] = target.name
                    alignment['is_reverse'] = is_reverse
                    all_alignments.append(alignment)

    return all_alignments
Ejemplo n.º 8
0
def get_local_alignments(read, targets):
    seq = read.seq
    seq_rc = utilities.reverse_complement(read.seq)
    all_alignments = []
    for target in targets:
        min_score = min(20, 2 * len(target.seq))
        for query, is_reverse in [(seq, False), (seq_rc, True)]:
            alignments = sw.generate_alignments(query,
                                                target.seq,
                                                'local',
                                                min_score=min_score,
                                                max_alignments=3,
                                               )
            for alignment in alignments:
                if alignment['score'] >= 0.5 * 2 * len(alignment['path']):
                    alignment['query'] = query
                    alignment['rname'] = target.name
                    alignment['is_reverse'] = is_reverse
                    all_alignments.append(alignment)

    return all_alignments
Ejemplo n.º 9
0
def get_local_alignments(read, targets):
    seq = read.seq
    seq_rc = utilities.reverse_complement(read.seq)
    all_alignments = []
    for target in targets:
        min_score = min(20, 2 * len(target.seq))
        for query, is_reverse in [(seq, False), (seq_rc, True)]:
            alignments = sw.generate_alignments(
                query,
                target.seq,
                'local',
                min_score=min_score,
                max_alignments=3,
            )
            for alignment in alignments:
                if alignment['score'] >= 0.7 * 2 * len(alignment['path']):
                    alignment['query'] = query
                    alignment['rname'] = target.name
                    alignment['is_reverse'] = is_reverse
                    all_alignments.append(alignment)

    return all_alignments
def count_triplets(reads, gene, codon_number):
    ''' Counts triplets that occur at codon_number in the coding sequence of
    gene in reads by local alignment of each read sequence to the context
    around the codon with the codon itself replaced with 'NNN'.
    '''
    sequence = gene.get_coding_sequence()

    start = (codon_number - 1) * 3
    around = 28
    context = sequence[start -
                       around:start] + 'NNN' + sequence[start + 3:start + 3 +
                                                        around]

    def relevant_alignment(alignment, context, seq):
        path = dict(alignment['path'])
        relevant = True
        triplet = ''
        for position in range(around, around + 3):
            if position not in path:
                relevant = False
            elif min(path[position], len(seq) - 1 - path[position]) < 3:
                relevant = False
            else:
                triplet += seq[path[position]]

        return relevant, triplet

    triplets = Counter()

    for read in reads:
        alignment = sw.generate_alignments(context, read.seq, 'overlap')[0]
        if len(alignment['path']) >= 20 and len(
                alignment['mismatches']) <= 6 and alignment['XO'] == 0:
            relevant, triplet = relevant_alignment(alignment, context,
                                                   read.seq)
            if relevant:
                triplets[triplet] += 1

    return triplets