def count_triplets(reads, gene, codon_number): ''' Counts triplets that occur at codon_number in the coding sequence of gene in reads by local alignment of each read sequence to the context around the codon with the codon itself replaced with 'NNN'. ''' sequence = gene.get_coding_sequence() start = (codon_number - 1) * 3 around = 28 context = sequence[start - around:start] + 'NNN' + sequence[start + 3: start + 3 + around] def relevant_alignment(alignment, context, seq): path = dict(alignment['path']) relevant = True triplet = '' for position in range(around, around + 3): if position not in path: relevant = False elif min(path[position], len(seq) - 1 - path[position]) < 3: relevant = False else: triplet += seq[path[position]] return relevant, triplet triplets = Counter() for read in reads: alignment = sw.generate_alignments(context, read.seq, 'overlap')[0] if len(alignment['path']) >= 20 and len(alignment['mismatches']) <= 6 and alignment['XO'] == 0: relevant, triplet = relevant_alignment(alignment, context, read.seq) if relevant: triplets[triplet] += 1 return triplets
def is_synthetic(read, synthetic_sequences): for synthetic_seq in synthetic_sequences: alignment, = sw.generate_alignments(read.seq, synthetic_seq, 'overlap') score_diff = 2 * len(alignment['path']) - alignment['score'] if len(alignment['path']) > 10 and score_diff <= 7: return True return False
def is_synthetic(read, synthetic_sequences): for synthetic_seq in synthetic_sequences: alignment, = sw.generate_alignments(read.seq, synthetic_seq, "overlap") score_diff = 2 * len(alignment["path"]) - alignment["score"] if len(alignment["path"]) > 10 and score_diff <= 7: return True return False
def trim_by_local_alignment(adapter, seq): ''' Try to find a near-exact match. If this fails, do a local alignment. ''' trim_at = find_adapter(adapter[:adapter_prefix_length], 1, seq) if trim_at > len(seq) - adapter_prefix_length: alignment, = sw.generate_alignments(adapter, seq, 'unpaired_adapter', max_alignments=1, ) score_diff = 2 * len(alignment['path']) - alignment['score'] adapter_start_in_seq = sw.first_target_index(alignment['path']) if alignment['path'] and score_diff <= 10. / 22 * len(alignment['path']): trim_at = adapter_start_in_seq return trim_at
def trim_by_local_alignment(adapter, seq): ''' Try to find a near-exact match. If this fails, do a local alignment. ''' trim_at = find_adapter(adapter[:adapter_prefix_length], 1, seq) if trim_at > len(seq) - adapter_prefix_length: alignment, = sw.generate_alignments( adapter, seq, 'unpaired_adapter', max_alignments=1, ) score_diff = 2 * len(alignment['path']) - alignment['score'] adapter_start_in_seq = sw.first_target_index(alignment['path']) if alignment['path'] and score_diff <= 10. / 22 * len( alignment['path']): trim_at = adapter_start_in_seq return trim_at
def get_edge_alignments(read, targets): seq = read.seq seq_rc = utilities.reverse_complement(read.seq) all_alignments = [] min_score = 12 for target in targets: for query, is_reverse in [(seq, False), (seq_rc, True)]: alignments = sw.generate_alignments(query, target.seq, 'unpaired_adapter', min_score=min_score, ) for alignment in alignments: if alignment['score'] >= 2 * len(alignment['path']): alignment['query'] = query alignment['rname'] = target.name alignment['is_reverse'] = is_reverse all_alignments.append(alignment) return all_alignments
def get_edge_alignments(read, targets): seq = read.seq seq_rc = utilities.reverse_complement(read.seq) all_alignments = [] min_score = 10 for target in targets: for query, is_reverse in [(seq, False), (seq_rc, True)]: alignments = sw.generate_alignments( query, target.seq, 'unpaired_adapter', min_score=min_score, ) for alignment in alignments: if alignment['score'] >= 2 * len(alignment['path']): alignment['query'] = query alignment['rname'] = target.name alignment['is_reverse'] = is_reverse all_alignments.append(alignment) return all_alignments
def get_local_alignments(read, targets): seq = read.seq seq_rc = utilities.reverse_complement(read.seq) all_alignments = [] for target in targets: min_score = min(20, 2 * len(target.seq)) for query, is_reverse in [(seq, False), (seq_rc, True)]: alignments = sw.generate_alignments(query, target.seq, 'local', min_score=min_score, max_alignments=3, ) for alignment in alignments: if alignment['score'] >= 0.5 * 2 * len(alignment['path']): alignment['query'] = query alignment['rname'] = target.name alignment['is_reverse'] = is_reverse all_alignments.append(alignment) return all_alignments
def get_local_alignments(read, targets): seq = read.seq seq_rc = utilities.reverse_complement(read.seq) all_alignments = [] for target in targets: min_score = min(20, 2 * len(target.seq)) for query, is_reverse in [(seq, False), (seq_rc, True)]: alignments = sw.generate_alignments( query, target.seq, 'local', min_score=min_score, max_alignments=3, ) for alignment in alignments: if alignment['score'] >= 0.7 * 2 * len(alignment['path']): alignment['query'] = query alignment['rname'] = target.name alignment['is_reverse'] = is_reverse all_alignments.append(alignment) return all_alignments
def count_triplets(reads, gene, codon_number): ''' Counts triplets that occur at codon_number in the coding sequence of gene in reads by local alignment of each read sequence to the context around the codon with the codon itself replaced with 'NNN'. ''' sequence = gene.get_coding_sequence() start = (codon_number - 1) * 3 around = 28 context = sequence[start - around:start] + 'NNN' + sequence[start + 3:start + 3 + around] def relevant_alignment(alignment, context, seq): path = dict(alignment['path']) relevant = True triplet = '' for position in range(around, around + 3): if position not in path: relevant = False elif min(path[position], len(seq) - 1 - path[position]) < 3: relevant = False else: triplet += seq[path[position]] return relevant, triplet triplets = Counter() for read in reads: alignment = sw.generate_alignments(context, read.seq, 'overlap')[0] if len(alignment['path']) >= 20 and len( alignment['mismatches']) <= 6 and alignment['XO'] == 0: relevant, triplet = relevant_alignment(alignment, context, read.seq) if relevant: triplets[triplet] += 1 return triplets