Beispiel #1
0
    def realign_variable(self, germline_gene, match=3, mismatch=-2, gap_open_penalty=22, gap_extend_penalty=1):
        '''
        Due to restrictions on the available scoring parameters in BLASTn, incorrect truncation
        of the v-gene alignment can occur. This function re-aligns the query sequence with
        the identified germline variable gene using more appropriate alignment parameters.

        Input is the name of the germline variable gene (ex: 'IGHV1-2*02').
        '''
        # self.germline_seq = self._get_germline_sequence_for_realignment(germline_gene, 'V')
        alignment = local_alignment(self.seq.sequence, self.germline_seq,
                                    match=match, mismatch=mismatch,
                                    gap_open_penalty=gap_open_penalty, gap_extend_penalty=gap_extend_penalty)
        rc = self.seq.reverse_complement
        alignment_rc = local_alignment(rc, self.germline_seq,
                                       match=match, mismatch=mismatch,
                                       gap_open_penalty=gap_open_penalty, gap_extend_penalty=gap_extend_penalty)
        if alignment.score > alignment_rc.score:
            self._process_realignment(alignment)
        else:
            self.strand = 'minus'
            self.input_sequence = rc
            self._process_realignment(alignment_rc)
Beispiel #2
0
 def _fallback_find_junc_nt_start(self, antibody):
     self.fallback_5prime = True
     # get the FR3 nt sequence of the IMGT gapped germline
     germ_fr3_sequence = antibody.v.imgt_germline.gapped_nt_sequence[196:309].replace('.', '')
     antibody.log('GERM FR3 SEQUENCE:', germ_fr3_sequence)
     # find the start of the junction (immediately after the end of FR3)
     aln = local_alignment(antibody.oriented_input, germ_fr3_sequence)
     antibody.log('  QUERY: ', aln.aligned_query)
     antibody.log('         ', aln.alignment_midline)
     antibody.log('GERM FR3:', aln.aligned_target)
     fr3_end = aln.query_end + (len(germ_fr3_sequence) - aln.target_end)
     junc_start_codon = antibody.oriented_input[fr3_end:fr3_end + 3]
     antibody.log('JUNC START:', junc_start_codon, codons[junc_start_codon], fr3_end)
     return fr3_end
Beispiel #3
0
    def _fallback_find_junc_nt_end(self, antibody):
        self.fallback_3prime = True

        # need to find the start of FR4 in the IMGT germline sequence
        end_res = 'W' if antibody.chain == 'heavy' else 'F'
        for i, res in enumerate(antibody.j.imgt_germline.ungapped_aa_sequence):
            if res == end_res and end_res not in antibody.j.imgt_germline.ungapped_aa_sequence[i + 1:]:
                fr4_nt_start_pos = (antibody.j.imgt_germline.coding_start - 1) + (i * 3)
                break
        germ_fr4_sequence = antibody.j.imgt_germline.gapped_nt_sequence[fr4_nt_start_pos:]

        # find the end of the junction (end of the first codon of FR4)
        aln = local_alignment(antibody.oriented_input, germ_fr4_sequence)
        fr4_start = aln.query_begin - aln.target_begin
        junc_end_codon = antibody.oriented_input[fr4_start:fr4_start + 3]
        antibody.log('JUNC END:', junc_end_codon, codons[junc_end_codon], fr4_start)
        return fr4_start + 3
Beispiel #4
0
    def realign_germline(self, antibody, query_start=None, query_end=None):
        '''
        Due to restrictions on the available scoring parameters in BLASTn, incorrect truncation
        of the v-gene alignment can occur. This function re-aligns the query sequence with
        the identified germline variable gene using more appropriate alignment parameters.

        Args:

            oriented_input (str): the raw input sequence, correctly oriented

            query_start (int): 5' position in `oriented_input` at which the sequence
                should be truncated prior to alignment with the germline sequence.

            query_end (int): 3' position in `oriented_input` at which the seqeunce
                should be truncated prior to alignment with the germline sequence
        '''
        oriented_input = antibody.oriented_input
        germline_seq = self._get_germline_sequence_for_realignment()
        aln_params = self._realignment_scoring_params(self.gene_type)
        # if the alignment start/end positions have been annotated by the assigner,
        # force re-alignment using those parameters
        if all([x is not None for x in [self.query_start,
                                        self.query_end,
                                        self.germline_start,
                                        self.germline_end]]):
            query = oriented_input.sequence[self.query_start:self.query_end]
            germline = germline_seq[self.germline_start:self.germline_end]
            alignment = global_alignment(query, germline, **aln_params)
        # use local alignment to determine alignment start/end positions if
        # they haven't already been determined by the assigner
        else:
            query = oriented_input.sequence[query_start:query_end]
            alignment = local_alignment(query, germline_seq, **aln_params)
        if alignment:
            self._process_realignment(antibody, alignment, query_start)
        else:
            antibody.log('GERMLINE REALIGNMENT ERROR')
            antibody.log('REALIGNMENT QUERY SEQUENCE:', query)
            antibody.log('QUERY START:', query_start)
            antibody.log('QUERN END:', query_end)
Beispiel #5
0
def assign_d(seq, species):
    '''
    Identifies the germline diversity gene for a given sequence.
    Alignment is performed using the ssw_wrap.Aligner.align function.

    Input is a junction sequence (as a string) and the species of origin.

    Output is a DiversityResult object.
    '''
    mod_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
    db_file = os.path.join(mod_dir, 'ssw/dbs/{}_D.fasta'.format(species.lower()))
    db_handle = open(db_file, 'r')
    germs = [Sequence(s) for s in SeqIO.parse(db_handle, 'fasta')]
    rc_germs = [Sequence(s.reverse_complement, id=s.id) for s in germs]
    germs.extend(rc_germs)
    db_handle.close()
    alignments = local_alignment(seq, targets=germs,
                                 gap_open_penalty=20, gap_extend_penalty=2)
    alignments.sort(key=lambda x: x.score, reverse=True)
    try:
        return blast.DiversityResult(seq, alignments[:5])
    except IndexError:
        return None
Beispiel #6
0
 def gapped_imgt_realignment(self):
     '''
     Aligns to gapped IMGT germline sequence. Used to determine
     IMGT-formatted position numberings so that identifying
     antibody regions is simplified.
     '''
     self.imgt_germline = get_imgt_germlines(species=self.species,
                                             gene_type=self.gene_type,
                                             gene=self.full)
     query = self.germline_alignment.replace('-', '')
     aln_params = self._realignment_scoring_params(self.gene_type)
     aln_params['gap_open'] = -11
     aln_matrix = self._get_gapped_imgt_substitution_matrix()
     self.imgt_gapped_alignment = local_alignment(query,
                                                  self.imgt_germline.gapped_nt_sequence,
                                                  matrix=aln_matrix,
                                                  **aln_params)
     self.alignment_reading_frame = ((2 * (self.imgt_gapped_alignment.target_begin % 3)) % 3) + (self.imgt_germline.coding_start - 1)  # IMGT coding start is 1-based
     self.coding_region = self._get_coding_region()
     self.aa_sequence = self._get_aa_sequence()
     try:
         self._imgt_numbering()
     except:
         self.exception('IMGT NUMBERING', traceback.format_exc(), sep='\n')
Beispiel #7
0
 def _find_junction_nt_end(self, vdj):
     fr4 = vdj.j.regions.nt_seqs['FR4'][3:]
     aln = local_alignment(fr4, vdj.vdj_nt)
     if aln:
         return aln.target_begin
Beispiel #8
0
 def _find_junction_nt_start(self, vdj):
     fr3 = vdj.v.regions.nt_seqs['FR3'][:-3]
     aln = local_alignment(fr3, vdj.vdj_nt)
     if aln:
         return aln.target_end + 1
Beispiel #9
0
 def _get_d_start_position_nt(self, vdj):
     a = local_alignment(self.d_nt, self.cdr3_nt,
                         gap_open_penalty=22, gap_extend_penalty=1)
     d_start = a.target_begin
     return d_start
Beispiel #10
0
 def _get_isotype_query_region(self, antibody):
     aln = local_alignment(antibody.vdj_nt, antibody.oriented_input)
     return antibody.oriented_input[aln.target_end:]
Beispiel #11
0
 def _get_alignments(self, antibody, isotype_seqs):
     query_region = self._get_isotype_query_region(antibody)
     alignments = local_alignment(query_region, targets=isotype_seqs,
         gap_open_penalty=22, gap_extend_penalty=1)
     return sorted(alignments, key=lambda x: x.score, reverse=True)
Beispiel #12
0
 def _get_isotype_query_region(self, vdj):
     aln = local_alignment(vdj.vdj_nt, vdj.raw_input)
     return vdj.raw_input[aln.target_end:]
Beispiel #13
0
 def _get_alignments(self, vdj, isotype_seqs):
     query_region = self._get_isotype_query_region(vdj)
     alignments = local_alignment(query_region, targets=isotype_seqs,
         gap_open_penalty=22, gap_extend_penalty=1)
     return sorted(alignments, key=lambda x: x.score, reverse=True)