Ejemplo n.º 1
0
 def gapped_imgt_realignment(self):
     '''
     Aligns to gapped IMGT germline sequence. Used to determine
     IMGT-formatted position numberings so that identifying
     antibody regions is simplified.
     '''
     self.imgt_germline = get_imgt_germlines(species=self.species,
                                             gene_type=self.gene_type,
                                             gene=self.full)
     query = self.germline_alignment.replace('-', '')
     aln_params = self._realignment_scoring_params(self.gene_type)
     aln_params['gap_open'] = -11
     aln_matrix = self._get_gapped_imgt_substitution_matrix()
     self.imgt_gapped_alignment = local_alignment(
         query,
         self.imgt_germline.gapped_nt_sequence,
         matrix=aln_matrix,
         **aln_params)
     self.alignment_reading_frame = (
         (2 * (self.imgt_gapped_alignment.target_begin % 3)) % 3) + (
             self.imgt_germline.coding_start - 1
         )  # IMGT coding start is 1-based
     self.coding_region = self._get_coding_region()
     self.aa_sequence = self._get_aa_sequence()
     try:
         self._imgt_numbering()
     except:
         self.exception('IMGT NUMBERING', traceback.format_exc(), sep='\n')
Ejemplo n.º 2
0
 def _fallback_find_junc_nt_start(self, antibody):
     self.fallback_5prime = True
     # get the FR3 nt sequence of the IMGT gapped germline
     germ_fr3_sequence = antibody.v.imgt_germline.gapped_nt_sequence[
         196:309].replace('.', '')
     antibody.log('GERM FR3 SEQUENCE:', germ_fr3_sequence)
     # find the start of the junction (immediately after the end of FR3)
     aln = local_alignment(antibody.oriented_input, germ_fr3_sequence)
     antibody.log('  QUERY: ', aln.aligned_query)
     antibody.log('         ', aln.alignment_midline)
     antibody.log('GERM FR3:', aln.aligned_target)
     fr3_end = aln.query_end + (len(germ_fr3_sequence) - aln.target_end)
     junc_start_codon = antibody.oriented_input[fr3_end:fr3_end + 3]
     antibody.log('JUNC START:', junc_start_codon, codons[junc_start_codon],
                  fr3_end)
     return fr3_end
Ejemplo n.º 3
0
 def assign_dgene(self, seq, species):
     db_file = os.path.join(self.germline_directory, 'ungapped/d.fasta')
     with open(db_file, 'r') as db_handle:
         germs = [Sequence(s) for s in SeqIO.parse(db_handle, 'fasta')]
         rc_germs = [Sequence(s.reverse_complement, id=s.id) for s in germs]
         germs.extend(rc_germs)
     alignments = local_alignment(seq, targets=germs,
                                  gap_open=-20, gap_extend=-2)
     alignments.sort(key=lambda x: x.score, reverse=True)
     all_gls = [a.target.id for a in alignments]
     all_scores = [a.score for a in alignments]
     if not all([all_gls, all_scores]):
         return None
     top_gl = all_gls[0]
     top_score = all_scores[0]
     others = [GermlineSegment(germ, species, score=score) for germ, score in zip(all_gls[1:6], all_scores[1:6])]
     return GermlineSegment(top_gl, species, score=top_score, others=others, assigner_name=self.name)
Ejemplo n.º 4
0
    def realign_germline(self, antibody, query_start=None, query_end=None):
        '''
        Due to restrictions on the available scoring parameters in BLASTn, incorrect truncation
        of the v-gene alignment can occur. This function re-aligns the query sequence with
        the identified germline variable gene using more appropriate alignment parameters.

        Args:

            oriented_input (str): the raw input sequence, correctly oriented

            query_start (int): 5' position in `oriented_input` at which the sequence
                should be truncated prior to alignment with the germline sequence.

            query_end (int): 3' position in `oriented_input` at which the seqeunce
                should be truncated prior to alignment with the germline sequence
        '''
        oriented_input = antibody.oriented_input
        germline_seq = self._get_germline_sequence_for_realignment()
        aln_params = self._realignment_scoring_params(self.gene_type)
        # if the alignment start/end positions have been annotated by the assigner,
        # force re-alignment using those parameters
        if all([
                x is not None for x in [
                    self.query_start, self.query_end, self.germline_start,
                    self.germline_end
                ]
        ]):
            query = oriented_input.sequence[self.query_start:self.query_end]
            germline = germline_seq[self.germline_start:self.germline_end]
            alignment = global_alignment(query, germline, **aln_params)
        # use local alignment to determine alignment start/end positions if
        # they haven't already been determined by the assigner
        else:
            query = oriented_input.sequence[query_start:query_end]
            alignment = local_alignment(query, germline_seq, **aln_params)
        if alignment:
            self._process_realignment(antibody, alignment, query_start)
        else:
            antibody.log('GERMLINE REALIGNMENT ERROR')
            antibody.log('REALIGNMENT QUERY SEQUENCE:', query)
            antibody.log('QUERY START:', query_start)
            antibody.log('QUERN END:', query_end)
Ejemplo n.º 5
0
    def _fallback_find_junc_nt_end(self, antibody):
        self.fallback_3prime = True

        # need to find the start of FR4 in the IMGT germline sequence
        end_res = 'W' if antibody.chain == 'heavy' else 'F'
        for i, res in enumerate(antibody.j.imgt_germline.ungapped_aa_sequence):
            if res == end_res and end_res not in antibody.j.imgt_germline.ungapped_aa_sequence[
                    i + 1:]:
                fr4_nt_start_pos = (antibody.j.imgt_germline.coding_start -
                                    1) + (i * 3)
                break
        germ_fr4_sequence = antibody.j.imgt_germline.gapped_nt_sequence[
            fr4_nt_start_pos:]

        # find the end of the junction (end of the first codon of FR4)
        aln = local_alignment(antibody.oriented_input, germ_fr4_sequence)
        fr4_start = aln.query_begin - aln.target_begin
        junc_end_codon = antibody.oriented_input[fr4_start:fr4_start + 3]
        antibody.log('JUNC END:', junc_end_codon, codons[junc_end_codon],
                     fr4_start)
        return fr4_start + 3
Ejemplo n.º 6
0
 def _get_isotype_query_region(self, antibody):
     aln = local_alignment(antibody.vdj_nt, antibody.oriented_input)
     return antibody.oriented_input[aln.target_end:]
Ejemplo n.º 7
0
 def _get_alignments(self, antibody, isotype_seqs):
     query_region = self._get_isotype_query_region(antibody)
     alignments = local_alignment(query_region, targets=isotype_seqs,
         gap_open_penalty=22, gap_extend_penalty=1)
     return sorted(alignments, key=lambda x: x.score, reverse=True)