Example #1
0
 def Valign_chain(self,chain,verbose=False):
     # compute hashes from query seq
     querykeys = vdj_aligner.seq2kmers(chain.seq.tostring(),self.seedpatterns)
     
     # for each reference V segment and each pattern, how many shared k-mers are there?
     Vscores_hash = vdj_aligner.hashscore(self.Vseqlistkeys,querykeys)
     
     # get numCrudeVCandidates highest scores in Vscores and store their names in descending order
     goodVseglist = sorted(self.refV.keys(),key=lambda k: Vscores_hash[k],reverse=True)[0:self.numCrudeVCandidates]
     goodVsegdict = dict([(seg,self.refV[seg].seq.tostring()) for seg in goodVseglist])
     
     # Needleman-Wunsch of V segment
     (bestVseg,bestVscore,bestVscoremat,bestVtracemat) = vdj_aligner.bestalignNW(goodVsegdict,chain.seq.tostring(),self.minVscore)
     
     # if successful alignment
     if bestVseg is not None:
         # copy features from ref to query
         Vrefaln,Vqueryaln = vdj_aligner.construct_alignment( self.refV[bestVseg].seq.tostring(), chain.seq.tostring(), bestVscoremat, bestVtracemat )
         coord_mapping = vdj_aligner.ungapped_coord_mapping(Vrefaln, Vqueryaln)
         seqtools.copy_features(self.refV[bestVseg], chain, coord_mapping, erase=['translation'], replace=False)
         
         # store gapped aln
         chain.annotations['gapped_query'] = Vqueryaln
         chain.annotations['gapped_reference'] = Vrefaln
         
         # annotate mutations
         curr_annot = chain.letter_annotations['alignment']
         aln_annot = vdj_aligner.alignment_annotation(Vrefaln,Vqueryaln)
         aln_annot = aln_annot.translate(None,'D')
         lNER = len(aln_annot) - len(aln_annot.lstrip('I'))
         rNER = len(aln_annot.rstrip('I'))
         chain.letter_annotations['alignment'] = curr_annot[:lNER] + aln_annot[lNER:rNER] + curr_annot[rNER:]
         
         # perform some curating; esp, CDR3-IMGT is annotated in V
         # references, though it's not complete. I will recreate that
         # annotation manually.
         chain._update_feature_dict()
         try:    # some reference entries do not have CDR3 annotations
             chain.features.pop(chain._features['CDR3-IMGT'][0])
             chain._features.pop('CDR3-IMGT')
             chain._update_feature_dict()
         except KeyError:
             pass
         
         # update codon_start of V-REGION anchored to the CDR3 2nd-CYS
         cys = chain.features[ chain._features['2nd-CYS'][0] ]
         v_reg = chain.features[ chain._features['V-REGION'][0] ]
         v_reg.qualifiers['codon_start'] = [cys.location.start.position % 3 + 1]
     
     return bestVscore
Example #2
0
 def Valign_chain(self,chain,verbose=False):
     # compute hashes from query seq
     querykeys = vdj_aligner.seq2kmers(chain.seq.tostring(),self.seedpatterns)
     
     # for each reference V segment and each pattern, how many shared k-mers are there?
     Vscores_hash = vdj_aligner.hashscore(self.Vseqlistkeys,querykeys)
     
     # get numCrudeVCandidates highest scores in Vscores and store their names in descending order
     goodVseglist = sorted(self.refV.keys(),key=lambda k: Vscores_hash[k],reverse=True)[0:self.numCrudeVCandidates]
     goodVsegdict = dict([(seg,self.refV[seg].seq.tostring()) for seg in goodVseglist])
     
     # Needleman-Wunsch of V segment
     (bestVseg,bestVscore,bestVscoremat,bestVtracemat) = vdj_aligner.bestalignNW(goodVsegdict,chain.seq.tostring(),self.minVscore)
     
     # if successful alignment
     if bestVseg is not None:
         # copy features from ref to query
         Vrefaln,Vqueryaln = vdj_aligner.construct_alignment( self.refV[bestVseg].seq.tostring(), chain.seq.tostring(), bestVscoremat, bestVtracemat )
         coord_mapping = vdj_aligner.ungapped_coord_mapping(Vrefaln, Vqueryaln)
         seqtools.copy_features(self.refV[bestVseg], chain, coord_mapping, erase=['translation'], replace=False)
         
         # store gapped aln
         chain.annotations['gapped_query'] = Vqueryaln
         chain.annotations['gapped_reference'] = Vrefaln
         
         # annotate mutations
         curr_annot = chain.letter_annotations['alignment']
         aln_annot = vdj_aligner.alignment_annotation(Vrefaln,Vqueryaln)
         aln_annot = aln_annot.translate(None,'D')
         lNER = len(aln_annot) - len(aln_annot.lstrip('I'))
         rNER = len(aln_annot.rstrip('I'))
         chain.letter_annotations['alignment'] = curr_annot[:lNER] + aln_annot[lNER:rNER] + curr_annot[rNER:]
         
         # perform some curating; esp, CDR3-IMGT is annotated in V
         # references, though it's not complete. I will recreate that
         # annotation manually.
         chain._update_feature_dict()
         try:    # some reference entries do not have CDR3 annotations
             chain.features.pop(chain._features['CDR3-IMGT'][0])
             chain._features.pop('CDR3-IMGT')
             chain._update_feature_dict()
         except KeyError:
             pass
         
         # update codon_start of V-REGION anchored to the CDR3 2nd-CYS
         cys = chain.features[ chain._features['2nd-CYS'][0] ]
         v_reg = chain.features[ chain._features['V-REGION'][0] ]
         v_reg.qualifiers['codon_start'] = [cys.location.start.position % 3 + 1]
     
     return bestVscore
Example #3
0
 def Jalign_chain(self,chain,verbose=False):
     # try pruning off V region for J alignment
     try:
         second_cys = chain.__getattribute__('2nd-CYS')
         second_cys_offset = second_cys.location.end.position
         query = chain.seq.tostring()[second_cys_offset:]
     except AttributeError:
         query = chain.seq.tostring()
         second_cys_offset = 0
     
     # compute hashes from query seq
     querykeys = vdj_aligner.seq2kmers(query,self.seedpatterns)
     
     # for each reference J segment and each pattern, how many shared k-mers are there?
     Jscores_hash = vdj_aligner.hashscore(self.Jseqlistkeys,querykeys)
     
     # get numCrudeJCandidates highest scores in Jscores and store their names in descending order
     goodJseglist = sorted(self.refJ.keys(),key=lambda k: Jscores_hash[k],reverse=True)[0:self.numCrudeJCandidates]
     goodJsegdict = dict([(seg,self.refJ[seg].seq.tostring()) for seg in goodJseglist])
     
     # Needleman-Wunsch of J segment
     (bestJseg,bestJscore,bestJscoremat,bestJtracemat) = vdj_aligner.bestalignNW(goodJsegdict,query,self.minJscore)
     
     # if successful alignment
     if bestJseg is not None:
         # copy features from ref to query
         Jrefaln,Jqueryaln = vdj_aligner.construct_alignment( self.refJ[bestJseg].seq.tostring(), query, bestJscoremat, bestJtracemat )
         coord_mapping = vdj_aligner.ungapped_coord_mapping(Jrefaln, Jqueryaln)
         seqtools.copy_features(self.refJ[bestJseg], chain, coord_mapping, offset=second_cys_offset, erase=['translation'], replace=False)
         chain._update_feature_dict()
         
         # update gapped aln
         gapped_query = chain.annotations.get('gapped_query','')
         gapped_reference = chain.annotations.get('gapped_reference','')
         gapped_CDR3_offset = vdj_aligner.ungapped2gapped_coord(chain.seq.tostring(),gapped_query,second_cys_offset)
         gapped_Vref_aln_end = len(gapped_reference.rstrip('-'))
         chain.annotations['gapped_query'] = gapped_query[:gapped_Vref_aln_end] + Jqueryaln[gapped_Vref_aln_end-gapped_CDR3_offset:]
         chain.annotations['gapped_reference'] = gapped_reference[:gapped_Vref_aln_end] + Jrefaln[gapped_Vref_aln_end-gapped_CDR3_offset:]
         
         # annotate mutations
         curr_annot = chain.letter_annotations['alignment']
         aln_annot = vdj_aligner.alignment_annotation(Jrefaln,Jqueryaln)
         aln_annot = aln_annot.translate(None,'D')
         lNER = len(aln_annot) - len(aln_annot.lstrip('I'))
         rNER = len(aln_annot.rstrip('I'))
         chain.letter_annotations['alignment'] = curr_annot[:second_cys_offset+lNER] + aln_annot[lNER:rNER] + curr_annot[second_cys_offset+rNER:]
     
     return bestJscore
Example #4
0
 def Jalign_chain(self,chain,verbose=False):
     # try pruning off V region for J alignment
     try:
         second_cys = chain.__getattribute__('2nd-CYS')
         second_cys_offset = second_cys.location.end.position
         query = chain.seq.tostring()[second_cys_offset:]
     except AttributeError:
         query = chain.seq.tostring()
         second_cys_offset = 0
     
     # compute hashes from query seq
     querykeys = vdj_aligner.seq2kmers(query,self.seedpatterns)
     
     # for each reference J segment and each pattern, how many shared k-mers are there?
     Jscores_hash = vdj_aligner.hashscore(self.Jseqlistkeys,querykeys)
     
     # get numCrudeJCandidates highest scores in Jscores and store their names in descending order
     goodJseglist = sorted(self.refJ.keys(),key=lambda k: Jscores_hash[k],reverse=True)[0:self.numCrudeJCandidates]
     goodJsegdict = dict([(seg,self.refJ[seg].seq.tostring()) for seg in goodJseglist])
     
     # Needleman-Wunsch of J segment
     (bestJseg,bestJscore,bestJscoremat,bestJtracemat) = vdj_aligner.bestalignNW(goodJsegdict,query,self.minJscore)
     
     # if successful alignment
     if bestJseg is not None:
         # copy features from ref to query
         Jrefaln,Jqueryaln = vdj_aligner.construct_alignment( self.refJ[bestJseg].seq.tostring(), query, bestJscoremat, bestJtracemat )
         coord_mapping = vdj_aligner.ungapped_coord_mapping(Jrefaln, Jqueryaln)
         seqtools.copy_features(self.refJ[bestJseg], chain, coord_mapping, offset=second_cys_offset, erase=['translation'], replace=False)
         chain._update_feature_dict()
         
         # update gapped aln
         gapped_query = chain.annotations.get('gapped_query','')
         gapped_reference = chain.annotations.get('gapped_reference','')
         gapped_CDR3_offset = vdj_aligner.ungapped2gapped_coord(chain.seq.tostring(),gapped_query,second_cys_offset)
         gapped_Vref_aln_end = len(gapped_reference.rstrip('-'))
         chain.annotations['gapped_query'] = gapped_query[:gapped_Vref_aln_end] + Jqueryaln[gapped_Vref_aln_end-gapped_CDR3_offset:]
         chain.annotations['gapped_reference'] = gapped_reference[:gapped_Vref_aln_end] + Jrefaln[gapped_Vref_aln_end-gapped_CDR3_offset:]
         
         # annotate mutations
         curr_annot = chain.letter_annotations['alignment']
         aln_annot = vdj_aligner.alignment_annotation(Jrefaln,Jqueryaln)
         aln_annot = aln_annot.translate(None,'D')
         lNER = len(aln_annot) - len(aln_annot.lstrip('I'))
         rNER = len(aln_annot.rstrip('I'))
         chain.letter_annotations['alignment'] = curr_annot[:second_cys_offset+lNER] + aln_annot[lNER:rNER] + curr_annot[second_cys_offset+rNER:]
     
     return bestJscore