def Valign_chain(self,chain,verbose=False): # compute hashes from query seq querykeys = vdj_aligner.seq2kmers(chain.seq.tostring(),self.seedpatterns) # for each reference V segment and each pattern, how many shared k-mers are there? Vscores_hash = vdj_aligner.hashscore(self.Vseqlistkeys,querykeys) # get numCrudeVCandidates highest scores in Vscores and store their names in descending order goodVseglist = sorted(self.refV.keys(),key=lambda k: Vscores_hash[k],reverse=True)[0:self.numCrudeVCandidates] goodVsegdict = dict([(seg,self.refV[seg].seq.tostring()) for seg in goodVseglist]) # Needleman-Wunsch of V segment (bestVseg,bestVscore,bestVscoremat,bestVtracemat) = vdj_aligner.bestalignNW(goodVsegdict,chain.seq.tostring(),self.minVscore) # if successful alignment if bestVseg is not None: # copy features from ref to query Vrefaln,Vqueryaln = vdj_aligner.construct_alignment( self.refV[bestVseg].seq.tostring(), chain.seq.tostring(), bestVscoremat, bestVtracemat ) coord_mapping = vdj_aligner.ungapped_coord_mapping(Vrefaln, Vqueryaln) seqtools.copy_features(self.refV[bestVseg], chain, coord_mapping, erase=['translation'], replace=False) # store gapped aln chain.annotations['gapped_query'] = Vqueryaln chain.annotations['gapped_reference'] = Vrefaln # annotate mutations curr_annot = chain.letter_annotations['alignment'] aln_annot = vdj_aligner.alignment_annotation(Vrefaln,Vqueryaln) aln_annot = aln_annot.translate(None,'D') lNER = len(aln_annot) - len(aln_annot.lstrip('I')) rNER = len(aln_annot.rstrip('I')) chain.letter_annotations['alignment'] = curr_annot[:lNER] + aln_annot[lNER:rNER] + curr_annot[rNER:] # perform some curating; esp, CDR3-IMGT is annotated in V # references, though it's not complete. I will recreate that # annotation manually. chain._update_feature_dict() try: # some reference entries do not have CDR3 annotations chain.features.pop(chain._features['CDR3-IMGT'][0]) chain._features.pop('CDR3-IMGT') chain._update_feature_dict() except KeyError: pass # update codon_start of V-REGION anchored to the CDR3 2nd-CYS cys = chain.features[ chain._features['2nd-CYS'][0] ] v_reg = chain.features[ chain._features['V-REGION'][0] ] v_reg.qualifiers['codon_start'] = [cys.location.start.position % 3 + 1] return bestVscore
def Jalign_chain(self,chain,verbose=False): # try pruning off V region for J alignment try: second_cys = chain.__getattribute__('2nd-CYS') second_cys_offset = second_cys.location.end.position query = chain.seq.tostring()[second_cys_offset:] except AttributeError: query = chain.seq.tostring() second_cys_offset = 0 # compute hashes from query seq querykeys = vdj_aligner.seq2kmers(query,self.seedpatterns) # for each reference J segment and each pattern, how many shared k-mers are there? Jscores_hash = vdj_aligner.hashscore(self.Jseqlistkeys,querykeys) # get numCrudeJCandidates highest scores in Jscores and store their names in descending order goodJseglist = sorted(self.refJ.keys(),key=lambda k: Jscores_hash[k],reverse=True)[0:self.numCrudeJCandidates] goodJsegdict = dict([(seg,self.refJ[seg].seq.tostring()) for seg in goodJseglist]) # Needleman-Wunsch of J segment (bestJseg,bestJscore,bestJscoremat,bestJtracemat) = vdj_aligner.bestalignNW(goodJsegdict,query,self.minJscore) # if successful alignment if bestJseg is not None: # copy features from ref to query Jrefaln,Jqueryaln = vdj_aligner.construct_alignment( self.refJ[bestJseg].seq.tostring(), query, bestJscoremat, bestJtracemat ) coord_mapping = vdj_aligner.ungapped_coord_mapping(Jrefaln, Jqueryaln) seqtools.copy_features(self.refJ[bestJseg], chain, coord_mapping, offset=second_cys_offset, erase=['translation'], replace=False) chain._update_feature_dict() # update gapped aln gapped_query = chain.annotations.get('gapped_query','') gapped_reference = chain.annotations.get('gapped_reference','') gapped_CDR3_offset = vdj_aligner.ungapped2gapped_coord(chain.seq.tostring(),gapped_query,second_cys_offset) gapped_Vref_aln_end = len(gapped_reference.rstrip('-')) chain.annotations['gapped_query'] = gapped_query[:gapped_Vref_aln_end] + Jqueryaln[gapped_Vref_aln_end-gapped_CDR3_offset:] chain.annotations['gapped_reference'] = gapped_reference[:gapped_Vref_aln_end] + Jrefaln[gapped_Vref_aln_end-gapped_CDR3_offset:] # annotate mutations curr_annot = chain.letter_annotations['alignment'] aln_annot = vdj_aligner.alignment_annotation(Jrefaln,Jqueryaln) aln_annot = aln_annot.translate(None,'D') lNER = len(aln_annot) - len(aln_annot.lstrip('I')) rNER = len(aln_annot.rstrip('I')) chain.letter_annotations['alignment'] = curr_annot[:second_cys_offset+lNER] + aln_annot[lNER:rNER] + curr_annot[second_cys_offset+rNER:] return bestJscore