def predict_binding(df, predictor='netmhcpan', alleles=[], verbose=False, cpus=1, cutoff=.95, cutoff_method='default'): """ Predict binding scores for mutated and wt peptides (if present) from supplied variants. Args: df: pandas dataframe with peptide sequences, requires at least 2 columns 'peptide' - the mutant peptide 'wt' - a corresponding wild type peptide this data could be generated from get_mutant_sequences or from an external program predictor: mhc binding prediction method alleles: list of alleles Returns: dataframe with mutant and wt binding scores for all alleles """ P = base.get_predictor(predictor, scoring='ligand') print (P) print ('predicting mhc binding for %s peptides with %s' %(len(df), P.name)) peps = list(df.peptide) res = P.predict_peptides(peps, alleles=alleles, cpus=cpus, cutoff=cutoff, cutoff_method=cutoff_method, drop_columns=True) if res is None: print ('no binding predictions!') return #predict closest matching peptide affinity if verbose == True: print ('predicting wt peptides') wtpeps = list(df.closest) #print wild type peptides b_wt = P.predict_peptides(wtpeps, alleles=alleles, cpus=cpus, cutoff=cutoff, cutoff_method=cutoff_method, drop_columns=True) #combine mutant and matching binding predictions res = combine_wt_scores(res, b_wt, P.scorekey) res = res.drop(['pos','name'],1) #combine binding results with main dataframe res = df.merge(res, on='peptide') res['binding_diff'] = res[P.scorekey]/res.matched_score #anchor position mutated in any 9-mers res['anchor'] = res.apply(anchor_mutated, 1) #hydrophobicity and net charge res = analysis.peptide_properties(res, 'peptide') res['length'] = res.peptide.str.len() #merge promiscuity measure into results #if len(pb) > 0: # res = res.merge(pb[['peptide','alleles']], on='peptide',how='left') #else: # res['alleles'] = 0 #rename some columns res = res.rename(columns={'rank':'binding_rank','alleles':'promiscuity'}) res = res.sort_values('binding_rank', ascending=True) return res
def analysis(self): """Do analysis of prediction results.""" preds = self.preds cutoffs = self.cutoffs if len(cutoffs) < len(preds) : cutoffs = [cutoffs[0] for p in preds] cutoff_method = self.cutoff_method i=0 prom_binders = {} print ('analysing results..') for P in self.preds: p = P.name cutoff = cutoffs[i] n = self.n print (P.path) if P.data is not None: b = P.get_binders(cutoff=cutoff, cutoff_method=cutoff_method) elif P.path is not None: b = P.get_binders(path=P.path, cutoff=cutoff, cutoff_method=cutoff_method) else: print ('empty results?') continue if b is None: continue print ('%s: %s binders found' %(P, len(b))) if len(b) == 0: print ('no binders found, check your cutoff value') return pb = P.promiscuous_binders(binders=b, n=n, cutoff=cutoff, cutoff_method=cutoff_method) print ('found %s promiscuous binders at cutoff=%s, n=%s' %(len(pb),cutoff,n)) pb.to_csv(os.path.join(self.path,'final_%s_%s.csv' %(p,n)), float_format='%g') prom_binders[p] = pb if len(pb)>0: print ('top promiscuous binders:') print (pb[:10]) else: continue if self.sequences is not None: x = analysis.get_nmer(pb, self.sequences, how='split', length=20) x = analysis.peptide_properties(x, 'n-mer') x.to_csv(os.path.join(self.path,'final_%s_%s.csv' %(p,n)), float_format='%g') #do further analysis if using protein sequences cl = analysis.find_clusters(pb) if len(cl) > 0: #make peptide lists cl = analysis.get_nmer(cl, self.sequences, how='split', length=20) cl = analysis.peptide_properties(cl, 'n-mer') cl.to_csv(os.path.join(self.path,'clusters_%s.csv' %p)) #make summary table summary = self.get_summary(P, pb, self.sequences, clusters=cl) summary.to_csv(os.path.join(self.path,'summary_%s.csv' %p)) print ('-----------------------------') i+=1 if len(prom_binders) > 1: print ('finding combined list of binders') comb = self.combine(prom_binders) comb.to_csv(os.path.join(self.path,'combined.csv'), float_format='%g') return
def predict_variants(df, predictor='tepitope', alleles=[], verbose=False, cpus=1, cutoff=.95, cutoff_method='default'): """ Predict binding scores for mutated and wt peptides (if present) from supplied variants. Args: df: pandas dataframe with peptide sequences, requires at least 2 columns 'peptide' - the mutant peptide 'wt' - a corresponding wild type peptide this data could be generated from get_mutant_sequences or from an external source predictor: mhc binding prediction method alleles: list of alleles Returns: dataframe with mutant and wt binding scores for all alleles plus other score metrics """ #find matches to self proteome, adds penalty score column to df #we should only blast non-duplicates.... if verbose == True: print('finding matches to self proteome') df = self_matches(df, cpus=cpus) if verbose == True: print('finding matches to viral proteomes') df = virus_matches(df, cpus=cpus) #get similarity scores for wt and closest match to proteome df['wt_similarity'] = df.apply(wt_similarity, 1) df['self_similarity'] = df.apply(self_similarity, 1) df['virus_similarity'] = df.apply(virus_similarity, 1) #get closest peptide in another column, either wt or nearest self df['closest'] = df.apply(get_closest_match, 1) P = base.get_predictor(predictor) if verbose == True: print(P) print('predicting mhc binding for %s peptides with %s' % (len(df), P.name)) peps = list(df.peptide) res = P.predict_peptides(peps, alleles=alleles, cpus=cpus, cutoff=cutoff, cutoff_method=cutoff_method, drop_columns=True) pb = P.promiscuous_binders(n=1, cutoff=.95) if res is None: print('no binding predictions!') return #predict closest matching peptide affinity if verbose == True: print('predicting wt peptides') wtpeps = list(df.closest) #print wild type peptides b_wt = P.predict_peptides(wtpeps, alleles=alleles, cpus=cpus, cutoff=cutoff, cutoff_method=cutoff_method, drop_columns=True) #combine mutant and matching binding predictions res = combine_wt_scores(res, b_wt, P.scorekey) res = res.drop(['pos', 'name'], 1) #combine binding results with main dataframe res = df.merge(res, on='peptide') res['binding_diff'] = res[P.scorekey] / res.matched_score #hydrophobicity and net charge res = analysis.peptide_properties(res, 'peptide') #print(res) #exclude exact matches to self print('%s peptides with exact matches to self' % len(res[res.self_mismatches == 0])) #merge promiscuity measure into results if len(pb) > 0: res = res.merge(pb[['peptide', 'alleles']], on='peptide', how='left') else: print('no promiscuous binders') res['alleles'] = 0 #rename some columns res = res.rename(columns={ 'rank': 'binding_rank', 'alleles': 'promiscuity' }) return res