Exemple #1
0
def predict_binding(df, predictor='netmhcpan', alleles=[],
                     verbose=False, cpus=1, cutoff=.95, cutoff_method='default'):
    """
    Predict binding scores for mutated and wt peptides (if present) from supplied variants.
    Args:
        df: pandas dataframe with peptide sequences, requires at least 2 columns
            'peptide' - the mutant peptide
            'wt' - a corresponding wild type peptide
        this data could be generated from get_mutant_sequences or from an external program
        predictor: mhc binding prediction method
        alleles: list of alleles
    Returns:
        dataframe with mutant and wt binding scores for all alleles
    """

    P = base.get_predictor(predictor, scoring='ligand')
    print (P)
    print ('predicting mhc binding for %s peptides with %s' %(len(df), P.name))

    peps = list(df.peptide)
    res = P.predict_peptides(peps, alleles=alleles, cpus=cpus,
                             cutoff=cutoff, cutoff_method=cutoff_method, drop_columns=True)

    if res is None:
        print ('no binding predictions!')
        return

    #predict closest matching peptide affinity
    if verbose == True:
        print ('predicting wt peptides')
    wtpeps = list(df.closest)
    #print wild type peptides
    b_wt = P.predict_peptides(wtpeps, alleles=alleles, cpus=cpus,
                               cutoff=cutoff, cutoff_method=cutoff_method, drop_columns=True)

    #combine mutant and matching binding predictions
    res = combine_wt_scores(res, b_wt, P.scorekey)
    res = res.drop(['pos','name'],1)

    #combine binding results with main dataframe
    res = df.merge(res, on='peptide')
    res['binding_diff'] = res[P.scorekey]/res.matched_score

    #anchor position mutated in any 9-mers
    res['anchor'] = res.apply(anchor_mutated, 1)
    #hydrophobicity and net charge
    res = analysis.peptide_properties(res, 'peptide')
    res['length'] = res.peptide.str.len()

    #merge promiscuity measure into results
    #if len(pb) > 0:
    #    res = res.merge(pb[['peptide','alleles']], on='peptide',how='left')
    #else:
    #    res['alleles'] = 0
    #rename some columns
    res = res.rename(columns={'rank':'binding_rank','alleles':'promiscuity'})
    res = res.sort_values('binding_rank', ascending=True)
    return res
Exemple #2
0
    def analysis(self):
        """Do analysis of prediction results."""

        preds = self.preds
        cutoffs = self.cutoffs
        if len(cutoffs) < len(preds) :
            cutoffs = [cutoffs[0] for p in preds]
        cutoff_method = self.cutoff_method
        i=0
        prom_binders = {}
        print ('analysing results..')
        for P in self.preds:
            p = P.name
            cutoff = cutoffs[i]
            n = self.n
            print (P.path)
            if P.data is not None:
                b = P.get_binders(cutoff=cutoff, cutoff_method=cutoff_method)
            elif P.path is not None:
                b = P.get_binders(path=P.path, cutoff=cutoff, cutoff_method=cutoff_method)
            else:
                print ('empty results?')
                continue
            if b is None:
                continue
            print ('%s: %s binders found' %(P, len(b)))
            if len(b) == 0:
                print ('no binders found, check your cutoff value')
                return

            pb = P.promiscuous_binders(binders=b, n=n, cutoff=cutoff, cutoff_method=cutoff_method)
            print ('found %s promiscuous binders at cutoff=%s, n=%s' %(len(pb),cutoff,n))
            pb.to_csv(os.path.join(self.path,'final_%s_%s.csv' %(p,n)), float_format='%g')
            prom_binders[p] = pb

            if len(pb)>0:
                print ('top promiscuous binders:')
                print (pb[:10])
            else:
                continue
            if self.sequences is not None:
                x = analysis.get_nmer(pb, self.sequences, how='split', length=20)
                x = analysis.peptide_properties(x, 'n-mer')
                x.to_csv(os.path.join(self.path,'final_%s_%s.csv' %(p,n)), float_format='%g')
                #do further analysis if using protein sequences
                cl = analysis.find_clusters(pb)
                if len(cl) > 0:
                    #make peptide lists
                    cl = analysis.get_nmer(cl, self.sequences, how='split', length=20)
                    cl = analysis.peptide_properties(cl, 'n-mer')
                    cl.to_csv(os.path.join(self.path,'clusters_%s.csv' %p))
                #make summary table
                summary = self.get_summary(P, pb, self.sequences, clusters=cl)
                summary.to_csv(os.path.join(self.path,'summary_%s.csv' %p))
            print ('-----------------------------')
            i+=1

        if len(prom_binders) > 1:
            print ('finding combined list of binders')
            comb = self.combine(prom_binders)
            comb.to_csv(os.path.join(self.path,'combined.csv'), float_format='%g')
        return
Exemple #3
0
def predict_variants(df,
                     predictor='tepitope',
                     alleles=[],
                     verbose=False,
                     cpus=1,
                     cutoff=.95,
                     cutoff_method='default'):
    """
    Predict binding scores for mutated and wt peptides (if present) from supplied variants.
    Args:
        df: pandas dataframe with peptide sequences, requires at least 2 columns
            'peptide' - the mutant peptide
            'wt' - a corresponding wild type peptide
        this data could be generated from get_mutant_sequences or from an external source
        predictor: mhc binding prediction method
        alleles: list of alleles
    Returns:
        dataframe with mutant and wt binding scores for all alleles plus other score metrics
    """

    #find matches to self proteome, adds penalty score column to df
    #we should only blast non-duplicates....
    if verbose == True:
        print('finding matches to self proteome')
    df = self_matches(df, cpus=cpus)
    if verbose == True:
        print('finding matches to viral proteomes')
    df = virus_matches(df, cpus=cpus)
    #get similarity scores for wt and closest match to proteome
    df['wt_similarity'] = df.apply(wt_similarity, 1)
    df['self_similarity'] = df.apply(self_similarity, 1)
    df['virus_similarity'] = df.apply(virus_similarity, 1)
    #get closest peptide in another column, either wt or nearest self
    df['closest'] = df.apply(get_closest_match, 1)

    P = base.get_predictor(predictor)
    if verbose == True:
        print(P)
        print('predicting mhc binding for %s peptides with %s' %
              (len(df), P.name))

    peps = list(df.peptide)
    res = P.predict_peptides(peps,
                             alleles=alleles,
                             cpus=cpus,
                             cutoff=cutoff,
                             cutoff_method=cutoff_method,
                             drop_columns=True)
    pb = P.promiscuous_binders(n=1, cutoff=.95)
    if res is None:
        print('no binding predictions!')
        return

    #predict closest matching peptide affinity
    if verbose == True:
        print('predicting wt peptides')
    wtpeps = list(df.closest)
    #print wild type peptides
    b_wt = P.predict_peptides(wtpeps,
                              alleles=alleles,
                              cpus=cpus,
                              cutoff=cutoff,
                              cutoff_method=cutoff_method,
                              drop_columns=True)

    #combine mutant and matching binding predictions
    res = combine_wt_scores(res, b_wt, P.scorekey)
    res = res.drop(['pos', 'name'], 1)

    #combine binding results with main dataframe
    res = df.merge(res, on='peptide')
    res['binding_diff'] = res[P.scorekey] / res.matched_score

    #hydrophobicity and net charge
    res = analysis.peptide_properties(res, 'peptide')
    #print(res)
    #exclude exact matches to self
    print('%s peptides with exact matches to self' %
          len(res[res.self_mismatches == 0]))

    #merge promiscuity measure into results
    if len(pb) > 0:
        res = res.merge(pb[['peptide', 'alleles']], on='peptide', how='left')
    else:
        print('no promiscuous binders')
        res['alleles'] = 0
    #rename some columns
    res = res.rename(columns={
        'rank': 'binding_rank',
        'alleles': 'promiscuity'
    })
    return res