コード例 #1
0
class ConfigForm(Form):
    path = TextField('output path', default='results', validators=[DataRequired()],
                     render_kw={"class": "textbox"})
    pm = [(i,i) for i in base.predictors]
    predictors = SelectMultipleField('predictors', choices=pm,
                                     render_kw={"class": "combobox"})
    mhc1_length = IntegerField('mhc1 length', default=11)
    mhc2_length = IntegerField('mhc2 length', default=15)
    sequence_file = TextField('sequence file',
                              validators=[DataRequired(), is_seqfile(), exists()], default='')
    overwrite = BooleanField('overwrite', default=False, false_values={False, 'n', ''})
    cpus = IntegerField('cpus', default=1)
    ps1 = [(i,i) for i in base.mhc1_presets]
    ps1.insert(0, ('',''))
    mhc1_presets = SelectField('MHC-I presets', choices=ps1, default='')
    ps2 = [(i,i) for i in base.mhc2_presets]
    ps2.insert(0, ('',''))
    mhc2_presets = SelectField('MHC-II presets', choices=ps2, default='')
    p1 = base.get_predictor('iedbmhc1')
    x = [(i,i) for i in p1.get_alleles()]
    mhc1_alleles = SelectMultipleField('MHC-I alleles', choices=x,
                                      render_kw={"class": "combobox"})
    p2 = base.get_predictor('tepitope')
    x = [(i,i) for i in p2.get_alleles()]
    #drballeles = base.getDRBList(mhc2alleles)
    #dqpalleles = base.getDQPList(mhc2alleles)
    mhc2_alleles = SelectMultipleField('MHC-II alleles', choices=x,
                                     render_kw={"class": "combobox"})
    mhc2_alleles.size=5
    iedbmhc1_path = TextField('iedb MHC-I tools path')
    iedbmhc2_path = TextField('iedb MHC-II tools path')
コード例 #2
0
    def run(self):
        """Run prediction workflow"""

        preds = []
        for p in self.predictors:
            P = base.get_predictor(p)
            if P.check_install() == False:
                print ('%s not installed' %P.name)
                continue
            savepath = os.path.join(self.path, p)
            if self.overwrite == True and os.path.exists(savepath):
                shutil.rmtree(savepath)
            if p in base.mhc1_predictors:
                a = self.mhc1_alleles
                length = self.mhc1_length
                check_mhc1_length(length)
                method = self.iedb_mhc1_method
            else:
                a = self.mhc2_alleles
                length = self.mhc2_length
                method = self.iedb_mhc2_method
            if method == '': method = None
            print ('predictor:', p)
            print ('alleles:',', '.join(a))
            print ('length:',length)
            print ('cpus:', self.cpus)
            if 'iedb' in p:
                if iedb_checks(method) == False:
                    continue

            if self.peptides is not None:
                P.predict_peptides(self.peptides, length=length, alleles=a,
                                path=self.path, overwrite=self.overwrite, verbose=self.verbose,
                                method=method, cpus=self.cpus, compression=self.compression)
                #load the results into the predictor
                P.load()
            else:
                #print (savepath)
                #data saved to disk to avoid large memory usage
                P.predict_proteins(self.sequences, length=length, alleles=a, names=self.names,
                                path=savepath, overwrite=self.overwrite, verbose=self.verbose,
                                method=method, cpus=self.cpus, compression=self.compression)
                #keep reference to path where results saved
                P.path = savepath
                #clear data as we will reload during analysis from disk
                P.data = None
            #if P.data is None:
            #    print ('no results were found, did predictor run?')
            #    return
            preds.append(P)
            n = self.n
            print ('-----------------------------')

        self.preds = preds
        self.analysis()
        #if self.plots == True:
        #    self.plot_results()
        print ('results saved in the folder %s' %self.path)
        return
コード例 #3
0
def get_results(path, predictor, name):

    filename = os.path.join(path, predictor, name)
    P = base.get_predictor(predictor)
    P.load(filename + '.csv')
    #print filename
    #print P.data
    return P
コード例 #4
0
ファイル: neo.py プロジェクト: hwang-happy/epitopepredict
def predict_binding(df, predictor='netmhcpan', alleles=[],
                     verbose=False, cpus=1, cutoff=.95, cutoff_method='default'):
    """
    Predict binding scores for mutated and wt peptides (if present) from supplied variants.
    Args:
        df: pandas dataframe with peptide sequences, requires at least 2 columns
            'peptide' - the mutant peptide
            'wt' - a corresponding wild type peptide
        this data could be generated from get_mutant_sequences or from an external program
        predictor: mhc binding prediction method
        alleles: list of alleles
    Returns:
        dataframe with mutant and wt binding scores for all alleles
    """

    P = base.get_predictor(predictor, scoring='ligand')
    print (P)
    print ('predicting mhc binding for %s peptides with %s' %(len(df), P.name))

    peps = list(df.peptide)
    res = P.predict_peptides(peps, alleles=alleles, cpus=cpus,
                             cutoff=cutoff, cutoff_method=cutoff_method, drop_columns=True)

    if res is None:
        print ('no binding predictions!')
        return

    #predict closest matching peptide affinity
    if verbose == True:
        print ('predicting wt peptides')
    wtpeps = list(df.closest)
    #print wild type peptides
    b_wt = P.predict_peptides(wtpeps, alleles=alleles, cpus=cpus,
                               cutoff=cutoff, cutoff_method=cutoff_method, drop_columns=True)

    #combine mutant and matching binding predictions
    res = combine_wt_scores(res, b_wt, P.scorekey)
    res = res.drop(['pos','name'],1)

    #combine binding results with main dataframe
    res = df.merge(res, on='peptide')
    res['binding_diff'] = res[P.scorekey]/res.matched_score

    #anchor position mutated in any 9-mers
    res['anchor'] = res.apply(anchor_mutated, 1)
    #hydrophobicity and net charge
    res = analysis.peptide_properties(res, 'peptide')
    res['length'] = res.peptide.str.len()

    #merge promiscuity measure into results
    #if len(pb) > 0:
    #    res = res.merge(pb[['peptide','alleles']], on='peptide',how='left')
    #else:
    #    res['alleles'] = 0
    #rename some columns
    res = res.rename(columns={'rank':'binding_rank','alleles':'promiscuity'})
    res = res.sort_values('binding_rank', ascending=True)
    return res
コード例 #5
0
    def run(self):
        """Run workflow"""

        preds = []
        if self.names == None:
            self.names = self.sequences.locus_tag
        for p in self.predictors:
            P = base.get_predictor(p)
            savepath = os.path.join(self.path, p)
            if self.overwrite == True and os.path.exists(savepath):
                shutil.rmtree(savepath)
            if p in ['iedbmhc1', 'mhcflurry']:
                a = self.mhc1_alleles
                length = self.mhc1_length
                check_mhc1_length(length)
                method = self.iedb_mhc1_method
            else:
                a = self.mhc2_alleles
                length = self.mhc2_length
                method = self.iedb_mhc2_method
            if method == '': method = None
            print('predictor:', p)
            print('alleles:', a)
            print('length:', length)
            print('cpus:', self.cpus)
            if 'iedb' in p:
                if iedb_checks(method) == False:
                    continue

            P.predictProteins(self.sequences,
                              length=length,
                              alleles=a,
                              names=self.names,
                              path=savepath,
                              overwrite=self.overwrite,
                              verbose=self.verbose,
                              method=method,
                              cpus=self.cpus)
            #load results into predictor
            P.load(path=savepath)
            if P.data is None:
                print('no results were found, did predictor run?')
                return
            preds.append(P)
            #print (preds)
            cutoff = self.cutoff
            cutoff_method = self.cutoff_method
            n = self.n
            print('-----------------------------')

        self.preds = preds
        self.analysis()
        if self.plots == True:
            self.plot_results()
        return
コード例 #6
0
def check_installed():
    """Check which predictors can be used"""

    cl=base.get_predictor_classes()
    found=[]
    for i in cl:
        P=base.get_predictor(i)
        st = P.check_install()
        if st is True:
            found.append(i)
    return found
コード例 #7
0
def list_alleles():
    for p in base.predictors:
        print(p)
        print('-----------------------------')
        P = base.get_predictor(p)
        x = P.getAlleles()
        if type(x) is list:
            for i in x:
                print(i)
        print()
    return
コード例 #8
0
ファイル: app.py プロジェクト: partrita/epitopepredict
def list_alleles():
    n=7
    for p in base.predictors:
        print (p)
        print ('-----------------------------')
        P = base.get_predictor(p)
        x = P.get_alleles()
        if type(x) is list:
            for i in range(0, len(x), n):
                l=x[i:i+n]
                print(', '.join(l))

        print ()
    return
コード例 #9
0
ファイル: app.py プロジェクト: partrita/epitopepredict
def check_iedbmhc2_path():
    P = base.get_predictor('iedbmhc2')
    if not os.path.exists(P.iedbmhc2_path):
        print ('IEDB MHC-II tools not found, check path')
        return False
コード例 #10
0
    def run(self):
        """Run workflow for multiple samples and prediction methods."""

        print('running neoepitope predictions')
        path = self.path
        overwrite = self.overwrite
        files = self.vcf_files
        preds = self.predictors
        labels = self.get_file_labels(files)
        cutoffs = self.cutoffs
        if len(cutoffs) < len(preds):
            cutoffs = [cutoffs[0] for p in preds]

        for f in labels:
            print(f)
            infile = labels[f]['filename']
            #file to save variants to, if present we can skip
            eff_csv = os.path.join(path, 'variant_effects_%s.csv' % f)
            eff_obj = os.path.join(path, 'variant_effects_%s.pickle' % f)
            if not os.path.exists(eff_obj) or overwrite == True:
                #get variant effects for each file and then iterate over predictors
                variants = load_variants(vcf_file=infile)
                labels[f]['variants'] = len(variants)
                print('getting variant effects')
                effects = get_variant_effects(variants, self.verbose)
                #serialize variant effects
                effects_to_pickle(effects, eff_obj)
            else:
                #else reload from last run
                effects = pickle.load(open(eff_obj, 'rb'))
            #save effects as table
            eff_data = effects_to_dataframe(effects)
            eff_data['sample'] = f
            eff_data.to_csv(eff_csv)

            i = 0
            for predictor in self.predictors:
                outfile = os.path.join(path,
                                       'results_%s_%s.csv' % (f, predictor))
                if os.path.exists(outfile) and overwrite == False:
                    continue
                if predictor in base.mhc1_predictors:
                    alleles = self.mhc1_alleles
                    length = self.mhc1_length
                else:
                    alleles = self.mhc2_alleles
                    length = self.mhc2_length
                seqs = get_mutant_sequences(effects=effects,
                                            length=length,
                                            verbose=self.verbose)

                res = predict_variants(seqs,
                                       alleles=alleles,
                                       length=length,
                                       predictor=predictor,
                                       verbose=self.verbose,
                                       cpus=self.cpus)
                res['label'] = f
                res.to_csv(outfile, index=False)

                #gets promiscuous binders based on the cutoff
                P = base.get_predictor(predictor)
                P.data = res
                #pb = P.promiscuous_binders(n=1, keep_columns=True, cutoff=cutoffs[i])
                #pb['label'] = f
                #print (pb[:20])
                #pb.to_csv(os.path.join(path, 'binders_%s_%s.csv' %(f,predictor)), index=False)
                i += 1
                #peps = self_similarity(res, proteome="human_proteome")

        #combine results for multiple samples
        pd.DataFrame(labels).T.to_csv(os.path.join(path, 'sample_labels.csv'))
        print('finished, results saved to %s' % path)
        return
コード例 #11
0
def predict_variants(df,
                     predictor='tepitope',
                     alleles=[],
                     verbose=False,
                     cpus=1,
                     cutoff=.95,
                     cutoff_method='default'):
    """
    Predict binding scores for mutated and wt peptides (if present) from supplied variants.
    Args:
        df: pandas dataframe with peptide sequences, requires at least 2 columns
            'peptide' - the mutant peptide
            'wt' - a corresponding wild type peptide
        this data could be generated from get_mutant_sequences or from an external source
        predictor: mhc binding prediction method
        alleles: list of alleles
    Returns:
        dataframe with mutant and wt binding scores for all alleles plus other score metrics
    """

    #find matches to self proteome, adds penalty score column to df
    #we should only blast non-duplicates....
    if verbose == True:
        print('finding matches to self proteome')
    df = self_matches(df, cpus=cpus)
    if verbose == True:
        print('finding matches to viral proteomes')
    df = virus_matches(df, cpus=cpus)
    #get similarity scores for wt and closest match to proteome
    df['wt_similarity'] = df.apply(wt_similarity, 1)
    df['self_similarity'] = df.apply(self_similarity, 1)
    df['virus_similarity'] = df.apply(virus_similarity, 1)
    #get closest peptide in another column, either wt or nearest self
    df['closest'] = df.apply(get_closest_match, 1)

    P = base.get_predictor(predictor)
    if verbose == True:
        print(P)
        print('predicting mhc binding for %s peptides with %s' %
              (len(df), P.name))

    peps = list(df.peptide)
    res = P.predict_peptides(peps,
                             alleles=alleles,
                             cpus=cpus,
                             cutoff=cutoff,
                             cutoff_method=cutoff_method,
                             drop_columns=True)
    pb = P.promiscuous_binders(n=1, cutoff=.95)
    if res is None:
        print('no binding predictions!')
        return

    #predict closest matching peptide affinity
    if verbose == True:
        print('predicting wt peptides')
    wtpeps = list(df.closest)
    #print wild type peptides
    b_wt = P.predict_peptides(wtpeps,
                              alleles=alleles,
                              cpus=cpus,
                              cutoff=cutoff,
                              cutoff_method=cutoff_method,
                              drop_columns=True)

    #combine mutant and matching binding predictions
    res = combine_wt_scores(res, b_wt, P.scorekey)
    res = res.drop(['pos', 'name'], 1)

    #combine binding results with main dataframe
    res = df.merge(res, on='peptide')
    res['binding_diff'] = res[P.scorekey] / res.matched_score

    #hydrophobicity and net charge
    res = analysis.peptide_properties(res, 'peptide')
    #print(res)
    #exclude exact matches to self
    print('%s peptides with exact matches to self' %
          len(res[res.self_mismatches == 0]))

    #merge promiscuity measure into results
    if len(pb) > 0:
        res = res.merge(pb[['peptide', 'alleles']], on='peptide', how='left')
    else:
        print('no promiscuous binders')
        res['alleles'] = 0
    #rename some columns
    res = res.rename(columns={
        'rank': 'binding_rank',
        'alleles': 'promiscuity'
    })
    return res