예제 #1
0
def genomeAnalysis(datadir, label, gname, method):
    """this method should be made independent of web app paths etc"""

    path = os.path.join(datadir, '%s/%s/%s' % (label, gname, method))
    #path='test'
    gfile = os.path.join(genomespath, '%s.gb' % gname)
    g = sequtils.genbank2Dataframe(gfile, cds=True)
    b = getAllBinders(path, method=method, n=5)
    P = base.getPredictor(method)
    res = b.groupby('name').agg({
        P.scorekey: [np.mean, np.size, np.max]
    }).sort()
    res.columns = res.columns.get_level_values(1)
    res = res.merge(g[['locus_tag', 'length', 'gene', 'product', 'order']],
                    left_index=True,
                    right_on='locus_tag')
    res['perc'] = res['size'] / res.length * 100
    res = res.sort('perc', ascending=False)

    top = b.groupby('peptide').agg({
        P.scorekey: np.mean,
        'allele': np.max,
        'name': lambda x: x
    }).reset_index()
    top = top.sort(P.scorekey, ascending=P.rankascending)
    cl = findClusters(b, method, dist=9, minsize=3)
    if cl is not None:
        gc = cl.groupby('name').agg({'density': np.max})
        res = res.merge(gc, left_on='locus_tag', right_index=True)
    #print res[:10]

    return res
예제 #2
0
def testrun(gname):

    method = 'tepitope'  #'iedbmhc1'#'netmhciipan'
    path = 'test'
    gfile = os.path.join(genomespath, '%s.gb' % gname)
    df = sequtils.genbank2Dataframe(gfile, cds=True)
    #names = list(df.locus_tag[:1])
    names = ['VP24']
    alleles1 = [
        "HLA-A*02:02", "HLA-A*11:01", "HLA-A*32:07", "HLA-B*15:17",
        "HLA-B*51:01", "HLA-C*04:01", "HLA-E*01:03"
    ]
    alleles2 = [
        "HLA-DRB1*0101", "HLA-DRB1*0305", "HLA-DRB1*0812", "HLA-DRB1*1196",
        "HLA-DRB1*1346", "HLA-DRB1*1455", "HLA-DRB1*1457", "HLA-DRB1*1612",
        "HLA-DRB4*0107", "HLA-DRB5*0203"
    ]
    P = base.getPredictor(method)
    P.iedbmethod = 'IEDB_recommended'  #'netmhcpan'
    P.predictProteins(df,
                      length=11,
                      alleles=alleles2,
                      names=names,
                      save=True,
                      path=path)
    f = os.path.join('test', names[0] + '.mpk')
    df = pd.read_msgpack(f)
    P.data = df
    #b = P.getBinders(data=df)
    #print b[:20]
    base.getScoreDistributions(method, path)
    return
예제 #3
0
def genomeAnalysis(datadir,label,gname,method):
    """this method should be made independent of web app paths etc"""

    path = os.path.join(datadir, '%s/%s/%s' %(label,gname,method))
    #path='test'
    gfile = os.path.join(genomespath,'%s.gb' %gname)
    g = sequtils.genbank2Dataframe(gfile, cds=True)
    b = getAllBinders(path, method=method, n=5)
    P = base.getPredictor(method)
    res = b.groupby('name').agg({P.scorekey:[np.mean,np.size,np.max]}).sort()
    res.columns = res.columns.get_level_values(1)
    res = res.merge(g[['locus_tag','length','gene','product','order']],
                            left_index=True,right_on='locus_tag')
    res['perc'] = res['size']/res.length*100
    res = res.sort('perc',ascending=False)

    top = b.groupby('peptide').agg({P.scorekey:np.mean,'allele':np.max,
                    'name': lambda x: x}).reset_index()
    top = top.sort(P.scorekey,ascending=P.rankascending)
    cl = findClusters(b, method, dist=9, minsize=3)
    if cl is not None:
        gc = cl.groupby('name').agg({'density':np.max})
        res = res.merge(gc,left_on='locus_tag',right_index=True)
    #print res[:10]

    return res
예제 #4
0
파일: tests.py 프로젝트: tazjel/epitopemap
    def testLoad(self):
        """Test re-loading predictions"""

        infile = os.path.join(self.testdir, 'ZEBOVgp1.mpk')
        pred = pd.read_msgpack(infile)
        P = base.getPredictor('iedbmhc1')
        P.data = pred
        return
예제 #5
0
파일: tests.py 프로젝트: tazjel/epitopemap
    def testBcell(self):
        """IEDB BCell test"""

        df = self.df
        names = ['VP24']
        P = base.getPredictor('bcell')
        P.iedbmethod='Chou-Fasman'
        P.predictProteins(df, names=names, save=True, path=self.testdir)
        return
예제 #6
0
파일: tests.py 프로젝트: tazjel/epitopemap
    def testFasta(self):
        """Test fasta predictions"""

        fastafile = 'testing/zaire-ebolavirus.faa'
        df = sequtils.fasta2Dataframe(fastafile)
        alleles = ["HLA-DRB1*0101"]
        P = base.getPredictor('tepitope')
        P.predictProteins(df, length=11, alleles=alleles,
                          save=True, path=self.testdir)
        return
예제 #7
0
def testBcell(gname):
    path = 'test'
    gfile = os.path.join(genomespath, '%s.gb' % gname)
    df = sequtils.genbank2Dataframe(gfile, cds=True)
    names = ['VP24']
    P = base.getPredictor('bcell')
    P.iedbmethod = 'Chou-Fasman'
    P.predictProteins(df, names=names, save=True, path=path)
    print P.data
    return
예제 #8
0
def testBcell(gname):
    path='test'
    gfile = os.path.join(genomespath,'%s.gb' %gname)
    df = sequtils.genbank2Dataframe(gfile, cds=True)
    names=['VP24']
    P = base.getPredictor('bcell')
    P.iedbmethod='Chou-Fasman'
    P.predictProteins(df,names=names,save=True,path=path)
    print P.data
    return
예제 #9
0
파일: tests.py 프로젝트: tazjel/epitopemap
    def testTepitope(self):
        """Tepitope test"""

        df = self.df
        P = base.getPredictor('tepitope')
        alleles = ["HLA-DRB1*0101", "HLA-DRB1*0305"]
        print P
        P.predictProteins(df, length=11, alleles=alleles,
                          save=True, path=self.testdir)
        P.getBinders(data=P.data)
        return
예제 #10
0
파일: tests.py 프로젝트: tazjel/epitopemap
    def testIEDB(self):
        """IEDB MHCI test"""

        df = self.df
        P = base.getPredictor('iedbmhc1')
        print P
        alleles = ["HLA-A*02:02", "HLA-A*11:01",
                   "HLA-B*15:17", "HLA-B*51:01",
                   "HLA-C*04:01", "HLA-E*01:03"]
        P.predictProteins(df, length=11, alleles=alleles,
                          save=True, path=self.testdir)
        return
예제 #11
0
파일: tests.py 프로젝트: tazjel/epitopemap
    def testnetMHCIIpan(self):
        """netMHCIIpan test"""

        #requires netmHCIIpan is installed
        df = self.df
        P = base.getPredictor('netmhciipan')
        alleles = ["HLA-DRB1*0101"]
        names = ['ZEBOVgp1']
        print P
        P.predictProteins(df, length=11, alleles=alleles, names=names,
                          save=True, path=self.testdir)
        P.getBinders(data=P.data)
        return
예제 #12
0
def getPredictions(path, tag, method='tepitope', q=0.96):
    """Get predictions from file system"""

    q = round(q, 2)
    #preds = OrderedDict()
    cutoffs = {}
    filename = os.path.join(path, tag + '.mpk')
    if not os.path.exists(filename):
        return
    df = pd.read_msgpack(filename)
    pred = base.getPredictor(name=method, data=df)
    cutoffs = pred.allelecutoffs = getCutoffs(path, method, q)
    pred = pred
    return pred
예제 #13
0
def getPredictions(path,tag,method='tepitope',q=0.96):
    """Get predictions from file system"""

    q=round(q,2)
    #preds = OrderedDict()
    cutoffs = {}
    filename = os.path.join(path, tag+'.mpk')
    if not os.path.exists(filename):
        return
    df = pd.read_msgpack(filename)
    pred = base.getPredictor(name=method, data=df)
    cutoffs = pred.allelecutoffs = getCutoffs(path, method, q)
    pred = pred
    return pred
예제 #14
0
def testrun(gname):

    method = 'tepitope'#'iedbmhc1'#'netmhciipan'
    path='test'
    gfile = os.path.join(genomespath,'%s.gb' %gname)
    df = sequtils.genbank2Dataframe(gfile, cds=True)
    #names = list(df.locus_tag[:1])
    names=['VP24']
    alleles1 = ["HLA-A*02:02", "HLA-A*11:01", "HLA-A*32:07", "HLA-B*15:17", "HLA-B*51:01",
              "HLA-C*04:01", "HLA-E*01:03"]
    alleles2 = ["HLA-DRB1*0101", "HLA-DRB1*0305", "HLA-DRB1*0812", "HLA-DRB1*1196", "HLA-DRB1*1346",
            "HLA-DRB1*1455", "HLA-DRB1*1457", "HLA-DRB1*1612", "HLA-DRB4*0107", "HLA-DRB5*0203"]
    P = base.getPredictor(method)
    P.iedbmethod='IEDB_recommended' #'netmhcpan'
    P.predictProteins(df,length=11,alleles=alleles2,names=names,
                        save=True,path=path)
    f = os.path.join('test', names[0]+'.mpk')
    df = pd.read_msgpack(f)
    P.data=df
    #b = P.getBinders(data=df)
    #print b[:20]
    base.getScoreDistributions(method, path)
    return
예제 #15
0
def getAllBinders(path, method='tepitope', n=3, cutoff=0.95, promiscuous=True):
    """Get all promiscuous binders from a set of proteins in path"""

    print 'getting binders..'
    binders = []
    m = method
    if m == 'bcell': return  #not applicable
    l = 9
    P = base.getPredictor(m)
    files = glob.glob(os.path.join(path, '*.mpk'))
    #get allele specific cutoffs
    P.allelecutoffs = getCutoffs(path, method, cutoff, overwrite=True)
    for f in files:
        df = pd.read_msgpack(f)
        if promiscuous == True:
            b = P.getPromiscuousBinders(data=df, n=n)
        else:
            b = P.getBinders(data=df)
        #print b[:5]
        binders.append(b)
    result = pd.concat(binders)
    result['start'] = result.pos
    result['end'] = result.pos + result.peptide.str.len()
    return result
예제 #16
0
def getAllBinders(path, method='tepitope', n=3, cutoff=0.95, promiscuous=True):
    """Get all promiscuous binders from a set of proteins in path"""

    print 'getting binders..'
    binders = []
    m=method
    if m=='bcell': return #not applicable
    l=9
    P = base.getPredictor(m)
    files = glob.glob(os.path.join(path, '*.mpk'))
    #get allele specific cutoffs
    P.allelecutoffs = getCutoffs(path, method, cutoff, overwrite=True)
    for f in files:
        df = pd.read_msgpack(f)
        if promiscuous== True:
            b = P.getPromiscuousBinders(data=df,n=n)
        else:
            b = P.getBinders(data=df)
        #print b[:5]
        binders.append(b)
    result = pd.concat(binders)
    result['start'] = result.pos
    result['end'] = result.pos+result.peptide.str.len()
    return result