예제 #1
0
파일: btol.py 프로젝트: bh0085/compbio
  def makeRank(self, rank = 'phylum', subtree = None): 
    #Get the subtree and db connections to build meta for
    tree = subtree if subtree != None else self.t     
    dbi = cbdb.getName('taxdmp')

    print 'Fetching taxonomic nodes from the db'
    #Get the terminal nodes and corresponding ncbi taxa
    terms = [t for t in tree.get_terminals() if t.m.has_key('taxid')]
    nodes = [dbi.S.q(dbi.Node).filter_by(id = t.m['taxid']).scalar() 
             for t in terms]

    #endpoints for parental iteratiion
    taxa = ncbi.get_rank(rank)
    root = ncbi.get_root()


    print 'Computing terminal node mappings for taxon: {0}'.format(rank)
    bar = pbar.simple(len(nodes)); bar.start()

    node_taxa = list(nodes)
    get_p_iter = lambda: \
        node_taxa[idx] == None and True \
        or node_taxa[idx] in taxa and True \
        or node_taxa[idx] == root and True \
        or node_taxa.__setitem__(idx,node_taxa[idx].parent) \
        or node_taxa[idx]

    for idx, v in enumerate(node_taxa):
      bar.update(idx);
      par = list(iter(get_p_iter, True))[-1] if v else None
      terms[idx].m[rank] = par.id if par in taxa else None
    bar.finish()
    print 'Done!'
예제 #2
0
파일: learner.py 프로젝트: bh0085/compbio
    def testParams(self, model_class, prediction="test", res=10, dim=1):

        # set up the grid of prediction parameters

        if len(shape(res)) == 0:
            res = (res,) * dim
        test_vals = list(it.product(*[[(x, r) for x in arange(r)] for r in res]))

        # shall we predict holdout  or training set?
        if prediction == "training":
            xyfun = self.xyTrain
            predictfun = self.predictTraining
        else:
            xyfun = self.xyTest
            predictfun = self.predictTest

        # set initial values for output variables
        ntest = len(xyfun()[1][0])
        rms = zeros(res)
        pdicts = reshape(array([{} for i in range(product(res))]), res)
        test_preds = reshape(array([zeros(ntest) for i in range(product(res))]), concatenate([res + (ntest,)]))

        # test the learning method for each parameter
        bar = pbar.simple(len(test_vals))
        ct = 0
        for t in test_vals:
            ct += 1
            bar.update(ct)
            pdict = {}
            idxs = zip(map(lambda x: x[0], t))

            self.setModel(model_class(params=t, pdict=pdicts[idxs][0]))
            self.learn()
            xtest, ytest = xyfun()
            ypred = predictfun()
            rms[idxs] = std(ytest - ypred)
            test_preds[idxs] = ypred
        bar.finish()

        # create a dictionary of all of the output variables
        out = {}
        out["pdicts"] = pdicts
        out["test_rms"] = rms
        out["test_preds"] = test_preds
        out["actual_preds"] = ytest
        print ytest
        return out