Example #1
0
    def crossvalidate_delta(self, folds):
        import utils
        cv_scheme = utils.crossValidationScheme(folds, self.nTrain)
        ldeltas = SP.arange(-3, -1.5, .01)
        Ss = []
        Us = []
        Uys = []
        UCs = []
        err = 0.0
        errs = []
        for ldelta in ldeltas:
            for test_set in cv_scheme:
                train_set = ~test_set
                K_sub = self.kernel[SP.ix_(train_set, train_set)]
                K_cross = self.kernel[SP.ix_(~train_set, train_set)]
                # print LA.inv((K_sub + SP.eye(train_set.sum())*self.delta))
                Core = SP.dot(
                    K_cross,
                    LA.inv((K_sub + SP.eye(train_set.sum()) * SP.exp(ldelta))))
                diff = self.yTrain[test_set] -\
                    SP.dot(Core, self.yTrain[train_set])
                err += (diff**2).sum() / diff.size
                S, U = LA.eigh(self.kernel[SP.ix_(train_set, train_set)])
                Ss.append(S)
                Us.append(U)
                Uys.append(SP.dot(U.T, self.yTrain[train_set]))
                UCs.append(SP.dot(U.T, SP.ones_like(self.yTrain[train_set])))
            errs.append(err / len(cv_scheme))
            err = 0.0

        nll_scores = []
        for ldelta in ldeltas:
            # print 'ldelta equals', ldelta
            score = 0.0
            for i in xrange(len(cv_scheme)):
                score += lmm_fast.nLLeval(ldelta, (Uys[i])[:, 0], UCs[i],
                                          Ss[i])
            nll_scores.append(score / len(cv_scheme))
        print 'best ldelta found ll', ldeltas[SP.argmin(nll_scores)]
        return ldeltas[SP.argmin(errs)]
Example #2
0
    def crossvalidate_delta(self, folds):
        import utils
        cv_scheme = utils.crossValidationScheme(folds, self.nTrain)
        ldeltas = SP.arange(-3, -1.5, .01)
        Ss = []
        Us = []
        Uys = []
        UCs = []
        err = 0.0
        errs = []
        for ldelta in ldeltas:
            for test_set in cv_scheme:
                train_set = ~test_set
                K_sub = self.kernel[SP.ix_(train_set, train_set)]
                K_cross = self.kernel[SP.ix_(~train_set, train_set)]
                # print LA.inv((K_sub + SP.eye(train_set.sum())*self.delta))
                Core = SP.dot(K_cross, LA.inv((K_sub + SP.eye(train_set.sum()) *
                                               SP.exp(ldelta))))
                diff = self.yTrain[test_set] -\
                    SP.dot(Core, self.yTrain[train_set])
                err += (diff**2).sum()/diff.size
                S, U = LA.eigh(self.kernel[SP.ix_(train_set, train_set)])
                Ss.append(S)
                Us.append(U)
                Uys.append(SP.dot(U.T, self.yTrain[train_set]))
                UCs.append(SP.dot(U.T, SP.ones_like(self.yTrain[train_set])))
            errs.append(err/len(cv_scheme))
            err = 0.0

        nll_scores = []
        for ldelta in ldeltas:
            # print 'ldelta equals', ldelta
            score = 0.0
            for i in xrange(len(cv_scheme)):
                score += lmm_fast.nLLeval(ldelta, (Uys[i])[:, 0], UCs[i], Ss[i])
            nll_scores.append(score/len(cv_scheme))
        print 'best ldelta found ll', ldeltas[SP.argmin(nll_scores)]
        return ldeltas[SP.argmin(errs)]
Example #3
0
#TODO: I am sure this issues is common to cython stuff and other people have similar problems
_UX = SP.array(UX[:,0:2])
_UY = SP.array(UY[:,0])

if 1:
    print "testing delta opt"
    delta0 = lmm_fast.optdelta(_UY,_UX,S)
    delta1 = lmm.optdelta(_UY,_UX,S)
    print "%.2f versus %.2f" % (delta0,delta1)

if 1:
    print "testing eval on all SNPs"
    for i in xrange(UX.shape[1]):
        _UX = UX[:,i:i+1]
        _UY = SP.array(UY[:,0])
        lml0=lmm_fast.nLLeval(ldelta,_UY,_UX,S)
        lml1=lmm.nLLeval(ldelta,_UY,SP.array(_UX),S)
        lml2=lmm.nLLeval(ldelta,_UY,_UX,S)
        assert SP.absolute(lml1-lml2)<1E-10, 'outch'
        print "lml: %.2f delta lml (rel) : %.2f " % (lml1,(lml1-lml0)/SP.absolute(lml1))
       

if 0:
    covariates = SP.ones([X.shape[0],1])

    t0=time.time()
    LOD0 = lmm.train_associations(X,Y,K,covariates)
    print "t1"
    t1=time.time()
    LOD1 = lmm.train_interactions(X,Y,K,X[:,0:1],covariates,refit_delta0_snp=False)
    print "t2"
Example #4
0
def best_split_full_model(X, Uy, C, S, U, noderange, delta):
    mBest = -1
    sBest = -float('inf')
    score_best = -float('inf')
    left_mean = None
    right_mean = None
    ldelta = SP.log(delta)
    levels = map(SP.unique, X[noderange].T)
    feature_map = []
    s = []
    UXt = []
    cnt = 0
    for i in xrange(X.shape[1]):
        lev = levels[i]
        for j in xrange(lev.size - 1):
            split_point = SP.median(lev[j:j + 2])
            x = SP.int_(X[noderange, i] > split_point)
            UXt.append(SP.dot(U.T[:, noderange], x))
            feature_map.append(i)
            s.append(split_point)
            cnt += 1
    UXt = SP.array(UXt).T
    if UXt.size == 0:  #predictors are homogeneous
        return mBest, sBest, left_mean, right_mean, score_best
    else:
        #print UXt
        #         print X[noderange]
        #         print ''
        #         print ''
        # test all transformed predictors
        scores = -NP.ones(cnt) * float('inf')
        UC = SP.dot(U.T, C)
        ########################
        #finding the best split#
        ########################
        score_0 = lmm_fast.nLLeval(ldelta, Uy[:, 0], UC, S)
        for snp_cnt in SP.arange(cnt):
            UX = SP.hstack((UXt[:, snp_cnt:snp_cnt + 1], UC))
            scores[snp_cnt] = -lmm_fast.nLLeval(ldelta, Uy[:, 0], UX, S)
            scores[snp_cnt] += score_0
        ############################
        ###evaluate the new means###
        ############################
        kBest = SP.argmax(scores)
        score_best = scores[kBest]
        sBest = s[kBest]
        if score_best > 0:
            sBest = s[kBest]
            score_best = scores[kBest]
            UX = SP.hstack((UXt[:, kBest:kBest + 1], UC))
            _, beta, _ = lmm_fast.nLLeval(ldelta,
                                          Uy[:, 0],
                                          UX,
                                          S,
                                          MLparams=True)
            mBest = feature_map[kBest]
            CX = SP.zeros_like(Uy)
            CX[noderange] = SP.int_(X[noderange, mBest:mBest + 1] > sBest)
            C_new = SP.hstack((CX, C))
            mean = SP.dot(C_new,
                          beta.reshape(beta.size,
                                       -1))  #TODO:is this the correct way?
            left_mean = ((mean[noderange])[CX[noderange] == 0])[0]
            right_mean = ((mean[noderange])[CX[noderange] == 1])[0]
        return mBest, sBest, left_mean, right_mean, score_best
Example #5
0
def estimate_bias(Uy, U, S, ldelta):
    UC = SP.dot(U.T, SP.ones_like(Uy))
    _, beta, _ = lmm_fast.nLLeval(ldelta, Uy[:, 0], UC, S, MLparams=True)
    return beta[0]
Example #6
0
#TODO: I am sure this issues is common to cython stuff and other people have similar problems
_UX = SP.array(UX[:, 0:2])
_UY = SP.array(UY[:, 0])

if 1:
    print "testing delta opt"
    delta0 = lmm_fast.optdelta(_UY, _UX, S)
    delta1 = lmm.optdelta(_UY, _UX, S)
    print "%.2f versus %.2f" % (delta0, delta1)

if 1:
    print "testing eval on all SNPs"
    for i in xrange(UX.shape[1]):
        _UX = UX[:, i:i + 1]
        _UY = SP.array(UY[:, 0])
        lml0 = lmm_fast.nLLeval(ldelta, _UY, _UX, S)
        lml1 = lmm.nLLeval(ldelta, _UY, SP.array(_UX), S)
        lml2 = lmm.nLLeval(ldelta, _UY, _UX, S)
        assert SP.absolute(lml1 - lml2) < 1E-10, 'outch'
        print "lml: %.2f delta lml (rel) : %.2f " % (lml1, (lml1 - lml0) /
                                                     SP.absolute(lml1))

if 0:
    covariates = SP.ones([X.shape[0], 1])

    t0 = time.time()
    LOD0 = lmm.train_associations(X, Y, K, covariates)
    print "t1"
    t1 = time.time()
    LOD1 = lmm.train_interactions(X,
                                  Y,
Example #7
0
def best_split_full_model(X,
                          Uy,
                          C,
                          S,
                          U,
                          noderange,
                          delta):
    mBest = -1
    sBest = -float('inf')
    score_best = -float('inf')
    left_mean = None
    right_mean = None
    ldelta = SP.log(delta)
    levels = map(SP.unique, X[noderange].T)
    feature_map = []
    s = []
    UXt = []
    cnt = 0
    for i in xrange(X.shape[1]):
        lev = levels[i]
        for j in xrange(lev.size-1):
            split_point = SP.median(lev[j:j+2])
            x = SP.int_(X[noderange,i] > split_point)
            UXt.append(SP.dot(U.T[:,noderange], x))
            feature_map.append(i)
            s.append(split_point)
            cnt += 1
    UXt = SP.array(UXt).T
    if UXt.size == 0: #predictors are homogeneous
        return mBest, sBest, left_mean, right_mean, score_best
    else:
        #print UXt
#         print X[noderange]
#         print ''
#         print ''
        # test all transformed predictors
        scores = -NP.ones(cnt)*float('inf')
        UC = SP.dot(U.T,C)
        ########################
        #finding the best split#
        ########################
        score_0 = lmm_fast.nLLeval(ldelta,Uy[:,0],UC,S)
        for snp_cnt in SP.arange(cnt):
            UX=SP.hstack((UXt[:,snp_cnt:snp_cnt+1], UC))
            scores[snp_cnt] = -lmm_fast.nLLeval(ldelta,Uy[:,0],UX,S)
            scores[snp_cnt] += score_0
        ############################
        ###evaluate the new means###
        ############################
        kBest = SP.argmax(scores)
        score_best = scores[kBest]
        sBest = s[kBest]
        if score_best > 0:
                sBest = s[kBest]
                score_best = scores[kBest]
                UX=SP.hstack((UXt[:,kBest:kBest+1], UC))
                _, beta,_ = lmm_fast.nLLeval(ldelta, Uy[:,0], UX, S, MLparams=True)
                mBest = feature_map[kBest]
                CX = SP.zeros_like(Uy)
                CX[noderange] = SP.int_(X[noderange,mBest:mBest+1] > sBest)
                C_new = SP.hstack((CX,C))
                mean = SP.dot(C_new,beta.reshape(beta.size, -1)) #TODO:is this the correct way?
                left_mean = ((mean[noderange])[CX[noderange]==0])[0]
                right_mean = ((mean[noderange])[CX[noderange]==1])[0]
        return mBest, sBest, left_mean, right_mean, score_best
Example #8
0
def estimate_bias(Uy, U, S, ldelta):
    UC = SP.dot(U.T,SP.ones_like(Uy))
    _, beta, _ = lmm_fast.nLLeval(ldelta, Uy[:,0], UC, S, MLparams=True)
    return beta[0]