def crossvalidate_delta(self, folds): import utils cv_scheme = utils.crossValidationScheme(folds, self.nTrain) ldeltas = SP.arange(-3, -1.5, .01) Ss = [] Us = [] Uys = [] UCs = [] err = 0.0 errs = [] for ldelta in ldeltas: for test_set in cv_scheme: train_set = ~test_set K_sub = self.kernel[SP.ix_(train_set, train_set)] K_cross = self.kernel[SP.ix_(~train_set, train_set)] # print LA.inv((K_sub + SP.eye(train_set.sum())*self.delta)) Core = SP.dot( K_cross, LA.inv((K_sub + SP.eye(train_set.sum()) * SP.exp(ldelta)))) diff = self.yTrain[test_set] -\ SP.dot(Core, self.yTrain[train_set]) err += (diff**2).sum() / diff.size S, U = LA.eigh(self.kernel[SP.ix_(train_set, train_set)]) Ss.append(S) Us.append(U) Uys.append(SP.dot(U.T, self.yTrain[train_set])) UCs.append(SP.dot(U.T, SP.ones_like(self.yTrain[train_set]))) errs.append(err / len(cv_scheme)) err = 0.0 nll_scores = [] for ldelta in ldeltas: # print 'ldelta equals', ldelta score = 0.0 for i in xrange(len(cv_scheme)): score += lmm_fast.nLLeval(ldelta, (Uys[i])[:, 0], UCs[i], Ss[i]) nll_scores.append(score / len(cv_scheme)) print 'best ldelta found ll', ldeltas[SP.argmin(nll_scores)] return ldeltas[SP.argmin(errs)]
def crossvalidate_delta(self, folds): import utils cv_scheme = utils.crossValidationScheme(folds, self.nTrain) ldeltas = SP.arange(-3, -1.5, .01) Ss = [] Us = [] Uys = [] UCs = [] err = 0.0 errs = [] for ldelta in ldeltas: for test_set in cv_scheme: train_set = ~test_set K_sub = self.kernel[SP.ix_(train_set, train_set)] K_cross = self.kernel[SP.ix_(~train_set, train_set)] # print LA.inv((K_sub + SP.eye(train_set.sum())*self.delta)) Core = SP.dot(K_cross, LA.inv((K_sub + SP.eye(train_set.sum()) * SP.exp(ldelta)))) diff = self.yTrain[test_set] -\ SP.dot(Core, self.yTrain[train_set]) err += (diff**2).sum()/diff.size S, U = LA.eigh(self.kernel[SP.ix_(train_set, train_set)]) Ss.append(S) Us.append(U) Uys.append(SP.dot(U.T, self.yTrain[train_set])) UCs.append(SP.dot(U.T, SP.ones_like(self.yTrain[train_set]))) errs.append(err/len(cv_scheme)) err = 0.0 nll_scores = [] for ldelta in ldeltas: # print 'ldelta equals', ldelta score = 0.0 for i in xrange(len(cv_scheme)): score += lmm_fast.nLLeval(ldelta, (Uys[i])[:, 0], UCs[i], Ss[i]) nll_scores.append(score/len(cv_scheme)) print 'best ldelta found ll', ldeltas[SP.argmin(nll_scores)] return ldeltas[SP.argmin(errs)]
#TODO: I am sure this issues is common to cython stuff and other people have similar problems _UX = SP.array(UX[:,0:2]) _UY = SP.array(UY[:,0]) if 1: print "testing delta opt" delta0 = lmm_fast.optdelta(_UY,_UX,S) delta1 = lmm.optdelta(_UY,_UX,S) print "%.2f versus %.2f" % (delta0,delta1) if 1: print "testing eval on all SNPs" for i in xrange(UX.shape[1]): _UX = UX[:,i:i+1] _UY = SP.array(UY[:,0]) lml0=lmm_fast.nLLeval(ldelta,_UY,_UX,S) lml1=lmm.nLLeval(ldelta,_UY,SP.array(_UX),S) lml2=lmm.nLLeval(ldelta,_UY,_UX,S) assert SP.absolute(lml1-lml2)<1E-10, 'outch' print "lml: %.2f delta lml (rel) : %.2f " % (lml1,(lml1-lml0)/SP.absolute(lml1)) if 0: covariates = SP.ones([X.shape[0],1]) t0=time.time() LOD0 = lmm.train_associations(X,Y,K,covariates) print "t1" t1=time.time() LOD1 = lmm.train_interactions(X,Y,K,X[:,0:1],covariates,refit_delta0_snp=False) print "t2"
def best_split_full_model(X, Uy, C, S, U, noderange, delta): mBest = -1 sBest = -float('inf') score_best = -float('inf') left_mean = None right_mean = None ldelta = SP.log(delta) levels = map(SP.unique, X[noderange].T) feature_map = [] s = [] UXt = [] cnt = 0 for i in xrange(X.shape[1]): lev = levels[i] for j in xrange(lev.size - 1): split_point = SP.median(lev[j:j + 2]) x = SP.int_(X[noderange, i] > split_point) UXt.append(SP.dot(U.T[:, noderange], x)) feature_map.append(i) s.append(split_point) cnt += 1 UXt = SP.array(UXt).T if UXt.size == 0: #predictors are homogeneous return mBest, sBest, left_mean, right_mean, score_best else: #print UXt # print X[noderange] # print '' # print '' # test all transformed predictors scores = -NP.ones(cnt) * float('inf') UC = SP.dot(U.T, C) ######################## #finding the best split# ######################## score_0 = lmm_fast.nLLeval(ldelta, Uy[:, 0], UC, S) for snp_cnt in SP.arange(cnt): UX = SP.hstack((UXt[:, snp_cnt:snp_cnt + 1], UC)) scores[snp_cnt] = -lmm_fast.nLLeval(ldelta, Uy[:, 0], UX, S) scores[snp_cnt] += score_0 ############################ ###evaluate the new means### ############################ kBest = SP.argmax(scores) score_best = scores[kBest] sBest = s[kBest] if score_best > 0: sBest = s[kBest] score_best = scores[kBest] UX = SP.hstack((UXt[:, kBest:kBest + 1], UC)) _, beta, _ = lmm_fast.nLLeval(ldelta, Uy[:, 0], UX, S, MLparams=True) mBest = feature_map[kBest] CX = SP.zeros_like(Uy) CX[noderange] = SP.int_(X[noderange, mBest:mBest + 1] > sBest) C_new = SP.hstack((CX, C)) mean = SP.dot(C_new, beta.reshape(beta.size, -1)) #TODO:is this the correct way? left_mean = ((mean[noderange])[CX[noderange] == 0])[0] right_mean = ((mean[noderange])[CX[noderange] == 1])[0] return mBest, sBest, left_mean, right_mean, score_best
def estimate_bias(Uy, U, S, ldelta): UC = SP.dot(U.T, SP.ones_like(Uy)) _, beta, _ = lmm_fast.nLLeval(ldelta, Uy[:, 0], UC, S, MLparams=True) return beta[0]
#TODO: I am sure this issues is common to cython stuff and other people have similar problems _UX = SP.array(UX[:, 0:2]) _UY = SP.array(UY[:, 0]) if 1: print "testing delta opt" delta0 = lmm_fast.optdelta(_UY, _UX, S) delta1 = lmm.optdelta(_UY, _UX, S) print "%.2f versus %.2f" % (delta0, delta1) if 1: print "testing eval on all SNPs" for i in xrange(UX.shape[1]): _UX = UX[:, i:i + 1] _UY = SP.array(UY[:, 0]) lml0 = lmm_fast.nLLeval(ldelta, _UY, _UX, S) lml1 = lmm.nLLeval(ldelta, _UY, SP.array(_UX), S) lml2 = lmm.nLLeval(ldelta, _UY, _UX, S) assert SP.absolute(lml1 - lml2) < 1E-10, 'outch' print "lml: %.2f delta lml (rel) : %.2f " % (lml1, (lml1 - lml0) / SP.absolute(lml1)) if 0: covariates = SP.ones([X.shape[0], 1]) t0 = time.time() LOD0 = lmm.train_associations(X, Y, K, covariates) print "t1" t1 = time.time() LOD1 = lmm.train_interactions(X, Y,
def best_split_full_model(X, Uy, C, S, U, noderange, delta): mBest = -1 sBest = -float('inf') score_best = -float('inf') left_mean = None right_mean = None ldelta = SP.log(delta) levels = map(SP.unique, X[noderange].T) feature_map = [] s = [] UXt = [] cnt = 0 for i in xrange(X.shape[1]): lev = levels[i] for j in xrange(lev.size-1): split_point = SP.median(lev[j:j+2]) x = SP.int_(X[noderange,i] > split_point) UXt.append(SP.dot(U.T[:,noderange], x)) feature_map.append(i) s.append(split_point) cnt += 1 UXt = SP.array(UXt).T if UXt.size == 0: #predictors are homogeneous return mBest, sBest, left_mean, right_mean, score_best else: #print UXt # print X[noderange] # print '' # print '' # test all transformed predictors scores = -NP.ones(cnt)*float('inf') UC = SP.dot(U.T,C) ######################## #finding the best split# ######################## score_0 = lmm_fast.nLLeval(ldelta,Uy[:,0],UC,S) for snp_cnt in SP.arange(cnt): UX=SP.hstack((UXt[:,snp_cnt:snp_cnt+1], UC)) scores[snp_cnt] = -lmm_fast.nLLeval(ldelta,Uy[:,0],UX,S) scores[snp_cnt] += score_0 ############################ ###evaluate the new means### ############################ kBest = SP.argmax(scores) score_best = scores[kBest] sBest = s[kBest] if score_best > 0: sBest = s[kBest] score_best = scores[kBest] UX=SP.hstack((UXt[:,kBest:kBest+1], UC)) _, beta,_ = lmm_fast.nLLeval(ldelta, Uy[:,0], UX, S, MLparams=True) mBest = feature_map[kBest] CX = SP.zeros_like(Uy) CX[noderange] = SP.int_(X[noderange,mBest:mBest+1] > sBest) C_new = SP.hstack((CX,C)) mean = SP.dot(C_new,beta.reshape(beta.size, -1)) #TODO:is this the correct way? left_mean = ((mean[noderange])[CX[noderange]==0])[0] right_mean = ((mean[noderange])[CX[noderange]==1])[0] return mBest, sBest, left_mean, right_mean, score_best
def estimate_bias(Uy, U, S, ldelta): UC = SP.dot(U.T,SP.ones_like(Uy)) _, beta, _ = lmm_fast.nLLeval(ldelta, Uy[:,0], UC, S, MLparams=True) return beta[0]