def lasso(X,Y,w0,Xteste,Yteste,Xdev,Ydev): import matplotlib.pyplot as plt w_new=np.zeros(w0.shape) final=[0,0,0] sigma=1 sigma2=0.0000001 max_iter =2000 yyy=[] xxx=[] gmedia=[] gmediatest=[] w0=w0.todense() soft=it.soft lambs=[10**x for x in xrange(0,8)] xg=map(lambda x: np.log10(x),lambs) #lambdas=100*X.shape[0] #lambs=[lambdas] #xg=[2] for lamb in lambs: print "A iniciar Iteracoes para lambda=",lamb w_old_old=w0 w_old=w0 yy=[] xx=[] i=0 while i<max_iter: #print "iteracao",i if i==0: step_size=0.0000001 else: error_old=(X*w_old)-Y error_old_old=(X*w_old_old)-Y alpha=it.get_step(w_old,w_old_old,X,error_old,error_old_old,lamb) if alpha!=0: step_size=sigma/alpha else: i=max_iter error=(X*w_old)-Y grad1=it.GetGradL(error,X) U=it.getU(w_old,step_size,grad1) #pdb.set_trace() #w_new=[soft(x,step_size*lamb) for x in U.transpose().tolist()[0]] #w_new=sparse.csr_matrix(w_new) #w_new=w_new.transpose() w_new=it.softt(U,step_size*lamb) #pdb.set_trace() dif=w_new-w_old dif=dif.transpose()*dif error=(X*w_new)-Y y_new=it.get_func_lasso(error,np.matrix(w_new).transpose(),lamb) #funcao de erro count=0 if i!=0: w_temp=w_new while y_new>=y_old-sigma2*alpha*dif[0,0] and i<max_iter: #while y_new>=y_old: #print "A diminuir step:",i step_size=step_size/2 U=it.getU(w_old,step_size,grad1) #w_new=[soft(x,step_size*lamb) for x in U.transpose().tolist()[0]] #w_new=sparse.csr_matrix(w_new) #w_new=w_new.transpose().transpose()) w_new=it.softt(U,step_size*lamb) error=(X*w_new)-Y dif=w_new-w_old dif=dif.transpose()*dif y_new=it.get_func_lasso(error,w_new,lamb) #funcao de custo count=count+1 i=i+1 if count==10: break else: if i==max_iter: w_new=w_temp #if count ==5000: #print "****A SAIR****\nProvavelmente o sparsa chegou ao minimo antes de terminar o numero de iteracoes" #break #elif i==max_iter: #pass #print "Fim das interacoes" i=i+1 y_old=y_new w_old_old=w_old w_old=w_new yy.append(y_new) xx.append(i) errod=RRegression_beta.erro(Xdev,Ydev,w_new) gmedia.append(errod) errot=RRegression_beta.erro(Xteste,Yteste,w_new) gmediatest.append(errot) if final[0]>errod or final[0]==0: final[0]=errod final[1]=lamb final[2]=errot graphFinal=deepcopy(yy) wfinal=deepcopy(w_new) yfinal=deepcopy(y_new) finalxx=deepcopy(xx) zero=0.0 for J in xrange(w_new.shape[0]): if w_new[J,0]==0: zero=zero+1.0 sp=(zero/w_new.shape[0])*100 print "percentagem:",sp yyy.append(sp) xxx.append(lamb) plt.figure(1) plt.subplot(221) plt.title("Funcao de custo") plt.plot(finalxx,graphFinal,"r",finalxx,[1.1959e15 for lolol in xrange(len(finalxx))]) plt.subplot(222) plt.title("Percentagem de W com valor =0") import pylab #print yyy #pylab.ylim([0,100]) plt.plot(xg,yyy,"b",xg,yyy,"ro") plt.subplot(223) plt.title("Erro DEV ao longo dos lambdas") plt.plot(xg,gmedia,"b",xg,gmedia,"ro") plt.subplot(224) plt.title("Erro teste ao longo dos lambdas") plt.plot(xg,gmediatest,"b",xg,gmediatest,"ro") #pylab.savefig("lasso_beta.png") plt.show() return wfinal,yfinal,final[1]
error=(X*w_old)-Y #print i if i==0: step_size=0.001 else: error_old=error=(X*w_old)-Y error_old_old=(X*w_old_old)-Y alpha=it.get_step(w_old,w_old_old,X,error_old,error_old_old,lamb) if alpha==0: print "**ERRO**" print "aplha=0, impossivel continuar o algorimto" break step_size=sigma/alpha error=(X*w_old)-Y grad1=it.GetGradL(error,X) U=it.getU(w_old,step_size,grad1).todense() w_new=list(itertools.imap(soft,U.tolist(),[step_size*lamb for x in xrange(len(U.tolist()))])) w_new=np.matrix(w_new) w_new=w_new.transpose() dif=w_new-w_old dif=dif.transpose()*dif w_new=sparse.csr_matrix(w_new) error=(X*w_new)-Y y_new=it.get_func(error,w_new,lamb) #funcao de erro count=0 if i!=0: while y_new>=y_old-sigma2*alpha*dif[0,0]: print "A diminuir step:",i step_size=step_size/10 U=it.getU(w_old,step_size,grad1).todense() w_new=list(itertools.imap(soft,U.tolist(),[step_size*lamb for x in xrange(len(U.tolist()))]))
def lasso(X,Y,w0,total): import matplotlib.pyplot as plt print "A fazer calculos..." w_new=np.zeros(w0.shape) Y=sparse.csr_matrix(np.log2(Y.todense())) final=[0,0] #Use a fixed small step size step_size = 0.00000002 #max iterations max_iter = 1000 yyy=[] xxx=[] it.soft=soft for lamb in [10**x for x in xrange(-12,-9)]: #for lamb in [0.000001]: w_old=w0 yy=[] xx=[] print "calculos terminados, a fazer iterecoes" for i in xrange(max_iter): error=(X*w_old)-Y grad1=it.GetGradL(error,X) U=it.getU(w_old,step_size,grad1).todense() w_new=list(itertools.imap(soft,U.tolist(),[step_size*lamb for x in xrange(len(U.tolist()))])) w_new=np.matrix(w_new) w_new=w_new.transpose() error=(X*w_new)-Y y_new=it.get_func(error,w_old,lamb) #funcao de erro y_old=y_new w_old = sparse.csr_matrix(w_new) yy.append(y_new) xx.append(i) print "exceeded maximum number of iterations, leaving" media=RRegression_beta.erro("../le_ficheiro/dev.txt",w_new,total) print lamb,media if final[0]>media or final[0]==0: final[0]=media final[1]=lamb graphFinal=yy zero=0.0 for J in xrange(w_new.shape[0]): if w_new[J,0]==0: zero=zero+1.0 #print "zero->",zero sp=(zero/w_new.shape[0])*100 print "percentagem:",sp yyy.append(sp) xxx.append(lamb) plt.figure(1) plt.subplot(211) plt.title("Funcao de custo") plt.plot(xx,graphFinal,"r") plt.subplot(212) plt.title("Percentagem de W com valor =0") import pylab print yyy #pylab.ylim([0,100]) plt.plot(yyy,"b",yyy,"ro") plt.show() return w_new,y_new