Esempio n. 1
0
 def fit(self, corpus, n_iter=1, offset=10, eta=1e-2, timevarying=True, **kwargs):
     """ online learning solution for the following optimization problem.
         max_w {softmax(dot(L2_w,xt)+L2_b)-0.5*nnz*mean(\|L2_w\|_F^2+\|L2_b\|^2)} + R
     """        
     vocab_size = corpus.shape[1]
     if self.t == 0:
         mu, sigma = corpus.data.mean(), 2.56*corpus.data.var()**0.5
         self.L2_w = mu + (mu if mu < sigma else sigma)*\
             self.rng.uniform(low=-1,high=1,size=(vocab_size,self.topic_num))
         self.L2_b = numpy.zeros(self.topic_num)
         
     L2_w = numpy.empty((vocab_size, self.topic_num))
     L2_b = numpy.empty(self.topic_num)
         
     P_z = numpy.empty(self.topic_num)      
     for t in xrange(n_iter):
         L2_w[:] = self.L2_w
         L2_b[:] = self.L2_b
         for s in xrange(corpus.shape[0]):
             #update parameter
             #1.learning rate
             alpha = eta/(s+1.) if timevarying else eta
             #2.difference between xt*L2_w*P_z - nnz*mean(L2_w**2) 
             euclid2prob(corpus[s].dot(L2_w)+L2_b, P_z, offset)
             
             idx, xt = corpus[s].nonzero()[1], corpus[s].data
             L2_w *= (1-alpha*xt.size/L2_w.size)                
             L2_w[idx] += (alpha*xt)[:,numpy.newaxis]*P_z
             L2_b += alpha*(1-L2_b)*P_z
         ##
         print "."
         self.t += 1; beta = 1./self.t
         self.L2_w *=(1-beta); self.L2_w += beta*L2_w
         self.L2_b *=(1-beta); self.L2_b += beta*L2_b
Esempio n. 2
0
 def fit(self, corpus, n_iter=1, offset=10, eta=1e-2, timevarying=False, **kwargs):
     """ online learning solution for the following optimization problem.
         max_w softmax[dot(Pasterik_w,xt)+L2_b)-0.5*(nnz*mean(L2_w**2)+L2_b**2)]+R
     """
     vocab_size = corpus.shape[1]
     if self.t == 0:
         mu, sigma = corpus.data.mean(), 2.56*corpus.data.var()**0.5
         self.L2_w = mu + (mu if mu < sigma else sigma)*\
             self.rng.uniform(low=-1,high=1,size=(vocab_size, self.topic_num))
         self.L2_b = numpy.zeros(self.topic_num)
         
     L2_w = numpy.empty((vocab_size, self.topic_num))
     L2_b = numpy.empty(self.topic_num)
         
     P_topic = numpy.empty(self.topic_num)
     for t in xrange(n_iter):
         L2_w[:] = self.L2_w
         L2_b[:] = self.L2_b
         for s in xrange(corpus.shape[0]):
             #update parameter
             #1.learning rate
             alpha = eta/(s+1.) if timevarying else eta
             #2.difference between xt*prob(topic) - eta*L2_w 
             idx, xt = corpus[s].nonzero()[1], corpus[s].data
             P_w, Pasterik_w = euclid2sign(L2_w[idx])
             euclid2prob(numpy.dot(xt, Pasterik_w)+L2_b, P_topic, offset)
             
             scale = 1-float(xt.size)/L2_w.size*alpha
             L2_w *= scale; L2_b *= scale
             
             L2_w[idx] += (alpha*xt)[:,numpy.newaxis]*P_w*P_topic
             L2_b += alpha*P_topic
         ##
         print "."
         self.t += 1
         beta = 1./self.t
         self.L2_w *= beta; self.L2_w += (1-beta)*L2_w
         self.Pasterik_w = euclid2sign(self.L2_w, offset)[1]
         self.L2_b *= beta; self.L2_b += (1-beta)*L2_b
Esempio n. 3
0
 def fit(self, corpus, n_iter=1, offset=10, eta=1e-2, timevarying=False, **kwargs):
     """ online learning solution for the following optimization problem.
         max_w logphi(dot(xt,w)+b-0.5*(w[I]**2+b**2)) - eta/2*\|w\|_F^2
         log_phi(.) := (.) - log_sum_exp(.)
     """
     vocab_size = corpus.shape[0]
     if self.t == 0:
         mu, sigma = corpus.data.mean(), 2.56*corpus.data.var()**0.5
         self.L2_w = mu + (mu if mu < sigma else sigma)*\
             self.rng.uniform(low=-1,high=1,size=(vocab_size, self.topic_num))
         self.L2_b = numpy.zeros(self.topic_num)
         
     L2_w = numpy.empty((vocab_size, self.topic_num))
     L2_b = numpy.empty(self.topic_num)
         
     P_z = numpy.empty(self.topic_num)      
     for t in xrange(n_iter):
         L2_w[:] = self.L2_w
         L2_b[:] = self.L2_b
         for s in xrange(corpus.shape[0]):
             #update parameter
             #1.learning rate
             alpha = eta/(s+1.) if timevarying else eta
             #2.probability of topic
             idx, xt = corpus[s].nonzero()[1], corpus[s].data
             L2_w_idx = L2_w[idx]
             euclid2prob(xt.dot(L2_w_idx)+L2_b-0.5*((L2_w_idx*L2_w_idx).sum(0)+L2_b*L2_b), P_z, offset)
             #w+= alpha*(xt-w)*P_z-> w-= alpha*w*Pz; w += alpha*xt'*P_z
             L2_w[idx] *= (1-alpha*P_z)
             L2_w[idx] += (alpha*xt)[:,numpy.newaxis]*P_z
             L2_b += alpha*(1-L2_b)*P_z
         ##
         print "."
         self.t += 1; beta = 1./self.t
         self.L2_w *=(1-beta); self.L2_w += beta*L2_w
         self.L2_b *=(1-beta); self.L2_b += beta*L2_b
         self.L2_w2 = self.L2_w*self.L2_w
         self.L2_b_b2 = self.L2_b-0.5*self.L2_b*self.L2_b