def __init__(self, model, type_model): super(LatentTypeWithTuningCurve, self).__init__(model, type_model) # Also initialize the tuning curves self.mu = self.type_model['mu'] self.sigma = self.type_model['sigma'] # Create a basis for the stimulus response self.spatial_basis = create_basis(self.type_model['spatial_basis']) self.spatial_shape = self.type_model['spatial_shape'] self.spatial_ndim = len(self.spatial_shape) (_,Bx) = self.spatial_basis.shape self.temporal_basis = create_basis(self.type_model['temporal_basis']) (_,Bt) = self.temporal_basis.shape # Save the filter sizes self.Bx = Bx self.Bt = Bt # Initialize interpolated bases self.initialize_basis() # Initialize RxBx and RxBt matrices for the per-type tuning curves self.w_x = T.dmatrix('w_x') self.w_t = T.dmatrix('w_t') # Create function handles for the stimulus responses self.stim_resp_t = T.dot(self.temporal_basis, self.w_t) self.stim_resp_x = T.dot(self.spatial_basis, self.w_x) # Add the probability of these tuning curves to the log probability self.log_p += -0.5/self.sigma**2 *T.sum((self.w_x-self.mu)**2) + \ -0.5/self.sigma**2 *T.sum((self.w_t-self.mu)**2)
def test_pickle(): """Test that a module can be pickled""" M = Module() M.x = (T.dmatrix()) M.y = (T.dmatrix()) a = T.dmatrix() M.f = Method([a], a + M.x + M.y) M.g = Method([a], a * M.x * M.y) mode = get_mode() m = M.make(x=numpy.zeros((4,5)), y=numpy.ones((2,3)), mode=mode) m_dup = cPickle.loads(cPickle.dumps(m, protocol=-1)) assert numpy.all(m.x == m_dup.x) and numpy.all(m.y == m_dup.y) m_dup.x[0,0] = 3.142 assert m_dup.f.input_storage[1].data[0,0] == 3.142 assert m.x[0,0] == 0.0 #ensure that m is not aliased to m_dup #check that the unpickled version has the same argument/property aliasing assert m_dup.x is m_dup.f.input_storage[1].data assert m_dup.y is m_dup.f.input_storage[2].data assert m_dup.x is m_dup.g.input_storage[1].data assert m_dup.y is m_dup.g.input_storage[2].data
def make_functions(num_features): W1_shape = (num_features/4, num_features) b1_shape = num_features/4 W2_shape = (nb_classes, num_features/4) b2_shape = nb_classes W1 = shared(np.random.random(W1_shape) - 0.5, name = "W1") b1 = shared(np.random.random(b1_shape) - 0.5, name = "b1") W2 = shared(np.random.random(W2_shape) - 0.5, name = "W2") b2 = shared(np.random.random(b2_shape) - 0.5, name = "b2") x = T.dmatrix("x") labels = T.dmatrix("labels") hidden = T.nnet.sigmoid(x.dot(W1.transpose())+b1) output = T.nnet.softmax(hidden.dot(W2.transpose()) + b2) prediction = T.argmax(output, axis=1) reg_lambda = 0.0001 regularization = reg_lambda * ((W1 * W1).sum() + (W2 * W2).sum() + (b1 * b1).sum() + (b2 * b2).sum()) cost = T.nnet.binary_crossentropy(output, labels).mean() + regularization compute_prediction = function([x], prediction) alpha = T.dscalar("alpha") weights = [W1, W2, b1, b2] updates = [(w, w-alpha * grad(cost, w)) for w in weights] train_nn = function([x, labels, alpha], cost, updates = updates) return train_nn, compute_prediction
def test_free_energy(self): self.setUpAssociativeRBM() rbm = self.rbm w = rbm.W.get_value(borrow=True) u = rbm.U.get_value(borrow=True) v = T.dmatrix("v") v2 = T.dmatrix("v2") v_bias = rbm.v_bias.eval() v_bias2 = rbm.v_bias2.eval() h_bias = rbm.h_bias.eval() res = rbm.free_energy(v, v2) f = theano.function([v, v2], [res]) theano_res = f(self.x, self.y) # Test for case only v1 is present n1 = - np.dot(self.x, v_bias) n2 = - np.dot(self.y, v_bias2) n3 = - np.sum(np.log(1 + np.exp(h_bias + np.dot(self.x, w) + np.dot(self.y, u)))) np_res = n1 + n2 + n3 print theano_res print np_res diff = theano_res == np_res self.assertTrue(np.all(diff))
def test_prop_up(self): self.setUpSimpleRBM() rbm = self.rbm v1 = T.dmatrix("v1") v2 = T.dmatrix("v2") # Test Single out = rbm.prop_up(v1) out_fn = theano.function([], [out[0], out[1]], givens={v1: self.x1}) out_sum, out_sum_mapped = out_fn() h_sum = np.dot(self.x1, rbm.W.get_value(borrow=True)) + rbm.h_bias.eval() h_sum_mapped = 1 / (1 + np.exp(-h_sum)) self.assertTrue(np.all(out_sum == h_sum)) self.assertTrue((np.all(out_sum_mapped == h_sum_mapped))) # Test Double out = rbm.prop_up(v1, v2) out_fn = theano.function([], [out[0], out[1]], givens={v1: self.x1, v2: self.x12}) out_sum, out_sum_mapped = out_fn() h_sum = np.dot(self.x1, rbm.W.get_value(borrow=True)) + np.dot(self.x12, rbm.U.get_value( borrow=True)) + rbm.h_bias.eval() h_sum_mapped = 1 / (1 + np.exp(-h_sum)) # h_sum_mapped = theano.function([], [log_sig(h_sum)])() self.assertTrue(np.all(out_sum == h_sum)) self.assertTrue((np.all(out_sum_mapped == h_sum_mapped)))
def test_prop_down(self): self.setUpRBM() self.assertTrue(self.rbm.h_n == 10) rbm = self.rbm W = rbm.W.get_value(borrow=True) U = rbm.U.get_value(borrow=True) v1 = T.dmatrix("v1") v2 = T.dmatrix("v2") h = np.array([[1, 2, 3, 4, 5, -1, -2, -3, -4, -5]]) # Single x = T.dmatrix("x") out = rbm.prop_down(x) f = theano.function([x], out) out_sum, out_sum_mapped = f(h) h_sum = np.dot(h, W.T) + rbm.v_bias.eval() h_sum_mapped = theano.function([], [log_sig(h_sum)])() self.assertTrue(np.all(out_sum == h_sum)) self.assertTrue(np.all(out_sum_mapped == h_sum_mapped)) # Assoc out = rbm.prop_down_assoc(x) f = theano.function([x], out) out_sum, out_sum_mapped = f(h) h_sum2 = np.dot(h, U.T) + rbm.v_bias2.eval() h_sum_mapped2 = theano.function([], [log_sig(h_sum2)])() self.assertTrue(np.all(out_sum == h_sum2)) self.assertTrue(np.all(out_sum_mapped == h_sum_mapped2))
def test_validity2(self): theano.config.on_unused_input = 'warn' a0_var = T.dmatrix('a0') r0_var = T.dmatrix('r0') fri_var = T.dmatrix("fri") out = T.dmatrix("out") out_stale = T.dmatrix("out_stale") f = theano.function([a0_var, r0_var, fri_var, out, out_stale], dqn.build_loss(out, out_stale, a0_var, r0_var, fri_var, gamma=0.5)) sqr_mean, mean, y, q = f(np.array([[1, 0, 0, 0, 0, 0], [0, 1, 0, 0, 0, 0], [0, 0, 0, 0, 0, 1]]), np.array([[1], [0], [5]]), np.array([[1], [1], [0]]), np.array([[-5, 1, 2, 3, 4, 7], [1, 4, 3, 4, 5, 9], [0, 9, 0, 3, 2, 1]]), np.array([[-5, 1, 2, 3, 4, 5], [1, 2, 3, 4, 5, 6], [8, 0, -1, -1, 2, 3]])) print(y, q)
def createMLP(layers, s): l_in = lasagne.layers.InputLayer(shape=(None, s)) prev_layer = l_in Ws = [] for layer in layers: enc = lasagne.layers.DenseLayer(prev_layer, num_units=layer, nonlinearity=rectify, W=init.Uniform(0.01)) Ws += [enc.W] drop = lasagne.layers.DropoutLayer(enc, p=0.5) prev_layer = drop idx = 1 # creating mask mask = lasagne.layers.InputLayer(shape=(None, layers[-1])) prev_layer = lasagne.layers.ElemwiseMergeLayer([prev_layer, mask], merge_function=T.mul) for layer in layers[-2::-1]: print layer dec = lasagne.layers.DenseLayer(prev_layer, num_units=layer, nonlinearity=rectify, W=Ws[-idx].T) idx += 1 drop = lasagne.layers.DropoutLayer(dec, p=0.0) prev_layer = drop model = lasagne.layers.DenseLayer(prev_layer, num_units=s, nonlinearity=identity, W=Ws[0].T) x_sym = T.dmatrix() mask_sym = T.dmatrix() all_params = lasagne.layers.get_all_params(model) output = lasagne.layers.get_output(model, inputs={l_in: x_sym, mask: mask_sym}) loss_eval = lasagne.objectives.squared_error(output, x_sym).sum() loss_eval /= (2.*batch_size) updates = lasagne.updates.adam(loss_eval, all_params) return l_in, mask, model, theano.function([x_sym, mask_sym], loss_eval, updates=updates)
def NNet(x=None, y=None, n_hid_layers=2): # our points, one point per row if x is None: x = T.dmatrix() # targets , one per row if y is None: y = T.dmatrix() layers = [] _x = x for i in xrange(n_hid_layers): layers.append(Layer(x=_x)) _x = layers[-1].y classif = LR(x=_x) @symbolicmethod def params(): rval = classif.params() for l in layers: rval.extend(l.params()) print([id(r) for r in rval]) return rval if 0: @symbolicmethod def update(x, y): pp = params() gp = T.grad(classif.loss, pp) return dict((p, p - 0.01*g) for p, g in zip(pp, gp)) return locals()
def LR(x=None, y=None, v=None, c=None, l2_coef=None): # our points, one point per row if x is None: x = T.dmatrix() # targets , one per row if y is None: y = T.dmatrix() # first layer weights if v is None: v = T.dmatrix() # first layer biases if c is None: c = T.dvector() if l2_coef is None: l2_coef = T.dscalar() pred = T.dot(x, v) + c sse = T.sum((pred - y) * (pred - y)) mse = sse / T.shape(y)[0] v_l2 = T.sum(T.sum(v*v)) loss = mse + l2_coef * v_l2 @symbolicmethod def params(): return [v, c] return locals()
def __init__(self,N,Nsub,NRGC,prior=1): self.N = N self.Nsub = Nsub self.NRGC = NRGC U = Th.dmatrix() # SYMBOLIC variables # V1 = Th.dvector() # V2 = Th.dvector() # STA = Th.dvector() # STC = Th.dmatrix() # theta = Th.dot( U.T , V1 ) # UV1U = Th.dot( U , theta ) # UV1V2U= Th.dot( V1 * U.T , (V2 * U.T).T ) # posterior = -0.5 * Th.sum( V1 * V2 * U.T*U.T ) \ -0.25* Th.sum( UV1V2U.T * UV1V2U ) \ -0.5 * Th.sum( UV1U * UV1U * UV1U *V2 *V2 * V1 ) \ -0.5 * Th.sum( UV1U * UV1U * V2 * V1 ) \ -0.5 * Th.sum( theta * theta ) \ + Th.dot( theta.T , STA ) \ + Th.sum( Th.dot( V1* V2*U.T , U ) \ * (STC + STA.T*STA) ) dpost_dU = Th.grad( cost = posterior , # wrt = U ) # dpost_dV1 = Th.grad( cost = posterior , # wrt = V1 ) # dpost_dV2 = Th.grad( cost = posterior , # wrt = V2 ) # # self.posterior = function( [U,V2,V1,STA,STC], UV1V2U) # self.posterior = function( [U,V2,V1,STA,STC], posterior) # self.dpost_dU = function( [U,V2,V1,STA,STC], dpost_dU ) # self.dpost_dV1 = function( [U,V2,V1,STA,STC], dpost_dV1 ) # self.dpost_dV2 = function( [U,V2,V1,STA,STC], dpost_dV2 ) #
def make_theano_functions(self) : x = T.dmatrix('x') h1 = T.dot(x, self.w1.T) + self.b1 a1 = 1. / (1. + T.exp(-h1)) h2 = T.dot(a1,self.w2.T) + self.b2 a2 = T.nnet.softmax(h2) f = theano.function([x], a2) y = T.dmatrix('y') loss = T.mean(T.sum(y*-T.log(a2), axis=1)) gradw1 = T.grad(loss, self.w1) gradw2 = T.grad(loss, self.w2) gradb1 = T.grad(loss, self.b1) gradb2 = T.grad(loss, self.b2) gradf = theano.function( [x, y], [loss, a2], updates = [ (self.w1, self.w1-self.lr*gradw1), (self.w2, self.w2-self.lr*gradw2), (self.b1, self.b1-self.lr*gradb1), (self.b2, self.b2-self.lr*gradb2) ] ) return f, gradf
def UV12_input(V1=Th.dmatrix(), STAs=Th.dmatrix(), STCs=Th.dtensor3(), N_spikes=Th.dvector(), **other): other.update(locals()) return named(**other)
def theano_sed(): """ Function to create a theano function to compute the euclidian distances efficiently Returns: theano.compile.function_module.Function: Compiled function """ theano.config.compute_test_value = "ignore" # Set symbolic variable as matrix (with the XYZ coords) coord_T_x1 = T.dmatrix() coord_T_x2 = T.dmatrix() # Euclidian distances function def squared_euclidean_distances(x_1, x_2): sqd = T.sqrt(T.maximum( (x_1 ** 2).sum(1).reshape((x_1.shape[0], 1)) + (x_2 ** 2).sum(1).reshape((1, x_2.shape[0])) - 2 * x_1.dot(x_2.T), 0 )) return sqd # Compiling function f = theano.function([coord_T_x1, coord_T_x2], squared_euclidean_distances(coord_T_x1, coord_T_x2), allow_input_downcast=False) return f
def test_infer_shape(self): admat = dmatrix() bdmat = dmatrix() admat_val = numpy.random.rand(3, 4) bdmat_val = numpy.random.rand(3, 4) self._compile_and_check([admat, bdmat], [SoftmaxGrad()(admat, bdmat)], [admat_val, bdmat_val], SoftmaxGrad)
def LQLEP_wBarrier( LQLEP = Th.dscalar(), ldet = Th.dscalar(), v1 = Th.dvector(), N_spike = Th.dscalar(), ImM = Th.dmatrix(), U = Th.dmatrix(), V2 = Th.dvector(), u = Th.dvector(), C = Th.dmatrix(), **other): ''' The actual Linear-Quadratic-Exponential-Poisson log-likelihood, as a function of theta and M, with a barrier on the log-det term and a prior. ''' sq_nonlinearity = V2**2.*Th.sum( Th.dot(U,C)*U, axis=[1]) #Th.sum(U**2,axis=[1]) nonlinearity = V2 * Th.sqrt( Th.sum( Th.dot(U,C)*U, axis=[1])) #Th.sum(U**2,axis=[1]) ) if other.has_key('uc'): LQLEP_wPrior = LQLEP + 0.5 * N_spike * ( 1./(ldet+250.)**2. \ - 0.000001 * Th.sum(Th.log(1.-4*sq_nonlinearity))) \ + 10. * Th.sum( (u[2:]+u[:-2]-2*u[1:-1])**2. ) \ + 10. * Th.sum( (other['uc'][2:]+other['uc'][:-2]-2*other['uc'][1:-1])**2. ) \ + 0.000000001 * Th.sum( v1**2. ) # + 100. * Th.sum( v1 ) # + 0.0001*Th.sum( V2**2 ) else: LQLEP_wPrior = LQLEP + 0.5 * N_spike * ( 1./(ldet+250.)**2. \ - 0.000001 * Th.sum(Th.log(1.-4*sq_nonlinearity))) \ + 10. * Th.sum( (u[2:]+u[:-2]-2*u[1:-1])**2. ) \ + 0.000000001 * Th.sum( v1**2. ) # + 100. * Th.sum( v1 ) # + 0.0001*Th.sum( V2**2 ) eigsImM,barrier = eig( ImM ) barrier = 1-(Th.sum(Th.log(eigsImM))>-250) * \ (Th.min(eigsImM)>0) * (Th.max(4*sq_nonlinearity)<1) other.update(locals()) return named( **other )
def get_hidden_layers(dbn, layers): print "... getting hidden layers" test_data, test_label = get_test_set() index = T.lscalar() hidden_features = [] total_layers = len(layers) w = T.dmatrix("w") t = T.dmatrix("t") b = T.vector("b") z = T.dot(w,t) # function for testing model test_f = theano.function([w,t], z) #loop through each layer for i in xrange(total_layers): weights = layers[i][0] bias = layers[i][1] if i == 0: hidden_features.append( test_f(test_data,weights) ) else: #use previous layer prev_layer = hidden_features[i-1] hidden_features.append( test_f(prev_layer,weights) ) # apply sigmoid with open('hidden.pkl', 'w') as f: cPickle.dump(hidden_features, f)
def test_validity(self): theano.config.on_unused_input = 'warn' a0_var = T.dmatrix('a0') r0_var = T.dmatrix('r0') fri_var = T.dmatrix("fri") out = T.dmatrix("out") out_stale = T.dmatrix("out_stale") f = theano.function([a0_var, r0_var, fri_var, out, out_stale], dqn.build_loss(out, out_stale, a0_var, r0_var, fri_var, gamma=0.5)) loss, not_loss, y, q = f(np.array([[1, 0, 0, 0, 0, 0], [0, 1, 0, 0, 0, 0]]), np.array([[1], [0]]), np.array([[1], [1]]), np.array([[-5, 1, 2, 3, 4, 7], [1, 4, 3, 4, 5, 9]]), np.array([[-5, 1, 2, 3, 4, 5], [1, 2, 3, 4, 5, 6]])) self.assertTrue(np.all(y == [[3.5], [3]])) self.assertTrue(np.all(q == [[-5], [4]])) print(loss) print(not_loss) self.assertTrue(loss == 8.5)
def __init__(self, beta=0.1, n_in=1, n_out=1): self.__beta = beta self.__x = T.dmatrix('x') self.__y = T.dmatrix('y') self.__n_in = n_in self.__n_out = n_out self.__clf_model = _LogisticRegressionModel(d_input=self.__x, n_in=self.__n_in, n_out=self.__n_out) self.__cost = self.__clf_model.negative_log_likelihood(self.__y) # compute the gradient of cost with respect to theta = (W,b) self.__g_W = T.grad(cost=self.__cost, wrt=self.__clf_model.W) self.__g_b = T.grad(cost=self.__cost, wrt=self.__clf_model.b) # start-snippet-3 # specify how to update the parameters of the model as a list of # (variable, update expression) pairs. self.__updates = [(self.__clf_model.W, self.__clf_model.W - self.__beta * self.__g_W), (self.__clf_model.b, self.__clf_model.b - self.__beta * self.__g_b)] self.__train_model = theano.function( inputs=[self.__x, self.__y], outputs=[self.__cost, self.__clf_model.y_pred, self.__g_W, self.__g_b], updates=self.__updates, ) self.__prediction_model = theano.function( inputs=[self.__clf_model.input], outputs=self.__clf_model.y_pred )
def test_mixin_composition(): # Check composed expressions as parameters a = theano.shared(0.0) b = theano.shared(-1.0) mu = a + b - 1.0 sigma = T.abs_(a * b) p = Normal(mu=mu, sigma=sigma) assert a in p.parameters_ assert b in p.parameters_ # Compose parameters with observed variables a = theano.shared(1.0) b = theano.shared(0.0) y = T.dmatrix(name="y") p = Normal(mu=a * y + b) assert len(p.parameters_) == 3 assert a in p.parameters_ assert b in p.parameters_ assert p.sigma in p.parameters_ assert p.mu not in p.parameters_ assert len(p.observeds_) == 1 assert y in p.observeds_ # Check signatures data_X = np.random.rand(10, 1) data_y = np.random.rand(10, 1) p.pdf(X=data_X, y=data_y) p.cdf(X=data_X, y=data_y) p.rvs(10, y=data_y) # Check error a = theano.shared(1.0) b = theano.shared(0.0) y = T.dmatrix() # y must be named assert_raises(ValueError, Normal, mu=a * y + b)
def asho_test(): import theano.tensor as T x = T.dmatrix('x') w = T.dmatrix('w') y = T.dot(x,w) f = function([x,w],y)
def theano_setup(self): W = T.dmatrix('W') b = T.dvector('b') c = T.dvector('c') x = T.dmatrix('x') s = T.dot(x, W) + c # h = 1 / (1 + T.exp(-s)) # h = T.nnet.sigmoid(s) h = T.tanh(s) # r = T.dot(h,W.T) + b # r = theano.printing.Print("r=")(2*T.tanh(T.dot(h,W.T) + b)) ract = T.dot(h,W.T) + b r = self.output_scaling_factor * T.tanh(ract) #g = function([W,b,c,x], h) #f = function([W,b,c,h], r) #fg = function([W,b,c,x], r) # Another variable to be able to call a function # with a noisy x and compare it to a reference x. y = T.dmatrix('y') all_losses = ((r - y)**2) loss = T.sum(all_losses) #loss = ((r - y)**2).sum() self.theano_encode_decode = function([W,b,c,x], r) self.theano_all_losses = function([W,b,c,x,y], [all_losses, T.abs_(s), T.abs_(ract)]) self.theano_gradients = function([W,b,c,x,y], [T.grad(loss, W), T.grad(loss, b), T.grad(loss, c)])
def __init__(self, np_rng = np.random.RandomState(1234), theano_rng = None, n_in = 424 * 424 * 3, n_out = 37, # galaxy classes hidden_layer_sizes = [500, 500], corruption_levels = [0.1, 0.2]): self.np_rng = np_rng if not theano_rng: theano_rng = RandomStreams(np_rng.randint(2 ** 30)) self.n_in = n_in self.n_out = n_out self.hidden_layer_sizes = hidden_layer_sizes self.corruption_levels = corruption_levels self.sigmoid_layers = [] self.da_layers = [] self.params = [] self.n_layers = len(hidden_layer_sizes) assert self.n_layers > 0, 'must have some hidden layers' self.x = T.dmatrix('x') self.y = T.dmatrix('y') self.build_layers()
def train( self, train_set, batch_size = 100 ): for i in xrange(len(self.layers) - 1): train_data = T.dmatrix('train_data') x = T.dmatrix('x') rng = numpy.random.RandomState(123) theano_rng = RandomStreams(rng.randint(2 ** 10)) da = dA( numpy_rng=rng, theano_rng=theano_rng, input=x, n_visible=self.layers[i], n_hidden=self.layers[i+1] ) cost, updates = da.get_cost_updates( corruption_level=0., learning_rate=0.4 ) train_da = theano.function( [train_data], cost, updates=updates, givens={ x: train_data } ) for epoch in xrange(200): train_cost = [] for index in xrange(len(train_set)/batch_size): train_cost.append(train_da(numpy.asarray(train_set[index * batch_size: (index + 1) * batch_size]))) print 'Training 1st ae epoch %d, cost ' % epoch, numpy.mean(train_cost) train_set = da.get_hidden_values(train_set).eval() self.dAs.append(da)
def neural_net( x=T.dmatrix(), #our points, one point per row y=T.dmatrix(), #our targets w=T.dmatrix(), #first layer weights b=T.dvector(), #first layer bias v=T.dmatrix(), #second layer weights c=T.dvector(), #second layer bias step=T.dscalar(), #step size for gradient descent l2_coef=T.dscalar() #l2 regularization amount ): """Idea A: """ hid = T.tanh(T.dot(x, w) + b) pred = T.dot(hid, v) + c sse = T.sum((pred - y) * (pred - y)) w_l2 = T.sum(T.sum(w*w)) v_l2 = T.sum(T.sum(v*v)) loss = sse + l2_coef * (w_l2 + v_l2) def symbolic_params(cls): return [cls.w, cls.b, cls.v, cls.c] def update(cls, x, y, **kwargs): params = cls.symbolic_params() gp = T.grad(cls.loss, params) return [], [In(p, update=p - cls.step * g) for p,g in zip(params, gp)] def predict(cls, x, **kwargs): return cls.pred, [] return locals()
def theano_setup(self): # The matrices Wb and Wc were originally tied. # Because of that, I decided to keep Wb and Wc with # the same shape (instead of being transposed) to # avoid disturbing the code as much as possible. Wb = T.dmatrix('Wb') Wc = T.dmatrix('Wc') b = T.dvector('b') c = T.dvector('c') s = T.dscalar('s') x = T.dmatrix('x') h_act = T.dot(x, Wc) + c if self.act_func[0] == 'tanh': h = T.tanh(h_act) elif self.act_func[0] == 'sigmoid': h = T.nnet.sigmoid(h_act) elif self.act_func[0] == 'id': # bad idae h = h_act else: raise("Invalid act_func[0]") r_act = T.dot(h, Wb.T) + b if self.act_func[1] == 'tanh': r = s * T.tanh(r_act) elif self.act_func[1] == 'sigmoid': r = s * T.nnet.sigmoid(r_act) elif self.act_func[1] == 'id': r = s * r_act else: raise("Invalid act_func[1]") # Another variable to be able to call a function # with a noisy x and compare it to a reference x. y = T.dmatrix('y') loss = ((r - y)**2) sum_loss = T.sum(loss) # theano_encode_decode : vectorial function in argument X. # theano_loss : vectorial function in argument X. # theano_gradients : returns triplet of gradients, each of # which involves the all data X summed # so it's not a "vectorial" function. self.theano_encode_decode = function([Wb,Wc,b,c,s,x], r) self.theano_loss = function([Wb,Wc,b,c,s,x,y], loss) self.theano_gradients = function([Wb,Wc,b,c,s,x,y], [T.grad(sum_loss, Wb), T.grad(sum_loss, Wc), T.grad(sum_loss, b), T.grad(sum_loss, c), T.grad(sum_loss, s)]) # other useful theano functions for the experiments that involve # adding noise to the hidden states self.theano_encode = function([Wc,c,x], h) self.theano_decode = function([Wb,b,s,h], r)
def test_argsort(): # Set up rng = np.random.RandomState(seed=utt.fetch_seed()) m_val = rng.rand(3, 2) v_val = rng.rand(4) # Example 1 a = tensor.dmatrix() w = argsort(a) f = theano.function([a], w) gv = f(m_val) gt = np.argsort(m_val) assert np.allclose(gv, gt) # Example 2 a = tensor.dmatrix() axis = tensor.lscalar() w = argsort(a, axis) f = theano.function([a, axis], w) for axis_val in 0, 1: gv = f(m_val, axis_val) gt = np.argsort(m_val, axis_val) assert np.allclose(gv, gt) # Example 3 a = tensor.dvector() w2 = argsort(a) f = theano.function([a], w2) gv = f(v_val) gt = np.argsort(v_val) assert np.allclose(gv, gt) # Example 4 a = tensor.dmatrix() axis = tensor.lscalar() l = argsort(a, axis, "mergesort") f = theano.function([a, axis], l) for axis_val in 0, 1: gv = f(m_val, axis_val) gt = np.argsort(m_val, axis_val) assert np.allclose(gv, gt) # Example 5 a = tensor.dmatrix() axis = tensor.lscalar() a1 = ArgSortOp("mergesort", []) a2 = ArgSortOp("quicksort", []) # All the below should give true assert a1 != a2 assert a1 == ArgSortOp("mergesort", []) assert a2 == ArgSortOp("quicksort", []) # Example 6: Testing axis=None a = tensor.dmatrix() w2 = argsort(a, None) f = theano.function([a], w2) gv = f(m_val) gt = np.argsort(m_val, None) assert np.allclose(gv, gt)
def test_infer_shape(self): x = tensor.dmatrix() y = tensor.dmatrix() self._compile_and_check([x, y], [self.op_class()(x, y)], [numpy.random.rand(5, 6), numpy.random.rand(5, 6)], self.op_class)
def eigs( theta = Th.dvector('theta'), M = Th.dmatrix('M') , STA = Th.dvector('STA') , STC = Th.dmatrix('STC'), **other): ''' Return eigenvalues of I-sym(M), for display/debugging purposes. ''' ImM = Th.identity_like(M)-(M+M.T)/2 w,v = eig( ImM ) return w
def ldet( theta = Th.dvector('theta'), M = Th.dmatrix('M') , STA = Th.dvector('STA'), STC = Th.dmatrix('STC'), **other): ''' Return log-det of I-sym(M), for display/debugging purposes. ''' ImM = Th.identity_like(M)-(M+M.T)/2 w, v = eig(ImM) return Th.sum(Th.log(w))
def fit_rkl(data, log_p, max_epochs=20): """ Fit isotropic Gaussian by minimizing reverse Kullback-Leibler divergence. """ # data dimensionality D = data.shape[0] # data and hidden states X = tt.dmatrix('X') Z = tt.dmatrix('Z') nr.seed(int(time() * 1000.) % 4294967295) idx = nr.permutation(data.shape[1])[:100] # initialize parameters b = th.shared(np.mean(data[:, idx], 1)[:, None], broadcastable=(False, True)) a = th.shared(np.std(data[:, idx] - b.get_value(), 1)[:, None], broadcastable=[False, True]) # model density q = lambda X: normal(X, b, a) log_q = lambda X: -0.5 * tt.sum(tt.square( (X - b) / a), 0) - D * tt.log(tt.abs_(a)) - D / 2. * np.log(np.pi) G = lambda Z: a * Z + b # geometric Jensen-Shannon divergence RKL = tt.mean(tt.exp(log_p(X)) * (log_p(X) - log_q(X))) + tt.mean(0.0 * Z) # function computing G-JSD and its gradient f_rkl = th.function([Z, X], [RKL, th.grad(RKL, a), th.grad(RKL, b)]) # SGD hyperparameters B = 200 mm = 0.8 lr = .5 da = 0. db = 0. try: # display initial JSD print('{0:>4} {1:.4f}'.format( 0, float(f_rkl(nr.randn(*data.shape), data)[0]))) for epoch in range(max_epochs): values = [] # stochastic gradient descent for t in range(0, data.shape[1], B): Z = nr.randn(D, B) Y = data[:, t:t + B] v, ga, gb = f_rkl(Z, Y) da = mm * da - lr * ga db = mm * db - lr * gb values.append(v) a.set_value(a.get_value() + da) b.set_value(b.get_value() + db) # reduce learning rate lr /= 2. # display estimated JSD print('{0:>4} {1:.4f}'.format(epoch + 1, np.mean(values))) except KeyboardInterrupt: pass return a.get_value() * np.eye(D), b.get_value()
def evaluate_lenet5(learning_rate=0.05, n_epochs=2000, nkerns=[50], batch_size=1, window_width=4, maxSentLength=64, emb_size=300, hidden_size=200, margin=0.5, L2_weight=0.0003, update_freq=1, norm_threshold=5.0, max_truncate=40): maxSentLength = max_truncate + 2 * (window_width - 1) model_options = locals().copy() print "model options", model_options rootPath = '/mounts/data/proj/wenpeng/Dataset/WikiQACorpus/' rng = numpy.random.RandomState(23455) datasets, vocab_size = load_wikiQA_corpus( rootPath + 'vocab.txt', rootPath + 'WikiQA-train.txt', rootPath + 'test_filtered.txt', max_truncate, maxSentLength) #vocab_size contain train, dev and test #datasets, vocab_size=load_wikiQA_corpus(rootPath+'vocab_lower_in_word2vec.txt', rootPath+'WikiQA-train.txt', rootPath+'test_filtered.txt', maxSentLength)#vocab_size contain train, dev and test mtPath = '/mounts/data/proj/wenpeng/Dataset/WikiQACorpus/MT/BLEU_NIST/' mt_train, mt_test = load_mts_wikiQA( mtPath + 'result_train/concate_2mt_train.txt', mtPath + 'result_test/concate_2mt_test.txt') wm_train, wm_test = load_wmf_wikiQA( rootPath + 'train_word_matching_scores.txt', rootPath + 'test_word_matching_scores.txt') #wm_train, wm_test=load_wmf_wikiQA(rootPath+'train_word_matching_scores_normalized.txt', rootPath+'test_word_matching_scores_normalized.txt') indices_train, trainY, trainLengths, normalized_train_length, trainLeftPad, trainRightPad = datasets[ 0] indices_train_l = indices_train[::2, :] indices_train_r = indices_train[1::2, :] trainLengths_l = trainLengths[::2] trainLengths_r = trainLengths[1::2] normalized_train_length_l = normalized_train_length[::2] normalized_train_length_r = normalized_train_length[1::2] trainLeftPad_l = trainLeftPad[::2] trainLeftPad_r = trainLeftPad[1::2] trainRightPad_l = trainRightPad[::2] trainRightPad_r = trainRightPad[1::2] indices_test, testY, testLengths, normalized_test_length, testLeftPad, testRightPad = datasets[ 1] indices_test_l = indices_test[::2, :] indices_test_r = indices_test[1::2, :] testLengths_l = testLengths[::2] testLengths_r = testLengths[1::2] normalized_test_length_l = normalized_test_length[::2] normalized_test_length_r = normalized_test_length[1::2] testLeftPad_l = testLeftPad[::2] testLeftPad_r = testLeftPad[1::2] testRightPad_l = testRightPad[::2] testRightPad_r = testRightPad[1::2] n_train_batches = indices_train_l.shape[0] / batch_size n_test_batches = indices_test_l.shape[0] / batch_size train_batch_start = list(numpy.arange(n_train_batches) * batch_size) test_batch_start = list(numpy.arange(n_test_batches) * batch_size) indices_train_l = theano.shared(numpy.asarray(indices_train_l, dtype=theano.config.floatX), borrow=True) indices_train_r = theano.shared(numpy.asarray(indices_train_r, dtype=theano.config.floatX), borrow=True) indices_test_l = theano.shared(numpy.asarray(indices_test_l, dtype=theano.config.floatX), borrow=True) indices_test_r = theano.shared(numpy.asarray(indices_test_r, dtype=theano.config.floatX), borrow=True) indices_train_l = T.cast(indices_train_l, 'int64') indices_train_r = T.cast(indices_train_r, 'int64') indices_test_l = T.cast(indices_test_l, 'int64') indices_test_r = T.cast(indices_test_r, 'int64') rand_values = random_value_normal((vocab_size + 1, emb_size), theano.config.floatX, numpy.random.RandomState(1234)) rand_values[0] = numpy.array(numpy.zeros(emb_size), dtype=theano.config.floatX) #rand_values[0]=numpy.array([1e-50]*emb_size) rand_values = load_word2vec_to_init(rand_values, rootPath + 'vocab_embs_300d.txt') #rand_values=load_word2vec_to_init(rand_values, rootPath+'vocab_lower_in_word2vec_embs_300d.txt') embeddings = theano.shared(value=rand_values, borrow=True) #cost_tmp=0 error_sum = 0 # allocate symbolic variables for the data index = T.lscalar() x_index_l = T.lmatrix( 'x_index_l') # now, x is the index matrix, must be integer x_index_r = T.lmatrix('x_index_r') y = T.lvector('y') left_l = T.lscalar() right_l = T.lscalar() left_r = T.lscalar() right_r = T.lscalar() length_l = T.lscalar() length_r = T.lscalar() norm_length_l = T.dscalar() norm_length_r = T.dscalar() mts = T.dmatrix() wmf = T.dmatrix() cost_tmp = T.dscalar() #x=embeddings[x_index.flatten()].reshape(((batch_size*4),maxSentLength, emb_size)).transpose(0, 2, 1).flatten() ishape = (emb_size, maxSentLength) # this is the size of MNIST images filter_size = (emb_size, window_width) #poolsize1=(1, ishape[1]-filter_size[1]+1) #????????????????????????????? length_after_wideConv = ishape[1] + filter_size[1] - 1 ###################### # BUILD ACTUAL MODEL # ###################### print '... building the model' # Reshape matrix of rasterized images of shape (batch_size,28*28) # to a 4D tensor, compatible with our LeNetConvPoolLayer #layer0_input = x.reshape(((batch_size*4), 1, ishape[0], ishape[1])) layer0_l_input = embeddings[x_index_l.flatten()].reshape( (batch_size, maxSentLength, emb_size)).transpose(0, 2, 1).dimshuffle(0, 'x', 1, 2) layer0_r_input = embeddings[x_index_r.flatten()].reshape( (batch_size, maxSentLength, emb_size)).transpose(0, 2, 1).dimshuffle(0, 'x', 1, 2) conv_W, conv_b = create_conv_para(rng, filter_shape=(nkerns[0], 1, filter_size[0], filter_size[1])) #layer0_output = debug_print(layer0.output, 'layer0.output') layer0_l = Conv_with_input_para(rng, input=layer0_l_input, image_shape=(batch_size, 1, ishape[0], ishape[1]), filter_shape=(nkerns[0], 1, filter_size[0], filter_size[1]), W=conv_W, b=conv_b) layer0_r = Conv_with_input_para(rng, input=layer0_r_input, image_shape=(batch_size, 1, ishape[0], ishape[1]), filter_shape=(nkerns[0], 1, filter_size[0], filter_size[1]), W=conv_W, b=conv_b) layer0_l_output = debug_print(layer0_l.output, 'layer0_l.output') layer0_r_output = debug_print(layer0_r.output, 'layer0_r.output') layer1 = Average_Pooling_for_Top(rng, input_l=layer0_l_output, input_r=layer0_r_output, kern=nkerns[0], left_l=left_l, right_l=right_l, left_r=left_r, right_r=right_r, length_l=length_l + filter_size[1] - 1, length_r=length_r + filter_size[1] - 1, dim=maxSentLength + filter_size[1] - 1) #layer2=HiddenLayer(rng, input=layer1_out, n_in=nkerns[0]*2, n_out=hidden_size, activation=T.tanh) sum_uni_l = T.sum(layer0_l_input, axis=3).reshape((1, emb_size)) aver_uni_l = sum_uni_l / layer0_l_input.shape[3] norm_uni_l = sum_uni_l / T.sqrt((sum_uni_l**2).sum()) sum_uni_r = T.sum(layer0_r_input, axis=3).reshape((1, emb_size)) aver_uni_r = sum_uni_r / layer0_r_input.shape[3] norm_uni_r = sum_uni_r / T.sqrt((sum_uni_r**2).sum()) uni_cosine = cosine(sum_uni_l, sum_uni_r) aver_uni_cosine = cosine(aver_uni_l, aver_uni_r) uni_sigmoid_simi = debug_print( T.nnet.sigmoid(T.dot(norm_uni_l, norm_uni_r.T)).reshape((1, 1)), 'uni_sigmoid_simi') ''' linear=Linear(sum_uni_l, sum_uni_r) poly=Poly(sum_uni_l, sum_uni_r) sigmoid=Sigmoid(sum_uni_l, sum_uni_r) rbf=RBF(sum_uni_l, sum_uni_r) gesd=GESD(sum_uni_l, sum_uni_r) ''' eucli_1 = 1.0 / (1.0 + EUCLID(sum_uni_l, sum_uni_r)) #25.2% #eucli_1_exp=1.0/T.exp(EUCLID(sum_uni_l, sum_uni_r)) len_l = norm_length_l.reshape((1, 1)) len_r = norm_length_r.reshape((1, 1)) ''' len_l=length_l.reshape((1,1)) len_r=length_r.reshape((1,1)) ''' #length_gap=T.log(1+(T.sqrt((len_l-len_r)**2))).reshape((1,1)) #length_gap=T.sqrt((len_l-len_r)**2) #layer3_input=mts layer3_input = T.concatenate( [ #mts, uni_cosine, #eucli_1_exp,#uni_sigmoid_simi, #norm_uni_l-(norm_uni_l+norm_uni_r)/2,#uni_cosine, # layer1. output_cosine, #layer1.output_eucli_to_simi_exp,#layer1.output_sigmoid_simi,#layer1.output_vector_l-(layer1.output_vector_l+layer1.output_vector_r)/2,#layer1.output_cosine, # len_l, len_r, wmf ], axis=1) #, layer2.output, layer1.output_cosine], axis=1) #layer3_input=T.concatenate([mts,eucli, uni_cosine, len_l, len_r, norm_uni_l-(norm_uni_l+norm_uni_r)/2], axis=1) #layer3=LogisticRegression(rng, input=layer3_input, n_in=11, n_out=2) layer3 = LogisticRegression(rng, input=layer3_input, n_in=(1) + (1) + 2 + 2, n_out=2) #L2_reg =(layer3.W** 2).sum()+(layer2.W** 2).sum()+(layer1.W** 2).sum()+(conv_W** 2).sum() L2_reg = debug_print( (layer3.W**2).sum() + (conv_W**2).sum(), 'L2_reg') #+(layer1.W** 2).sum()++(embeddings**2).sum() cost_this = debug_print(layer3.negative_log_likelihood(y), 'cost_this') #+L2_weight*L2_reg cost = debug_print( (cost_this + cost_tmp) / update_freq + L2_weight * L2_reg, 'cost') #cost=debug_print((cost_this+cost_tmp)/update_freq, 'cost') test_model = theano.function( [index], [layer3.prop_for_posi, layer3_input, y], givens={ x_index_l: indices_test_l[index:index + batch_size], x_index_r: indices_test_r[index:index + batch_size], y: testY[index:index + batch_size], left_l: testLeftPad_l[index], right_l: testRightPad_l[index], left_r: testLeftPad_r[index], right_r: testRightPad_r[index], length_l: testLengths_l[index], length_r: testLengths_r[index], norm_length_l: normalized_test_length_l[index], norm_length_r: normalized_test_length_r[index], mts: mt_test[index:index + batch_size], wmf: wm_test[index:index + batch_size] }, on_unused_input='ignore') #params = layer3.params + layer2.params + layer1.params+ [conv_W, conv_b] params = layer3.params + [conv_W, conv_b] #+[embeddings]# + layer1.params params_conv = [conv_W, conv_b] accumulator = [] for para_i in params: eps_p = numpy.zeros_like(para_i.get_value(borrow=True), dtype=theano.config.floatX) accumulator.append(theano.shared(eps_p, borrow=True)) # create a list of gradients for all model parameters grads = T.grad(cost, params) updates = [] for param_i, grad_i, acc_i in zip(params, grads, accumulator): grad_i = debug_print(grad_i, 'grad_i') acc = acc_i + T.sqr(grad_i) updates.append( (param_i, param_i - learning_rate * grad_i / T.sqrt(acc))) #AdaGrad updates.append((acc_i, acc)) train_model = theano.function( [index, cost_tmp], cost, updates=updates, givens={ x_index_l: indices_train_l[index:index + batch_size], x_index_r: indices_train_r[index:index + batch_size], y: trainY[index:index + batch_size], left_l: trainLeftPad_l[index], right_l: trainRightPad_l[index], left_r: trainLeftPad_r[index], right_r: trainRightPad_r[index], length_l: trainLengths_l[index], length_r: trainLengths_r[index], norm_length_l: normalized_train_length_l[index], norm_length_r: normalized_train_length_r[index], mts: mt_train[index:index + batch_size], wmf: wm_train[index:index + batch_size] }, on_unused_input='ignore') train_model_predict = theano.function( [index], [cost_this, layer3.errors(y), layer3_input, y], givens={ x_index_l: indices_train_l[index:index + batch_size], x_index_r: indices_train_r[index:index + batch_size], y: trainY[index:index + batch_size], left_l: trainLeftPad_l[index], right_l: trainRightPad_l[index], left_r: trainLeftPad_r[index], right_r: trainRightPad_r[index], length_l: trainLengths_l[index], length_r: trainLengths_r[index], norm_length_l: normalized_train_length_l[index], norm_length_r: normalized_train_length_r[index], mts: mt_train[index:index + batch_size], wmf: wm_train[index:index + batch_size] }, on_unused_input='ignore') ############### # TRAIN MODEL # ############### print '... training' # early-stopping parameters patience = 500000000000000 # look as this many examples regardless patience_increase = 2 # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant validation_frequency = min(n_train_batches, patience / 2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch best_params = None best_validation_loss = numpy.inf best_iter = 0 test_score = 0. start_time = time.clock() epoch = 0 done_looping = False svm_max = 0.0 best_epoch = 0 while (epoch < n_epochs) and (not done_looping): epoch = epoch + 1 #for minibatch_index in xrange(n_train_batches): # each batch minibatch_index = 0 #shuffle(train_batch_start)#shuffle training data cost_tmp = 0.0 for batch_start in train_batch_start: # iter means how many batches have been runed, taking into loop iter = (epoch - 1) * n_train_batches + minibatch_index + 1 minibatch_index = minibatch_index + 1 #if epoch %2 ==0: # batch_start=batch_start+remain_train #time.sleep(0.5) #print batch_start if iter % update_freq != 0: cost_ij, error_ij, layer3_input, y = train_model_predict( batch_start) #print 'layer3_input', layer3_input cost_tmp += cost_ij error_sum += error_ij #print 'cost_acc ',cost_acc #print 'cost_ij ', cost_ij #print 'cost_tmp before update',cost_tmp else: cost_average = train_model(batch_start, cost_tmp) #print 'layer3_input', layer3_input error_sum = 0 cost_tmp = 0.0 #reset for the next batch #print 'cost_average ', cost_average #print 'cost_this ',cost_this #exit(0) #exit(0) if iter % n_train_batches == 0: print 'training @ iter = ' + str( iter) + ' average cost: ' + str( cost_average) + ' error: ' + str( error_sum) + '/' + str( update_freq) + ' error rate: ' + str( error_sum * 1.0 / update_freq) #if iter ==1: # exit(0) if iter % validation_frequency == 0: #write_file=open('log.txt', 'w') test_probs = [] test_y = [] test_features = [] for i in test_batch_start: prob_i, layer3_input, y = test_model(i) #test_losses = [test_model(i) for i in test_batch_start] test_probs.append(prob_i[0][0]) test_y.append(y[0]) test_features.append(layer3_input[0]) MAP, MRR = compute_map_mrr(rootPath + 'test_filtered.txt', test_probs) #now, check MAP and MRR print( ('\t\t\t\t\t\tepoch %i, minibatch %i/%i, test MAP of best ' 'model %f, MRR %f') % (epoch, minibatch_index, n_train_batches, MAP, MRR)) #now, see the results of LR #write_feature=open(rootPath+'feature_check.txt', 'w') train_y = [] train_features = [] count = 0 for batch_start in train_batch_start: cost_ij, error_ij, layer3_input, y = train_model_predict( batch_start) train_y.append(y[0]) train_features.append(layer3_input[0]) #write_feature.write(str(batch_start)+' '+' '.join(map(str,layer3_input[0]))+'\n') #count+=1 #write_feature.close() clf = svm.SVC(C=1.0, kernel='linear') clf.fit(train_features, train_y) results_svm = clf.decision_function(test_features) MAP_svm, MRR_svm = compute_map_mrr( rootPath + 'test_filtered.txt', results_svm) lr = LinearRegression().fit(train_features, train_y) results_lr = lr.predict(test_features) MAP_lr, MRR_lr = compute_map_mrr( rootPath + 'test_filtered.txt', results_lr) print '\t\t\t\t\t\t\tSVM, MAP: ', MAP_svm, ' MRR: ', MRR_svm, ' LR: ', MAP_lr, ' MRR: ', MRR_lr if patience <= iter: done_looping = True break #after each epoch, increase the batch_size if epoch % 2 == 1: update_freq = update_freq * 1 else: update_freq = update_freq / 1 #store the paras after epoch 15 if epoch == 15: store_model_to_file(params_conv) print 'Finished storing best conv params' exit(0) #print 'Batch_size: ', update_freq end_time = time.clock() print('Optimization complete.') print('Best validation score of %f %% obtained at iteration %i,'\ 'with test performance %f %%' % (best_validation_loss * 100., best_iter + 1, test_score * 100.)) print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.))
def evaluate_lenet5(self): #def evaluate_lenet5(learning_rate=0.1, n_epochs=2000, nkerns=[6, 12], batch_size=70, useAllSamples=0, kmax=30, ktop=5, filter_size=[10,7], # L2_weight=0.000005, dropout_p=0.5, useEmb=0, task=5, corpus=1): rng = numpy.random.RandomState(23455) n_train_batches=self.raw_data[0].shape[0]/self.batch_size n_valid_batches=self.raw_data[1].shape[0]/self.batch_size n_test_batches=self.raw_data[2].shape[0]/self.batch_size train_batch_start=[] dev_batch_start=[] test_batch_start=[] if self.useAllSamples: train_batch_start=list(numpy.arange(n_train_batches)*self.batch_size)+[self.raw_data[0].shape[0]-self.batch_size] dev_batch_start=list(numpy.arange(n_valid_batches)*self.batch_size)+[self.raw_data[1].shape[0]-self.batch_size] test_batch_start=list(numpy.arange(n_test_batches)*self.batch_size)+[self.raw_data[2].shape[0]-self.batch_size] n_train_batches=n_train_batches+1 n_valid_batches=n_valid_batches+1 n_test_batches=n_test_batches+1 else: train_batch_start=list(numpy.arange(n_train_batches)*self.batch_size) dev_batch_start=list(numpy.arange(n_valid_batches)*self.batch_size) test_batch_start=list(numpy.arange(n_valid_batches)*self.batch_size) ''' indices_train_theano=theano.shared(numpy.asarray(indices_train, dtype=theano.config.floatX), borrow=True) indices_dev_theano=theano.shared(numpy.asarray(indices_dev, dtype=theano.config.floatX), borrow=True) indices_train_theano=T.cast(indices_train_theano, 'int32') indices_dev_theano=T.cast(indices_dev_theano, 'int32') ''' # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.dmatrix('x') # now, x is the index matrix, must be integer y = T.dmatrix('y') ###################### # BUILD ACTUAL MODEL # ###################### print '... building the model' inputs=debug_print(x, 'inputs') labels=debug_print(y, 'labels') layer2 = HiddenLayer(rng, input=inputs, n_in=self.source_embedding_size, n_out=self.target_embedding_size, activation=None) layer2_output=debug_print(layer2.output, 'layer2_output') #J= debug_print(- T.sum(labels * T.log(layer2_output) + (1 - labels) * T.log(1 - layer2_output), axis=1), 'J') # a vector of cross-entropy J=T.sum((layer2_output - labels)**2, axis=1) L2_reg = (layer2.W** 2).sum() self.cost = T.mean(J) + self.L2_weight*L2_reg validate_model = theano.function([index], self.cost, givens={ x: self.dev_source[index: index + self.batch_size], y: self.dev_target[index: index + self.batch_size]}) test_model = theano.function([index], layer2_output, givens={ x: self.test_source[index: index + self.batch_size], y: self.test_source[index: index + self.batch_size]}) # create a list of all model parameters to be fit by gradient descent self.params = layer2.params #params = layer3.params + layer2.params + layer0.params+[embeddings] accumulator=[] for para_i in self.params: eps_p=numpy.zeros_like(para_i.get_value(borrow=True),dtype=theano.config.floatX) accumulator.append(theano.shared(eps_p, borrow=True)) # create a list of gradients for all model parameters grads = T.grad(self.cost, self.params) updates = [] for param_i, grad_i, acc_i in zip(self.params, grads, accumulator): acc = acc_i + T.sqr(grad_i) updates.append((param_i, param_i - self.ini_learning_rate * grad_i / T.sqrt(acc))) #AdaGrad updates.append((acc_i, acc)) train_model = theano.function([index], self.cost, updates=updates, givens={ x: self.train_source[index: index + self.batch_size], y: self.train_target[index: index + self.batch_size]}) ############### # TRAIN MODEL # ############### print '... training' # early-stopping parameters patience = 500000 # look as this many examples regardless patience_increase = 2 # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant validation_frequency = min(n_train_batches, patience / 2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch best_params = None best_validation_loss = numpy.inf best_iter = 0 test_score = 0. start_time = time.clock() epoch = 0 done_looping = False vali_loss_list=[] lowest_vali_loss=0 OOV_embs=numpy.zeros((len(self.OOV),self.target_embedding_size), dtype=theano.config.floatX) while (epoch < self.n_epochs) and (not done_looping): epoch = epoch + 1 #for minibatch_index in xrange(n_train_batches): # each batch minibatch_index=0 for batch_start in train_batch_start: # iter means how many batches have been runed, taking into loop iter = (epoch - 1) * n_train_batches + minibatch_index +1 minibatch_index=minibatch_index+1 cost_of_each_iteration= train_model(batch_start) #exit(0) #print 'sentence embeddings:' #print sentences_embs[:6,:] #if iter ==1: # exit(0) if iter % validation_frequency == 0: print 'training @ iter = '+str(iter)+' cost: '+str(cost_of_each_iteration)# +' error: '+str(error_ij) if iter % validation_frequency == 0: #print '\t iter: '+str(iter) # compute zero-one loss on validation set #validation_losses = [validate_model(i) for i in xrange(n_valid_batches)] validation_losses=[] for batch_start in dev_batch_start: vali_loss_i=validate_model(batch_start) validation_losses.append(vali_loss_i) this_validation_loss = numpy.mean(validation_losses) print('\t\tepoch %i, minibatch %i/%i, validation cost %f ' % \ (epoch, minibatch_index , n_train_batches, \ this_validation_loss)) if this_validation_loss < (minimal_of_list(vali_loss_list)-1.0): #is very small #print str(minimal_of_list(vali_loss_list))+'-'+str(this_validation_loss)+'='+str(minimal_of_list(vali_loss_list)-this_validation_loss) del vali_loss_list[:] vali_loss_list.append(this_validation_loss) lowest_vali_loss=this_validation_loss #store params self.best_params=self.params for batch_start in test_batch_start: predicted_embeddings=test_model(batch_start) for row in range(batch_start, batch_start + self.batch_size): OOV_embs[row]=predicted_embeddings[row-batch_start] if len(vali_loss_list)==self.vali_cost_list_length: # only happen when self.vali_cost_list_length==1 print 'Training over, best model got at vali_cost:'+str(lowest_vali_loss) return OOV_embs, self.OOV elif len(vali_loss_list)<self.vali_cost_list_length: if this_validation_loss < minimal_of_list(vali_loss_list): #if it's small, but not small enough self.best_params=self.params lowest_vali_loss=this_validation_loss for batch_start in test_batch_start: predicted_embeddings=test_model(batch_start) for row in range(batch_start, batch_start + self.batch_size): OOV_embs[row]=predicted_embeddings[row-batch_start] vali_loss_list.append(this_validation_loss) if len(vali_loss_list)==self.vali_cost_list_length: print 'Training over, best model got at vali_cost:'+str(lowest_vali_loss) return OOV_embs, self.OOV #print vali_loss_list if patience <= iter: done_looping = True break end_time = time.clock() ''' print('Optimization complete.') print('Best validation score of %f %% obtained at iteration %i,'\ 'with test performance %f %%' % (best_validation_loss * 100., best_iter + 1, test_score * 100.)) ''' print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.)) return OOV_embs, self.OOV
############################################### # EXAMPLE 2 ############################################### print('Building the model...') x = T.dmatrix('x') # Symbolic input matrix # Initializing the weight matrix and bias vector W = theano.shared(value=np.zeros((28*28, 10),dtype=theano.config.floatX), name='W') b = theano.shared(value=np.zeros((10,),dtype=theano.config.floatX), name='b') p_y_given_x = T.exp(T.dot(x, W) + b) p_y_given_x = p_y_given_x / T.sum(p_y_given_x,axis=1)[:,None] # Symbolic description of how to compute prediction as class whose # probability is maximal
def __init__(self, state_length, action_length, state_bounds, action_bounds, settings_): super(NeuralNetwork,self).__init__(state_length, action_length, state_bounds, action_bounds, 0, settings_) batch_size=32 # data types for model State = T.dmatrix("State") State.tag.test_value = np.random.rand(batch_size,self._state_length) # ResultState = T.dmatrix("ResultState") # ResultState.tag.test_value = np.random.rand(batch_size,self._state_length) Action = T.dmatrix("Action") Action.tag.test_value = np.random.rand(batch_size, self._action_length) # create a small convolutional neural network inputLayerState = lasagne.layers.InputLayer((None, self._state_length), input_var=State) # inputLayerAction = lasagne.layers.InputLayer((None, self._action_length), Action) # concatLayer = lasagne.layers.ConcatLayer([inputLayerState, inputLayerAction]) l_hid2ActA = lasagne.layers.DenseLayer( inputLayerState, num_units=128, nonlinearity=lasagne.nonlinearities.leaky_rectify, W=lasagne.init.Uniform()) num_layers=1 """ l_hid2ActA = lasagne.layers.DenseLayer( inputLayerState, num_units=128, nonlinearity=lasagne.nonlinearities.leaky_rectify) l_hid2ActA = lasagne.layers.DenseLayer( l_hid2ActA, num_units=64, nonlinearity=lasagne.nonlinearities.leaky_rectify) l_hid2ActA = lasagne.layers.DenseLayer( l_hid2ActA, num_units=32, nonlinearity=lasagne.nonlinearities.leaky_rectify) """ for i in range(num_layers): l_hid2ActA = lasagne.layers.DenseLayer( l_hid2ActA, num_units=64, nonlinearity=lasagne.nonlinearities.leaky_rectify # ,W=lasagne.init.Uniform() ) self._l_out = lasagne.layers.DenseLayer( l_hid2ActA, num_units=self._action_length, nonlinearity=lasagne.nonlinearities.linear # ,W=lasagne.init.Uniform() ) # print "Initial W " + str(self._w_o.get_value()) self._learning_rate = 0.01 self._rho = 0.95 self._rms_epsilon = 0.001 self._updates=0 self._states_shared = theano.shared( np.zeros((batch_size, self._state_length), dtype=theano.config.floatX)) """self._next_states_shared = theano.shared( np.zeros((batch_size, self._state_length), dtype=theano.config.floatX)) """ self._actions_shared = theano.shared( np.zeros((batch_size, self._action_length), dtype=theano.config.floatX), ) inputs_ = { State: State, # Action: Action, } self._forward = lasagne.layers.get_output(self._l_out, inputs_, deterministic=True) # self._target = (Reward + self._discount_factor * self._q_valsB) self._diff = Action - self._forward self._loss = 0.5 * self._diff ** 2 self._loss = T.mean(self._loss) + (1e-5 * lasagne.regularization.regularize_network_params(self._l_out, lasagne.regularization.l2)) self._loss2 = T.mean(self._loss) self._params = lasagne.layers.helper.get_all_params(self._l_out) self._givens_ = { State: self._states_shared, # ResultState: self._next_states_shared, Action: self._actions_shared, } # SGD update # self._updates_ = lasagne.updates.rmsprop(self._loss, self._params, self._learning_rate, self._rho, # self._rms_epsilon) self._all_grads = T.grad(self._loss, self._params) # gself._all_grads = lasagne.updates.total_norm_constraint(self._all_grads, 0.5) # self._params = lasagne.updates.norm_constraint(self._params, max_norm=0.4) self._updates_ = lasagne.updates.momentum(self._all_grads, self._params, self._learning_rate, 0.9) # self._updates_ = lasagne.updates.norm_constraint(self._updates_, self._params, max_norm=0.4) # updates = lasagne.updates.nesterov(loss, params) # updates = norm_constraint(updates, someparam, abs_max=15) # TD update # minimize Value function error #self._updates_ = lasagne.updates.rmsprop(T.mean(self._q_func) + (1e-4 * lasagne.regularization.regularize_network_params( #self._l_outA, lasagne.regularization.l2)), self._params, # self._learning_rate * -T.mean(self._diff), self._rho, self._rms_epsilon) # actDiff1 = (Action - self._q_valsActB) #TODO is this correct? # actDiff = (actDiff1 - (Action - self._q_valsActA)) # actDiff = ((Action - self._q_valsActB2)) # Target network does not work well here? #self._actDiff = ((Action - self._q_valsActA)) # Target network does not work well here? #self._actLoss = 0.5 * self._actDiff ** 2 + (1e-4 * lasagne.regularization.regularize_network_params( self._l_outActA, lasagne.regularization.l2)) #self._actLoss = T.mean(self._actLoss) self._train = theano.function([], [self._loss], updates=self._updates_, givens=self._givens_) self._forwardDynamics = theano.function([], self._forward, givens={State: self._states_shared, # Action: self._actions_shared }) inputs_ = [State, # ResultState, Action] self._bellman_error = theano.function(inputs=inputs_, outputs=self._diff, allow_input_downcast=True) # self._diffs = theano.function(input=[State]) # grad_params_ = [self._states_shared] # grad_params_.extend(self._params) self._get_grad = theano.function([], outputs=lasagne.updates.get_or_compute_grads(self._loss, [lasagne.layers.get_all_layers(self._l_out)[0].input_var] + self._params), allow_input_downcast=True, givens=self._givens_)
def __init__(self, numpy_rng, theano_rng=None, input=None, n_visible=300, n_hidden=150, W=None, bhid=None, bvis=None): self.n_visible = n_visible self.n_hidden = n_hidden # create a Theano random generator that gives symbolic random values if not theano_rng: theano_rng = RandomStreams(numpy_rng.randint(2**30)) # note : W' was written as `W_prime` and b' as `b_prime` if W == None: # W is initialized with `initial_W` which is uniformely sampled # from -4*sqrt(6./(n_visible+n_hidden)) and # 4*sqrt(6./(n_hidden+n_visible))the output of uniform if # converted using asarray to dtype # theano.config.floatX so that the code is runable on GPU print('这里W是空的') initial_W = numpy.asarray(numpy_rng.uniform( low=-4 * numpy.sqrt(6. / (n_hidden + n_visible)), high=4 * numpy.sqrt(6. / (n_hidden + n_visible)), size=(n_visible, n_hidden)), dtype=theano.config.floatX) W = theano.shared(value=initial_W, name='W', borrow=True) if bvis == None: print('这里bvis是空的') bvis = theano.shared(value=numpy.zeros(n_visible, dtype=theano.config.floatX), name='bvis', borrow=True) if bhid == None: print('这里bhid是空的') bhid = theano.shared(value=numpy.zeros(n_hidden, dtype=theano.config.floatX), name='bhid', borrow=True) self.W = W # b corresponds to the bias of the hidden self.b = bhid # b_prime corresponds to the bias of the visible self.b_prime = bvis # tied weights, therefore W_prime is W transpose self.W_prime = self.W.T self.theano_rng = theano_rng # if no input is given, generate a variable representing the input if input is None: # we use a matrix because we expect a minibatch of several # examples, each example being a row self.x = T.dmatrix(name='input') else: self.x = input self.params = [self.W, self.b, self.b_prime]
import gzip import cPickle f = gzip.open('C:/nnets/mnist.pkl.gz', 'rb') train_set, valid_set, test_set = cPickle.load(f) f.close() n_train, n_test = map(lambda x: len(x[0]), [train_set, test_set]) dims = train_set[0].shape[1] n_classes = len(set(train_set[1])) import numpy import theano import theano.tensor as T X = T.dmatrix() y = T.ivector() prepare_data = lambda x: (theano.shared(x[0].astype('float64')), theano.shared(x[1].astype('int32'))) (training_x, training_y), (test_x, test_y), (validation_x, validation_y) = map( prepare_data, [train_set, test_set, valid_set]) W = theano.shared(numpy.zeros([dims, n_classes])) b = theano.shared(numpy.zeros(n_classes)) y_hat = T.nnet.softmax(T.dot(X, W) + b) y_pred = T.argmax(y_hat, axis=1) test_error = T.mean(T.neq(y_pred, y)) training_error = -T.mean(T.log(y_hat)[T.arange(y.shape[0]), y])
class Layer(object): def __init__(self, inputs, in_size, out_size, activation_function=None): self.W = theano.shared(numpy.random.normal(0, 1, (in_size, out_size))) self.b = theano.shared(numpy.zeros(out_size) + 0.1) self.Wx_plus_b = T.dot(inputs,self.W) + self.b self.activation_function = activation_function if activation_function is None: self.out_puts = self.Wx_plus_b else: self.out_puts = self.activation_function(self.Wx_plus_b) x_data = numpy.linspace(-1, 1, 300)[:, numpy.newaxis] noise = numpy.random.normal(0, 0.05, x_data.shape) y_data = numpy.square(x_data) - 0.5 + noise x = T.dmatrix('x') y = T.dmatrix('y') l1 = Layer(x, 1, 10, T.nnet.relu) l2 = Layer(l1.out_puts, 10, 1, None) cost = T.mean(T.square(l2.out_puts - y)) gW1, gb1, gW2, gb2 = T.grad(cost, [l1.W, l1.b, l2.W, l2.b]) learning_rate = 0.05 train = theano.function(inputs=[x, y], outputs=cost, updates=[(l1.W, l1.W - learning_rate*gW1), \ (l1.b, l1.b - learning_rate*gb1), \ (l2.W, l2.W - learning_rate*gW2), \ (l2.b, l2.b - learning_rate*gb2)]) predict = theano.function(inputs=[x],outputs=l2.out_puts) flg = plt.figure()
import theano import theano.tensor as T from math import sqrt rng = numpy.random N = 400 # training sample size feats = 784 # number of input variables hidden_layer = 100 # Número de capas ocultas # generate a dataset: D = (input_values, target_class) D = (rng.randn(N, feats), rng.randint(size=N, low=0, high=2)) training_steps = 10000 # Declare Theano symbolic variables x = T.dmatrix("x") y = T.dvector("y") # initialize the weight vector w randomly # # this and the following bias variable b # are shared so they keep their values # between training iterations (updates) w0 = theano.shared(rng.randn(feats, hidden_layer), name="w0") w1 = theano.shared(rng.randn(hidden_layer) * sqrt(2.0/hidden_layer), name="w1") # initialize the bias term b0 = theano.shared(0., name="b0") b1 = theano.shared(0., name="b1") print("Initial model:")
def __init__(self, numpy_rng, theano_rng=None, input=None, n_visible=784, n_hidden=500, W=None, bhid=None, bvis=None): self.n_hidden = n_hidden self.n_visible = n_visible #create a symbolic random variable: if not theano_rng: theano_rng = RandomStreams(numpy_rng.randint(23)) if not W: W_values = numpy.asarray(numpy_rng.uniform( low=12 * numpy.sqrt(6. / (n_hidden + n_visible)), high=16 * numpy.sqrt(6. / (n_hidden + n_visible)), size=(n_visible, n_hidden)), dtype=theano.config.floatX) W = theano.shared(value=W_values, name='W', borrow=True) if not bvis: bvis = theano.shared( value=4 * numpy.zeros(n_visible, dtype=theano.config.floatX), borrow=True) if not bhid: bhid = theano.shared( value=4 * numpy.zeros(n_hidden, dtype=theano.config.floatX), borrow=True) # we are using tied weights, in which the output weights are just the # transpose of the input ones. self.W = W self.W_prime = W.T self.b = bhid self.b_prime = bvis # deltas = numpy.zeros(shape=(n_visible, n_hidden), dtype=theano.config.floatX) self.deltaW = theano.shared(value=numpy.zeros( shape=(n_visible, n_hidden), dtype=theano.config.floatX), borrow=True) self.deltaBvis = theano.shared(value=numpy.zeros( n_visible, dtype=theano.config.floatX), borrow=True) self.deltaBhid = theano.shared(value=numpy.zeros( n_hidden, dtype=theano.config.floatX), borrow=True) self.theano_rng = theano_rng if input is None: self.x = T.dmatrix(name='input') else: self.x = input # bundle up all the params. No W_prime as is updated whenever W is updated. self.params = [self.W, self.b, self.b_prime] self.gparams = [] self.deltaParams = [self.deltaW, self.deltaBhid, self.deltaBvis]
def fit_mmd(data): """ Fit isotropic Gaussian by minimizing maximum mean discrepancy. B{References:} - A. Gretton et al., I{A Kernel Method for the Two-Sample-Problem}, NIPS, 2007 - Y. Li et al., I{Generative Moment Matching Networks}, ICML, 2015 """ def gaussian_kernel(x, y, sigma=1.): return tt.exp(-tt.sum(tt.square(x - y)) / sigma**2) def mixed_kernel(x, y, sigma=[.5, 1., 2., 4., 8.]): return tt.sum([gaussian_kernel(x, y, s) for s in sigma]) def gram_matrix(X, Y, kernel): M = X.shape[0] N = Y.shape[0] G, _ = th.scan( fn=lambda k: kernel(X[k // N], Y[k % N]), sequences=[tt.arange(M * N)]) return G.reshape([M, N]) # hiddens Z = tt.dmatrix('Z') # parameters b = th.shared(np.mean(data, 1)[None], broadcastable=[True, False]) A = th.shared(np.std(data - b.get_value().T)) # model samples X = Z * A + b # data Y = tt.dmatrix('Y') M = X.shape[0] N = Y.shape[0] Kyy = gram_matrix(Y, Y, mixed_kernel) Kxy = gram_matrix(X, Y, mixed_kernel) Kxx = gram_matrix(X, X, mixed_kernel) MMDsq = tt.sum(Kxx) / M**2 - 2. / (N * M) * tt.sum(Kxy) + tt.sum(Kyy) / N**2 MMD = tt.sqrt(MMDsq) f = th.function([Z, Y], [MMD, tt.grad(MMD, A), tt.grad(MMD, b)]) # batch size, momentum, learning rate schedule B = 100 mm = 0.8 kappa = .7 tau = 1. values = [] try: for t in range(0, data.shape[1], B): if t % 10000 == 0: # reset momentum dA = 0. db = 0. Z = nr.randn(B, data.shape[0]) Y = data.T[t:t + B] lr = np.power(tau + (t + B) / B, -kappa) v, gA, gb = f(Z, Y) dA = mm * dA - lr * gA db = mm * db - lr * gb values.append(v) A.set_value(A.get_value() + dA) b.set_value(b.get_value() + db) print('{0:>6} {1:.4f}'.format(t, np.mean(values[-100:]))) except KeyboardInterrupt: pass return A.get_value() * np.eye(data.shape[0]), b.get_value().T
import theano.tensor as T from theano import function from theano.tensor.shared_randomstreams import RandomStreams import numpy random = RandomStreams(seed=42) a = random.normal((1, 3)) b = T.dmatrix('a') f1 = a * b g1 = function([b], f1) print('numpy.ones((1,3)=', numpy.ones((1, 3))) print('numpy.ones((1,3)=', numpy.ones((1, 3))) print('numpy.ones((1,3)=', numpy.ones((1, 3))) for i in range(50): print("Invocation 1:", g1(numpy.ones((1, 3))))
def test_sparse(): print '\n\n*************************************************' print ' TEST SPARSE' print '*************************************************' # fixed parameters bsize = 10 # batch size imshp = (28, 28) kshp = (5, 5) nkern = 1 # per output pixel ssizes = ((1, 1), (2, 2)) convmodes = ( 'full', 'valid', ) # symbolic stuff bias = T.dvector() kerns = T.dvector() input = T.dmatrix() rng = N.random.RandomState(3423489) import theano.gof as gof #Mode(optimizer='fast_run', linker=gof.OpWiseCLinker(allow_gc=False)),): ntot, ttot = 0, 0 for conv_mode in convmodes: for ss in ssizes: output, outshp = sp.applySparseFilter(kerns, kshp,\ nkern, input, imshp, ss, bias=bias, mode=conv_mode) f = function([kerns, bias, input], output) # build actual input images img2d = N.arange(bsize * N.prod(imshp)).reshape((bsize, ) + imshp) img1d = img2d.reshape(bsize, -1) zeropad_img = N.zeros((bsize,\ img2d.shape[1]+2*(kshp[0]-1),\ img2d.shape[2]+2*(kshp[1]-1))) zeropad_img[:, kshp[0] - 1:kshp[0] - 1 + img2d.shape[1], kshp[1] - 1:kshp[1] - 1 + img2d.shape[2]] = img2d # build kernel matrix -- flatten it for theano stuff filters = N.arange(N.prod(outshp)*N.prod(kshp)).\ reshape(nkern,N.prod(outshp[1:]),N.prod(kshp)) spfilt = filters.flatten() biasvals = N.arange(N.prod(outshp)) # compute output by hand ntime1 = time.time() refout = N.zeros((bsize, nkern, outshp[1], outshp[2])) patch = N.zeros((kshp[0], kshp[1])) for b in xrange(bsize): for k in xrange(nkern): pixi = 0 # pixel index in raster order for j in xrange(outshp[1]): for i in xrange(outshp[2]): n = j * ss[0] m = i * ss[1] patch = zeropad_img[b, n:n + kshp[0], m:m + kshp[1]] refout[b,k,j,i] = N.dot(filters[k,pixi,:],\ patch.flatten()) pixi += 1 refout = refout.reshape(bsize, -1) + biasvals ntot += time.time() - ntime1 # need to flatten images ttime1 = time.time() out1 = f(spfilt, biasvals, img1d) ttot += time.time() - ttime1 temp = refout - out1 assert (temp < 1e-10).all() # test downward propagation vis = T.grad(output, input, output) downprop = function([kerns, output], vis) temp1 = time.time() for zz in range(100): visval = downprop(spfilt, out1) indices, indptr, spmat_shape, sptype, outshp, kmap = \ sp.convolution_indices.sparse_eval(imshp,kshp,nkern,ss,conv_mode) spmat = sparse.csc_matrix((spfilt[kmap], indices, indptr), spmat_shape) visref = N.dot(out1, spmat.todense()) assert N.all(visref == visval) print '**** Sparse Profiling Results ****' print 'Numpy processing time: ', ntot print 'Theano processing time: ', ttot
def mogaussian(D=2, K=10, N=100000, seed=2, D_max=100): """ Creates a random mixture of Gaussians and corresponding samples. @rtype: C{tuple} @return: a function representing the density and samples """ nr.seed(seed) # mixture weights p = nr.dirichlet([.5] * K) # variances v = 1. / np.square(nr.rand(K) + 1.) # means; D_max makes sure that data only depends on seed and not on D m = nr.randn(D_max, K) * 1.5 m = m[:D] # density function X = tt.dmatrix('X') C = [np.eye(D) * _ for _ in v] def log_p(X): """ @type X: C{ndarray}/C{TensorVariable} @param X: data points stored column-wise @rtype: C{ndarray}/C{TensorVariable} """ if isinstance(X, tt.TensorVariable): return tt.log( tt.sum( [p[i] * normal(X, m[:, [i]], C[i]) for i in range(len(p))], 0)) else: if log_p.f is None: Y = tt.dmatrix('Y') log_p.f = th.function([Y], log_p(Y)) return log_p.f(X) log_p.f = None def nonlog_p(X): """ @type X: C{ndarray}/C{TensorVariable} @param X: data points stored column-wise @rtype: C{ndarray}/C{TensorVariable} """ if isinstance(X, tt.TensorVariable): return tt.sum( [p[i] * normal(X, m[:, [i]], C[i]) for i in range(len(p))], 0) else: if nonlog_p.f is None: Y = tt.dmatrix('Y') nonlog_p.f = th.function([Y], nonlog_p(Y)) return nonlog_p.f(X) nonlog_p.f = None # sample data M = nr.multinomial(N, p) data = np.hstack( nr.randn(D, M[i]) * np.sqrt(v[i]) + m[:, [i]] for i in range(len(p))) data = data[:, nr.permutation(N)] return nonlog_p, log_p, data
def test_searchsortedOp_on_no_1d_inp(self): no_1d = tt.dmatrix("no_1d") with pytest.raises(ValueError): searchsorted(no_1d, self.v) with pytest.raises(ValueError): searchsorted(self.x, self.v, sorter=no_1d)
self.outputs = self.Wx_plus_b else: self.outputs = self.activation_function(self.Wx_plus_b) # Make up some fake data x_data = np.linspace(-1, 1, 300)[:, np.newaxis] noise = np.random.normal(0, 0.05, x_data.shape) y_data = np.square(x_data) - 0.5 + noise # y = x^2 - 0.5 # show the fake data plt.scatter(x_data, y_data) plt.show() # determine the inputs dtype x = T.dmatrix("x") y = T.dmatrix("y") # add layers l1 = Layer(x, 1, 10, T.nnet.relu) l2 = Layer(l1.outputs, 10, 1, None) # compute the cost cost = T.mean(T.square(l2.outputs - y)) # compute the gradients gW1, gb1, gW2, gb2 = T.grad(cost, [l1.W, l1.b, l2.W, l2.b]) # apply gradient descent learning_rate = 0.05 train = theano.function(inputs=[x, y],
def evaluate_lenet5(learning_rate=0.085, n_epochs=2000, nkerns=[50], batch_size=1, window_width=3, maxSentLength=60, emb_size=300, hidden_size=200, margin=0.5, L2_weight=0.0001, update_freq=1, norm_threshold=5.0): model_options = locals().copy() print "model options", model_options rootPath='/mounts/data/proj/wenpeng/Dataset/MicrosoftParaphrase/tokenized_msr/'; rng = numpy.random.RandomState(23455) datasets, vocab_size=load_msr_corpus(rootPath+'vocab.txt', rootPath+'tokenized_train.txt', rootPath+'tokenized_test.txt', maxSentLength) mtPath='/mounts/data/proj/wenpeng/Dataset/paraphraseMT/' mt_train, mt_test=load_mts(mtPath+'concate_15mt_train.txt', mtPath+'concate_15mt_test.txt') wm_train, wm_test=load_wmf_wikiQA(rootPath+'train_word_matching_scores_normalized.txt', rootPath+'test_word_matching_scores_normalized.txt') indices_train, trainY, trainLengths, normalized_train_length, trainLeftPad, trainRightPad= datasets[0] indices_train_l=indices_train[::2,:] indices_train_r=indices_train[1::2,:] trainLengths_l=trainLengths[::2] trainLengths_r=trainLengths[1::2] normalized_train_length_l=normalized_train_length[::2] normalized_train_length_r=normalized_train_length[1::2] trainLeftPad_l=trainLeftPad[::2] trainLeftPad_r=trainLeftPad[1::2] trainRightPad_l=trainRightPad[::2] trainRightPad_r=trainRightPad[1::2] indices_test, testY, testLengths,normalized_test_length, testLeftPad, testRightPad= datasets[1] indices_test_l=indices_test[::2,:] indices_test_r=indices_test[1::2,:] testLengths_l=testLengths[::2] testLengths_r=testLengths[1::2] normalized_test_length_l=normalized_test_length[::2] normalized_test_length_r=normalized_test_length[1::2] testLeftPad_l=testLeftPad[::2] testLeftPad_r=testLeftPad[1::2] testRightPad_l=testRightPad[::2] testRightPad_r=testRightPad[1::2] n_train_batches=indices_train_l.shape[0]/batch_size n_test_batches=indices_test_l.shape[0]/batch_size train_batch_start=list(numpy.arange(n_train_batches)*batch_size) test_batch_start=list(numpy.arange(n_test_batches)*batch_size) indices_train_l=theano.shared(numpy.asarray(indices_train_l, dtype=theano.config.floatX), borrow=True) indices_train_r=theano.shared(numpy.asarray(indices_train_r, dtype=theano.config.floatX), borrow=True) indices_test_l=theano.shared(numpy.asarray(indices_test_l, dtype=theano.config.floatX), borrow=True) indices_test_r=theano.shared(numpy.asarray(indices_test_r, dtype=theano.config.floatX), borrow=True) indices_train_l=T.cast(indices_train_l, 'int64') indices_train_r=T.cast(indices_train_r, 'int64') indices_test_l=T.cast(indices_test_l, 'int64') indices_test_r=T.cast(indices_test_r, 'int64') rand_values=random_value_normal((vocab_size+1, emb_size), theano.config.floatX, numpy.random.RandomState(1234)) rand_values[0]=numpy.array(numpy.zeros(emb_size)) #rand_values[0]=numpy.array([1e-50]*emb_size) rand_values=load_word2vec_to_init(rand_values, rootPath+'vocab_embs_300d.txt') embeddings=theano.shared(value=rand_values, borrow=True) cost_tmp=0 error_sum=0 # allocate symbolic variables for the data index = T.lscalar() x_index_l = T.lmatrix('x_index_l') # now, x is the index matrix, must be integer x_index_r = T.lmatrix('x_index_r') y = T.lvector('y') left_l=T.lscalar() right_l=T.lscalar() left_r=T.lscalar() right_r=T.lscalar() length_l=T.lscalar() length_r=T.lscalar() norm_length_l=T.dscalar() norm_length_r=T.dscalar() mts=T.dmatrix() wmf=T.dmatrix() cost_tmp=T.dscalar() #x=embeddings[x_index.flatten()].reshape(((batch_size*4),maxSentLength, emb_size)).transpose(0, 2, 1).flatten() ishape = (emb_size, maxSentLength) # this is the size of MNIST images filter_size=(emb_size,window_width) #poolsize1=(1, ishape[1]-filter_size[1]+1) #????????????????????????????? length_after_wideConv=ishape[1]+filter_size[1]-1 ###################### # BUILD ACTUAL MODEL # ###################### print '... building the model' # Reshape matrix of rasterized images of shape (batch_size,28*28) # to a 4D tensor, compatible with our LeNetConvPoolLayer #layer0_input = x.reshape(((batch_size*4), 1, ishape[0], ishape[1])) layer0_l_input = embeddings[x_index_l.flatten()].reshape((batch_size,maxSentLength, emb_size)).transpose(0, 2, 1).dimshuffle(0, 'x', 1, 2) layer0_r_input = embeddings[x_index_r.flatten()].reshape((batch_size,maxSentLength, emb_size)).transpose(0, 2, 1).dimshuffle(0, 'x', 1, 2) conv_W, conv_b=create_conv_para(rng, filter_shape=(nkerns[0], 1, filter_size[0], filter_size[1])) #layer0_output = debug_print(layer0.output, 'layer0.output') layer0_l = Conv_with_input_para(rng, input=layer0_l_input, image_shape=(batch_size, 1, ishape[0], ishape[1]), filter_shape=(nkerns[0], 1, filter_size[0], filter_size[1]), W=conv_W, b=conv_b) layer0_r = Conv_with_input_para(rng, input=layer0_r_input, image_shape=(batch_size, 1, ishape[0], ishape[1]), filter_shape=(nkerns[0], 1, filter_size[0], filter_size[1]), W=conv_W, b=conv_b) layer0_l_output=debug_print(layer0_l.output, 'layer0_l.output') layer0_r_output=debug_print(layer0_r.output, 'layer0_r.output') layer1=Average_Pooling_for_Top(rng, input_l=layer0_l_output, input_r=layer0_r_output, kern=nkerns[0], left_l=left_l, right_l=right_l, left_r=left_r, right_r=right_r, length_l=length_l+filter_size[1]-1, length_r=length_r+filter_size[1]-1, dim=maxSentLength+filter_size[1]-1) #layer2=HiddenLayer(rng, input=layer1_out, n_in=nkerns[0]*2, n_out=hidden_size, activation=T.tanh) sum_uni_l=T.sum(layer0_l_input, axis=3).reshape((1, emb_size)) norm_uni_l=sum_uni_l/T.sqrt((sum_uni_l**2).sum()) sum_uni_r=T.sum(layer0_r_input, axis=3).reshape((1, emb_size)) norm_uni_r=sum_uni_r/T.sqrt((sum_uni_r**2).sum()) uni_cosine=cosine(sum_uni_l, sum_uni_r) ''' linear=Linear(sum_uni_l, sum_uni_r) poly=Poly(sum_uni_l, sum_uni_r) sigmoid=Sigmoid(sum_uni_l, sum_uni_r) rbf=RBF(sum_uni_l, sum_uni_r) gesd=GESD(sum_uni_l, sum_uni_r) ''' eucli_1=1.0/(1.0+EUCLID(sum_uni_l, sum_uni_r))#25.2% #eucli_1=EUCLID(sum_uni_l, sum_uni_r) len_l=norm_length_l.reshape((1,1)) len_r=norm_length_r.reshape((1,1)) ''' len_l=length_l.reshape((1,1)) len_r=length_r.reshape((1,1)) ''' #length_gap=T.log(1+(T.sqrt((len_l-len_r)**2))).reshape((1,1)) #length_gap=T.sqrt((len_l-len_r)**2) #layer3_input=mts layer3_input=T.concatenate([mts, eucli_1, #uni_cosine,#norm_uni_l-(norm_uni_l+norm_uni_r)/2,#uni_cosine, # layer1.output_eucli_to_simi, #layer1.output_cosine,#layer1.output_vector_l-(layer1.output_vector_l+layer1.output_vector_r)/2,#layer1.output_cosine, # len_l, len_r, #layer1.output_attentions, #wmf, ], axis=1)#, layer2.output, layer1.output_cosine], axis=1) #layer3_input=T.concatenate([mts,eucli, uni_cosine, len_l, len_r, norm_uni_l-(norm_uni_l+norm_uni_r)/2], axis=1) #layer3=LogisticRegression(rng, input=layer3_input, n_in=11, n_out=2) layer3=LogisticRegression(rng, input=layer3_input, n_in=15+(2)+(2)+2, n_out=2) #L2_reg =(layer3.W** 2).sum()+(layer2.W** 2).sum()+(layer1.W** 2).sum()+(conv_W** 2).sum() L2_reg =debug_print((layer3.W** 2).sum()+(conv_W** 2).sum(), 'L2_reg')#+(layer1.W** 2).sum() cost_this =debug_print(layer3.negative_log_likelihood(y), 'cost_this')#+L2_weight*L2_reg cost=debug_print((cost_this+cost_tmp)/update_freq+L2_weight*L2_reg, 'cost') test_model = theano.function([index], [layer3.errors(y), layer3.y_pred, layer3_input, y], givens={ x_index_l: indices_test_l[index: index + batch_size], x_index_r: indices_test_r[index: index + batch_size], y: testY[index: index + batch_size], left_l: testLeftPad_l[index], right_l: testRightPad_l[index], left_r: testLeftPad_r[index], right_r: testRightPad_r[index], length_l: testLengths_l[index], length_r: testLengths_r[index], norm_length_l: normalized_test_length_l[index], norm_length_r: normalized_test_length_r[index], mts: mt_test[index: index + batch_size], wmf: wm_test[index: index + batch_size]}, on_unused_input='ignore') #params = layer3.params + layer2.params + layer1.params+ [conv_W, conv_b] params = layer3.params+ [conv_W, conv_b]# + layer1.params accumulator=[] for para_i in params: eps_p=numpy.zeros_like(para_i.get_value(borrow=True),dtype=theano.config.floatX) accumulator.append(theano.shared(eps_p, borrow=True)) # create a list of gradients for all model parameters grads = T.grad(cost, params) updates = [] for param_i, grad_i, acc_i in zip(params, grads, accumulator): #grad_i=debug_print(grad_i,'grad_i') #norm=T.sqrt((grad_i**2).sum()) #if T.lt(norm_threshold, norm): # print 'big norm' # grad_i=grad_i*(norm_threshold/norm) acc = acc_i + T.sqr(grad_i) updates.append((param_i, param_i - learning_rate * grad_i / T.sqrt(acc))) #AdaGrad updates.append((acc_i, acc)) train_model = theano.function([index,cost_tmp], [cost,layer3.errors(y), layer3_input], updates=updates, givens={ x_index_l: indices_train_l[index: index + batch_size], x_index_r: indices_train_r[index: index + batch_size], y: trainY[index: index + batch_size], left_l: trainLeftPad_l[index], right_l: trainRightPad_l[index], left_r: trainLeftPad_r[index], right_r: trainRightPad_r[index], length_l: trainLengths_l[index], length_r: trainLengths_r[index], norm_length_l: normalized_train_length_l[index], norm_length_r: normalized_train_length_r[index], mts: mt_train[index: index + batch_size], wmf: wm_train[index: index + batch_size]}, on_unused_input='ignore') train_model_predict = theano.function([index], [cost_this,layer3.errors(y), layer3_input, y , sum_uni_l, sum_uni_r, uni_cosine], givens={ x_index_l: indices_train_l[index: index + batch_size], x_index_r: indices_train_r[index: index + batch_size], y: trainY[index: index + batch_size], left_l: trainLeftPad_l[index], right_l: trainRightPad_l[index], left_r: trainLeftPad_r[index], right_r: trainRightPad_r[index], length_l: trainLengths_l[index], length_r: trainLengths_r[index], norm_length_l: normalized_train_length_l[index], norm_length_r: normalized_train_length_r[index], mts: mt_train[index: index + batch_size], wmf: wm_train[index: index + batch_size]}, on_unused_input='ignore') ############### # TRAIN MODEL # ############### print '... training' # early-stopping parameters patience = 500000000000000 # look as this many examples regardless patience_increase = 2 # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant validation_frequency = min(n_train_batches/5, patience / 2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch best_params = None best_validation_loss = numpy.inf best_iter = 0 test_score = 0. start_time = time.clock() epoch = 0 done_looping = False max_acc=0.0 best_epoch=0 while (epoch < n_epochs) and (not done_looping): epoch = epoch + 1 #for minibatch_index in xrange(n_train_batches): # each batch minibatch_index=0 #shuffle(train_batch_start)#shuffle training data cost_tmp=0.0 for batch_start in train_batch_start: # iter means how many batches have been runed, taking into loop iter = (epoch - 1) * n_train_batches + minibatch_index +1 minibatch_index=minibatch_index+1 #if epoch %2 ==0: # batch_start=batch_start+remain_train #time.sleep(0.5) if iter%update_freq != 0: cost_ij, error_ij, layer3_input, y, sum_uni_l, sum_uni_r, uni_cosine=train_model_predict(batch_start) #print 'cost_ij: ', cost_ij cost_tmp+=cost_ij error_sum+=error_ij else: cost_average, error_ij, layer3_input= train_model(batch_start,cost_tmp) #print 'training @ iter = '+str(iter)+' average cost: '+str(cost_average)+' sum error: '+str(error_sum)+'/'+str(update_freq) error_sum=0 cost_tmp=0.0#reset for the next batch #print layer3_input #exit(0) #exit(0) if iter % n_train_batches == 0: print 'training @ iter = '+str(iter)+' average cost: '+str(cost_average)+' error: '+str(error_sum)+'/'+str(update_freq)+' error rate: '+str(error_sum*1.0/update_freq) #if iter ==1: # exit(0) if iter % validation_frequency == 0: #write_file=open('log.txt', 'w') test_losses=[] for i in test_batch_start: test_loss, pred_y, layer3_input, y=test_model(i) #test_losses = [test_model(i) for i in test_batch_start] test_losses.append(test_loss) #write_file.write(str(pred_y[0])+'\n')#+'\t'+str(testY[i].eval())+ #write_file.close() test_score = numpy.mean(test_losses) print(('\t\t\t\t\t\tepoch %i, minibatch %i/%i, test acc of best ' 'model %f %%') % (epoch, minibatch_index, n_train_batches, (1-test_score) * 100.)) #now, see the results of svm write_feature=open('feature_check.txt', 'w') train_y=[] train_features=[] for batch_start in train_batch_start: cost_ij, error_ij, layer3_input, y, sum_uni_l, sum_uni_r, uni_cosine=train_model_predict(batch_start) train_y.append(y[0]) train_features.append(layer3_input[0]) write_feature.write(' '.join(map(str,layer3_input[0]))+'\n') write_feature.close() test_y=[] test_features=[] for i in test_batch_start: test_loss, pred_y, layer3_input, y=test_model(i) test_y.append(y[0]) test_features.append(layer3_input[0]) clf = svm.SVC(kernel='linear')#OneVsRestClassifier(LinearSVC()) #linear 76.11%, poly 75.19, sigmoid 66.50, rbf 73.33 clf.fit(train_features, train_y) results=clf.predict(test_features) lr=LinearRegression().fit(train_features, train_y) results_lr=lr.predict(test_features) corr_count=0 corr_lr=0 test_size=len(test_y) for i in range(test_size): if results[i]==test_y[i]: corr_count+=1 if numpy.absolute(results_lr[i]-test_y[i])<0.5: corr_lr+=1 acc=corr_count*1.0/test_size acc_lr=corr_lr*1.0/test_size if acc > max_acc: max_acc=acc best_epoch=epoch if acc_lr> max_acc: max_acc=acc_lr best_epoch=epoch print '\t\t\t\t\t\t\t\t\t\t\tsvm acc: ', acc, 'LR acc: ', acc_lr, ' max acc: ', max_acc , ' at epoch: ', best_epoch #exit(0) if patience <= iter: done_looping = True break #after each epoch, increase the batch_size if epoch%2==1: update_freq=update_freq*1 else: update_freq=update_freq/1 #print 'Batch_size: ', update_freq end_time = time.clock() print('Optimization complete.') print('Best validation score of %f %% obtained at iteration %i,'\ 'with test performance %f %%' % (best_validation_loss * 100., best_iter + 1, test_score * 100.)) print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.))
def train(self, input_train, target_train=None, input_test=None, target_test=None, epochs=100, epsilon=None, summary_type='table'): """ Method train neural network. Parameters ---------- input_train : array-like target_train : array-like or None input_test : array-like or None target_test : array-like or None epochs : int Defaults to `100`. epsilon : float or None Defaults to ``None``. """ show_epoch = self.show_epoch logs = self.logs training = self.training = AttributeKeyDict() if epochs <= 0: raise ValueError("Number of epochs needs to be greater than 0.") if epsilon is not None and epochs <= 2: raise ValueError("Network should train at teast 3 epochs before " "check the difference between errors") if summary_type == 'table': logging_info_about_the_data(self, input_train, input_test) logging_info_about_training(self, epochs, epsilon) logs.newline() summary = SummaryTable( table_builder=table.TableBuilder( table.Column(name="Epoch #"), table.NumberColumn(name="Train err"), table.NumberColumn(name="Valid err"), table.TimeColumn(name="Time", width=10), stdout=logs.write ), network=self, delay_limit=1., delay_history_length=10, ) elif summary_type == 'inline': summary = InlineSummary(network=self) else: raise ValueError("`{}` is unknown summary type" "".format(summary_type)) iterepochs = create_training_epochs_iterator(self, epochs, epsilon) show_epoch = parse_show_epoch_property(self, epochs, epsilon) training.show_epoch = show_epoch # Storring attributes and methods in local variables we prevent # useless __getattr__ call a lot of times in each loop. # This variables speed up loop in case on huge amount of # iterations. training_errors = self.errors validation_errors = self.validation_errors shuffle_data = self.shuffle_data train_epoch = self.train_epoch epoch_end_signal = self.epoch_end_signal train_end_signal = self.train_end_signal on_epoch_start_update = self.on_epoch_start_update is_first_iteration = True can_compute_validation_error = (input_test is not None) last_epoch_shown = 0 symMatrix = tt.dmatrix("symMatrix") symEigenvalues, eigenvectors = tt.nlinalg.eig(symMatrix) get_Eigen = theano.function([symMatrix], [symEigenvalues, eigenvectors] ) epsilon = [] alpha = [] alpha0 = [] with logs.disable_user_input(): for epoch in iterepochs: validation_error = None epoch_start_time = time.time() on_epoch_start_update(epoch) if shuffle_data: input_train, target_train = shuffle(input_train, target_train) try: train_error = train_epoch(input_train, target_train) H = self.variables.hessian.get_value() ev, _ = get_Eigen(H) if can_compute_validation_error: validation_error = self.prediction_error(input_test, target_test) epsilon.append(train_error) alpha.append(numpy.sum(ev < 0)) alpha0.append(numpy.sum(ev == 0)) training_errors.append(train_error) validation_errors.append(validation_error) epoch_finish_time = time.time() training.epoch_time = epoch_finish_time - epoch_start_time if epoch % training.show_epoch == 0 or is_first_iteration: summary.show_last() last_epoch_shown = epoch if epoch_end_signal is not None: epoch_end_signal(self) is_first_iteration = False except StopNetworkTraining as err: # TODO: This notification breaks table view in terminal. # I need to show it in a different way. logs.message("TRAIN", "Epoch #{} stopped. {}" "".format(epoch, str(err))) break if epoch != last_epoch_shown: summary.show_last() if train_end_signal is not None: train_end_signal(self) summary.finish() logs.newline() plt.plot(alpha,epsilon,'r') plt.plot(alpha0,epsilon,'b') plt.xlabel('alpha') plt.ylabel('epsilon') # want to collect the output of stdout in a variable capture = StringIO() capture.truncate(0) save_stdout = sys.stdout sys.stdout = capture print self.connection sys.stdout=save_stdout s = capture.getvalue() s=s.split('\n')[0:][0] str = self.class_name() str1 = s+'---'+str+'-alpha-epsilon'+'.eps' plt.savefig(str1,format='eps',dpi=1000) plt.plot(iterepochs,epsilon) plt.xlabel('iterepochs') plt.ylabel('epsilon') str2=s+'---'+str+'-epsilon-iterepochs'+'.eps' plt.savefig(str2,format='eps',dpi=1000)
def train(num_epochs, batch_size, X_train, y_train, X_val, y_val, input_dim, output_dim, depth, num_units, drop_input=None, drop_hidden=None, report=50): """ Train neural network. :param num_epochs: training epochs count :param batch_size: integer :param X_train: numpy array with train data :param y_train: numpy array with train targets :param X_val: numpy array with validation data :param y_val: numpy arrays with validation targets :param input_dim: count of input units :param output_dim: count of output units :param depth: hidden layers count :param num_units: count of units in hidden layers :param drop_input: input dropout value :param drop_hidden: hidden dropout value :param report: report output frequency :return: lasagne network """ input_var = T.dmatrix('inputs') target_var = T.imatrix('targets') network = build_mlp(input_dim, output_dim, depth, num_units, drop_input, drop_hidden, input_var) # create a loss expression for training prediction = lasagne.layers.get_output(network) loss = lasagne.objectives.categorical_crossentropy(prediction, target_var) loss = loss.mean() # create update expressions for training params = lasagne.layers.get_all_params(network, trainable=True) updates = lasagne.updates.nesterov_momentum(loss, params, learning_rate=0.01, momentum=0.9) # create a loss expression for validation with deterministic forward pass (disable dropout layers) test_prediction = lasagne.layers.get_output(network, deterministic=True) test_loss = lasagne.objectives.categorical_crossentropy(test_prediction, target_var) test_loss = test_loss.mean() # create an expression for the classification accuracy test_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), T.argmax(target_var, axis=1)), dtype=theano.config.floatX) # compile a function performing a training step on a mini-batch train_fn = theano.function([input_var, target_var], loss, updates=updates) # compile a function computing the validation loss and accuracy val_fn = theano.function([input_var, target_var], [test_loss, test_acc]) for epoch in range(num_epochs): # full pass over the training data train_err = 0 train_batches = 0 start_time = time.time() for batch in iterate_minibatches(X_train, y_train, batch_size, shuffle=True): inputs, targets = batch train_err += train_fn(inputs, targets) train_batches += 1 # full pass over the validation data val_err = 0 val_acc = 0 val_batches = 0 for batch in iterate_minibatches(X_val, y_val, batch_size, shuffle=False): inputs, targets = batch err, acc = val_fn(inputs, targets) val_err += err val_acc += acc val_batches += 1 if (epoch + 1) % report == 0: print("Epoch {} of {} took {:.3f}s".format(epoch + 1, num_epochs, time.time() - start_time)) print("\ttraining loss:\t\t\t{:.6f}".format(train_err / train_batches)) print("\tvalidation loss:\t\t{:.6f}".format(val_err / val_batches)) print("\tvalidation accuracy:\t\t{:.2f} %".format(val_acc / val_batches * 100)) return network
def test_convolution(self): # print '\n\n*************************************************' # print ' TEST CONVOLUTION' # print '*************************************************' # fixed parameters bsize = 10 # batch size imshp = (28, 28) kshp = (5, 5) nkern = 5 ssizes = ((1, 1), (2, 2), (3, 3), (4, 4)) convmodes = ('full', 'valid') # symbolic stuff bias = tensor.dvector() kerns = tensor.dmatrix() input = tensor.dmatrix() rng = numpy.random.RandomState(3423489) filters = rng.randn(nkern, numpy.prod(kshp)) biasvals = rng.randn(nkern) for mode in ('FAST_COMPILE', 'FAST_RUN'): # , profmode): ttot, ntot = 0, 0 for conv_mode in convmodes: for ss in ssizes: output, outshp = sp.convolve(kerns, kshp, nkern, input,\ imshp, ss, bias=bias, mode=conv_mode) f = function([kerns, bias, input], output, mode=mode) # now test with real values img2d = numpy.arange(bsize * numpy.prod(imshp)).reshape(( \ bsize,) + imshp) img1d = img2d.reshape(bsize, -1) # create filters (need to be flipped to use convolve2d) filtersflipped = numpy.zeros((nkern, ) + kshp) for k in range(nkern): it = reversed(filters[k, :]) for i in range(kshp[0]): for j in range(kshp[1]): filtersflipped[k, i, j] = it.next() # compute output with convolve2d if conv_mode == 'valid': fulloutshp = numpy.array(imshp) - numpy.array(kshp) + 1 else: fulloutshp = numpy.array(imshp) + numpy.array(kshp) - 1 ntime1 = time.time() refout = numpy.zeros((bsize, ) + tuple(fulloutshp) + (nkern, )) for b in range(bsize): for n in range(nkern): refout[b, ..., n] = convolve2d(img2d[b, :, :], filtersflipped[n, ...], conv_mode) ntot += time.time() - ntime1 # need to flatten images bench1 = refout[:, 0::ss[0], 0::ss[1], :].reshape(bsize, -1, nkern) bench1 += biasvals.reshape(1, 1, nkern) # swap the last two dimensions (output needs to be nkern x outshp) bench1 = numpy.swapaxes(bench1, 1, 2) ttime1 = time.time() out1 = f(filters, biasvals, img1d) ttot += time.time() - ttime1 temp = bench1.flatten() - out1.flatten() assert (temp < 1e-5).all()
def __init__(self, theano_rng=None, input=None, n_visible=None, n_hidden=None, W=None, bhid=None, bvis=None, activation=None, firstlayer=1, variance=None): self.n_visible = n_visible self.n_hidden = n_hidden if not W: initial_W = numpy.asarray(theano_rng.normal(0.0, 1.0 / numpy.sqrt(n_in), size=(n_visible, n_hidden)), dtype=theano.config.floatX) W = theano.shared(value=initial_W, name='W') #initial_W = numpy.asarray( numpy_rng.uniform( # low = -4*numpy.sqrt(6./(n_hidden+n_visible)), # high = 4*numpy.sqrt(6./(n_hidden+n_visible)), # size = (n_visible, n_hidden)), # dtype = theano.config.floatX) if not bvis: bvis = theano.shared( value=numpy.zeros(n_visible, dtype=theano.config.floatX)) if not bhid: bhid = theano.shared(value=numpy.zeros(n_hidden, dtype=theano.config.floatX), name='b') self.W = W self.b = bhid self.b_prime = bvis self.W_prime = self.W.T self.theano_rng = theano_rng self.activation = activation if input == None: self.x = T.dmatrix(name='input') else: self.x = input self.params = [self.W, self.b, self.b_prime] # first layer, use Gaussian noise self.firstlayer = firstlayer if self.firstlayer == 1: if variance == None: self.var = T.vector(name='input') else: self.var = variance else: self.var = None
"eval()" takes a dictionary with names of variables and values to be assigned to them eval() utimately imports a "function()", so we end up in the same situation, so it is slower the first time we invoke this, subsequent invocations are faster since it saves "function()" imported already that way we don't need to import "function()", but importing and using it is more flexible than relyin on "eval()" itself ''' ''' Addition of two matrices is also very simple, the only difference is using T.dmatrix instead of T.dscalar ''' x = T.dmatrix('x') y = T.dmatrix('y') z = x + y f2 = function([x, y], z) ''' now we do not assign the matrices to x and y directly, but instead we pass matrices to function as variables, invoking it like: "f2([matrix_nr_1], [matrix_nr_2]) here we add, elementwise, 2 2-dimensional matrices ''' a1 = [[1, 2], [3, 4]] a2 = [[10, 20], [30, 40]]
def exec_multilayer_conv_nnet_old(conv_mode, ss, bsize, imshp, kshps, nkerns, unroll_batch=0, unroll_kern=0, img=T.dmatrix(), validate=True, conv_op_py=False, do_print=True, repeat=1, unroll_patch=False, unroll_patch_size=False, verbose=0): # build actual input images imgval = global_rng.rand(bsize, imshp[0], imshp[1], imshp[2]) a = T.dmatrix() kerns = [a for i in nkerns] inputs4 = dmatrix4() kerns4 = dmatrix4() # for each layer ntot = 0 tctot = 0 tpytot = 0 for kshp, kern, nkern, n_layer in zip(kshps, kerns, nkerns, range(len(nkerns))): if do_print: print '************* layer %i ***************' % n_layer print conv_mode, ss, n_layer, kshp, nkern # actual values w = global_rng.random_sample(N.r_[nkern, imshp[0], kshp]) w_flip = flip(w, kshp).reshape(w.shape) # manual implementation # check first stage padimg = imgval if conv_mode == 'full': padimg_shp = N.array( imshp[1:]) + 2 * (N.array(kshp) - N.array([1, 1])) padimg = N.zeros(N.r_[bsize, imshp[0], padimg_shp]) padimg[:, :, kshp[0] - 1:-kshp[0] + 1, kshp[1] - 1:-kshp[1] + 1] = imgval outshp = N.hstack( (nkern, ConvOp.getOutputShape(imshp[1:], kshp, ss, conv_mode))) time1 = time.time() outval = N.zeros(N.r_[bsize, outshp]) if validate: # causes an atexit problem from scipy.signal.sigtools import _convolve2d from scipy.signal.signaltools import _valfrommode, _bvalfromboundary val = _valfrommode(conv_mode) bval = _bvalfromboundary('fill') for b in range(bsize): # loop over batches for n in range(nkern): # loop over filters for i in range(imshp[0]): # loop over input feature maps outval[b, n, ...] += _convolve2d(\ imgval[b, i, ...], w_flip[n, i, ...], 1, val, bval, 0)[0::ss[0], 0::ss[1]] ntot += time.time() - time1 # ConvOp if unroll_patch and not unroll_patch_size: conv_op = ConvOp(dx=ss[0], dy=ss[1], output_mode=conv_mode, unroll_patch=unroll_patch, verbose=verbose)(inputs4, kerns4) else: conv_op = ConvOp(imshp, kshp, nkern, bsize, ss[0], ss[1], conv_mode, unroll_batch=unroll_batch, unroll_kern=unroll_kern, unroll_patch=unroll_patch, verbose=verbose)(inputs4, kerns4) l1shp = N.hstack( (nkern, ConvOp.getOutputShape(imshp[1:], kshp, ss, conv_mode))) propup2 = function([inputs4, kerns4], conv_op) propup3 = function([inputs4, kerns4], conv_op, mode=Mode(linker="py")) time1 = time.time() for i in range(repeat): hidval2_ = propup2(imgval, w_flip) hidval2 = hidval2_ # [:,:,0::ss[0],0::ss[1]] tctot += time.time() - time1 if conv_op_py: time1 = time.time() for i in range(repeat): hidval3_ = propup3(imgval, w_flip) hidval3 = hidval3_ # [:,:,0::ss[0],0::ss[1]] tpytot += time.time() - time1 assert (N.abs(hidval2 - hidval3) < 1e-5).all() else: tpytot += 0 if validate: temp = N.abs(outval - hidval2) assert (temp < 1e-5).all() if validate and conv_op_py: temp = N.abs(outval - hidval3) assert (temp < 1e-5).all() imshp = tuple(outshp) imgval = outval.reshape(bsize, outshp[0], outshp[1], outshp[2]) return tctot, tpytot, ntot
# View more python tutorials on my Youtube and Youku channel!!! # Youtube video tutorial: https://www.youtube.com/channel/UCdyjiB5H8Pu7aDTNVXTTpcg # Youku video tutorial: http://i.youku.com/pythontutorial # 5 - theano.function """ Please note, this code is only for python 3+. If you are using python 2+, please modify the code accordingly. """ from __future__ import print_function import numpy as np import theano import theano.tensor as T # activation function example x = T.dmatrix('x') s = 1 / (1 + T.exp(-x)) # logistic or soft step logistic = theano.function([x], s) print(logistic([[0, 1],[-1, -2]])) # multiply outputs for a function a, b = T.dmatrices('a', 'b') diff = a - b abs_diff = abs(diff) diff_squared = diff ** 2 f = theano.function([a, b], [diff, abs_diff, diff_squared]) print( f(np.ones((2, 2)), np.arange(4).reshape((2, 2))) ) # default value and name for a function x, y, w = T.dscalars('x', 'y', 'w') z = (x+y)*w
def exec_multilayer_conv_nnet(conv_mode, ss, bsize, imshp, kshps, nkerns, unroll_batch=0, unroll_kern=0, img=T.dmatrix(), do_print=True, repeat=1, unroll_patch=False, unroll_patch_size=False, verbose=0): # build actual input images imgval = global_rng.rand(bsize, imshp[0], imshp[1], imshp[2]) a = T.dmatrix() kerns = [a for i in nkerns] inputs4 = dmatrix4() kerns4 = dmatrix4() # for each layer ntot = 0 tctot = 0 tpytot = 0 for kshp, kern, nkern, n_layer in zip(kshps, kerns, nkerns, range(len(nkerns))): if do_print: print '************* layer %i ***************' % n_layer print conv_mode, ss, n_layer, kshp, nkern # actual values w = global_rng.random_sample(N.r_[nkern, imshp[0], kshp]) w_flip = flip(w, kshp).reshape(w.shape) outshp = N.hstack( (nkern, ConvOp.getOutputShape(imshp[1:], kshp, ss, conv_mode))) time1 = time.time() outval = N.zeros(N.r_[bsize, outshp]) # ConvOp if unroll_patch and not unroll_patch_size: conv_op = ConvOp(dx=ss[0], dy=ss[1], output_mode=conv_mode, unroll_patch=unroll_patch, verbose=verbose)(inputs4, kerns4) else: conv_op = ConvOp(imshp, kshp, nkern, bsize, ss[0], ss[1], conv_mode, unroll_batch=unroll_batch, unroll_kern=unroll_kern, unroll_patch=unroll_patch, verbose=verbose)(inputs4, kerns4) l1shp = N.hstack( (nkern, ConvOp.getOutputShape(imshp[1:], kshp, ss, conv_mode))) propup2 = function([inputs4, kerns4], conv_op) time1 = time.time() for i in range(repeat): hidval2_ = propup2(imgval, w_flip) hidval2 = hidval2_ # [:,:,0::ss[0],0::ss[1]] tctot += time.time() - time1 imshp = tuple(outshp) imgval = outval.reshape(bsize, outshp[0], outshp[1], outshp[2]) return tctot, tpytot, ntot
def test1(self): a = tensor.dmatrix() w = sort(a) f = theano.function([a], w) utt.assert_allclose(f(self.m_val), np.sort(self.m_val))
def __init__( self, numpy_rng, theano_rng=None, input=None, n_visible=784, n_hidden=500, W=None, bhid=None, bvis=None ): """ Initialize the dA class by specifying the number of visible units (the dimension d of the input ), the number of hidden units ( the dimension d' of the latent or hidden space ) and the corruption level. The constructor also receives symbolic variables for the input, weights and bias. Such a symbolic variables are useful when, for example the input is the result of some computations, or when weights are shared between the dA and an MLP layer. When dealing with SdAs this always happens, the dA on layer 2 gets as input the output of the dA on layer 1, and the weights of the dA are used in the second stage of training to construct an MLP. :type numpy_rng: numpy.random.RandomState :param numpy_rng: number random generator used to generate weights :type theano_rng: theano.tensor.shared_randomstreams.RandomStreams :param theano_rng: Theano random generator; if None is given one is generated based on a seed drawn from `rng` :type input: theano.tensor.TensorType :param input: a symbolic description of the input or None for standalone dA :type n_visible: int :param n_visible: number of visible units :type n_hidden: int :param n_hidden: number of hidden units :type W: theano.tensor.TensorType :param W: Theano variable pointing to a set of weights that should be shared belong the dA and another architecture; if dA should be standalone set this to None :type bhid: theano.tensor.TensorType :param bhid: Theano variable pointing to a set of biases values (for hidden units) that should be shared belong dA and another architecture; if dA should be standalone set this to None :type bvis: theano.tensor.TensorType :param bvis: Theano variable pointing to a set of biases values (for visible units) that should be shared belong dA and another architecture; if dA should be standalone set this to None """ self.n_visible = n_visible self.n_hidden = n_hidden # create a Theano random generator that gives symbolic random values if not theano_rng: theano_rng = RandomStreams(numpy_rng.randint(2 ** 30)) # note : W' was written as `W_prime` and b' as `b_prime` if not W: # W is initialized with `initial_W` which is uniformely sampled # from -4*sqrt(6./(n_visible+n_hidden)) and # 4*sqrt(6./(n_hidden+n_visible))the output of uniform if # converted using asarray to dtype # theano.config.floatX so that the code is runable on GPU initial_W = numpy.asarray( numpy_rng.uniform( low=-4 * numpy.sqrt(6. / (n_hidden + n_visible)), high=4 * numpy.sqrt(6. / (n_hidden + n_visible)), size=(n_visible, n_hidden) ), dtype=theano.config.floatX ) W = theano.shared(value=initial_W, name='W', borrow=True) if not bvis: bvis = theano.shared( value=numpy.zeros( n_visible, dtype=theano.config.floatX ), borrow=True ) if not bhid: bhid = theano.shared( value=numpy.zeros( n_hidden, dtype=theano.config.floatX ), name='b', borrow=True ) self.W = W # b corresponds to the bias of the hidden self.b = bhid # b_prime corresponds to the bias of the visible self.b_prime = bvis # tied weights, therefore W_prime is W transpose self.W_prime = self.W.T self.theano_rng = theano_rng # if no input is given, generate a variable representing the input if input is None: # we use a matrix because we expect a minibatch of several # examples, each example being a row self.x = T.dmatrix(name='input') else: self.x = input self.params = [self.W, self.b, self.b_prime]
def speed_multilayer_conv(): # calculate the speed up of different combination of unroll # put the paramter to the same you will try. validate = False # we don't validate the result to have it much faster! repeat = 3 verbose = 1 unroll_batch = [1, 2, 3, 4, 5, 6, 10] # 15, 30, 60 always much slower unroll_kern = [1, 2, 3, 4, 5, 6, 10] # 15, 30, 60 always much slower #unroll_batch = [1,4,5] #unroll_kern = [1,4,5] #unroll_batch = [1,4] #unroll_kern = [1,4] unroll_patch = [True, False] bsize = 60 # batch size imshp_start = (1, 48, 48) # un square shape to test more corner case. kshps = ([11, 12], ) # un square shape to test more corner case. nkerns = [60] # per output pixel ssizes = [ (1, 1), ] # (1,1)]#(2,2) bugged convmodes = ['valid', 'full'] do_convolve2 = False a = T.dmatrix() kerns = [a for i in nkerns] assert len(kshps) == len(nkerns) == len(kerns) timing = N.zeros( (len(unroll_batch), len(unroll_kern), 3, len(convmodes) * len(ssizes))) t_b_k = [] # calculate the timing with unrolling print 'time unroll batch kern' best = [] worst = [] t_ = [] for unroll_b, n_b in zip(unroll_batch, range(len(unroll_batch))): for unroll_k, n_k in zip(unroll_kern, range(len(unroll_kern))): t_b_k.append(str(unroll_b) + "/" + str(unroll_k)) if not t_: tctot, tpytot, ntot = [], [], [] for conv_mode, n_mode in zip(convmodes, range(len(convmodes))): for ss, n_ss in zip(ssizes, range(len(ssizes))): # tctot_, tpytot_, ntot_ = exec_multilayer_conv_nnet_old(conv_mode, ss, bsize, imshp_start, kshps, nkerns, unroll_batch=unroll_b, unroll_kern=unroll_k, validate=validate, verbose=verbose,do_print=False) tctot_, tpytot_, ntot_ = exec_multilayer_conv_nnet( conv_mode, ss, bsize, imshp_start, kshps, nkerns, unroll_batch=unroll_b, unroll_kern=unroll_k, verbose=verbose, do_print=False, repeat=repeat) tctot += [tctot_] tpytot += [tpytot_] ntot += [ntot_] if unroll_b == 4 and unroll_k == 4: # print "unroll 4/4",tctot best = tctot if unroll_b == 1 and unroll_k == 1: # print "unroll 1/1",tctot worst = tctot timing[n_b, n_k] = [tctot, tpytot, ntot] # [sum(tctot), sum(tpytot), sum(ntot)] if not t_: t = timing[:, :, 0, :] # We select only the c timing. else: t = t_ t = N.asarray(t) # calculate the old timing print 'time old version' tctot, tpytot, ntot = [], [], [] tctot_ = [] if not tctot_: for conv_mode, n_mode in zip(convmodes, range(len(convmodes))): for ss, n_ss in zip(ssizes, range(len(ssizes))): # tctot_, tpytot_, ntot_ = exec_multilayer_conv_nnet_old(conv_mode, ss, bsize, imshp_start, kshps, nkerns, unroll_batch=0, unroll_kern=0, validate=validate, verbose=verbose,do_print=False) tctot_, tpytot_, ntot_ = exec_multilayer_conv_nnet( conv_mode, ss, bsize, imshp_start, kshps, nkerns, unroll_batch=0, unroll_kern=0, verbose=verbose, do_print=False, repeat=repeat) tctot += [tctot_] tpytot += [tpytot_] ntot += [ntot_] else: tctot = N.asarray(tctot_) print "old code timing %.3fs" % sum(tctot), tctot best = N.asarray(best) worst = N.asarray(worst) print "timing for unrolled version" print "unroll_batch/unroll_kern valid_mode full_mode" for n_b in range(len(unroll_batch)): for n_k in range(len(unroll_kern)): print(unroll_batch[n_b], unroll_kern[n_k]) + tuple(t[n_b, n_k]), ',' t_detail = t t = t.sum(axis=2) print "max %.3fs" % t.max( ), "max param(batch unloop size/kernel unloop size)", t_b_k[t.argmax()] print "min %.3fs" % t.min( ), "min param(batch unloop size/kernel unloop size)", t_b_k[t.argmin()] print "speedup vs (1/1)%.3fx, vs old %.3fx" % (t.max() / t.min(), sum(tctot) / t.min()) print worst / best, tctot / best # calculate the timing of unroll_patch print 'time unroll_patch' tctot_patch = [] tctot_patch_size = [] for conv_mode, n_mode in zip(convmodes, range(len(convmodes))): for ss, n_ss in zip(ssizes, range(len(ssizes))): #tctot_, tpytot_, ntot_ = exec_multilayer_conv_nnet_old(conv_mode, ss, bsize, imshp_start, kshps, nkerns, unroll_batch=0, unroll_kern=0, validate=validate,unroll_patch=True,verbose=verbose,do_print=False) tctot_, tpytot_, ntot_ = exec_multilayer_conv_nnet( conv_mode, ss, bsize, imshp_start, kshps, nkerns, unroll_batch=0, unroll_kern=0, unroll_patch=True, verbose=verbose, do_print=False, repeat=repeat) tctot_patch += [tctot_] #tctot_, tpytot_, ntot_ = exec_multilayer_conv_nnet_old(conv_mode, ss, bsize, imshp_start, kshps, nkerns, unroll_batch=0, unroll_kern=0, validate=validate,unroll_patch=True,verbose=verbose,do_print=False,unroll_patch_size=True) tctot_, tpytot_, ntot_ = exec_multilayer_conv_nnet( conv_mode, ss, bsize, imshp_start, kshps, nkerns, unroll_batch=0, unroll_kern=0, unroll_patch=True, verbose=verbose, do_print=False, unroll_patch_size=True, repeat=repeat) tctot_patch_size += [tctot_] t_patch = sum(tctot_patch) print "unroll_patch without shape time", tctot_patch print "speedup vs (1/1)%.3fx, vs old %.3fx" % (t.max() / t_patch, sum(tctot) / t_patch) print best / tctot_patch, worst / tctot_patch t_patch_size = sum(tctot_patch_size) print "unroll_patch with shape time", tctot_patch_size print "speedup vs (1/1)%.3fx, vs old %.3fx" % (t.max() / t_patch_size, sum(tctot) / t_patch_size) print best / tctot_patch_size, worst / tctot_patch_size return
class GaussianLikelihoodModel(LikelihoodModel): def __init__(self, **parameters): super(GaussianLikelihoodModel, self).__init__(**parameters) self.sigma0inv = np.linalg.inv(self.sigma0) self.D = self.sigma.shape[0] self.compile() def transition_probability(self, parent, child): child_latent, child_time = child.get_state( 'latent_value'), child.get_state('time') if parent is None: return self.calculate_transition(child_latent, self.mu0, child_time, -1) parent_latent, parent_time = parent.get_state( 'latent_value'), parent.get_state('time') assert parent_time < child_time, (parent_time, child_time) return self.calculate_transition(child_latent, parent_latent, child_time, parent_time) @theanify(T.dvector('state'), T.dvector('parent'), T.dscalar('time'), T.dscalar('parent_time')) def calculate_transition(self, state, parent, time, parent_time): sigma = (time - parent_time) * self.sigma mu = parent logdet = T.log(T.nlinalg.det(sigma)) delta = state - mu pre = -(self.D / 2.0 * np.log(2 * np.pi) + 1 / 2.0 * logdet) return pre + -0.5 * (T.dot( delta, T.dot(T.nlinalg.matrix_inverse(sigma), delta))) @theanify(T.dvector('mean'), T.dmatrix('cov')) def sample(self, mean, cov): e, v = T.nlinalg.eigh(cov) x = RandomStreams().normal(size=(self.D, )) x = T.dot(x, T.sqrt(e)[:, None] * v) return x + mean def sample_transition(self, node, parent): children = node.children time = node.get_state('time') if parent is None: mu0 = self.mu0 sigma0 = self.sigma0 sigma0inv = self.sigma0inv else: mu0 = parent.get_state('latent_value') sigma0 = self.sigma * (time - parent.get_state('time')) sigma0inv = np.linalg.inv(sigma0) mus = [c.get_state('latent_value') for c in children] sigmas = [self.sigma * (c.get_state('time') - time) for c in children] sigmas_inv = [np.linalg.inv(s) for s in sigmas] sigman = np.linalg.inv(sigma0inv + sum(sigmas_inv)) mun = np.dot( sigman, np.dot(sigma0inv, mu0) + sum([np.dot(a, b) for a, b in zip(sigmas_inv, mus)])) return self.sample(mun, sigman) def get_parameters(self): return {"sigma", "sigma0", "mu0"}
from blocks.serialization import load from theano import tensor, function # theano variables features_car_cat = tensor.dmatrix('features_car_cat') features_car_int = tensor.dmatrix('features_car_int') features_nocar_cat = tensor.dmatrix('features_nocar_cat') features_nocar_int = tensor.dmatrix('features_nocar_int') features_cp = tensor.imatrix('codepostal') features_hascar = tensor.imatrix('features_hascar') main_loop = load(open("./model", "rb")) model = main_loop.model f = model.get_theano_function() from fuel.datasets.hdf5 import H5PYDataset submit_set = H5PYDataset('./data/data.hdf5', which_sets=('submit', ), load_in_memory=True) print model.inputs print submit_set.provides_sources m = [] for i in model.inputs: m.append(submit_set.provides_sources.index(i.name)) from fuel.schemes import SequentialScheme from fuel.streams import DataStream