コード例 #1
0
ファイル: mnist.py プロジェクト: wead-hsu/semi-vae
def build(params):
    image_shape = params['image_shape']
    image_layer = layers.InputLayer([params['batch_size'], image_shape[0] * image_shape[1]])
    label_layer = layers.InputLayer([params['batch_size'], params['num_classes']])

    # rewighted alpha
    reweighted_alpha = (params['alpha'] * params['num_samples_train'] / params['num_samples_train_label'])
    semi_vae_layer = semi_vae.SemiVAE(
        [image_layer, label_layer],
        params['num_units_hidden_common'],
        params['dim_z'],
        reweighted_alpha
    )

    sym_label_images = T.matrix('label_images')
    sym_label_labels = T.matrix('label_labels')
    sym_unlabel_images = T.matrix('unlabel_images')

    cost_for_label = semi_vae_layer.get_cost_for_label([sym_label_images, sym_label_labels])
    cost_for_unlabel = semi_vae_layer.get_cost_for_unlabel(sym_unlabel_images)
    cost_together = semi_vae_layer.get_cost_together([sym_label_images, sym_label_labels, sym_unlabel_images])
    cost_test, acc_test = semi_vae_layer.get_cost_test([sym_label_images, sym_label_labels])

    network_params = semi_vae_layer.get_params()

    for param in network_params:
        print(param, param.get_value().shape)

    update_for_label = updates.adam(cost_for_label, network_params)
    update_for_unlabel = updates.adam(cost_for_unlabel, network_params)
    update_together = updates.adam(cost_together, network_params, learning_rate=3e-4)

    fn_train = theano.function([sym_label_images, sym_label_labels, sym_unlabel_images],
                                cost_together,
                                updates = update_together,
                                on_unused_input = 'warn'
                                )
    '''
    fn_for_label = theano.function([sym_label_images, sym_label_labels], cost_for_label,
                                   updates = update_for_label,
                                   #on_unused_input = 'warn',
                                   )
    fn_for_unlabel = theano.function([sym_unlabel_images], cost_for_unlabel,
                                     updates = update_for_unlabel,
                                     #on_unused_input = 'warn'
                                     )
    '''
    fn_for_label = None
    fn_for_unlabel = None


    fn_for_test = theano.function([sym_label_images, sym_label_labels], [cost_test, acc_test])

    return semi_vae, fn_for_label, fn_for_unlabel, fn_train, fn_for_test
コード例 #2
0
ファイル: multi_gpu.py プロジェクト: yaoli/theano_multi_gpu
    def build_theano_fn_simple(self):
        print '%s build theano fn simple' % self.rank
        x = T.fmatrix('x')
        y = T.ivector('y')
        W_1, b_1 = common.init_tparams_fc(784, 1000, 'l1')
        out_1 = T.tanh(T.dot(x, W_1) + b_1)
        W_2, b_2 = common.init_tparams_fc(1000, 2000, 'l2')
        out_2 = T.tanh(T.dot(out_1, W_2) + b_2)
        W_3, b_3 = common.init_tparams_fc(2000, 3000, 'l3')
        out_3 = T.tanh(T.dot(out_2, W_3) + b_3)
        W_4, b_4 = common.init_tparams_fc(3000, 10, 'softmax')
        prob = T.nnet.softmax((T.dot(out_3, W_4) + b_4))
        self.params = [W_1, b_1, W_2, b_2, W_3, b_3, W_4, b_4]
        # cost
        cost = -T.log(prob[T.arange(prob.shape[0]), y] + 1e-6).mean()
        pred = T.argmax(prob, 1)
        grads = T.grad(cost, self.params)
        grads_all_reduced = self.grad_all_reduce(grads)
        updates = adam(grads_all_reduced, self.params)
        #updates = adam(grads, self.params)
        self.train_fn = theano.function([x, y], [cost, prob, pred, y],
                                        updates=updates,
                                        accept_inplace=True)

        # the following code is used for debugging only
        if model == 'debug':
            self.debug_var = theano.shared(numpy.float32(1.))
            debug_var_global = AllReduceSum(self.debug_var,
                                            inplace=True,
                                            worker=self.worker)
            updates = {self.debug_var: debug_var_global}
            self.debug_fn = theano.function([], [],
                                            updates=updates,
                                            accept_inplace=True)
コード例 #3
0
def generate_theano_func(args, network, penalty, input_dict, target_var):

    prediction = get_output(network, input_dict)

    # loss = T.mean( target_var * ( T.log(target_var) - prediction ))
    loss = T.mean(categorical_crossentropy(prediction, target_var))
    # loss += 0.0001 * sum (T.sum(layer_params ** 2) for layer_params in get_all_params(network) )
    # penalty = sum ( T.sum(lstm_param**2) for lstm_param in lstm_params )
    # penalty = regularize_layer_params(l_forward_1_lstm, l2)
    # penalty = T.sum(lstm_param**2 for lstm_param in lstm_params)
    # penalty = 0.0001 * sum (T.sum(layer_params ** 2) for layer_params in get_all_params(l_forward_1) )

    loss = loss + penalty

    params = get_all_params(network, trainable=True)

    if args.optimizer == "sgd":
        updates = sgd(loss, params, learning_rate=args.step)
    elif args.optimizer == "adagrad":
        updates = adagrad(loss, params, learning_rate=args.step)
    elif args.optimizer == "adadelta":
        updates = adadelta(loss, params, learning_rate=args.step)
    elif args.optimizer == "nesterov":
        updates = nesterov_momentum(loss, params, learning_rate=args.step)
    elif args.optimizer == "rms":
        updates = rmsprop(loss, params, learning_rate=args.step)
    elif args.optimizer == "adam":
        updates = adam(loss, params, learning_rate=args.step)
    else:
        raise "Need set optimizer correctly"

    test_prediction = get_output(network, input_dict, deterministic=True)
    # test_prediction = get_output(network, deterministic=True)
    # test_loss = T.mean( target_var * ( T.log(target_var) - test_prediction))
    test_loss = T.mean(categorical_crossentropy(test_prediction, target_var))

    train_fn = theano.function(
        [input1_var, input1_mask_var, input2_var, input2_mask_var, target_var],
        loss,
        updates=updates,
        allow_input_downcast=True,
    )

    if args.task == "sts":
        val_fn = theano.function(
            [input1_var, input1_mask_var, input2_var, input2_mask_var, target_var],
            [test_loss, test_prediction],
            allow_input_downcast=True,
        )

    elif args.task == "ent":
        # test_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), target_var), dtype=theano.config.floatX)
        test_acc = T.mean(categorical_accuracy(test_prediction, target_var))
        val_fn = theano.function(
            [input1_var, input1_mask_var, input2_var, input2_mask_var, target_var],
            [test_loss, test_acc],
            allow_input_downcast=True,
        )

    return train_fn, val_fn
コード例 #4
0
def create_updates(loss, network, opt, learning_rate, momentum, beta1, beta2):
    params = lasagne.layers.get_all_params(network, trainable=True)
    grads = theano.grad(loss, params)
    # if max_norm:
    #     names = ['crf.U', 'crf.W_h', 'crf.W_c', 'crf.b']
    #     constraints = [grad for param, grad in zip(params, grads) if param.name in names]
    #     assert len(constraints) == 4
    #     scaled_grads = total_norm_constraint(constraints, max_norm=max_norm)
    #     counter = 0
    #     for i in xrange(len(params)):
    #         param = params[i]
    #         if param.name in names:
    #             grads[i] = scaled_grads[counter]
    #             counter += 1
    #     assert counter == 4
    if opt == 'adam':
        updates = adam(grads,
                       params=params,
                       learning_rate=learning_rate,
                       beta1=beta1,
                       beta2=beta2)
    elif opt == 'momentum':
        updates = nesterov_momentum(grads,
                                    params=params,
                                    learning_rate=learning_rate,
                                    momentum=momentum)
    else:
        raise ValueError('unkown optimization algorithm: %s' % opt)

    return updates
コード例 #5
0
def prep_train(alpha=0.0002, nz=100):
    E, D = build_net(nz=nz)

    x = T.tensor4('x')

    #Get outputs z=E(x), x_hat=D(z)
    encoding = get_output(E, x)
    decoding = get_output(D, encoding)

    #Get parameters of E and D
    params_e = get_all_params(E, trainable=True)
    params_d = get_all_params(D, trainable=True)
    params = params_e + params_d

    #Calc cost and updates
    cost = T.mean(squared_error(x, decoding))
    grad = T.grad(cost, params)

    updates = adam(grad, params, learning_rate=alpha)

    train = theano.function(inputs=[x], outputs=cost, updates=updates)
    rec = theano.function(inputs=[x], outputs=decoding)
    test = theano.function(inputs=[x], outputs=cost)

    return train, test, rec, E, D
コード例 #6
0
    def set_train_data(self, train_data, train_target):
        self.train_data = theano.shared(
            np.asarray(train_data, dtype=theano.config.floatX))
        self.train_target = theano.shared(
            np.asarray(train_target, dtype=theano.config.floatX))

        i = T.iscalar()
        sigma_prior = T.exp(-3)
        learning_rate = 0.001
        batch_size = 100

        if self.learning_task == "classification":
            objective = self.cross_entropy(batch_size=batch_size,
                                           sigma_prior=sigma_prior)
        elif self.learning_task == "regression":
            objective = self.mean_square_loss(batch_size=batch_size,
                                              sigma_prior=sigma_prior)

        # train function setting
        updates = adam(objective, self.all_params, learning_rate=learning_rate)

        self.train_function = theano.function(
            inputs=[i],
            outputs=objective,
            updates=updates,
            givens={
                self.x: self.train_data[i * batch_size:(i + 1) * batch_size],
                self.y: self.train_target[i * batch_size:(i + 1) * batch_size]
            })

        self.n_train_batches = int(self.train_data.get_value().shape[0] /
                                   float(batch_size))
コード例 #7
0
def get_updates(nnet, train_obj, trainable_params):

    implemented_solvers = ("nesterov", "adagrad", "adadelta", "adam")

    if not hasattr(nnet, "solver") or nnet.solver not in implemented_solvers:
        nnet.sgd_solver = "nesterov"
    else:
        nnet.sgd_solver = nnet.solver

    if nnet.sgd_solver == "nesterov":
        updates = l_updates.nesterov_momentum(train_obj,
                                              trainable_params,
                                              learning_rate=Cfg.learning_rate,
                                              momentum=0.9)

    elif nnet.sgd_solver == "adagrad":
        updates = l_updates.adagrad(train_obj,
                                    trainable_params,
                                    learning_rate=Cfg.learning_rate)

    elif nnet.sgd_solver == "adadelta":
        updates = l_updates.adadelta(train_obj,
                                     trainable_params,
                                     learning_rate=Cfg.learning_rate)

    elif nnet.sgd_solver == "adam":
        updates = l_updates.adam(train_obj,
                                 trainable_params,
                                 learning_rate=Cfg.learning_rate)

    return updates
コード例 #8
0
ファイル: net_functions.py プロジェクト: mercred/HLCV16
def getFunctions(pixel, LR = 0.001):
    X = T.tensor4('X')
    Y = T.ivector('y')

    # set up theano functions to generate output by feeding data through network, any test outputs should be deterministic
    output_layer = ZFTurboNet(pixel,X)
    output_train = lasagne.layers.get_output(output_layer)
    output_test = lasagne.layers.get_output(output_layer, deterministic=True)

    # set up the loss that we aim to minimize, when using cat cross entropy our Y should be ints not one-hot
    loss = lasagne.objectives.categorical_crossentropy(output_train, Y)
    penalty = lasagne.regularization.regularize_layer_params(output_layer, l1) * 5e-4
    loss = loss + penalty
    loss = loss.mean()

    # set up loss functions for validation dataset
    valid_loss = lasagne.objectives.categorical_crossentropy(output_test, Y)
    valid_loss = valid_loss.mean()

    valid_acc = T.mean(T.eq(T.argmax(output_test, axis=1), Y), dtype=theano.config.floatX)

    # get parameters from network and set up sgd with nesterov momentum to update parameters
    params = lasagne.layers.get_all_params(output_layer, trainable=True)
    updates = adam(loss, params, learning_rate=LR)

    # set up training and prediction functions
    train_fn = theano.function(inputs=[X,Y], outputs=loss, updates=updates)
    valid_fn = theano.function(inputs=[X,Y], outputs=[valid_loss, valid_acc])

    # set up prediction function
    predict_proba = theano.function(inputs=[X], outputs=output_test)
    
    return train_fn, valid_fn, predict_proba, output_layer
コード例 #9
0
ファイル: neuralnetwork.py プロジェクト: peter-koo/Deepomics
def build_updates(grad, params, optimization, learning_rate):
	""" setup optimization algorithm """

	if optimization['optimizer'] == 'sgd':
		update_op = updates.sgd(grad, params, learning_rate=learning_rate) 
 
	elif optimization['optimizer'] == 'nesterov_momentum':
		if momenum in optimization:
			momentum = optimization['momentum']
		else:
			momentum = 0.9
		update_op = updates.nesterov_momentum(grad, params, learning_rate=learning_rate, momentum=momentum)
	
	elif optimization['optimizer'] == 'adagrad':
		update_op = updates.adagrad(grad, params, learning_rate=learning_rate)
	
	elif optimization['optimizer'] == 'rmsprop':
		if 'rho' in optimization:
			rho = optimization['rho']
		else:
			rho = 0.9
		update_op = updates.rmsprop(grad, params, learning_rate=learning_rate, rho=rho)
	
	elif optimization['optimizer'] == 'adam':
		if 'beta1' in optimization:
			beta1 = optimization['beta1']
		else:
			beta1 = 0.9
		if 'beta2' in optimization:
			beta2 = optimization['beta2']
		else:
			beta2 = 0.999
		update_op = updates.adam(grad, params, learning_rate=learning_rate, beta1=beta1, beta2=beta2)
  
	return update_op
コード例 #10
0
 def optimize(grads, params):
     if state['optim_method'] == 'adam':
         updates = adam(grads, params, lrt, state['momentum'])
     elif state['optim_method'] == 'adagrad':
         updates = adagrad(grads, params, lrt)
     elif state['optim_method'] == 'sgd':
         updates = sgd(grads, params, lrt)
     return updates
コード例 #11
0
def prep_train(alpha=0.002, beta1=0.5, beta2=0.9, nz=200):

    G, D = build_net(nz=nz)
    x = T.tensor4('x')
    z = T.matrix('z')

    # get network output for D and G
    G_z = get_output(G, z)
    D_G_z = get_output(D, G_z)  # fake
    D_x = get_output(D, x)  # real

    # create new variable e to sample X along straight lines
    e = T.TensorType(dtype=floatX, broadcastable=(False, True, True, True))()
    mixed_X = (e * G_z) + (1 - e) * x
    output_D_mixed = get_output(D, mixed_X)

    #compute gradients + penalty
    grad_mixed = T.grad(T.sum(output_D_mixed), mixed_X)
    norm_grad_mixed = T.sqrt(T.sum(T.square(grad_mixed), axis=[1, 2, 3]))
    grad_penalty = T.mean(T.square(norm_grad_mixed - 1))

    # get parameters
    params_d = get_all_params(D, trainable=True)
    params_g = get_all_params(G, trainable=True)

    # compute losses for the discriminator J_D and the generator J_G
    J_D = D_G_z.mean() - D_x.mean() + 10 * grad_penalty
    J_G = -D_G_z.mean()

    # update parameters for both
    update_D = adam(J_D,
                    params_d,
                    learning_rate=alpha,
                    beta1=beta1,
                    beta2=beta2)
    update_G = adam(J_G,
                    params_g,
                    learning_rate=alpha,
                    beta1=beta1,
                    beta2=beta2)

    # define training functions
    train_G = theano.function(inputs=[z], outputs=J_G, updates=update_G)
    train_D = theano.function(inputs=[x, z, e], outputs=J_D, updates=update_D)

    return train_G, train_D, G, D
コード例 #12
0
    def __init__(self,
                 steps        = 1,
                 num_layers   = 2,
                 num_units    = 32,
                 eps          = 1e-2,
                 recurrent    = False,
                 nonlinearity = tanh,
                 ):
        self.steps = steps

        self.X = T.fmatrix()
        self.Y = T.fmatrix()

        def network(l):
            if recurrent:
                l = ReshapeLayer(l,
                                 shape = (-1, steps, 1))
                l = LSTMLayer(l, num_units)

            for k in range(num_layers):
                l = DenseLayer(l,
                               num_units    = num_units,
                               nonlinearity = nonlinearity)
            l = DenseLayer(l,
                           num_units    = 1,
                           nonlinearity = linear)

            return l

        self.network = network

        l = InputLayer(input_var = self.X,
                       shape     = (None, steps))
        l = self.network(l)

        self.l_ = l
        self.x_ = get_output(self.l_)

        self.f  = theano.function([self.X],
                                  self.x_,
                                  allow_input_downcast=True)

        l2_penalty = regularize_network_params(l,L2)
        error = squared_error(self.x_, self.Y).mean()
        loss = error + eps * l2_penalty
        params = get_all_params(l)
        updates = adam(loss,
                       params)

        self.error = theano.function([self.X,self.Y],
                                     error,
                                     allow_input_downcast=True)

        self.train = theano.function([self.X,self.Y],
                                     loss,
                                     updates=updates,
                                     allow_input_downcast=True)
コード例 #13
0
ファイル: bow.py プロジェクト: passalis/eo-bow
    def __init__(self,
                 labels,
                 g=0.1,
                 m=0.01,
                 feature_dimension=128,
                 n_codewords=16,
                 n_feature_samples=100,
                 eta=0.01):
        """
        The labels of the objects used for the optimization.
        The objects must be in the same order when the fit function is called
        :param labels: labels of the objects used for the optimization
        :param g: BoW quantization parameter
        :param m: entropy softness parameter
        :param feature_dimension: dimension of the extracted feature vectors
        :param n_codewords: number of codewords in the dictionary
        :param n_feature_samples: number of feature vectors to use in each iteration
        :param eta: learning rate
        """

        SoftBoW.__init__(self,
                         g=g,
                         feature_dimension=feature_dimension,
                         n_codewords=n_codewords)

        self.entropy = SoftEntropy(m=m, labels=labels)
        self.entropy_loss = None
        self.learning_rate = eta
        self.n_feature_samples = n_feature_samples

        # Histograms
        self.S = self._sym_histograms(self.X)

        # Entropy loss
        self.entropy_loss = self.entropy._sym_entropy(self.S)

        # Compile loss function
        self.calculate_loss_theano = theano.function([self.X],
                                                     self.entropy_loss)

        # Define gradients w.r.t. V (and take care of NaNs)
        entropy_grad = T.grad(self.entropy_loss, self.S)
        entropy_grad = T.switch(T.isnan(entropy_grad), 0, entropy_grad)
        dictionary_grad = T.grad(self.entropy._sym_entropy(self.S),
                                 self.V,
                                 known_grads={self.S: entropy_grad})
        dictionary_grad = T.switch(T.isnan(dictionary_grad), 0,
                                   dictionary_grad)

        # Define and compile the training function
        self.updates = adam([dictionary_grad], [self.V],
                            learning_rate=self.learning_rate)
        self.train_theano = theano.function(inputs=[self.X],
                                            outputs=[self.entropy_loss],
                                            updates=self.updates)
コード例 #14
0
def buildFunctions(net, input_var, target_var):

    params = lasagne.layers.get_all_params(net['h0_inv'], trainable=True)
    out = lasagne.layers.get_output(net['h0_inv'], deterministic=True)

    loss = lasagne.objectives.squared_error(out, target_var)
    adam_update = adam (loss.mean(), params)

    train_function = theano.function([input_var, target_var], loss, updates=adam_update)

    return train_function
コード例 #15
0
ファイル: deep_model_s.py プロジェクト: youngflyasd/ssvae
 def train_function(self, semi_supervised= True, unlabel_stable=False):
     '''
     use_unlabel == True, semi-superviesd learning
     return: train function for 1 epoch use
     '''
     self.semi_supervised = semi_supervised
     sym_klw = T.scalar('sym_klw',dtype=theano.config.floatX) # symbolic scalar of warming up
     sym_cw = T.scalar('sym_cw',dtype=theano.config.floatX) # classifier warm up
     sym_s = T.matrix('sym_s',dtype='int64')
     sym_mask = T.matrix('sym_mask',dtype=theano.config.floatX)
     sym_y = T.matrix('sym_label',dtype=theano.config.floatX)
     sym_s_u = T.matrix('sym_s_u',dtype='int64')
     sym_mask_u = T.matrix('sym_mask_u', dtype=theano.config.floatX)
     num_l, num_u = sym_s.shape[0].astype(theano.config.floatX), 0.0
     if self.semi_supervised:
         print 'Train with unlabel data.'
         num_u = sym_s_u.shape[0].astype(theano.config.floatX)
     #get labeled/unlabeled cost
     outs1 = self.cost_label([sym_s, sym_mask, sym_y], dev_stage=False, return_mode = 'mean')
     loss_recons, loss_kl, valid_words, word_drop_num, loss_classifier, batch_ppl, acc = outs1
     loss_recons_u, loss_kl_u,loss_entropy_u, batch_ppl_u = 0.0,0.0,0.0,0.0
     valid_words_u = 0
     if self.semi_supervised:
         outs2 = self.cost_unlabel([sym_s_u, sym_mask_u], dev_stage=unlabel_stable, sample_by_prob=self.sample_unlabel)
         loss_recons_u, loss_kl_u, valid_words_u, loss_entropy_u, batch_ppl_u = outs2
     '''
     total Loss:
     L = Loss_labeled(s,mask,y) + beta*(n_l+n_u)/n_l * Loss_classisifer(s,mask,y)
         + Loss_unlabel(s_u, mask_u)
     L = recons_term + sym_klw_term + loss_classifier_term - loss_entropy_u
     '''
     alpha = sym_cw * self.cost_beta * ( num_l + num_u ) / num_l
     total_cost = loss_recons * num_l + loss_recons_u * num_u\
                  + sym_klw * ( loss_kl * num_l + loss_kl_u * num_u)\
                  + alpha * loss_classifier * num_l\
                  - loss_entropy_u * num_u
     total_cost /= (num_l + num_u)
     train_params = self.get_params(only_trainable=True)
     all_grads = theano.grad(total_cost,train_params)
     all_grads = [T.clip(g, -self.grad_clipping, self.grad_clipping) for g in all_grads]
     all_grads = total_norm_constraint( all_grads, max_norm=self.max_norm )
     #all_grads = [T.clip(g, -self.grad_clipping, self.grad_clipping) for g in all_grads]
     updates = adam(all_grads,train_params, self.lr, self.beta1, self.beta2)
     if self.semi_supervised:
         train_input = [sym_s, sym_mask, sym_y, sym_s_u, sym_mask_u, sym_klw, sym_cw]
         train_output = [total_cost,
                         loss_recons, loss_recons_u, loss_kl, loss_kl_u, alpha, loss_classifier, loss_entropy_u,
                         batch_ppl, batch_ppl_u, valid_words, valid_words_u, word_drop_num, acc]
     else:
         train_input = [sym_s, sym_mask, sym_y, sym_klw, sym_cw]
         train_output = [total_cost, loss_recons, loss_kl, loss_classifier,
                         batch_ppl, valid_words, word_drop_num, acc]
     train_f = theano.function(inputs=train_input, outputs=train_output,updates=updates, name='train_function')
     return train_f
コード例 #16
0
 def build_training_function(self):
     self._loss = self.get_loss_function()
     self._params = get_all_params(self.network, trainable=True)
     self._updates_net = adam(self._loss,
                              self._params,
                              learning_rate=self.learning_rate,
                              beta1=0.)
     return theano.function(
         [self.states, self.actions, self.next_states, self.rewards],
         self._loss,
         updates=self._updates_net)
コード例 #17
0
ファイル: fuckit_network.py プロジェクト: zmjjmz/thesiswork
def contrastive_loss_iter(embedder, update_params={}):
    X_pairs = {
            'img1':T.tensor4(),
            'img2':T.tensor4(),
            }
    y = T.ivector() # basically class labels

    final_emb_layer = embedder[-1]
    all_layers = ll.get_all_layers(embedder)
    imwrite_architecture(all_layers, './layer_rep.png')
    # assume we get a list of predictions (e.g. for jet architecture, but should work w/just one pred)
    # another assumption (which must hold when the network is being made)
    # the last prediction layer is a) the end of the network and b) what we ultimately care about
    # however the other prediction layers will be incorporated into the training loss
    predicted_embeds_train = {k:ll.get_output(embedder, X)[-1] for k, X in X_pairs.items()}
    predicted_embeds_valid = {k:ll.get_output(final_emb_layer, X, deterministic=True) for k, X in X_pairs.items()}

    margin = 1

    # if distance is 0 that's bad
    distance = lambda pred: (pred['img1'] - pred['img2'] + 1e-7).norm(2, axis=1)
    contrastive_loss = lambda pred: T.mean(y*(distance(pred)) + (1 - y)*(margin - distance(pred)).clip(0,np.inf))
    failed_matches = lambda pred: T.switch(T.eq(T.sum(y),0), 0, T.sum((y*distance(pred)) > margin) / T.sum(y))
    failed_nonmatches = lambda pred: T.switch(T.eq(T.sum(1-y),0), 0, T.sum((1-y*distance(pred)) < margin) / T.sum(1-y))
    failed_pairs = lambda pred: 0.5*failed_matches(pred) + 0.5*failed_nonmatches(pred)

    decay = 0.0001
    reg = regularize_network_params(final_emb_layer, l2) * decay
    losses_reg = lambda pred: contrastive_loss(pred) + reg
    loss_train = losses_reg(predicted_embeds_train)
    loss_train.name = 'CL' # for the names
    #all_params = list(chain(*[ll.get_all_params(pred) for pred in embedder]))
    all_params = ll.get_all_params(embedder, trainable=True) # this should work with multiple 'roots'
    grads = T.grad(loss_train, all_params, add_names=True)
    updates = adam(grads, all_params)
    #updates = nesterov_momentum(grads, all_params, update_params['l_r'], momentum=update_params['momentum'])

    print("Compiling network for training")
    tic = time.time()
    train_iter = theano.function([X_pairs['img1'], X_pairs['img2'], y], [loss_train] + grads, updates=updates)
    toc = time.time() - tic
    print("Took %0.2f seconds" % toc)
    #theano.printing.pydotprint(loss, outfile='./loss_graph.png',var_with_name_simple=True)
    print("Compiling network for validation")
    tic = time.time()
    valid_iter = theano.function([X_pairs['img1'], X_pairs['img2'], y], [
                                    contrastive_loss(predicted_embeds_valid),
                                    losses_reg(predicted_embeds_valid),
                                    failed_pairs(predicted_embeds_valid)])
    toc = time.time() - tic
    print("Took %0.2f seconds" % toc)

    return {'train':train_iter, 'valid':valid_iter, 'gradnames':[g.name for g in grads]}
コード例 #18
0
ファイル: fuckit_network.py プロジェクト: zmjjmz/thesiswork
def triplet_loss_iter(embedder, update_params={}):
    X_triplets = {
            'anchor':T.tensor4(),
            'positive':T.tensor4(),
            'negative':T.tensor4(),
            } # each will be a batch of images

    final_emb_layer = embedder[-1]
    all_layers = ll.get_all_layers(embedder)
    imwrite_architecture(all_layers, './layer_rep.png')
    # assume we get a list of predictions (e.g. for jet architecture, but should work w/just one pred)
    # another assumption (which must hold when the network is being made)
    # the last prediction layer is a) the end of the network and b) what we ultimately care about
    # however the other prediction layers will be incorporated into the training loss
    predicted_embeds_train = {k:ll.get_output(embedder, X)[-1] for k, X in X_triplets.items()}
    predicted_embeds_valid = {k:ll.get_output(final_emb_layer, X, deterministic=True) for k, X in X_triplets.items()}

    # each output should be batch_size x embed_size

    # should give us a vector of batch_size of distances btw anchor and positive
    alpha = 0.2 # FaceNet alpha
    triplet_pos = lambda pred: (pred['anchor'] - pred['positive']).norm(2,axis=1)
    triplet_neg = lambda pred: (pred['anchor'] - pred['negative']).norm(2,axis=1)
    triplet_distances = lambda pred: (triplet_pos(pred) - triplet_neg(pred) + alpha).clip(0, np.inf)
    triplet_failed = lambda pred: T.mean(triplet_distances(pred) > alpha)
    triplet_loss = lambda pred: T.sum(triplet_distances(pred))

    decay = 0.001
    reg = regularize_network_params(final_emb_layer, l2) * decay
    losses_reg = lambda pred: triplet_loss(pred) + reg
    loss_train = losses_reg(predicted_embeds_train)
    loss_train.name = 'TL' # for the names
    #all_params = list(chain(*[ll.get_all_params(pred) for pred in embedder]))
    all_params = ll.get_all_params(embedder, trainable=True) # this should work with multiple 'roots'
    grads = T.grad(loss_train, all_params, add_names=True)
    updates = adam(grads, all_params)
    #updates = nesterov_momentum(grads, all_params, update_params['l_r'], momentum=update_params['momentum'])

    print("Compiling network for training")
    tic = time.time()
    train_iter = theano.function([X_triplets['anchor'], X_triplets['positive'], X_triplets['negative']], [loss_train] + grads, updates=updates)
    toc = time.time() - tic
    print("Took %0.2f seconds" % toc)
    #theano.printing.pydotprint(loss, outfile='./loss_graph.png',var_with_name_simple=True)
    print("Compiling network for validation")
    tic = time.time()
    valid_iter = theano.function([X_triplets['anchor'], X_triplets['positive'], X_triplets['negative']], [triplet_loss(predicted_embeds_valid),
                                                                                                          losses_reg(predicted_embeds_valid),
                                                                                                          triplet_failed(predicted_embeds_valid)])
    toc = time.time() - tic
    print("Took %0.2f seconds" % toc)

    return {'train':train_iter, 'valid':valid_iter, 'gradnames':[g.name for g in grads]}
コード例 #19
0
ファイル: neural_network.py プロジェクト: frw/2048-DRL
    def __init__ (self):
        self.learning_rate = 0.001
        self.L1_reg = 0.0000
        self.L2_reg = 0.0001
        self.n_hidden = 50
        self.num_inputs = 20
        self.num_outputs = 1

        # allocate symbolic variables for the data
        x = T.ivector('x')  
        y = T.iscalar('y') 

        rng = np.random.RandomState(None)

        # construct the neural network's Architecture
        architecture = Architecture(
            rng=rng,
            input=[x],
            n_in=self.num_inputs,
            n_hidden=self.n_hidden,
            n_out=self.num_outputs
        )

        cost = (
            architecture.error_function(y)
            + self.L1_reg * architecture.L1
            + self.L2_reg * architecture.L2_sqr
        )
        
        #stochastic gradient descent with adaptive learning with Lasagne--using Adam
        updates = adam(cost, architecture.params, learning_rate=self.learning_rate, beta1=0.9, beta2=0.999, epsilon=1e-08)

        # backpropogation that also contains a forward pass
        self.train_model = theano.function(
            inputs=[x, y],
            outputs=[cost, architecture.get_result()],
            updates=updates,
            allow_input_downcast=True
        )

        # forward pass
        self.run_model = theano.function(
            inputs=[x],
            outputs=architecture.get_result(),
            allow_input_downcast=True
        )

        self.grab_weights = theano.function(
            inputs=[],
            outputs=architecture.params,
            allow_input_downcast=True
        )
コード例 #20
0
ファイル: teext_network.py プロジェクト: zmjjmz/thesiswork
def loss_iter(segmenter, update_params={}):
    X = T.tensor4()
    y = T.tensor4()
    pixel_weights = T.tensor3()

    final_pred_layer = segmenter[-1]
    all_layers = ll.get_all_layers(segmenter)
    imwrite_architecture(all_layers, './layer_rep.png')
    # assume we get a list of predictions (e.g. for jet architecture, but should work w/just one pred)
    # another assumption (which must hold when the network is being made)
    # the last prediction layer is a) the end of the network and b) what we ultimately care about
    # however the other prediction layers will be incorporated into the training loss
    predicted_masks_train = ll.get_output(segmenter, X)
    predicted_mask_valid = ll.get_output(final_pred_layer, X, deterministic=True)

    thresh = 0.5
    accuracy = lambda pred: T.mean(T.eq(T.argmax(pred, axis=1), T.argmax(y, axis=1)))
    true_pos = lambda pred: T.sum((pred[:,0,:,:] > thresh) * (y[:,0,:,:] > thresh))
    false_pos = lambda pred: T.sum((pred[:,0,:,:] > thresh) - (y[:,0,:,:] > thresh))
    precision = lambda pred: (true_pos(pred) / (true_pos(pred) + false_pos(pred)))

    pixel_weights_1d = pixel_weights.flatten(ndim=1)
    losses = lambda pred: T.mean(crossentropy_flat(pred + 1e-7, y + 1e-7) * pixel_weights_1d)

    decay = 0.0001
    reg = regularize_network_params(final_pred_layer, l2) * decay
    losses_reg = lambda pred: losses(pred) + reg
    loss_train = T.sum([losses_reg(mask) for mask in predicted_masks_train])
    loss_train.name = 'CE' # for the names
    #all_params = list(chain(*[ll.get_all_params(pred) for pred in segmenter]))
    all_params = ll.get_all_params(segmenter, trainable=True) # this should work with multiple 'roots'
    grads = T.grad(loss_train, all_params, add_names=True)
    updates = adam(grads, all_params)
    #updates = nesterov_momentum(grads, all_params, update_params['l_r'], momentum=update_params['momentum'])
    acc_train = accuracy(predicted_masks_train[-1])
    acc_valid = accuracy(predicted_mask_valid)
    prec_train = precision(predicted_masks_train[-1])
    prec_valid = precision(predicted_mask_valid)

    print("Compiling network for training")
    tic = time.time()
    train_iter = theano.function([X, y, pixel_weights], [loss_train] + grads, updates=updates)
    toc = time.time() - tic
    print("Took %0.2f seconds" % toc)
    #theano.printing.pydotprint(loss, outfile='./loss_graph.png',var_with_name_simple=True)
    print("Compiling network for validation")
    tic = time.time()
    valid_iter = theano.function([X, y, pixel_weights], [losses(predicted_mask_valid), losses_reg(predicted_mask_valid), prec_valid])
    toc = time.time() - tic
    print("Took %0.2f seconds" % toc)

    return {'train':train_iter, 'valid':valid_iter, 'gradnames':[g.name for g in grads]}
コード例 #21
0
def optimize(content_targets,
             vgg_path,
             im_size,
             epochs=2,
             period=1000,
             batch_size=4,
             save_path='saver/fns.ckpt',
             learning_rate=1e-3,
             checkpoint_model=None):

    assert content_targets.shape[1:] == (3, *im_size)

    print('=== CREATE VGG NET ===')
    images = T.ftensor4()
    vgg_net = vgg.Net(im_size)
    vgg_net.set_params(vgg.load_params(vgg_path))

    print('=== CREATE EMUL NET ===')
    emul_net = emul.Net(im_size)
    if checkpoint_model is not None:
        emul_net.set_params(checkpoint_model)

    content_losses = []
    for layer in vgg.CONTENT_LAYERS + vgg.STYLE_LAYERS:
        content_vgg_features = vgg_net(images, layer)
        content_emul_features = emul_net(images, layer)
        size = content_emul_features.size
        loss = l2_loss(content_vgg_features, content_emul_features) / size
        content_losses.append(loss)

    loss = sum(content_losses)
    updates = adam(loss, emul_net.get_params(False), learning_rate)
    print('=== FUNCTION COMPILE ===')
    train_fn = theano.function([images], loss, updates=updates)
    valid_fn = theano.function([images], loss)
    print('=== START TRAIN ===')
    it = 0
    time_in_train = 0
    for epoch in range(epochs):
        for i in range(0, len(content_targets), batch_size):
            batch = content_targets[i:i + batch_size]
            batch = np.float32(batch)
            start = time()
            loss = train_fn(batch)
            time_in_train += time() - start
            it += 1
            if it % period == 0 or i + batch_size >= len(content_targets):
                print('Time in train: %1.3lf' % time_in_train)
                time_in_train = 0
                save(save_path, emul_net.get_params())
                yield epoch, it, loss
コード例 #22
0
ファイル: spotlight.py プロジェクト: robintibor/braindecode
def create_spotlight_fn(final_layer, blur_axes, free_axes, weight_axes, trials_shape):
    ones_shape = [trials_shape[i_ax] if i_ax in blur_axes + free_axes else 1 
                  for i_ax in xrange(len(trials_shape))]

    means_stds_shape = [trials_shape[i_ax] if i_ax in free_axes else 1 
                  for i_ax in xrange(len(trials_shape))]
    means_stds_shape = [len(blur_axes)] + means_stds_shape
    #toadd: mixture of gaussians
    full_mask = T.ones(ones_shape, dtype=np.float32)
    broadcast_pattern = [True if ax not in (free_axes) else False 
                         for ax in xrange(len(trials_shape))]
    broadcast_pattern = [False] + broadcast_pattern

    means = theano.shared((np.ones(means_stds_shape)* 0.5).astype(np.float32),
                          broadcastable=broadcast_pattern)
    stds = theano.shared((np.ones(means_stds_shape)* 1).astype(np.float32),
                          broadcastable=broadcast_pattern)

    for i_blur_axis, axis in enumerate(blur_axes):
        ax_mask = T.constant(np.linspace(0,1, trials_shape[axis], dtype=np.float32))
        dimshuffle_pattern = [0 if ax == axis else 'x' for ax in xrange(len(trials_shape))]
        ax_mask = ax_mask.dimshuffle(*dimshuffle_pattern)
        # todo maybe have to fix this here?
        ax_gaussian = T.exp(-T.square((ax_mask - means[i_blur_axis]) / stds[i_blur_axis]) * 0.5)
        full_mask = full_mask * ax_gaussian
    
    weights_shape = [trials_shape[i_ax] if i_ax in weight_axes else 1 
                  for i_ax in xrange(1,len(trials_shape))]
    weights_shape = [trials_shape[0]] + weights_shape
    broadcast_pattern = [True if ax not in (weight_axes) else False 
                         for ax in xrange(1, len(trials_shape))]
    broadcast_pattern = [False] + broadcast_pattern
    weights = theano.shared((np.ones(weights_shape)).astype(np.float32),
                          broadcastable=broadcast_pattern)
    full_mask = full_mask * (T.maximum(weights,0) / 
        T.mean(T.maximum(weights,0), axis=0, keepdims=True))
    
    trials_var = T.ftensor4()
    scaled_trials = trials_var * full_mask
    targets = T.ivector()

    outputs = lasagne.layers.get_output(final_layer, inputs=scaled_trials, input_var=scaled_trials)

    loss = categorical_crossentropy(outputs, targets).sum()
    loss += T.mean(T.sqr(stds)) * 0.1
    loss -= T.mean(T.abs_(weights - T.mean(weights, axis=0, keepdims=True))) * 10
    adam_updates = adam(loss,[means, stds, weights], learning_rate=0.01)
    adam_grad_fn = theano.function([trials_var, targets], 
                                   [loss,outputs, scaled_trials, full_mask, weights], 
                                   updates=adam_updates)
    return adam_grad_fn
コード例 #23
0
ファイル: scenari.py プロジェクト: rsumner31/Mariana-212
    def run(self, parameter, parameterName, loss, **kwargs) :
        pVar = parameter.getVar()
        gparam = tt.grad(loss, pVar)
        updates = LUP.adam( [ gparam ], [pVar], learning_rate=self.getHP("lr"), beta1=self.getHP("beta1"), beta2=self.getHP("beta2"), epsilon=self.getHP("epsilon"))

        ret = OptimizerResult(pVar, parameterName, gparam, updates[pVar])
        i = 0
        for param, update in updates.items() :
            if param is not pVar :
                name = "%s_adam_%s" % (parameterName, i)
                ret.addCoParameter(param, name, None, update)
                i += 1

        return ret
コード例 #24
0
    def _build(self, X, y):
        """
        Builds the network and associated training functions, for the specific
        shapes of the inputs
        """
        n_x = X.shape[-1]
        n_y = y.shape[-1]
        n_c = X.shape[1]

        # Defining input layers
        self.l_x = InputLayer(shape=(self.batch_size, n_c, n_x, n_x),
                              input_var=self._x,
                              name='x')
        self.l_y = InputLayer(shape=(self.batch_size, n_y),
                              input_var=self._y,
                              name='y')

        net = self._model_definition(self.l_x)

        # Output classifier
        out = DenseLayer(net, num_units=n_y, nonlinearity=identity)

        self._network = NonlinearityLayer(out, nonlinearity=sigmoid)

        # Compute network loss
        q, p = get_output([out, self.l_y],
                          inputs={
                              self.l_x: self._x,
                              self.l_y: self._y
                          })

        # Define loss function
        loss = weighted_sigmoid_binary_crossentropy(q, p, self.pos_weight)

        # Average over batch
        loss = loss.mean()

        # Get trainable parameters and generate updates
        params = get_all_params([self._network], trainable=True)
        grads = T.grad(loss, params)
        updates = adam(grads, params, learning_rate=self._lr)
        self._trainer = theano.function([self._x, self._y, self._lr], [loss],
                                        updates=updates)

        # Get detection probability from the network
        qdet = get_output(self._network,
                          inputs={self.l_x: self._x},
                          deterministic=True)
        self._output = theano.function([self._x], qdet)
コード例 #25
0
    def train_expectation_function(self):
        '''
        unlabeled data train with expection
        '''
        print "Train Function: Calculate the Expectation of unlabeled data."
        sym_klw = T.scalar('sym_klw', dtype=theano.config.floatX)  # symbolic scalar of warming up
        sym_sents = T.matrix('sym_s', dtype='int64')
        sym_mask = T.matrix('sym_mask', dtype=theano.config.floatX)  # one hot!
        sym_label = T.matrix('sym_label', dtype=theano.config.floatX)
        sym_sents_u = T.matrix('sym_s_u', dtype='int64')
        sym_mask_u = T.matrix('sym_mask_u', dtype=theano.config.floatX)
        num_l, num_u = sym_sents.shape[0].astype(theano.config.floatX), \
                       sym_sents_u.shape[0].astype(theano.config.floatX)
        num_all = num_l + num_u

        # forward the network and get cost values
        enc_sents, dec_sents, _ = self._forward_sents(sym_sents, dev_stage=False)
        enc_sents_u, dec_sents_u, _ = self._forward_sents(sym_sents_u, dev_stage=False)

        # classifier loss
        y_pred, loss_class, acc = self._forward_classifier([enc_sents, sym_mask], sym_label, dev_stage=False)
        y_pred_u, loss_entropy, _ = self._forward_classifier([enc_sents_u, sym_mask_u], None, dev_stage=False)

        # reconstruction and kl loss
        loss_rec, loss_kl, ppl = self.cost_label([sym_sents, enc_sents, dec_sents, sym_mask, sym_label], dev_stage=False)
        loss_rec_u, loss_kl_u, ppl_u = self.cost_unlabel_expectation([sym_sents_u, enc_sents_u, dec_sents_u,
                                                        sym_mask_u, y_pred_u], dev_stage=False)
        
        # use baseline
        if self.use_baseline:
            baselines_u = self._get_baselines([sym_sents_u, enc_sents_u, sym_mask_u])
            loss_rec_u -= baselines_u
        
        total_cost = T.sum(loss_rec) + T.sum(loss_rec_u) - T.sum(loss_entropy)
        total_cost += sym_klw * (T.sum(loss_kl) + T.sum(loss_kl_u))
        total_cost += self.alpha * T.sum(loss_class) * num_all / num_l
        total_cost /= num_all

        all_params = self.get_params(tag='all')
        all_grads = theano.grad(total_cost, all_params)
        all_grads = total_norm_constraint(all_grads, max_norm=self.max_norm)
        updates = adam(all_grads, all_params, self.lr)

        train_input = [sym_sents, sym_mask, sym_label, sym_sents_u, sym_mask_u, sym_klw]
        train_output = [total_cost,
                        T.mean(loss_rec), T.mean(loss_rec_u), T.mean(loss_kl), T.mean(loss_kl_u),
                        T.mean(loss_class), T.mean(loss_entropy), ppl, ppl_u, acc, self.b]
        train_f = theano.function(inputs=train_input, outputs=train_output, updates=updates, name='train_expectation')
        return train_f
コード例 #26
0
    def build_model(self, train_x, train_mask_x, train_mask_out, train_target,
                    test_x, test_mask_x, test_mask_out, test_target):
        self.train_x = train_x
        self.train_mask_x = train_mask_x
        self.train_mask_out = train_mask_out
        self.train_target = train_target
        self.test_x = test_x
        self.test_mask_x = test_mask_x
        self.test_mask_out = test_mask_out
        self.test_target = test_target
        self.index = T.iscalar('index')
        self.num_batch_test = T.iscalar('index')
        self.b_slice = slice(self.index * self.num_batch, (self.index + 1) * self.num_batch)

        sym_x = T.dtensor3()
        sym_mask_x = T.dmatrix()
        sym_target = T.dtensor3()
        sym_mask_out = T.dtensor3()
        # sym_mask_out = T.dtensor3() should not be useful since output is still zero
        # TODO think about this if it is true

        out = lasagne.layers.get_output(self.model, inputs={self.l_in: sym_x, self.mask_input: sym_mask_x})
        out_out = self.get_output_y(out)
        loss = T.mean(lasagne.objectives.squared_error(out_out, sym_target)) / self.num_batch

        out_test = lasagne.layers.get_output(self.model, inputs={self.l_in: sym_x, self.mask_input: sym_mask_x})
        out_out_test = self.get_output_y(out_test)
        loss_test = T.mean(lasagne.objectives.squared_error(out_out_test, sym_target)) / self.num_batch_test

        all_params = [self.W] + [self.b] +lasagne.layers.get_all_params(self.model)
        all_grads_target = [T.clip(g, -3, 3) for g in T.grad(loss, all_params)]
        all_grads_target = lasagne.updates.total_norm_constraint(all_grads_target, 3)
        updates_target = adam(all_grads_target, all_params)

        train_model = theano.function([self.index],
                                      [loss, out_out],
                                      givens={sym_x: self.train_x[self.b_slice],
                                              sym_mask_x: self.train_mask_x[self.b_slice],
                                              sym_target: self.train_target[self.b_slice],
                                              },
                                      updates=updates_target)
        test_model = theano.function([self.num_batch_test],
                                     [loss_test, out_out_test],
                                     givens={sym_x: self.test_x,
                                             sym_mask_x: self.test_mask_x,
                                             sym_target: self.test_target,
                                             })

        return train_model, test_model
コード例 #27
0
def get_updates(nnet, train_obj, trainable_params, solver=None):

    implemented_solvers = ("sgd", "momentum", "nesterov", "adagrad", "rmsprop",
                           "adadelta", "adam", "adamax")

    if solver not in implemented_solvers:
        nnet.sgd_solver = "adam"
    else:
        nnet.sgd_solver = solver

    if nnet.sgd_solver == "sgd":
        updates = l_updates.sgd(train_obj,
                                trainable_params,
                                learning_rate=Cfg.learning_rate)
    elif nnet.sgd_solver == "momentum":
        updates = l_updates.momentum(train_obj,
                                     trainable_params,
                                     learning_rate=Cfg.learning_rate,
                                     momentum=Cfg.momentum)
    elif nnet.sgd_solver == "nesterov":
        updates = l_updates.nesterov_momentum(train_obj,
                                              trainable_params,
                                              learning_rate=Cfg.learning_rate,
                                              momentum=Cfg.momentum)
    elif nnet.sgd_solver == "adagrad":
        updates = l_updates.adagrad(train_obj,
                                    trainable_params,
                                    learning_rate=Cfg.learning_rate)
    elif nnet.sgd_solver == "rmsprop":
        updates = l_updates.rmsprop(train_obj,
                                    trainable_params,
                                    learning_rate=Cfg.learning_rate,
                                    rho=Cfg.rho)
    elif nnet.sgd_solver == "adadelta":
        updates = l_updates.adadelta(train_obj,
                                     trainable_params,
                                     learning_rate=Cfg.learning_rate,
                                     rho=Cfg.rho)
    elif nnet.sgd_solver == "adam":
        updates = l_updates.adam(train_obj,
                                 trainable_params,
                                 learning_rate=Cfg.learning_rate)
    elif nnet.sgd_solver == "adamax":
        updates = l_updates.adamax(train_obj,
                                   trainable_params,
                                   learning_rate=Cfg.learning_rate)

    return updates
コード例 #28
0
    def build_model(self, train_x, train_mask_x, train_mask_out, train_target,
                    test_x, test_mask_x, test_mask_out, test_target):
        self.train_x = train_x
        self.train_mask_x = train_mask_x
        self.train_mask_out = train_mask_out
        self.train_target = train_target
        self.test_x = test_x
        self.test_mask_x = test_mask_x
        self.test_mask_out = test_mask_out
        self.test_target = test_target
        self.index = T.iscalar('index')
        self.num_batch_test = T.iscalar('index')
        self.b_slice = slice(self.index * self.num_batch, (self.index + 1) * self.num_batch)

        sym_x = T.dtensor3()
        sym_mask_x = T.dmatrix()
        sym_target = T.dtensor3()
        # sym_mask_out = T.dtensor3() should not be useful since output is still zero
        # TODO think about this if it is true

        output = lasagne.layers.get_output(self.model, inputs={self.l_in: sym_x, self.mask_input: sym_mask_x})
        theta = self.get_output_y(output)
        log_px = self.get_log_x(sym_target, theta)
        log_px_sum_time = log_px.sum(axis=1, dtype=theano.config.floatX) # sum over tx
        loss = - T.sum(log_px_sum_time) / self.num_batch # average over batch
        ##
        log_px_test = self.get_log_x(sym_target, theta)
        log_px_sum_time_test = log_px_test.sum(axis=1, dtype=theano.config.floatX) # sum over time
        loss_test = - T.sum(log_px_sum_time_test) / self.num_batch_test  # average over batch
        # loss = T.mean(lasagne.objectives.squared_error(mu, sym_target))
        all_params = [self.W_y_theta] + [self.b_y_theta] + lasagne.layers.get_all_params(self.model)
        all_grads_target = [T.clip(g, -3, 3) for g in T.grad(loss, all_params)]
        all_grads_target = lasagne.updates.total_norm_constraint(all_grads_target, 3)
        updates_target = adam(all_grads_target, all_params)

        train_model = theano.function([self.index],
                                      [loss, theta, log_px],
                                      givens={sym_x: self.train_x[self.b_slice],
                                              sym_mask_x: self.train_mask_x[self.b_slice],
                                              sym_target: self.train_target[self.b_slice]},
                                      updates=updates_target)
        test_model = theano.function([self.num_batch_test],
                                     [loss_test, theta],
                                     givens={sym_x: self.test_x,
                                             sym_mask_x: self.test_mask_x,
                                             sym_target: self.test_target})

        return train_model, test_model
コード例 #29
0
def build(layer_heads, params):
    """"""
    fns = {}  # model methods
    x = T.tensor4('input')

    for target in params['targets']:
        fns[target['name']] = {}
        out_layer = layer_heads[target['name']]

        y = T.matrix('target')
        o = L.get_output(out_layer, inputs=x)
        o_vl = L.get_output(out_layer, inputs=x, deterministic=True)

        if 'class_weight' in params and params['class_weight']:
            loss_fn = partial(weighted_cce, weights=params['class_weight'])
        else:
            loss_fn = obj.categorical_crossentropy

        loss = loss_fn(o, y).mean()
        loss_vl = loss_fn(o_vl, y).mean()
        wd_l2 = reg.regularize_network_params(out_layer, reg.l2)
        wd_l2 *= params['beta']

        acc_vl = obj.categorical_accuracy(o_vl, y).mean()

        updates_ = updates.adam(loss + wd_l2,
                                L.get_all_params(out_layer, trainable=True),
                                learning_rate=params['learning_rate'],
                                epsilon=params['epsilon'])

        fns[target['name']]['train'] = theano.function(
            [x, y], updates=updates_, allow_input_downcast=True)
        fns[target['name']]['predict'] = theano.function(
            [x], o_vl, allow_input_downcast=True)
        fns[target['name']]['cost'] = theano.function(
            [x, y], loss_vl, allow_input_downcast=True)
        fns[target['name']]['acc'] = theano.function([x, y],
                                                     acc_vl,
                                                     allow_input_downcast=True)
        fns[target['name']]['transform'] = theano.function(
            [x],
            L.get_output(L.get_all_layers(layer_heads[target['name']])[-2],
                         inputs=x,
                         deterministic=True),
            allow_input_downcast=True)

    return fns, layer_heads
コード例 #30
0
    def __init__(self, architecture, dim, params):
        t1 = time.time()
        self.t_in = tensor.ftensor3('inputs')  #  =X     float64
        self.t_out = tensor.imatrix('targets')  # =Y_true int32
        self.input_shape = (
            None,
            dim,
            params['segment_size'],
        )
        self.output_shape = (
            None,
            dim,
            params['segment_size'],
        )
        self.architecture = architecture
        self.model = build_lasagne_model(architecture, dim, self.t_in,
                                         self.input_shape)
        self.params = params
        self.trained = False
        self.dim = dim

        test_pred = get_output(self.model, deterministic=False)
        test_loss = my_loss(self.model, test_pred, self.t_out, False, params)
        test_loss_with_reg = my_loss(self.model, test_pred, self.t_out, True,
                                     params)
        test_acc = tensor.mean(tensor.eq(tensor.argmax(test_pred, axis=1),
                                         self.t_out),
                               dtype=config.floatX)

        self.eval_fn = function([self.t_in, self.t_out],
                                [test_loss, test_loss_with_reg, test_acc],
                                allow_input_downcast=True)
        self.evaluate = function([self.t_in],
                                 get_output(self.model, self.t_in),
                                 allow_input_downcast=True)

        pred = get_output(self.model)
        loss_with_reg = my_loss(self.model, pred, self.t_out, True, params)
        params = get_all_params(self.model, trainable=True)
        updates = adam(loss_with_reg, params=params, learning_rate=0.0001)
        self.train_fn = function([self.t_in, self.t_out],
                                 loss_with_reg,
                                 updates=updates,
                                 allow_input_downcast=True)

        print('Neural network initialized in {:.2f}s'.format(time.time() - t1))
コード例 #31
0
def net_updates(net, loss, lr):

    # Get all trainable parameters (weights) of our net
    params = l.get_all_params(net, trainable=True)

    # We use the adam update, other options are available
    if cfg.OPTIMIZER == 'adam':
        param_updates = updates.adam(loss, params, learning_rate=lr, beta1=0.9)
    elif cfg.OPTIMIZER == 'nesterov':
        param_updates = updates.nesterov_momentum(loss,
                                                  params,
                                                  learning_rate=lr,
                                                  momentum=0.9)
    elif cfg.OPTIMIZER == 'sgd':
        param_updates = updates.sgd(loss, params, learning_rate=lr)

    return param_updates
コード例 #32
0
ファイル: main.py プロジェクト: giahy2507/lstm-lstm
def build_model(vocab_size=200, embsize=25, hiddensize=50, ydim=2):

    X = T.matrix('X', dtype='int64')
    Mask = T.matrix('mask', dtype=config.floatX)
    Y = T.vector('Y', dtype='int64')

    nstep = X.shape[0]
    mini_batch_size = X.shape[1]

    emblayer = ProjectionLayer(X, vocab_size, embsize,
                               (nstep, mini_batch_size))

    lstmlayer = LSTM(emblayer.output,
                     Mask,
                     embsize,
                     hiddensize,
                     name="lstm-encode")
    lstmlayer.build_lstm()
    proj = lstmlayer.output

    proj = (proj * Mask[:, :, None]).sum(axis=0)
    proj = proj / Mask.sum(axis=0)[:, None]

    softmax_layer = SoftmaxLayer(proj, hiddensize, ydim)

    cost = softmax_layer.negative_log_likelihood(Y)

    err = softmax_layer.errors(Y)

    params = emblayer.params + lstmlayer.params + softmax_layer.params

    updates = adam(cost, params)

    train_function = theano.function(inputs=[X, Mask, Y],
                                     outputs=[cost, err],
                                     updates=updates)
    valid_function = theano.function(inputs=[X, Mask, Y], outputs=[cost, err])
    predict_function = theano.function(inputs=[X, Mask],
                                       outputs=softmax_layer.y_pred)

    # see_func = theano.function(inputs=[X,Mask], outputs=softmax_layer.p_y_given_x)
    # hyhy = see_func(data_X,mask_X)
    # print hyhy.shape

    return X, Mask, Y, cost, err, train_function, valid_function, predict_function
コード例 #33
0
ファイル: multi_gpu.py プロジェクト: yaoli/theano_multi_gpu
 def build_theano_fn_resnet(self):
     t0 = time.time()
     print '%s build theano fn resnet' % self.rank
     x = T.ftensor4('images')
     y = T.ivector('label')
     model = resnet50.build_model(x)
     prob = lasagne.layers.get_output(model['prob'], deterministic=True)
     self.params = lasagne.layers.get_all_params(model['prob'],
                                                 trainable=True)
     cost = -T.log(prob[T.arange(prob.shape[0]), y] + 1e-6).mean()
     grads = T.grad(cost, self.params)
     grads_all_reduced = self.grad_all_reduce(grads)
     updates = adam(grads_all_reduced, self.params)
     self.train_fn = theano.function([x, y], [cost, y],
                                     updates=updates,
                                     accept_inplace=True)
     print '%s finished build theano fn, used %.3f' % (self.rank,
                                                       time.time() - t0)
コード例 #34
0
    def __init__(self, learning_rate, hdim, ldim, input_tensor, n_in, n_out):
        # Set variables
        self.learning_rate = learning_rate
        self.hdim = hdim
        self.ldim = ldim
        self.input_tensor = input_tensor
        self.n_in = n_in
        self.n_out = n_out
        # Build the network
        self.srng = RandomStreams(seed=234)
        # Encoder part (phi)
        self.encoder = utils.EncoderMLP(self.srng, input_tensor, n_in, hdim,
                                        ldim)
        # Decoder part (theta)
        self.decoder = utils.DecoderMLP(self.encoder.output, ldim, hdim, n_out)

        # Prediction
        self.predict = self.decoder.bern

        # Cost function
        self.kl_div = T.mean(
            utils.kl_unit_normal(self.encoder.mu, self.encoder.sigma2))
        self.xent = T.mean(
            T.sum(T.nnet.binary_crossentropy(self.decoder.bern,
                                             self.input_tensor),
                  axis=1))
        self.cost = self.kl_div + self.xent

        # parameters
        self.params = self.encoder.params + self.decoder.params
        self.updates = adam(self.cost, self.params, self.learning_rate)

        # functions
        self.predict = theano.function(inputs=[input_tensor],
                                       outputs=self.predict)
        self.cost_fun = theano.function(
            inputs=[input_tensor], outputs=[self.cost, self.kl_div, self.xent])
        self.train = theano.function(
            inputs=[input_tensor],
            outputs=[self.cost, self.kl_div, self.xent],
            updates=self.updates)
        self.normal_vars = theano.function(
            inputs=[input_tensor],
            outputs=[self.encoder.mu, self.encoder.sigma2])
コード例 #35
0
ファイル: mlp_gauss.py プロジェクト: jiri-hron/masterproject
def prepare_functions(model, X, index, y, X_test, X_train, y_train,
                      batch_size, l_rate):
    n_data_const = T.constant(
        X_train.shape[0].eval(), name='n_data', dtype=floatX
    )

    mean_log_likelihood = model.mean_log_likelihood(y)

    # scaled_kl_W = model.kl_W / n_data_const
    # scaled_kl_b = model.kl_b / n_data_const
    # scaled_kl = scaled_kl_W + scaled_kl_b
    effect_scaled_kl_W  = model.effect_kl_W / n_data_const
    effect_scaled_kl_b = model.effect_kl_b / n_data_const
    effect_scaled_kl = effect_scaled_kl_W + effect_scaled_kl_b

    # cost = -(mean_log_likelihood - scaled_kl)
    cost = -(mean_log_likelihood - effect_scaled_kl)

    params = model.params
    updates = adam(cost, params, learning_rate=l_rate)

    print('... compiling functions')

    # monitor cost and the individual components of it
    # outputs = [cost, mean_log_likelihood, scaled_kl_W, scaled_kl_b]
    outputs = (
        [cost, mean_log_likelihood, effect_scaled_kl_W, effect_scaled_kl_b]
    )

    train = theano.function(
        inputs=[index], outputs=outputs, updates=updates,
        givens={
            X: X_train[index * batch_size:(index + 1) * batch_size],
            y: y_train[index * batch_size:(index + 1) * batch_size]
        }
        # mode=NanGuardMode(nan_is_error=True, inf_is_error=True)
    )
    test_predict = theano.function(
        [index], model.p_y_given_x,
        givens={X: X_test[index * batch_size:(index + 1) * batch_size]}
        # mode=NanGuardMode(nan_is_error=True, inf_is_error=True)
    )

    return train, test_predict
コード例 #36
0
def loss_iter(segmenter, update_params={}):
    X = T.tensor4()
    y = T.tensor4()
    pixel_weights = T.tensor3()

    all_layers = ll.get_all_layers(segmenter)
    imwrite_architecture(all_layers, './layer_rep.png')
    predicted_mask_train = ll.get_output(segmenter, X)
    predicted_mask_valid = ll.get_output(segmenter, X, deterministic=True)

    accuracy = lambda pred: T.mean(T.eq(T.argmax(pred, axis=1), T.argmax(y, axis=1)))

    pixel_weights_1d = pixel_weights.flatten(ndim=1)
    losses = lambda pred: T.mean(crossentropy_flat(pred + 1e-7, y + 1e-7) * pixel_weights_1d)

    decay = 0.0001
    reg = regularize_network_params(segmenter, l2) * decay
    losses_reg = lambda pred: losses(pred) + reg
    loss_train = losses_reg(predicted_mask_train)
    loss_train.name = 'combined_loss' # for the names
    all_params = ll.get_all_params(segmenter)
    grads = T.grad(loss_train, all_params, add_names=True)
    #updates = adam(grads, all_params, **update_params)
    updates = adam(grads, all_params, **update_params)
    acc_train = accuracy(predicted_mask_train)
    acc_valid = accuracy(predicted_mask_valid)

    print("Compiling network for training")
    tic = time.time()
    train_iter = theano.function([X, y, pixel_weights], [loss_train, losses(predicted_mask_train), acc_train] + grads, updates=updates)
    toc = time.time() - tic
    print("Took %0.2f seconds" % toc)
    #theano.printing.pydotprint(loss, outfile='./loss_graph.png',var_with_name_simple=True)
    print("Compiling network for validation")
    tic = time.time()
    valid_iter = theano.function([X, y, pixel_weights], [losses(predicted_mask_valid), acc_valid])
    toc = time.time() - tic
    print("Took %0.2f seconds" % toc)

    return {'train':train_iter, 'valid':valid_iter, 'gradnames':[g.name for g in grads]}
コード例 #37
0
ファイル: main.py プロジェクト: giahy2507/lstm-lstm
def build_model(vocab_size = 200,
                embsize = 25,
                hiddensize = 50, ydim =2 ):

    X = T.matrix('X', dtype='int64')
    Mask = T.matrix('mask', dtype=config.floatX)
    Y = T.vector('Y', dtype='int64')

    nstep = X.shape[0]
    mini_batch_size = X.shape[1]

    emblayer = ProjectionLayer(X,vocab_size,embsize,(nstep,mini_batch_size))

    lstmlayer = LSTM(emblayer.output,Mask,embsize,hiddensize,name ="lstm-encode")
    lstmlayer.build_lstm()
    proj = lstmlayer.output

    proj = (proj * Mask[:, :, None]).sum(axis=0)
    proj = proj / Mask.sum(axis=0)[:, None]

    softmax_layer = SoftmaxLayer(proj,hiddensize,ydim)

    cost = softmax_layer.negative_log_likelihood(Y)

    err = softmax_layer.errors(Y)

    params = emblayer.params + lstmlayer.params + softmax_layer.params

    updates = adam(cost,params)

    train_function = theano.function(inputs=[X,Mask,Y], outputs=[cost, err],updates=updates)
    valid_function = theano.function(inputs=[X,Mask,Y], outputs=[cost, err])
    predict_function = theano.function(inputs=[X,Mask], outputs= softmax_layer.y_pred)

    # see_func = theano.function(inputs=[X,Mask], outputs=softmax_layer.p_y_given_x)
    # hyhy = see_func(data_X,mask_X)
    # print hyhy.shape


    return X,Mask,Y,cost,err, train_function, valid_function, predict_function
コード例 #38
0
def prep_train(alpha=0.0002, beta=0.5, nz=200):

    E, D = build_net(nz=nz)

    x = T.Tensor5('x')

    # x -> symbolic variable, input to the computational graph

    #Get outputs z=E(x), x_hat=D(z)

    encoding = get_output(E, x)

    decoding = get_output(D, encoding)

    #Get parameters of E and D

    params_e = get_all_params(E, trainable=True)

    params_d = get_all_params(D, trainable=True)

    params = params_e + params_d

    #Calculate cost and updates

    cost = T.mean(squared_error(x, decoding))

    grad = T.grad(cost, params)

    updates = adam(grad, params, learning_rate=alpha, beta1=beta)

    train = theano.function(inputs=[x], outputs=cost, updates=updates)

    rec = theano.function(inputs=[x], outputs=decoding)

    test = theano.function(inputs=[x], outputs=cost)

    #theano.function returns an actual python function used to evaluate our real data

    return train, test, rec, E, D
コード例 #39
0
    def train_network(self, n_epochs=10000, learning_rate=0.001):

        loss = categorical_crossentropy(self.output, self.Y)
        loss = loss.mean()

        params = get_all_params(self.network_probs, trainable=True)

        updates = adam(loss, params, learning_rate=learning_rate)

        train = theano.function(inputs=[self.X, self.Y],
                                outputs=loss,
                                updates=updates,
                                allow_input_downcast=True)

        trX, trY = self.get_data()

        for epoch in range(n_epochs):
            train_loss = train(trX, trY)
            if epoch % 50 == 0:
                print 'epoch: %d, loss: %f' % (epoch, train_loss)

        np.savez(pkg_path + '/models/model.npz',
                 *get_all_param_values(self.network_probs))
コード例 #40
0
ファイル: parsing.py プロジェクト: XuezheMax/NeuroNLP
def create_updates(loss, network, opt, learning_rate, momentum, beta1, beta2):
    params = lasagne.layers.get_all_params(network, trainable=True)
    grads = theano.grad(loss, params)
    # if max_norm:
    #     names = ['crf.U', 'crf.W_h', 'crf.W_c', 'crf.b']
    #     constraints = [grad for param, grad in zip(params, grads) if param.name in names]
    #     assert len(constraints) == 4
    #     scaled_grads = total_norm_constraint(constraints, max_norm=max_norm)
    #     counter = 0
    #     for i in xrange(len(params)):
    #         param = params[i]
    #         if param.name in names:
    #             grads[i] = scaled_grads[counter]
    #             counter += 1
    #     assert counter == 4
    if opt == 'adam':
        updates = adam(grads, params=params, learning_rate=learning_rate, beta1=beta1, beta2=beta2)
    elif opt == 'momentum':
        updates = nesterov_momentum(grads, params=params, learning_rate=learning_rate, momentum=momentum)
    else:
        raise ValueError('unkown optimization algorithm: %s' % opt)

    return updates
コード例 #41
0
ファイル: pycharm_test.py プロジェクト: youngflyasd/ssvae
    def get_f_train(self):
        network_params = self.get_params()
        for param in network_params:
            print param.get_value().shape, param.name

        x = T.imatrix()
        m = T.matrix()
        y = T.matrix()
        pred = layers.get_output(self.l_y, {
            self.l_x: x,
            self.l_m: m,
        },
                                 deterministic=False)

        cost = objectives.categorical_crossentropy(pred, y).mean()
        acc = T.eq(T.argmax(pred, axis=1), T.argmax(y, axis=1)).mean()
        grads = theano.grad(cost, network_params)
        grads = updates.total_norm_constraint(grads, max_norm=20.0)
        grads = [T.clip(g, -10.0, 10.0) for g in grads]
        params_update = updates.adam(grads, network_params, self.lr)
        f_train = theano.function([x, m, y], [cost, acc],
                                  updates=params_update)
        return f_train
コード例 #42
0
output_m_a = lasagne.layers.get_output(l_out_m_a, inputs={l_in_a: x_sym})
output_f_a = lasagne.layers.get_output(l_out_f_a, inputs={l_in_a: x_sym})

loss_all_target_m_a = lasagne.objectives.squared_error(output_m_a, t_sym)

loss_mean_target_m_a = T.mean(loss_all_target_m_a)

loss_all_target_f_a = lasagne.objectives.squared_error(output_f_a, t_sym)

loss_mean_target_f_a = T.mean(loss_all_target_f_a)

all_params_target_m_a = lasagne.layers.get_all_params([l_out_m_a])
all_grads_target_m_a = [T.clip(g, -10, 10) for g in T.grad(loss_mean_target_m_a, all_params_target_m_a)]
all_grads_target_m_a = lasagne.updates.total_norm_constraint(all_grads_target_m_a, 10)
updates_target_m_a = adam(all_grads_target_m_a, all_params_target_m_a)

all_params_target_f_a = lasagne.layers.get_all_params([l_out_f_a])
all_grads_target_f_a = [T.clip(g, -10, 10) for g in T.grad(loss_mean_target_f_a, all_params_target_f_a)]
all_grads_target_f_a = lasagne.updates.total_norm_constraint(all_grads_target_f_a, 10)
updates_target_f_a = adam(all_grads_target_f_a, all_params_target_f_a)

train_model_m_a = theano.function([x_sym, t_sym],
                              [loss_mean_target_m_a, output_m_a],
                              updates=updates_target_m_a)

test_model_m_a = theano.function([x_sym, t_sym],
                             [loss_mean_target_m_a, output_m_a])


train_model_f_a = theano.function([x_sym, t_sym],
コード例 #43
0
# Compute gradient in case of gradient clipping
if run_parameters.clip_gradient[0] is not None:
    grad = T.grad(loss, params)
    if run_parameters.clip_gradient[0] is 0:  # softclip
        grad = [updates.norm_constraint(g, run_parameters.clip_gradient[1], range(g.ndim)) for g in grad]
    elif run_parameters.clip_gradient[0] is 1:
        grad = [T.clip(g, run_parameters.clip_gradient[0], run_parameters.clip_gradient[1]) for g in grad]
    loss = grad

# Update function to train
# sgd_lr = run_parameters.update_lr
sgd_lr = theano.shared(utils.floatX(run_parameters.update_lr))
sgd_lr_decay = utils.floatX(1.0)
sgd_lr_decay_threshold = utils.floatX(1.0)
updates_function = updates.adam(loss, params, run_parameters.update_lr)

# Compile train function
train_fn = theano.function([input_var, target_var, labeled_var], loss, updates=updates_function,
                           allow_input_downcast=True, on_unused_input='ignore')
# Compile test prediction function
classification = T.argmax(test_prediction, axis=1)
test_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), T.argmax(target_var, axis=1)),
                  dtype=theano.config.floatX)
test_wrong = T.neq(T.argmax(test_prediction, axis=1), T.argmax(target_var, axis=1))
# Compile a second function computing the validation loss and accuracy:
# val_fn = theano.function([input_var, target_var, labeled_var], [loss2*lr[1], test_acc], allow_input_downcast=True)
val_fn = theano.function([input_var, target_var, labeled_var],
                         [test_loss,
                          losses_ratio[0] * test_loss1.mean(),
                          losses_ratio[1] * test_loss2.mean(),
コード例 #44
0
ファイル: vpg_2.py プロジェクト: QuantCollective/maml_rl
# the symbolic log-likelihood. For this example, the corresponding distribution is an instance of the class
# rllab.distributions.DiagonalGaussian
dist = policy.distribution

# Note that we negate the objective, since most optimizers assume a
# minimization problem
surr = - TT.mean(dist.log_likelihood_sym(actions_var, dist_info_vars) * advantages_var)

# Get the list of trainable parameters.
params = policy.get_params(trainable=True)
grads = theano.grad(surr, params)

f_train = theano.function(
    inputs=[observations_var, actions_var, advantages_var],
    outputs=None,
    updates=adam(grads, params, learning_rate=learning_rate),
    allow_input_downcast=True
)

for _ in range(n_itr):

    paths = []

    for _ in range(N):
        observations = []
        actions = []
        rewards = []

        observation = env.reset()

        for _ in range(T):
コード例 #45
0
ファイル: main_cond.py プロジェクト: adbrebs/handwriting
# Initial values of the variables that are transmitted through the recursion
h_ini, k_ini, w_ini = model.create_shared_init_states(batch_size)
loss, updates_ini, monitoring = model.apply(seq_pt, seq_pt_mask, seq_tg,
                                            seq_str, seq_str_mask,
                                            h_ini, k_ini, w_ini)


########################
# GRADIENT AND UPDATES #
########################
params = model.params
grads = T.grad(loss, params)
grads = clip_norm_gradients(grads)

if algo == 'adam':
    updates_params = adam(grads, params, 0.0003)
elif algo == 'sgd':
    updates_params = []
    for p, g in zip(params, grads):
        updates_params.append((p, p - learning_rate * g))
else:
    raise ValueError('Specified algo does not exist')

updates_all = updates_ini + updates_params


#####################
# SAMPLING FUNCTION #
#####################
pt_ini, h_ini_pred, k_ini_pred, w_ini_pred, bias = \
    model.create_sym_init_states()
def Train(options,init_params,build_model,DataHandler):
	load=options['load'];
	loadHis=options['loadHis'];
	saveto=options['saveto'];
	loadfrom=options['loadfrom'];
	dataset=options['dataset'];
	last_n=options['last_n'];
	fsize=options['videosize'];


	print ">>>init params & build graph";
	tparams=init_params(options);
	cost,preds,inner_state,inps,use_noise=build_model(options,tparams);
	print "build done"

	print ">>>compile cost&updates function";
	start=time.time();
	f=theano.function(inps,[cost,preds],allow_input_downcast=True,on_unused_input='ignore');

	print "cost function ready"
	if options['finetune']:
		updates=momentum(cost, itemlist(tparams), options['lrate'], momentum=options['momentum']);
	else:
		updates=adam(cost, itemlist(tparams), learning_rate=options['lrate'], beta1=0.9, beta2=0.999, epsilon=1e-08); 
	print len(itemlist(tparams))
	print "updates ready",len(updates)
	f_update=theano.function(inps,[cost,preds],updates=updates,allow_input_downcast=True,on_unused_input='ignore');
	print "update function ready"
	print "compile finish, use %.1fmin"%((time.time()-start)/60);

	print '>>>Optimization'
	# ready dataset
	dh_train = DataHandler(options['dataset'],datatype=0,fps=options['fps']); dh_train.SetMode('source');
	dh_valid = DataHandler(options['dataset'],datatype=1,fps=options['fps']); dh_valid.SetMode('source');
	
	train_log=np.empty((0,4),dtype='float32');
	min_valid_cost=1e8;
	max_valid_acc=0;
	if loadHis and os.path.exists(loadfrom):
		print "load log history from",loadfrom
		train_log = np.load(loadfrom)['train_log'];
		min_valid_cost=train_log[:,2].min();
	 	max_valid_acc=train_log[:,3].max();

	train_num=dh_train.batch_num;  # should be set to dh_train.batch_num
	for epochidx in xrange(options['max_epochs']):
		use_noise.set_value(1.0);
		dh_train.Reset();
		print 'Epoch ', epochidx
		start=time.time();
		for vidx in xrange(train_num):
			x,mask,y=dh_train.GetSingleVideoFromSource(size=fsize,scale=1);
			x=x.reshape([x.shape[0],x.shape[1],fsize,fsize,3]);
			x=x.transpose([0,1,4,2,3]);
			x=x.reshape([x.shape[0],x.shape[1],-1]);
			cost,preds=f_update(x,mask,y);

			acc=((y.mean(0)).argmax(1)==preds).mean();
			print cost,acc;
			# print tparams['recog/cnn_conv2_w'].get_value().sum(),tparams['recog/cnn_conv3_w'].get_value().sum(),tparams['recog/cnn_conv4_w'].get_value().sum(),tparams['recog/cnn_conv5_w'].get_value().sum(),(tparams['recog/cnn_conv5_w'].get_value()**2).sum()
			if ((vidx+1)%100==0):
				print "%d/%d, use %.1fmin"%(vidx+1,dh_train.batch_num,(time.time()-start)/60.0);
				start=time.time();


		use_noise.set_value(0.0);
		#compute train error
		dh_train.Reset(); 
		print ">>train cost";
		tcost,tacc=Predict(options,f,dh_train,verbose=True,train_num=200);
		print "cost: %.3f, acc: %.3f"%(tcost,tacc);

		#compute valid error
		dh_valid.Reset();
		print ">>valid cost";
		vcost,vacc=Predict(options,f,dh_valid,verbose=True);
		print "cost: %.3f, acc: %.3f"%(vcost,vacc);

		print ">>save point:",options['saveto'];
		train_log=np.append(train_log,np.array([tcost,tacc,vcost,vacc])[None,...],axis=0);
		# train_log.append([tcost,tacc,vcost,vacc]);
		params = unzip(tparams);
		np.savez(saveto, train_log=train_log, options=options, **params);

		if (vcost<min_valid_cost):
			min_valid_cost=vcost;
			max_valid_acc=max(max_valid_acc,vacc);
			print ">>save best:",options['bestsaveto'];
			np.savez(options['bestsaveto'], train_log=train_log, options=options, **params);
		elif (vacc>max_valid_acc):
			max_valid_acc=vacc;
			min_valid_cost=min(min_valid_cost,vcost);
			print ">>save best:",options['bestsaveto'];
			np.savez(options['bestsaveto'], train_log=train_log, options=options, **params);
コード例 #47
0
def get_model():

    dtensor4 = T.TensorType("float32", (False,) * 4)
    input_var = dtensor4("inputs")
    dtensor2 = T.TensorType("float32", (False,) * 2)
    target_var = dtensor2("targets")

    # input layer with unspecified batch size
    layer_input = InputLayer(
        shape=(None, 30, 64, 64), input_var=input_var
    )  # InputLayer(shape=(None, 1, 30, 64, 64), input_var=input_var)
    layer_0 = DimshuffleLayer(layer_input, (0, "x", 1, 2, 3))

    # Z-score?

    # Convolution then batchNormalisation then activation layer, then zero padding layer followed by a dropout layer
    layer_1 = batch_norm(
        Conv3DDNNLayer(
            incoming=layer_0,
            num_filters=64,
            filter_size=(3, 3, 3),
            stride=(1, 3, 3),
            pad="same",
            nonlinearity=leaky_rectify,
            W=Orthogonal(),
        )
    )
    layer_2 = MaxPool3DDNNLayer(layer_1, pool_size=(1, 2, 2), stride=(1, 2, 2), pad=(0, 1, 1))
    layer_3 = DropoutLayer(layer_2, p=0.25)

    # Convolution then batchNormalisation then activation layer, then zero padding layer followed by a dropout layer
    layer_4 = batch_norm(
        Conv3DDNNLayer(
            incoming=layer_3,
            num_filters=128,
            filter_size=(3, 3, 3),
            stride=(1, 3, 3),
            pad="same",
            nonlinearity=leaky_rectify,
            W=Orthogonal(),
        )
    )
    layer_5 = MaxPool3DDNNLayer(layer_4, pool_size=(1, 2, 2), stride=(1, 2, 2), pad=(0, 1, 1))
    layer_6 = DropoutLayer(layer_5, p=0.25)

    # Convolution then batchNormalisation then activation layer, then zero padding layer followed by a dropout layer
    layer_7 = batch_norm(
        Conv3DDNNLayer(
            incoming=layer_6,
            num_filters=256,
            filter_size=(3, 3, 3),
            stride=(1, 3, 3),
            pad="same",
            nonlinearity=leaky_rectify,
            W=Orthogonal(),
        )
    )
    layer_8 = MaxPool3DDNNLayer(layer_7, pool_size=(1, 2, 2), stride=(1, 2, 2), pad=(0, 1, 1))
    layer_9 = DropoutLayer(layer_8, p=0.25)

    # Recurrent layer
    layer_10 = DimshuffleLayer(layer_9, (0, 2, 1, 3, 4))
    layer_11 = LSTMLayer(layer_10, num_units=612, hid_init=Orthogonal(), only_return_final=False)

    # Output Layer
    layer_systole = DenseLayer(layer_11, 600, nonlinearity=leaky_rectify, W=Orthogonal())
    layer_diastole = DenseLayer(layer_11, 600, nonlinearity=leaky_rectify, W=Orthogonal())
    layer_systole_1 = DropoutLayer(layer_systole, p=0.3)
    layer_diastole_1 = DropoutLayer(layer_diastole, p=0.3)

    layer_systole_2 = DenseLayer(layer_systole_1, 1, nonlinearity=None, W=Orthogonal())
    layer_diastole_2 = DenseLayer(layer_diastole_1, 1, nonlinearity=None, W=Orthogonal())
    layer_output = ConcatLayer([layer_systole_2, layer_diastole_2])

    # Loss
    prediction = get_output(layer_output)
    loss = squared_error(prediction, target_var)
    loss = loss.mean()

    # Updates : Stochastic Gradient Descent (SGD) with Nesterov momentum Or Adam
    params = get_all_params(layer_output, trainable=True)
    updates = adam(loss, params)
    # updates_0            = rmsprop(loss, params)
    # updates              = apply_nesterov_momentum(updates_0, params)

    # Create a loss expression for validation/testing. The crucial difference
    # here is that we do a deterministic forward pass through the network, disabling dropout layers.
    test_prediction = get_output(layer_output, deterministic=True)
    test_loss = squared_error(test_prediction, target_var)
    test_loss = test_loss.mean()

    # Compile a function performing a training step on a mini-batch (by giving
    # the updates dictionary) and returning the corresponding training loss:
    train_fn = theano.function([input_var, target_var], loss, updates=updates, allow_input_downcast=True)

    # Compile a second function computing the validation loss and accuracy
    val_fn = theano.function([input_var, target_var], test_loss, allow_input_downcast=True)

    # Compule a third function computing the prediction
    predict_fn = theano.function([input_var], test_prediction, allow_input_downcast=True)

    return [layer_output, train_fn, val_fn, predict_fn]
コード例 #48
0
def doit(mode):
    from rllab.envs.box2d.cartpole_env import CartpoleEnv
    from rllab.baselines.linear_feature_baseline import LinearFeatureBaseline
    from rllab.baselines.zero_baseline import ZeroBaseline
    from rllab.policies.gaussian_mlp_policy import GaussianMLPPolicy
    from rllab.envs.normalized_env import normalize
    import numpy as np
    import theano
    import theano.tensor as TT
    from lasagne.updates import adam

    # normalize() makes sure that the actions for the environment lies
    # within the range [-1, 1] (only works for environments with continuous actions)
    env = normalize(CartpoleEnv())
    # Initialize a neural network policy with a single hidden layer of 8 hidden units
    policy = GaussianMLPPolicy(env.spec, hidden_sizes=(8,))
    # Initialize a linear baseline estimator using default hand-crafted features
    if "linbaseline" in mode:
        print('linear baseline')
        baseline = LinearFeatureBaseline(env.spec)
    elif "vanilla" in mode:
        print("zero baseline")
        baseline = ZeroBaseline(env.spec)
    elif mode == "batchavg":
        print('batch average baseline')
        # use a zero baseline but subtract the mean of the discounted returns (see below)
        baseline = ZeroBaseline(env.spec)

    if "_ztrans" in mode:
        print('z transform advantages')
    else:
        print('no z transform')


    # We will collect 100 trajectories per iteration
    N = 50
    # Each trajectory will have at most 100 time steps
    T = 50
    # Number of iterations
    n_itr = 50
    # Set the discount factor for the problem
    discount = 0.99
    # Learning rate for the gradient update
    learning_rate = 0.1

    # Construct the computation graph

    # Create a Theano variable for storing the observations
    # We could have simply written `observations_var = TT.matrix('observations')` instead for this example. However,
    # doing it in a slightly more abstract way allows us to delegate to the environment for handling the correct data
    # type for the variable. For instance, for an environment with discrete observations, we might want to use integer
    # types if the observations are represented as one-hot vectors.
    observations_var = env.observation_space.new_tensor_variable(
        'observations',
        # It should have 1 extra dimension since we want to represent a list of observations
        extra_dims=1
    )
    actions_var = env.action_space.new_tensor_variable(
        'actions',
        extra_dims=1
    )
    advantages_var = TT.vector('advantages')

    # policy.dist_info_sym returns a dictionary, whose values are symbolic expressions for quantities related to the
    # distribution of the actions. For a Gaussian policy, it contains the mean and (log) standard deviation.
    dist_info_vars = policy.dist_info_sym(observations_var)

    # policy.distribution returns a distribution object under rllab.distributions. It contains many utilities for computing
    # distribution-related quantities, given the computed dist_info_vars. Below we use dist.log_likelihood_sym to compute
    # the symbolic log-likelihood. For this example, the corresponding distribution is an instance of the class
    # rllab.distributions.DiagonalGaussian
    dist = policy.distribution

    # Note that we negate the objective, since most optimizers assume a
    # minimization problem
    surr = - TT.mean(dist.log_likelihood_sym(actions_var, dist_info_vars) * advantages_var)

    # Get the list of trainable parameters.
    params = policy.get_params(trainable=True)
    grads = theano.grad(surr, params)

    f_train = theano.function(
        inputs=[observations_var, actions_var, advantages_var],
        outputs=None,
        updates=adam(grads, params, learning_rate=learning_rate),
        allow_input_downcast=True
    )

    results = []
    for _ in range(n_itr):

        paths = []

        for _ in range(N):
            observations = []
            actions = []
            rewards = []

            observation = env.reset()

            for _ in range(T):
                # policy.get_action() returns a pair of values. The second one returns a dictionary, whose values contains
                # sufficient statistics for the action distribution. It should at least contain entries that would be
                # returned by calling policy.dist_info(), which is the non-symbolic analog of policy.dist_info_sym().
                # Storing these statistics is useful, e.g., when forming importance sampling ratios. In our case it is
                # not needed.
                action, _ = policy.get_action(observation)
                # Recall that the last entry of the tuple stores diagnostic information about the environment. In our
                # case it is not needed.
                next_observation, reward, terminal, _ = env.step(action)
                observations.append(observation)
                actions.append(action)
                rewards.append(reward)
                observation = next_observation
                if terminal:
                    # Finish rollout if terminal state reached
                    break

            # We need to compute the empirical return for each time step along the
            # trajectory
            path = dict(
                observations=np.array(observations),
                actions=np.array(actions),
                rewards=np.array(rewards),
            )
            path_baseline = baseline.predict(path)
            advantages = []
            returns = []
            return_so_far = 0
            for t in range(len(rewards) - 1, -1, -1):
                return_so_far = rewards[t] + discount * return_so_far
                returns.append(return_so_far)
                advantage = return_so_far - path_baseline[t]
                advantages.append(advantage)
            # The advantages are stored backwards in time, so we need to revert it
            advantages = np.array(advantages[::-1])
            # And we need to do the same thing for the list of returns
            returns = np.array(returns[::-1])

            if "_ztrans" in mode:
                advantages = (advantages - np.mean(advantages)) / (np.std(advantages) + 1e-8)


            path["advantages"] = advantages
            path["returns"] = returns

            paths.append(path)

        baseline.fit(paths)

        observations = np.concatenate([p["observations"] for p in paths])
        actions = np.concatenate([p["actions"] for p in paths])
        advantages = np.concatenate([p["advantages"] for p in paths])


        if mode == 'batchavg':
            # in this case `advantages` up to here are just our good old returns, without baseline or z transformation.
            # now we subtract their mean across all episodes.
            advantages = advantages - np.mean(advantages)


        f_train(observations, actions, advantages)
        avgr =  np.mean([sum(p["rewards"]) for p in paths])
        print(('Average Return:',avgr))
        results.append(avgr)
    return results
コード例 #49
0
ファイル: 2stream_nodelta.py プロジェクト: lzuwei/ip-avsr
def main():
    configure_theano()
    options = parse_options()
    config_file = options['config']
    config = ConfigParser.ConfigParser()
    config.read(config_file)

    print('CLI options: {}'.format(options.items()))

    print('Reading Config File: {}...'.format(config_file))
    print(config.items('stream1'))
    print(config.items('stream2'))
    print(config.items('lstm_classifier'))
    print(config.items('training'))

    print('preprocessing dataset...')

    # stream 1
    s1_data = load_mat_file(config.get('stream1', 'data'))
    s1_imagesize = tuple([int(d) for d in config.get('stream1', 'imagesize').split(',')])
    s1 = config.get('stream1', 'model')
    s1_inputdim = config.getint('stream1', 'input_dimensions')
    s1_shape = config.get('stream1', 'shape')
    s1_nonlinearities = config.get('stream1', 'nonlinearities')

    # stream 2
    s2_data = load_mat_file(config.get('stream2', 'data'))
    s2_imagesize = tuple([int(d) for d in config.get('stream2', 'imagesize').split(',')])
    s2 = config.get('stream2', 'model')
    s2_inputdim = config.getint('stream2', 'input_dimensions')
    s2_shape = config.get('stream2', 'shape')
    s2_nonlinearities = config.get('stream2', 'nonlinearities')

    # lstm classifier
    fusiontype = config.get('lstm_classifier', 'fusiontype')
    weight_init = options['weight_init'] if 'weight_init' in options else config.get('lstm_classifier', 'weight_init')
    use_peepholes = options['use_peepholes'] if 'use_peepholes' in options else config.getboolean('lstm_classifier',
                                                                                                  'use_peepholes')
    output_classes = config.getint('lstm_classifier', 'output_classes')
    output_classnames = config.get('lstm_classifier', 'output_classnames').split(',')
    lstm_size = config.getint('lstm_classifier', 'lstm_size')
    matlab_target_offset = config.getboolean('lstm_classifier', 'matlab_target_offset')

    # capture training parameters
    validation_window = int(options['validation_window']) \
        if 'validation_window' in options else config.getint('training', 'validation_window')
    num_epoch = int(options['num_epoch']) if 'num_epoch' in options else config.getint('training', 'num_epoch')
    learning_rate = options['learning_rate'] if 'learning_rate' in options \
        else config.getfloat('training', 'learning_rate')
    epochsize = config.getint('training', 'epochsize')
    batchsize = config.getint('training', 'batchsize')

    weight_init_fn = las.init.GlorotUniform()
    if weight_init == 'glorot':
        weight_init_fn = las.init.GlorotUniform()
    if weight_init == 'norm':
        weight_init_fn = las.init.Normal(0.1)
    if weight_init == 'uniform':
        weight_init_fn = las.init.Uniform()
    if weight_init == 'ortho':
        weight_init_fn = las.init.Orthogonal()

    train_subject_ids = read_data_split_file(config.get('training', 'train_subjects_file'))
    val_subject_ids = read_data_split_file(config.get('training', 'val_subjects_file'))
    test_subject_ids = read_data_split_file(config.get('training', 'test_subjects_file'))

    s1_data_matrix = s1_data['dataMatrix'].astype('float32')
    s2_data_matrix = s2_data['dataMatrix'].astype('float32')
    targets_vec = s1_data['targetsVec'].reshape((-1,))
    subjects_vec = s1_data['subjectsVec'].reshape((-1,))
    vidlen_vec = s1_data['videoLengthVec'].reshape((-1,))

    if matlab_target_offset:
        targets_vec -= 1

    s1_data_matrix = presplit_dataprocessing(s1_data_matrix, vidlen_vec, config, 'stream1', imagesize=s1_imagesize)
    s2_data_matrix = presplit_dataprocessing(s2_data_matrix, vidlen_vec, config, 'stream2', imagesize=s2_imagesize)

    s1_train_X, s1_train_y, s1_train_vidlens, s1_train_subjects, \
    s1_val_X, s1_val_y, s1_val_vidlens, s1_val_subjects, \
    s1_test_X, s1_test_y, s1_test_vidlens, s1_test_subjects = split_seq_data(s1_data_matrix, targets_vec, subjects_vec,
                                                                             vidlen_vec, train_subject_ids,
                                                                             val_subject_ids, test_subject_ids)

    s2_train_X, s2_train_y, s2_train_vidlens, s2_train_subjects, \
    s2_val_X, s2_val_y, s2_val_vidlens, s2_val_subjects, \
    s2_test_X, s2_test_y, s2_test_vidlens, s2_test_subjects = split_seq_data(s2_data_matrix, targets_vec, subjects_vec,
                                                                             vidlen_vec, train_subject_ids,
                                                                             val_subject_ids, test_subject_ids)

    s1_train_X, s1_val_X, s1_test_X = postsplit_datapreprocessing(s1_train_X, s1_val_X, s1_test_X, config, 'stream1')
    s2_train_X, s2_val_X, s2_test_X = postsplit_datapreprocessing(s2_train_X, s2_val_X, s2_test_X, config, 'stream2')

    ae1 = load_decoder(s1, s1_shape, s1_nonlinearities)
    ae2 = load_decoder(s2, s2_shape, s2_nonlinearities)

    # IMPT: the encoder was trained with fortan ordered images, so to visualize
    # convert all the images to C order using reshape_images_order()
    # output = dbn.predict(test_X)
    # test_X = reshape_images_order(test_X, (26, 44))
    # output = reshape_images_order(output, (26, 44))
    # visualize_reconstruction(test_X[:36, :], output[:36, :], shape=(26, 44))

    inputs1 = T.tensor3('inputs1', dtype='float32')
    inputs2 = T.tensor3('inputs2', dtype='float32')
    mask = T.matrix('mask', dtype='uint8')
    targets = T.imatrix('targets')

    print('constructing end to end model...')
    network, l_fuse = adenet_v2_nodelta.create_model(ae1, ae2, (None, None, s1_inputdim), inputs1,
                                                     (None, None), mask,
                                                     (None, None, s2_inputdim), inputs2,
                                                     lstm_size, output_classes, fusiontype,
                                                     w_init_fn=weight_init_fn,
                                                     use_peepholes=use_peepholes)

    print_network(network)
    # draw_to_file(las.layers.get_all_layers(network), 'network.png')
    print('compiling model...')
    predictions = las.layers.get_output(network, deterministic=False)
    all_params = las.layers.get_all_params(network, trainable=True)
    cost = temporal_softmax_loss(predictions, targets, mask)
    updates = adam(cost, all_params, learning_rate=learning_rate)

    train = theano.function(
        [inputs1, targets, mask, inputs2],
        cost, updates=updates, allow_input_downcast=True)
    compute_train_cost = theano.function([inputs1, targets, mask, inputs2],
                                         cost, allow_input_downcast=True)

    test_predictions = las.layers.get_output(network, deterministic=True)
    test_cost = temporal_softmax_loss(test_predictions, targets, mask)
    compute_test_cost = theano.function(
        [inputs1, targets, mask, inputs2], test_cost, allow_input_downcast=True)

    val_fn = theano.function([inputs1, mask, inputs2], test_predictions, allow_input_downcast=True)

    # We'll train the network with 10 epochs of 30 minibatches each
    print('begin training...')
    cost_train = []
    cost_val = []
    class_rate = []
    STRIP_SIZE = 3
    val_window = circular_list(validation_window)
    train_strip = np.zeros((STRIP_SIZE,))
    best_val = float('inf')
    best_cr = 0.0

    datagen = gen_lstm_batch_random(s1_train_X, s1_train_y, s1_train_vidlens, batchsize=batchsize)
    integral_lens = compute_integral_len(s1_train_vidlens)

    val_datagen = gen_lstm_batch_random(s1_val_X, s1_val_y, s1_val_vidlens, batchsize=len(s1_val_vidlens))
    test_datagen = gen_lstm_batch_random(s1_test_X, s1_test_y, s1_test_vidlens, batchsize=len(s1_test_vidlens))

    # We'll use this "validation set" to periodically check progress
    X_val, y_val, mask_val, idxs_val = next(val_datagen)
    integral_lens_val = compute_integral_len(s1_val_vidlens)
    X_diff_val = gen_seq_batch_from_idx(s2_val_X, idxs_val, s1_val_vidlens, integral_lens_val, np.max(s1_val_vidlens))

    # we use the test set to check final classification rate
    X_test, y_test, mask_test, idxs_test = next(test_datagen)
    integral_lens_test = compute_integral_len(s1_test_vidlens)
    X_diff_test = gen_seq_batch_from_idx(s2_test_X, idxs_test, s1_test_vidlens, integral_lens_test, np.max(s1_test_vidlens))

    # reshape the targets for validation
    y_val_evaluate = y_val
    y_val = y_val.reshape((-1, 1)).repeat(mask_val.shape[-1], axis=-1)

    for epoch in range(num_epoch):
        time_start = time.time()
        for i in range(epochsize):
            X, y, m, batch_idxs = next(datagen)
            # repeat targets based on max sequence len
            y = y.reshape((-1, 1))
            y = y.repeat(m.shape[-1], axis=-1)
            X_diff = gen_seq_batch_from_idx(s2_train_X, batch_idxs,
                                            s1_train_vidlens, integral_lens, np.max(s1_train_vidlens))
            print_str = 'Epoch {} batch {}/{}: {} examples using adam'.format(
                epoch + 1, i + 1, epochsize, len(X))
            print(print_str, end='')
            sys.stdout.flush()
            train(X, y, m, X_diff)
            print('\r', end='')
        cost = compute_train_cost(X, y, m, X_diff)
        val_cost = compute_test_cost(X_val, y_val, mask_val, X_diff_val)
        cost_train.append(cost)
        cost_val.append(val_cost)
        train_strip[epoch % STRIP_SIZE] = cost
        val_window.push(val_cost)

        gl = 100 * (cost_val[-1] / np.min(cost_val) - 1)
        pk = 1000 * (np.sum(train_strip) / (STRIP_SIZE * np.min(train_strip)) - 1)
        pq = gl / pk

        cr, val_conf = evaluate_model2(X_val, y_val_evaluate, mask_val, X_diff_val, val_fn)
        class_rate.append(cr)

        if val_cost < best_val:
            best_val = val_cost
            best_cr = cr
            test_cr, test_conf = evaluate_model2(X_test, y_test, mask_test,
                                                 X_diff_test, val_fn)
            print("Epoch {} train cost = {}, val cost = {}, "
                  "GL loss = {:.3f}, GQ = {:.3f}, CR = {:.3f}, Test CR= {:.3f} ({:.1f}sec)"
                  .format(epoch + 1, cost_train[-1], cost_val[-1], gl, pq, cr, test_cr, time.time() - time_start))
            best_params = las.layers.get_all_param_values(network)
        else:
            print("Epoch {} train cost = {}, val cost = {}, "
                  "GL loss = {:.3f}, GQ = {:.3f}, CR = {:.3f} ({:.1f}sec)"
                  .format(epoch + 1, cost_train[-1], cost_val[-1], gl, pq, cr, time.time() - time_start))

        if epoch >= validation_window and early_stop2(val_window, best_val, validation_window):
            break

    print('Final Model')
    print('CR: {}, val loss: {}, Test CR: {}'.format(best_cr, best_val, test_cr))

    # plot confusion matrix
    table_str = plot_confusion_matrix(test_conf, output_classnames, fmt='pipe')
    print('confusion matrix: ')
    print(table_str)

    if 'save_plot' in options:
        prefix = options['save_plot']
        plot_validation_cost(cost_train, cost_val, savefilename='{}.validloss.png'.format(prefix))
        with open('{}.confmat.txt'.format(prefix), mode='a') as f:
            f.write(table_str)
            f.write('\n\n')

    if 'write_results' in options:
        print('writing results to {}'.format(options['write_results']))
        results_file = options['write_results']
        with open(results_file, mode='a') as f:
            f.write('{},{},{}\n'.format(test_cr, best_cr, best_val))

    if 'save_best' in options:
        print('saving best model...')
        las.layers.set_all_param_values(network, best_params)
        save_model_params(network, options['save_best'])
        print('best model saved to {}'.format(options['save_best']))
コード例 #50
0
l_out_target = lasagne.layers.DenseLayer(l_reshape_target, num_units=n_features, nonlinearity=rectify)
l_out_reshape_target = lasagne.layers.ReshapeLayer(l_out_target, (-1, x_sym.shape[1], n_features))
output_target = lasagne.layers.get_output(l_out_reshape_target, inputs={l_in: x_sym, l_mask: mask_x_sym})

# print lasagne.layers.get_output(l_out_reshape_target, inputs={l_in: x_sym, l_mask: mask_x_sym}).eval({x_sym:test_x,mask_x_sym:mask_test_x}).shape

loss_all_target = lasagne.objectives.squared_error(output_target * mask_t_sym, t_sym)

loss_mean_target = loss_all_target.mean()

# print loss_mean_target.eval({x_sym:test_x,mask_x_sym:mask_test_x, t_sym: target_train, mask_t_sym: mask_target_train})

all_params_target = lasagne.layers.get_all_params([l_out_reshape_target])
all_grads_target = [T.clip(g, -3, 3) for g in T.grad(loss_mean_target, all_params_target)]
all_grads_target = lasagne.updates.total_norm_constraint(all_grads_target, 3)
updates_target = adam(all_grads_target, all_params_target)

train_target = theano.function([x_sym, mask_x_sym, t_sym, mask_t_sym],
                               loss_mean_target,
                               updates=updates_target)

test_target = theano.function([x_sym, mask_x_sym, t_sym, mask_t_sym],
                              [loss_mean_target, output_target])

# for noise
l_decoder_noise = lasagne.layers.GRULayer(l_bottle, num_units=NUM_UNITS_DEC)
l_reshape_noise = lasagne.layers.ReshapeLayer(l_decoder_noise, (-1, NUM_UNITS_DEC))
l_out_noise = lasagne.layers.DenseLayer(l_reshape_noise, num_units=n_features, nonlinearity=rectify)
l_out_reshape_noise = lasagne.layers.ReshapeLayer(l_out_noise, (-1, x_sym.shape[1], n_features))
output_noise = lasagne.layers.get_output(l_out_reshape_noise, inputs={l_in: x_sym, l_mask: mask_x_sym})
コード例 #51
0
    def create_model(n_feat):
        x_sym = T.tensor3()
        m_sym = T.tensor3()
        f_sym = T.tensor3()
        l_in = lasagne.layers.InputLayer(shape=(None, max_len, n_feat))

        l_dec_fwd = lasagne.layers.GRULayer(l_in, num_units=NUM_UNITS_DEC, name='GRUDecoder', backwards=False)
        l_dec_bwd = lasagne.layers.GRULayer(l_in, num_units=NUM_UNITS_DEC, name='GRUDecoder', backwards=True)

        l_concat = lasagne.layers.ConcatLayer([l_dec_fwd, l_dec_bwd], axis=2)

        l_encoder_2_m = lasagne.layers.GRULayer(l_concat, num_units=NUM_UNITS_ENC)
        l_encoder_2_f = lasagne.layers.GRULayer(l_concat, num_units=NUM_UNITS_ENC)

        l_decoder_m = lasagne.layers.GRULayer(l_encoder_2_m, num_units=NUM_UNITS_DEC)
        l_decoder_f = lasagne.layers.GRULayer(l_encoder_2_f, num_units=NUM_UNITS_DEC)

        l_reshape_m = lasagne.layers.ReshapeLayer(l_decoder_m, (-1, NUM_UNITS_DEC))
        l_dense_m = lasagne.layers.DenseLayer(l_reshape_m, num_units=n_feat, nonlinearity=nonlin)
        l_out_m = lasagne.layers.ReshapeLayer(l_dense_m, (-1, max_len, n_feat))

        l_reshape_f = lasagne.layers.ReshapeLayer(l_decoder_f, (-1, NUM_UNITS_DEC))
        l_dense_f = lasagne.layers.DenseLayer(l_reshape_f, num_units=n_feat, nonlinearity=nonlin)
        l_out_f = lasagne.layers.ReshapeLayer(l_dense_f, (-1, max_len, n_feat))

        output_m = lasagne.layers.get_output(l_out_m, inputs={l_in: x_sym})
        output_f = lasagne.layers.get_output(l_out_f, inputs={l_in: x_sym})

        # here I divide the 3 different type of training
        if tpe is 0:
            loss_all_m = lasagne.objectives.squared_error(output_m * x_sym, m_sym)
            loss_all_f = lasagne.objectives.squared_error(output_f * x_sym, f_sym)
            loss_mean_m = T.mean(loss_all_m)
            loss_mean_f = T.mean(loss_all_f)

        if tpe is 1:
            loss_all_m = lasagne.objectives.squared_error(output_m * x_sym, m_sym) + \
                         lasagne.objectives.squared_error((1. - output_m) * x_sym, f_sym)
            loss_mean_m = T.mean(loss_all_m)

        if tpe is 2:
            loss_all_m = lasagne.objectives.squared_error(output_m * x_sym, m_sym) \
                         - 0.05 * lasagne.objectives.squared_error(output_m * x_sym, f_sym)
            loss_all_f = lasagne.objectives.squared_error(output_f * x_sym, f_sym) \
                - 0.05 * lasagne.objectives.squared_error(output_f * x_sym, m_sym)
            loss_mean_m = T.mean(loss_all_m)
            loss_mean_f = T.mean(loss_all_f)

        all_params_target_m = lasagne.layers.get_all_params([l_out_m])
        all_grads_target_m = [T.clip(g, -10, 10) for g in T.grad(loss_mean_m, all_params_target_m)]
        all_grads_target_m = lasagne.updates.total_norm_constraint(all_grads_target_m, 10)
        updates_target_m = adam(all_grads_target_m, all_params_target_m)

        train_model_m = theano.function([x_sym, m_sym, f_sym],
                                        [loss_mean_m, output_m],
                                        updates=updates_target_m,
                                        on_unused_input='ignore')

        test_model_m = theano.function([x_sym, m_sym, f_sym],
                                       [loss_mean_m, output_m],
                                       on_unused_input='ignore')

        if tpe is not 1:
            all_params_target_f = lasagne.layers.get_all_params([l_out_f])
            all_grads_target_f = [T.clip(g, -10, 10) for g in T.grad(loss_mean_f, all_params_target_f)]
            all_grads_target_f = lasagne.updates.total_norm_constraint(all_grads_target_f, 10)
            updates_target_f = adam(all_grads_target_f, all_params_target_f)
            train_model_f = theano.function([x_sym, f_sym, m_sym],
                                            [loss_mean_f, output_f],
                                            updates=updates_target_f,
                                            on_unused_input='ignore')

            test_model_f = theano.function([x_sym, f_sym, m_sym],
                                           [loss_mean_f, output_f],
                                           on_unused_input='ignore')
            return train_model_m, test_model_m, train_model_f, test_model_f
        return train_model_m, test_model_m
コード例 #52
0
output = lasagne.layers.get_output(l_decoder, inputs={l_in: x_sym})

# output = T.nnet.sigmoid(T.dot(output, W) + b)
# output = T.nnet.sigmoid(T.dot(output, W2) + b2)
#print lasagne.layers.get_output(l_decoder, inputs={l_in: x_sym}).eval({x_sym:test_x,mask_x_sym:mask_test_x}).shape

loss_all_target = lasagne.objectives.squared_error(output, t_sym).sum()

loss_mean_target = loss_all_target / n_batch

# print loss_mean_target.eval({x_sym:test_x,mask_x_sym:mask_test_x, t_sym: target_train, mask_t_sym: mask_target_train})

all_params_target = lasagne.layers.get_all_params([l_decoder])
all_grads_target = [T.clip(g, -10, 10) for g in T.grad(loss_mean_target, all_params_target)]
all_grads_target = lasagne.updates.total_norm_constraint(all_grads_target, 10)
updates_target = adam(all_grads_target, all_params_target)

train_model = theano.function([x_sym, t_sym],
                              [loss_mean_target, output],
                              updates=updates_target)

test_model = theano.function([x_sym, t_sym],
                             [loss_mean_target, output])


num_min_batches = 100
n_batch = 100
epochs = 50

for i in range(epochs):
    start_time = time.time()
コード例 #53
0
ファイル: neural_network.py プロジェクト: frw/2048-DRL
    def __init__ (self):
        self.learning_rate = 0.001
        self.L1_reg = 0.0000
        self.L2_reg = 0.0001
        self.batch_size = 20
        self.n_hidden = 50
        self.num_inputs = 337
        self.num_outputs = 1
        self.momentum_coeff = 0.9

        # allocate symbolic variables for the data
        x = T.ivector('x')  
        y = T.iscalar('y') 

        rng = np.random.RandomState(None)

        # construct the neural network's Architecture
        architecture = Architecture(
            rng=rng,
            input=[x],
            n_in=self.num_inputs,
            n_hidden=self.n_hidden,
            n_out=self.num_outputs
        )

        cost = (
            architecture.error_function(y)
            + self.L1_reg * architecture.L1
            + self.L2_reg * architecture.L2_sqr
        )

        
        # old version of stochastic gradient descent
        #gparams = [T.grad(cost, wrt=param) for param in architecture.params]
        #updates = [(param, param - self.learning_rate * gparam) for param, gparam in zip(architecture.params, gparams)]

        #stochastic gradient descent with adaptive learning using lasagne--take your pick
        #updates_sgd = sgd(cost, architecture.params, learning_rate=self.learning_rate)
        #updates = apply_momentum(updates_sgd, architecture.params, momentum=self.momentum_coeff)
        #updates = adadelta(cost, architecture.params, learning_rate=self.learning_rate, rho=0.95, epsilon=1e-06)
        updates = adam(cost, architecture.params, learning_rate=self.learning_rate, beta1=0.9, beta2=0.999, epsilon=1e-08)

        # backpropogation that also contains a forward pass
        self.train_model = theano.function(
            inputs=[x, y],
            outputs=[cost, architecture.get_result()],
            updates=updates,
            allow_input_downcast=True
        )

        # forward pass
        self.run_model = theano.function(
            inputs=[x],
            outputs=architecture.get_result(),
            allow_input_downcast=True
        )

        self.grab_weights = theano.function(
            inputs=[],
            outputs=architecture.params,
            allow_input_downcast=True
        )
コード例 #54
0
ファイル: main_cond.py プロジェクト: vyraun/handwriting
                         n_mixt_output)
# Initial values of the variables that are transmitted through the recursion
h_ini, k_ini, w_ini = model.create_shared_init_states(batch_size)
loss, updates_ini, monitoring = model.apply(seq_pt, seq_pt_mask, seq_tg,
                                            seq_str, seq_str_mask, h_ini,
                                            k_ini, w_ini)

########################
# GRADIENT AND UPDATES #
########################
params = model.params
grads = T.grad(loss, params)
grads = clip_norm_gradients(grads)

if algo == 'adam':
    updates_params = adam(grads, params, 0.0003)
elif algo == 'sgd':
    updates_params = []
    for p, g in zip(params, grads):
        updates_params.append((p, p - learning_rate * g))
else:
    raise ValueError('Specified algo does not exist')

updates_all = updates_ini + updates_params

#####################
# SAMPLING FUNCTION #
#####################
pt_ini, h_ini_pred, k_ini_pred, w_ini_pred, bias = \
    model.create_sym_init_states()
create_gen_tag_values(model, pt_ini, h_ini_pred, k_ini_pred, w_ini_pred, bias,
def event_span_classifier(args, input_var, input_mask_var, target_var, wordEmbeddings, seqlen):

    print("Building model with LSTM")

    vocab_size = wordEmbeddings.shape[1]
    wordDim = wordEmbeddings.shape[0]

    GRAD_CLIP = wordDim

    args.lstmDim = 150

    input = InputLayer((None, seqlen),input_var=input_var)
    batchsize, seqlen = input.input_var.shape
    input_mask = InputLayer((None, seqlen),input_var=input_mask_var)
    
    emb = EmbeddingLayer(input, input_size=vocab_size, output_size=wordDim, W=wordEmbeddings.T)
    #emb.params[emb_1.W].remove('trainable')

    lstm = LSTMLayer(emb, num_units=args.lstmDim, mask_input=input_mask, grad_clipping=GRAD_CLIP,
        nonlinearity=tanh)

    lstm_back = LSTMLayer(
        emb, num_units=args.lstmDim, mask_input=input_mask, grad_clipping=GRAD_CLIP,
        nonlinearity=tanh, backwards=True)

    slice_forward = SliceLayer(lstm, indices=-1, axis=1) # out_shape (None, args.lstmDim)
    slice_backward = SliceLayer(lstm_back, indices=0, axis=1) # out_shape (None, args.lstmDim)

    concat = ConcatLayer([slice_forward, slice_backward])

    hid = DenseLayer(concat, num_units=args.hiddenDim, nonlinearity=sigmoid)

    network = DenseLayer(hid, num_units=2, nonlinearity=softmax)

    prediction = get_output(network)
    
    loss = T.mean(binary_crossentropy(prediction,target_var))
    lambda_val = 0.5 * 1e-4

    layers = {emb:lambda_val, lstm:lambda_val, hid:lambda_val, network:lambda_val} 
    penalty = regularize_layer_params_weighted(layers, l2)
    loss = loss + penalty


    params = get_all_params(network, trainable=True)

    if args.optimizer == "sgd":
        updates = sgd(loss, params, learning_rate=args.step)
    elif args.optimizer == "adagrad":
        updates = adagrad(loss, params, learning_rate=args.step)
    elif args.optimizer == "adadelta":
        updates = adadelta(loss, params, learning_rate=args.step)
    elif args.optimizer == "nesterov":
        updates = nesterov_momentum(loss, params, learning_rate=args.step)
    elif args.optimizer == "rms":
        updates = rmsprop(loss, params, learning_rate=args.step)
    elif args.optimizer == "adam":
        updates = adam(loss, params, learning_rate=args.step)
    else:
        raise "Need set optimizer correctly"
 
    test_prediction = get_output(network, deterministic=True)
    test_loss = T.mean(binary_crossentropy(test_prediction,target_var))


    train_fn = theano.function([input_var, input_mask_var,target_var], 
        loss, updates=updates, allow_input_downcast=True)

    test_acc = T.mean(binary_accuracy(test_prediction, target_var))

    val_fn = theano.function([input_var, input_mask_var, target_var], [test_loss, test_acc], allow_input_downcast=True)

    return train_fn, val_fn, network
コード例 #56
0
           T.mean(lasagne.objectives.squared_error(masked_f, f_sym))

#print loss_all.eval({x_sym: ftrain_x, m_sym: ftrain_m, f_sym: ftrain_f})
# - gamma * lasagne.objectives.squared_error(masked_f, m_sym)
# - gamma * lasagne.objectives.squared_error(masked_m, f_sym)

all_params_target_m = lasagne.layers.get_all_params([l_out_m])
all_grads_target_m = [T.clip(g, -10, 10) for g in T.grad(loss_all, all_params_target_m)]
all_grads_target_m = lasagne.updates.total_norm_constraint(all_grads_target_m, 10)

all_params_target_f = lasagne.layers.get_all_params([l_out_f])
all_grads_target_f = [T.clip(g, -10, 10) for g in T.grad(loss_all, all_params_target_f)]
all_grads_target_f = lasagne.updates.total_norm_constraint(all_grads_target_f, 10)


updates_target_m = adam(all_grads_target_m, all_params_target_m)
updates_target_f = adam(all_grads_target_f, all_params_target_f)


train_model_m = theano.function([x_sym, m_sym, f_sym],
                                loss_all,
                                updates=updates_target_m)

train_model_f = theano.function([x_sym, f_sym, m_sym],
                                loss_all,
                                updates=updates_target_f)

test_model_m = theano.function([x_sym, m_sym, f_sym],
                               [loss_all, output_m])
test_model_f = theano.function([x_sym, f_sym, m_sym],
                               [loss_all, output_f])
コード例 #57
0
ファイル: sdc_3dcnn.py プロジェクト: rajisme/deeplearning
    # Fit model 
    dtensor5 = TensorType('float32', (False,)*5)
    input_var = dtensor5('inputs')
    target_var = T.fvector('targets')
    network = build_cnn(input_var)['output']

    # Create loss function
    prediction = lasagne.layers.get_output(network)
    loss = lasagne.objectives.squared_error(prediction, target_var)
    loss = loss.mean()

    # Create parameter update expressions (later I will make rates adaptive)
    params = lasagne.layers.get_all_params(network, trainable=True)
    # updates = nesterov_momentum(loss, params, learning_rate=0.01,
    #                                         momentum=0.9)
    updates = adam(loss, params)
    test_prediction = lasagne.layers.get_output(network, deterministic=True)
    test_loss = lasagne.objectives.squared_error(test_prediction, target_var)
    test_loss = test_loss.mean()
    test_acc = T.mean(lasagne.objectives.squared_error(test_prediction, target_var),
                  dtype=theano.config.floatX)

    # Compile training function that updates parameters and returns training loss
    train_fn = theano.function([input_var, target_var], loss, updates=updates)
    val_fn = theano.function([input_var, target_var], [test_loss, test_acc])

    num_epochs = 8000 # Will probably not do this many b/c of early stopping 
    best_network_weights_epoch = 0 
    epoch_accuracies = [] 
    # Train network 
    for epoch in range(num_epochs):