コード例 #1
0
def generate_theano_func(args, network, penalty, input_dict, target_var):

    prediction = get_output(network, input_dict)

    # loss = T.mean( target_var * ( T.log(target_var) - prediction ))
    loss = T.mean(categorical_crossentropy(prediction, target_var))
    # loss += 0.0001 * sum (T.sum(layer_params ** 2) for layer_params in get_all_params(network) )
    # penalty = sum ( T.sum(lstm_param**2) for lstm_param in lstm_params )
    # penalty = regularize_layer_params(l_forward_1_lstm, l2)
    # penalty = T.sum(lstm_param**2 for lstm_param in lstm_params)
    # penalty = 0.0001 * sum (T.sum(layer_params ** 2) for layer_params in get_all_params(l_forward_1) )

    loss = loss + penalty

    params = get_all_params(network, trainable=True)

    if args.optimizer == "sgd":
        updates = sgd(loss, params, learning_rate=args.step)
    elif args.optimizer == "adagrad":
        updates = adagrad(loss, params, learning_rate=args.step)
    elif args.optimizer == "adadelta":
        updates = adadelta(loss, params, learning_rate=args.step)
    elif args.optimizer == "nesterov":
        updates = nesterov_momentum(loss, params, learning_rate=args.step)
    elif args.optimizer == "rms":
        updates = rmsprop(loss, params, learning_rate=args.step)
    elif args.optimizer == "adam":
        updates = adam(loss, params, learning_rate=args.step)
    else:
        raise "Need set optimizer correctly"

    test_prediction = get_output(network, input_dict, deterministic=True)
    # test_prediction = get_output(network, deterministic=True)
    # test_loss = T.mean( target_var * ( T.log(target_var) - test_prediction))
    test_loss = T.mean(categorical_crossentropy(test_prediction, target_var))

    train_fn = theano.function(
        [input1_var, input1_mask_var, input2_var, input2_mask_var, target_var],
        loss,
        updates=updates,
        allow_input_downcast=True,
    )

    if args.task == "sts":
        val_fn = theano.function(
            [input1_var, input1_mask_var, input2_var, input2_mask_var, target_var],
            [test_loss, test_prediction],
            allow_input_downcast=True,
        )

    elif args.task == "ent":
        # test_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), target_var), dtype=theano.config.floatX)
        test_acc = T.mean(categorical_accuracy(test_prediction, target_var))
        val_fn = theano.function(
            [input1_var, input1_mask_var, input2_var, input2_mask_var, target_var],
            [test_loss, test_acc],
            allow_input_downcast=True,
        )

    return train_fn, val_fn
コード例 #2
0
ファイル: test_text.py プロジェクト: honzas83/kitchen
def test_maxpool_layer():
    l_in1 = InputLayer((None, 2))
    l_in2 = InputLayer((None, 20))
    l_hid = DenseLayer(l_in2, num_units=30, nonlinearity=rectify)
    l_pool = MaxpoolLayer([l_in1, l_hid])
    l_out = DenseLayer(l_pool, num_units=1, nonlinearity=sigmoid)

    bounds = theano.tensor.lmatrix('bounds')
    data = theano.tensor.matrix('data')
    targets = theano.tensor.matrix('targets')

    predictions = get_output(l_out, {l_in1: bounds, l_in2: data})
    loss = categorical_crossentropy(predictions, targets)
    loss = aggregate(loss, mode='mean')

    params = get_all_params(l_out)
    updates_sgd = sgd(loss, params, learning_rate=0.0001)

    train_function = theano.function([bounds, data, targets], updates=updates_sgd, allow_input_downcast=True)

    test_bounds = np.array([[0, 3], [3, 5], [5, 7]])
    test_X = np.random.randn(10, 20)
    test_Y = np.array([[0], [1], [0]])

    train_function(test_bounds, test_X, test_Y)
コード例 #3
0
ファイル: neuralnetwork.py プロジェクト: peter-koo/Deepomics
def build_updates(grad, params, optimization, learning_rate):
	""" setup optimization algorithm """

	if optimization['optimizer'] == 'sgd':
		update_op = updates.sgd(grad, params, learning_rate=learning_rate) 
 
	elif optimization['optimizer'] == 'nesterov_momentum':
		if momenum in optimization:
			momentum = optimization['momentum']
		else:
			momentum = 0.9
		update_op = updates.nesterov_momentum(grad, params, learning_rate=learning_rate, momentum=momentum)
	
	elif optimization['optimizer'] == 'adagrad':
		update_op = updates.adagrad(grad, params, learning_rate=learning_rate)
	
	elif optimization['optimizer'] == 'rmsprop':
		if 'rho' in optimization:
			rho = optimization['rho']
		else:
			rho = 0.9
		update_op = updates.rmsprop(grad, params, learning_rate=learning_rate, rho=rho)
	
	elif optimization['optimizer'] == 'adam':
		if 'beta1' in optimization:
			beta1 = optimization['beta1']
		else:
			beta1 = 0.9
		if 'beta2' in optimization:
			beta2 = optimization['beta2']
		else:
			beta2 = 0.999
		update_op = updates.adam(grad, params, learning_rate=learning_rate, beta1=beta1, beta2=beta2)
  
	return update_op
コード例 #4
0
 def optimize(grads, params):
     if state['optim_method'] == 'adam':
         updates = adam(grads, params, lrt, state['momentum'])
     elif state['optim_method'] == 'adagrad':
         updates = adagrad(grads, params, lrt)
     elif state['optim_method'] == 'sgd':
         updates = sgd(grads, params, lrt)
     return updates
コード例 #5
0
ファイル: nn_.py プロジェクト: darioizzo/optimal_landing
def get_network(model):

    input_data = tensor.dmatrix('x')
    targets_var = tensor.dmatrix('y')

    network = layers.InputLayer((model['batch_size'], model['input_vars']), input_data)

    nonlin = nonlinearities.rectify
    if model['hidden_nonlinearity'] != 'ReLu':
        nonlin = nonlinearities.tanh

    prev_layer = network

    for l in range(model['nlayers']):
        fc = layers.DenseLayer(prev_layer, model['units'], nonlinearity=nonlin)
        if model['dropout']:
            fc = layers.DropoutLayer(fc, 0.5)
        prev_layer = fc

    output_lin = None
    if model['output_mode'] == OUTPUT_LOG:
        output_lin = nonlinearities.tanh
    output_layer = layers.DenseLayer(prev_layer, 1, nonlinearity=output_lin)

    predictions = layers.get_output(output_layer)

    if model['output_mode'] == OUTPUT_BOUNDED:
        (minth, maxth) = model['maxmin'][model['control']]
        maxt = theano.shared(np.ones((model['batch_size'], 1)) * maxth)
        mint = theano.shared(np.ones((model['batch_size'], 1)) * minth)
        predictions = tensor.min(tensor.concatenate([maxt, predictions], axis=1), axis=1)
        predictions = tensor.reshape(predictions, (model['batch_size'], 1))
        predictions = tensor.max(tensor.concatenate([mint, predictions], axis=1), axis=1)
        predictions = tensor.reshape(predictions, (model['batch_size'], 1))

    loss = objectives.squared_error(predictions, targets_var)
    loss = objectives.aggregate(loss, mode='mean')

    params = layers.get_all_params(output_layer)

    test_prediction = layers.get_output(output_layer, deterministic=True)
    test_loss = objectives.squared_error(test_prediction,  targets_var)
    test_loss = test_loss.mean()

    updates_sgd = updates.sgd(loss, params, learning_rate=model['lr'])
    ups = updates.apply_momentum(updates_sgd, params, momentum=0.9)

    train_fn = theano.function([input_data, targets_var], loss, updates=ups)
    pred_fn = theano.function([input_data], predictions)
    val_fn = theano.function([input_data, targets_var], test_loss)

    return {'train': train_fn, 'eval': val_fn, 'pred': pred_fn, 'layers': output_layer}
コード例 #6
0
def get_updates(nnet, train_obj, trainable_params, solver=None):

    implemented_solvers = ("sgd", "momentum", "nesterov", "adagrad", "rmsprop",
                           "adadelta", "adam", "adamax")

    if solver not in implemented_solvers:
        nnet.sgd_solver = "adam"
    else:
        nnet.sgd_solver = solver

    if nnet.sgd_solver == "sgd":
        updates = l_updates.sgd(train_obj,
                                trainable_params,
                                learning_rate=Cfg.learning_rate)
    elif nnet.sgd_solver == "momentum":
        updates = l_updates.momentum(train_obj,
                                     trainable_params,
                                     learning_rate=Cfg.learning_rate,
                                     momentum=Cfg.momentum)
    elif nnet.sgd_solver == "nesterov":
        updates = l_updates.nesterov_momentum(train_obj,
                                              trainable_params,
                                              learning_rate=Cfg.learning_rate,
                                              momentum=Cfg.momentum)
    elif nnet.sgd_solver == "adagrad":
        updates = l_updates.adagrad(train_obj,
                                    trainable_params,
                                    learning_rate=Cfg.learning_rate)
    elif nnet.sgd_solver == "rmsprop":
        updates = l_updates.rmsprop(train_obj,
                                    trainable_params,
                                    learning_rate=Cfg.learning_rate,
                                    rho=Cfg.rho)
    elif nnet.sgd_solver == "adadelta":
        updates = l_updates.adadelta(train_obj,
                                     trainable_params,
                                     learning_rate=Cfg.learning_rate,
                                     rho=Cfg.rho)
    elif nnet.sgd_solver == "adam":
        updates = l_updates.adam(train_obj,
                                 trainable_params,
                                 learning_rate=Cfg.learning_rate)
    elif nnet.sgd_solver == "adamax":
        updates = l_updates.adamax(train_obj,
                                   trainable_params,
                                   learning_rate=Cfg.learning_rate)

    return updates
コード例 #7
0
ファイル: optimize.py プロジェクト: starsky/pymlearn
def gradient_descend_theano(fun, x0, args=None, learning_rate=1e-3, tol=1e-3, max_iter=3000, verbose=True):
    funct, trainable_params, non_trainable_params = fun
    updates = sgd(funct, trainable_params, learning_rate=learning_rate)
    train_fun = theano.function(non_trainable_params, funct, updates=updates)
    loss_fn_compiled = theano.function(non_trainable_params, funct)
    old_loss = np.inf
    for i in range(max_iter):
        train_fun(*args)
        curr_loss = loss_fn_compiled(*args)
        if abs(curr_loss - old_loss) < tol:
            break
        old_loss = curr_loss
        _print_optimizer_iteration_info(verbose, i, old_loss)
    _print_optimizer_final_info(verbose, i, old_loss, 'Lasagne Gradient Descend')
    params_optimal = trainable_params[0].get_value()
    return {'x': params_optimal}
コード例 #8
0
ファイル: dA_class.py プロジェクト: digirak/TIFR-code
    def get_cost_updates(self, corrupted_input, learning_rate):
        """ This function computes the cost and the updates for one trainng
        step of the dA """

	tilde_x=corrupted_input
        y = self.get_hidden_values(tilde_x)
        z = self.get_reconstructed_input(y)
	#z=corrupted_input
        # note : we sum over the size of a datapoint; if we are using
        #        minibatches, L will be a vector, with one entry per
        #        example in minibatch
 #       L = - T.sum(self.x * T.log(z) + (1 - self.x) * T.log(1 - z))
 	L=categorical_crossentropy(z,self.x)

       #L = (self.x * T.log(z) + (1 - self.x) * T.log(1 - z))
	#cost=L.mean()

#	temp=(self.x*T.log(z)+(1-self.x)*T.log(1-z))
#	L=-T.sum(temp)
        # note : L is now a vector, where each element is the
        #        cross-entropy cost of the reconstruction of the
        #        corresponding example of the minibatch. We need to
        #        compute the average of all these to get the cost of
        #        the minibatch
        cost = T.mean(L)
#	print cost

	reg=1e-8*lasagne.regularization.l2(self.params[0])
	cost=cost+reg

        # compute the gradients of the cost of the `dA` with respect
        # to its parameters
        gparams = T.grad(cost, self.params,add_names='True')
	updates_sgd=sgd(cost,self.params,learning_rate)
	updates_dic=apply_momentum(updates_sgd, self.params, momentum=0.9)
	updates=updates_dic.items()
        # generate the list of updates
    #    updates = [
     #       (param, param - learning_rate * gparam)
      #      for param, gparam in zip(self.params, gparams)
       # ]
       	



        return (cost, updates)
コード例 #9
0
def net_updates(net, loss, lr):

    # Get all trainable parameters (weights) of our net
    params = l.get_all_params(net, trainable=True)

    # We use the adam update, other options are available
    if cfg.OPTIMIZER == 'adam':
        param_updates = updates.adam(loss, params, learning_rate=lr, beta1=0.9)
    elif cfg.OPTIMIZER == 'nesterov':
        param_updates = updates.nesterov_momentum(loss,
                                                  params,
                                                  learning_rate=lr,
                                                  momentum=0.9)
    elif cfg.OPTIMIZER == 'sgd':
        param_updates = updates.sgd(loss, params, learning_rate=lr)

    return param_updates
コード例 #10
0
    def __init__(self,
                 weights,
                 neurons_topology,
                 learning_rate=0.1,
                 learning_rate_decay=0.985,
                 collaboration_sigma=1.0,
                 collaboration_sigma_decay=0.95,
                 verbosity=2):

        self._verbosity = verbosity
        self._history = []
        self.neurons_number = weights.shape[0]
        self.W_shar_mat = theano.shared(weights)
        self.D_shar_mat = theano.shared(neurons_topology)

        self.collaboration_sigma = theano.shared(collaboration_sigma)
        self.collaboration_sigma_decay = collaboration_sigma_decay

        self.x_row = T.vector("exemplar")
        self.x_mat = T.matrix("batch")

        self.learning_rate = theano.shared(learning_rate)
        self.learning_rate_decay = learning_rate_decay

        self.distance_from_y_row = ((T.sub(self.W_shar_mat,
                                           self.x_row)**2).sum(axis=1))
        self.closest_neuron_idx = T.argmin(self.distance_from_y_row)
        self.distances_from_closest_neuron = self.D_shar_mat[
            self.closest_neuron_idx]
        self.affinities_to_closest_neuron = T.exp(
            -self.distances_from_closest_neuron /
            (self.collaboration_sigma)**2)

        self.smoothed_distances_from_closest_neuron = T.mul(
            self.distance_from_y_row,
            G.disconnected_grad(self.affinities_to_closest_neuron))
        self.cost_scal = self.smoothed_distances_from_closest_neuron.sum()

        self.updates = sgd(self.cost_scal, [self.W_shar_mat],
                           learning_rate=self.learning_rate)
        self.update_neurons = theano.function([self.x_row],
                                              self.cost_scal,
                                              updates=self.updates)
コード例 #11
0
 def __init__(self,
              weights,
              neurons_topology,
              relaxing_factor=-0.5,
              **kwargs):
     super(WinnerRelaxingSOM, self).__init__(weights, neurons_topology,
                                             **kwargs)
     self.wr_relaxing_factor = relaxing_factor
     self.wr_relaxing_member = (
         self.smoothed_distances_from_closest_neuron.sum() -
         self.smoothed_distances_from_closest_neuron[
             self.closest_neuron_idx])
     self.cost_scal += self.wr_relaxing_factor * self.learning_rate * T.mul(
         self.W_shar_mat[self.closest_neuron_idx],
         G.disconnected_grad(self.wr_relaxing_member)).sum()
     self.updates = sgd(self.cost_scal, [self.W_shar_mat],
                        learning_rate=self.learning_rate)
     self.update_neurons = theano.function([self.x_row],
                                           self.cost_scal,
                                           updates=self.updates)
コード例 #12
0
def train_net(x, y1, y2, num_iter=10000):
    input_var = tensor.tensor4('input_var')
    cls_target = tensor.ivector('cls_target')
    bbox_target = tensor.ivector('bbox_target')
    network = build_model(input_var, roidb=rois)
    cls_score_out, bbox_pred_out = get_output(
        [network['cls_score'], network['bbox_pred']])
    # Computing Loss functions update parameters
    cls_loss = categorical_crossentropy(cls_score_out, cls_target)
    cls_loss = cls_loss.mean()
    bbox_pred_loss = huber_loss(bbox_pred_out, bbox_target)
    bbox_pred_loss = bbox_pred_loss.mean()
    combined_params = get_all_params(
        [network['cls_score'], network['bbox_pred']], trainable=True)
    combined_loss = cls_loss + bbox_pred_loss
    updates = sgd(combined_loss, combined_params, learning_rate=0.001)

    train_net = theano.function([input_var, cls_target, bbox_target],
                                combined_loss,
                                updates=updates)
コード例 #13
0
def build_train_func(rank=0, **kwargs):
    print("rank: {} Building model".format(rank))
    resnet = build_resnet()

    print("Building training function")
    x = T.ftensor4('x')
    y = T.imatrix('y')

    prob = L.get_output(resnet['prob'], x, deterministic=False)
    loss = T.nnet.categorical_crossentropy(prob, y.flatten()).mean()
    params = L.get_all_params(resnet.values(), trainable=True)

    sgd_updates = updates.sgd(loss, params, learning_rate=1e-4)

    # make a function to compute and store the raw gradient
    f_train = theano.function(
        inputs=[x, y],
        outputs=loss,  # (assumes this is an avg)
        updates=sgd_updates)

    return f_train, "original"
コード例 #14
0
def gradient_descend_theano(fun,
                            x0,
                            args=None,
                            learning_rate=1e-3,
                            tol=1e-3,
                            max_iter=3000,
                            verbose=True):
    funct, trainable_params, non_trainable_params = fun
    updates = sgd(funct, trainable_params, learning_rate=learning_rate)
    train_fun = theano.function(non_trainable_params, funct, updates=updates)
    loss_fn_compiled = theano.function(non_trainable_params, funct)
    old_loss = np.inf
    for i in range(max_iter):
        train_fun(*args)
        curr_loss = loss_fn_compiled(*args)
        if abs(curr_loss - old_loss) < tol:
            break
        old_loss = curr_loss
        _print_optimizer_iteration_info(verbose, i, old_loss)
    _print_optimizer_final_info(verbose, i, old_loss,
                                'Lasagne Gradient Descend')
    params_optimal = trainable_params[0].get_value()
    return {'x': params_optimal}
コード例 #15
0
ファイル: main.py プロジェクト: adbrebs/rnn_reader
def main(cf):

    ########
    # DATA #
    ########

    print 'Creating data generators...'
    train_iterator, valid_iterator, test_iterator = create_data_generators(cf)

    ##############################
    # COST, GRADIENT AND UPDATES #
    ##############################

    print 'Building model...'

    cost, accuracy = cf.model.compute_cost(deterministic=False)
    cost_val, accuracy_val = cf.model.compute_cost(deterministic=True)

    params = get_all_params(cf.model.net, trainable=True)

    if cf.algo == 'adam':
        updates = adam(cost, params, cf.learning_rate)
    elif cf.algo == 'sgd':
        updates = sgd(cost, params, cf.learning_rate)
    elif cf.algo == 'momentum':
        updates = momentum(cost, params, cf.learning_rate)
    else:
        raise ValueError('Specified algo does not exist')

    ##############
    # MONITORING #
    ##############

    print 'Creating extensions and compiling functions...',

    train_monitor = TrainMonitor(
        cf.train_freq_print, cf.model.vars, [cost, accuracy], updates)

    monitoring_vars = [cost_val, accuracy_val]
    valid_monitor = ValMonitor(
        'Validation', cf.valid_freq_print, cf.model.vars, monitoring_vars,
        valid_iterator)

    test_monitor = ValMonitor(
        'Test', cf.valid_freq_print, cf.model.vars, monitoring_vars,
        valid_iterator)

    train_saver = VariableSaver(
        train_monitor, cf.dump_every_batches, cf.dump_path, 'train')

    valid_saver = VariableSaver(
        valid_monitor, cf.dump_every_batches, cf.dump_path, 'valid')

    test_saver = VariableSaver(test_monitor, None, cf.dump_path, 'test')

    # Ending conditions
    end_conditions = []
    if hasattr(cf, 'max_iter'):
        end_conditions.append(MaxIteration(cf.max_iter))
    if hasattr(cf, 'max_time'):
        end_conditions.append(MaxTime(cf.max_iter))

    extensions = [
        valid_monitor,
        test_monitor,

        train_saver,
        valid_saver,
        test_saver
    ]

    train_m = Trainer(train_monitor, train_iterator,
                      extensions, end_conditions)

    ############
    # TRAINING #
    ############

    train_m.train()
コード例 #16
0
ファイル: ldrhdr.py プロジェクト: soravux/jambokoko
# In[ ]:

input_var = T.tensor4('inputs')
output_var = T.matrix('outputs')

network = layers[0][0](input_var=input_var, **layers[0][1])
for layer in layers[1:]:
    network = layer[0](network, **layer[1])

prediction = get_output(network)
loss = squared_error(prediction, output_var)
loss = loss.mean()

params = get_all_params(network, trainable=True)
#updates = nesterov_momentum(loss, params, learning_rate=0.01, momentum=0.9)
updates = sgd(loss, params, learning_rate=0.01)


test_prediction = get_output(network, deterministic=True)
test_loss = squared_error(test_prediction, output_var)
test_loss = test_loss.mean()
# As a bonus, also create an expression for the classification accuracy:
#test_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), output_var),
#                  dtype=theano.config.floatX)

train_fn = theano.function([input_var, output_var], loss, updates=updates)# , mode=theano.compile.MonitorMode(post_func=theano.compile.monitormode.detect_nan))
#val_fn = theano.function([input_var, output_var], [test_loss, test_acc])
val_fn = theano.function([input_var, output_var], test_loss)


コード例 #17
0
def build_network_2dconv(
    args, input1_var, input1_mask_var, input2_var, intut2_mask_var, target_var, wordEmbeddings, maxlen=36
):

    print ("Building model with 2D Convolution")

    vocab_size = wordEmbeddings.shape[1]
    wordDim = wordEmbeddings.shape[0]

    num_filters = 100

    stride = 1

    # CNN_sentence config
    filter_size = (3, wordDim)
    pool_size = (maxlen - 3 + 1, 1)

    # two conv pool layer
    # filter_size=(10, 100)
    # pool_size=(4,4)

    input_1 = InputLayer((None, maxlen), input_var=input1_var)
    batchsize, seqlen = input_1.input_var.shape
    # input_1_mask = InputLayer((None, maxlen),input_var=input1_mask_var)
    emb_1 = EmbeddingLayer(input_1, input_size=vocab_size, output_size=wordDim, W=wordEmbeddings.T)
    emb_1.params[emb_1.W].remove("trainable")  # (batchsize, maxlen, wordDim)

    reshape_1 = ReshapeLayer(emb_1, (batchsize, 1, maxlen, wordDim))

    conv2d_1 = Conv2DLayer(
        reshape_1,
        num_filters=num_filters,
        filter_size=(filter_size),
        stride=stride,
        nonlinearity=rectify,
        W=GlorotUniform(),
    )  # (None, 100, 34, 1)
    maxpool_1 = MaxPool2DLayer(conv2d_1, pool_size=pool_size)  # (None, 100, 1, 1)

    """
    filter_size_2=(4, 10)
    pool_size_2=(2,2)
    conv2d_1 = Conv2DLayer(maxpool_1, num_filters=num_filters, filter_size=filter_size_2, stride=stride, 
        nonlinearity=rectify,W=GlorotUniform()) #(None, 100, 34, 1)
    maxpool_1 = MaxPool2DLayer(conv2d_1, pool_size=pool_size_2) #(None, 100, 1, 1) (None, 100, 1, 20)
    """

    forward_1 = FlattenLayer(maxpool_1)  # (None, 100) #(None, 50400)

    input_2 = InputLayer((None, maxlen), input_var=input2_var)
    # input_2_mask = InputLayer((None, maxlen),input_var=input2_mask_var)
    emb_2 = EmbeddingLayer(input_2, input_size=vocab_size, output_size=wordDim, W=wordEmbeddings.T)
    emb_2.params[emb_2.W].remove("trainable")

    reshape_2 = ReshapeLayer(emb_2, (batchsize, 1, maxlen, wordDim))
    conv2d_2 = Conv2DLayer(
        reshape_2,
        num_filters=num_filters,
        filter_size=filter_size,
        stride=stride,
        nonlinearity=rectify,
        W=GlorotUniform(),
    )  # (None, 100, 34, 1)
    maxpool_2 = MaxPool2DLayer(conv2d_2, pool_size=pool_size)  # (None, 100, 1, 1)

    """
    conv2d_2 = Conv2DLayer(maxpool_2, num_filters=num_filters, filter_size=filter_size_2, stride=stride, 
        nonlinearity=rectify,W=GlorotUniform()) #(None, 100, 34, 1)
    maxpool_2 = MaxPool2DLayer(conv2d_2, pool_size=pool_size_2) #(None, 100, 1, 1)
    """

    forward_2 = FlattenLayer(maxpool_2)  # (None, 100)

    # elementwisemerge need fix the sequence length
    mul = ElemwiseMergeLayer([forward_1, forward_2], merge_function=T.mul)
    sub = AbsSubLayer([forward_1, forward_2], merge_function=T.sub)
    concat = ConcatLayer([mul, sub])

    concat = ConcatLayer([forward_1, forward_2])

    hid = DenseLayer(concat, num_units=args.hiddenDim, nonlinearity=sigmoid)

    if args.task == "sts":
        network = DenseLayer(hid, num_units=5, nonlinearity=softmax)

    elif args.task == "ent":
        network = DenseLayer(hid, num_units=3, nonlinearity=softmax)

    # prediction = get_output(network, {input_1:input1_var, input_2:input2_var})
    prediction = get_output(network)

    loss = T.mean(categorical_crossentropy(prediction, target_var))
    lambda_val = 0.5 * 1e-4

    layers = {conv2d_1: lambda_val, hid: lambda_val, network: lambda_val}
    penalty = regularize_layer_params_weighted(layers, l2)
    loss = loss + penalty

    params = get_all_params(network, trainable=True)

    if args.optimizer == "sgd":
        updates = sgd(loss, params, learning_rate=args.step)
    elif args.optimizer == "adagrad":
        updates = adagrad(loss, params, learning_rate=args.step)
    elif args.optimizer == "adadelta":
        updates = adadelta(loss, params, learning_rate=args.step)
    elif args.optimizer == "nesterov":
        updates = nesterov_momentum(loss, params, learning_rate=args.step)
    elif args.optimizer == "rms":
        updates = rmsprop(loss, params, learning_rate=args.step)
    elif args.optimizer == "adam":
        updates = adam(loss, params, learning_rate=args.step)
    else:
        raise "Need set optimizer correctly"

    # test_prediction = get_output(network, {input_1:input1_var, input_2:input2_var}, deterministic=True)
    test_prediction = get_output(network, deterministic=True)
    test_loss = T.mean(categorical_crossentropy(test_prediction, target_var))

    """
    train_fn = theano.function([input1_var, input1_mask_var, input2_var, intut2_mask_var, target_var], 
        loss, updates=updates, allow_input_downcast=True)
    """
    train_fn = theano.function([input1_var, input2_var, target_var], loss, updates=updates, allow_input_downcast=True)

    if args.task == "sts":
        """
        val_fn = theano.function([input1_var, input1_mask_var, input2_var, intut2_mask_var, target_var], 
            [test_loss, test_prediction], allow_input_downcast=True)
        """
        val_fn = theano.function(
            [input1_var, input2_var, target_var], [test_loss, test_prediction], allow_input_downcast=True
        )

    elif args.task == "ent":
        # test_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), target_var), dtype=theano.config.floatX)
        test_acc = T.mean(categorical_accuracy(test_prediction, target_var))

        """
        val_fn = theano.function([input1_var, input1_mask_var, input2_var, intut2_mask_var, target_var], 
            [test_loss, test_acc], allow_input_downcast=True)
        """
        val_fn = theano.function([input1_var, input2_var, target_var], [test_loss, test_acc], allow_input_downcast=True)

    return train_fn, val_fn
コード例 #18
0
def main(cf):

    ########
    # DATA #
    ########

    print 'Creating data generators...'
    train_iterator, valid_iterator, test_iterator = create_data_generators(cf)

    ##############################
    # COST, GRADIENT AND UPDATES #
    ##############################

    print 'Building model...'

    cost, accuracy = cf.model.compute_cost(deterministic=False)
    cost_val, accuracy_val = cf.model.compute_cost(deterministic=True)

    params = get_all_params(cf.model.net, trainable=True)

    if cf.algo == 'adam':
        updates = adam(cost, params, cf.learning_rate)
    elif cf.algo == 'sgd':
        updates = sgd(cost, params, cf.learning_rate)
    elif cf.algo == 'momentum':
        updates = momentum(cost, params, cf.learning_rate)
    else:
        raise ValueError('Specified algo does not exist')

    ##############
    # MONITORING #
    ##############

    print 'Creating extensions and compiling functions...',

    train_monitor = TrainMonitor(cf.train_freq_print, cf.model.vars,
                                 [cost, accuracy], updates)

    monitoring_vars = [cost_val, accuracy_val]
    valid_monitor = ValMonitor('Validation', cf.valid_freq_print,
                               cf.model.vars, monitoring_vars, valid_iterator)

    test_monitor = ValMonitor('Test', cf.valid_freq_print, cf.model.vars,
                              monitoring_vars, valid_iterator)

    train_saver = VariableSaver(train_monitor, cf.dump_every_batches,
                                cf.dump_path, 'train')

    valid_saver = VariableSaver(valid_monitor, cf.dump_every_batches,
                                cf.dump_path, 'valid')

    test_saver = VariableSaver(test_monitor, None, cf.dump_path, 'test')

    # Ending conditions
    end_conditions = []
    if hasattr(cf, 'max_iter'):
        end_conditions.append(MaxIteration(cf.max_iter))
    if hasattr(cf, 'max_time'):
        end_conditions.append(MaxTime(cf.max_iter))

    extensions = [
        valid_monitor, test_monitor, train_saver, valid_saver, test_saver
    ]

    train_m = Trainer(train_monitor, train_iterator, extensions,
                      end_conditions)

    ############
    # TRAINING #
    ############

    train_m.train()
コード例 #19
0
ファイル: easy.py プロジェクト: mehdidc/lasagnekit
    def _prepare(self, X, y, X_valid=None, y_valid=None, sample_weight=None,
                 whole_dataset_in_device=True):

        self._stats = []
        self._class_label_encoder = LabelEncoder()
        if self.is_classification is True:
            self._class_label_encoder.fit(y)
            self.classes_ = self._class_label_encoder.classes_
            y = self._class_label_encoder.transform(y).astype(y.dtype)
            self.y_train_transformed = y
            if y_valid is not None:
                y_valid_transformed = self._class_label_encoder.transform(
                    y_valid).astype(y_valid.dtype)

        self._l_x_in = layers.InputLayer(shape=(None, X.shape[1]))
        batch_index, X_batch, y_batch, batch_slice = get_theano_batch_variables(
            self.batch_size, y_softmax=self.is_classification)

        if sample_weight is not None:
            t_sample_weight = T.vector('sample_weight')
            sample_weight = sample_weight.astype(theano.config.floatX)
        else:
            t_sample_weight = T.scalar('sample_weight')

        if self.is_classification is True:
            y_dim = len(set(y.flatten().tolist()))
        else:
            y_dim = y.shape[1]

        self._prediction_layer = self._build_model(y_dim)
        self._layers = layers.get_all_layers(self._prediction_layer)
        self._build_prediction_functions(X_batch, self._prediction_layer)

        if self.input_noise_function is None:
            output = layers.get_output(self._prediction_layer, X_batch)

        else:
            X_batch_noisy = self.input_noise_function(X_batch)
            output = layers.get_output(self._prediction_layer, X_batch_noisy)

        if self.is_classification:
            loss = -T.mean(t_sample_weight * T.log(output)
                           [T.arange(y_batch.shape[0]), y_batch])
        else:
            loss = T.mean(
                t_sample_weight * T.sum((output - y_batch) ** 2, axis=1))

        loss_unreg = loss

        all_params = layers.get_all_params(self._prediction_layer)
        if self._output_softener_coefs is not None:
            all_params.append(self._output_softener_coefs)

        W_params = layers.get_all_param_values(
            self._prediction_layer, regularizable=True)

        # regularization
        if self.L1_factor is not None:
            for L1_factor_layer, W in zip(self.L1_factor, W_params):
                loss = loss + L1_factor_layer * T.sum(abs(W))

        if self.L2_factor is not None:
            for L2_factor_layer, W in zip(self.L2_factor, W_params):
                loss = loss + L2_factor_layer * T.sum(W**2)

        if self.optimization_method == 'nesterov_momentum':
            gradient_updates = updates.nesterov_momentum(loss, all_params, learning_rate=self.learning_rate,
                                                         momentum=self.momentum)
        elif self.optimization_method == 'adadelta':
            # don't need momentum there
            gradient_updates = updates.adadelta(
                loss, all_params, learning_rate=self.learning_rate)
        elif self.optimization_method == 'adam':
            gradient_updates = updates.Adam(
                loss, all_params, learning_rate=self.learning_rate)
        elif self.optimization_method == 'momentum':
            gradient_updates = updates.momentum(
                loss, all_params, learning_rate=self.learning_rate,
                momentum=self.momentum
            )
        elif self.optimization_method == 'adagrad':
            gradient_updates = updates.adadelta(
                loss, all_params, learning_rate=self.learning_rate)
        elif self.optimization_method == 'rmsprop':
            gradient_updates = updates.adadelta(
                loss, all_params, learning_rate=self.learning_rate)
        elif self.optimization_method == 'sgd':
            gradient_updates = updates.sgd(
                loss, all_params, learning_rate=self.learning_rate,
            )
        else:
            raise Exception("wrong optimization method")

        nb_batches = X.shape[0] // self.batch_size
        if (X.shape[0] % self.batch_size) != 0:
            nb_batches += 1

        X = X.astype(theano.config.floatX)
        if self.is_classification == True:
            y = y.astype(np.int32)
        else:
            y = y.astype(theano.config.floatX)

        if whole_dataset_in_device == True:
            X_shared = theano.shared(X, borrow=True)
            y_shared = theano.shared(y, borrow=True)

            givens = {
                X_batch: X_shared[batch_slice],
                y_batch: y_shared[batch_slice]
            }

            if sample_weight is not None:
                sample_weight_shared = theano.shared(
                    sample_weight, borrow=True)
                givens[t_sample_weight] = sample_weight_shared[batch_slice]
            else:
                givens[t_sample_weight] = T.as_tensor_variable(
                    np.array(1., dtype=theano.config.floatX))

            iter_update_batch = theano.function(
                [batch_index], loss,
                updates=gradient_updates,
                givens=givens,

            )
        else:
            if sample_weight is None:
                iter_update_gradients = theano.function(
                    [X_batch, y_batch],
                    loss,
                    updates=gradient_updates,
                    givens={t_sample_weight: T.as_tensor_variable(
                        np.array(1., dtype=theano.config.floatX))},

                )

                def iter_update_batch(batch_index):
                    sl = slice(batch_index * self.batch_size,
                               (batch_index + 1) * self.batch_size)
                    return iter_update_gradients(X[sl], y[sl])

            else:
                iter_update_gradients = theano.function(
                    [X_batch, y_batch, t_sample_weight],
                    loss,
                    updates=gradient_updates
                )

                def iter_update_batch(batch_index):
                    sl = slice(batch_index * self.batch_size,
                               (batch_index + 1) * self.batch_size)
                    return iter_update_gradients(X[sl], y[sl], sample_weight[sl])
        self._iter_update_batch = iter_update_batch
        self._get_loss = theano.function(
            [X_batch, y_batch, t_sample_weight], loss_unreg, allow_input_downcast=True)

        def iter_update(epoch):
            losses = []
            #self.learning_rate.set_value(self.learning_rate.get_value() * np.array(0.99, dtype=theano.config.floatX))
            for i in xrange(nb_batches):
                losses.append(self._iter_update_batch(i))
                # max norm
                if self.max_norm is not None:
                    for max_norm_layer, layer in zip(self.max_norm, self._layers):
                        layer.W = updates.norm_constraint(
                            layer.W, self.max_norm)

            losses = np.array(losses)

            d = OrderedDict()
            d["epoch"] = epoch
            #d["loss_train_std"] = losses.std()

            #d["loss_train"] = losses.mean()
            d["loss_train"] = self._get_loss(
                self.X_train, self.y_train_transformed, 1.)

            d["accuracy_train"] = (
                self.predict(self.X_train) == self.y_train).mean()

            if X_valid is not None and y_valid is not None:
                d["loss_valid"] = self._get_loss(
                    X_valid, y_valid_transformed, 1.)

                if self.is_classification == True:
                    d["accuracy_valid"] = (
                        self.predict(X_valid) == y_valid).mean()

            if self.verbose > 0:
                if (epoch % self.report_each) == 0:
                    print(tabulate([d], headers="keys"))
            self._stats.append(d)
            return d

        def quitter(update_status):
            cur_epoch = len(self._stats) - 1
            if self.patience_nb_epochs > 0:
                # patience heuristic (for early stopping)
                cur_patience_stat = update_status[self.patience_stat]

                if self.cur_best_patience_stat is None:
                    self.cur_best_patience_stat = cur_patience_stat
                    first_time = True
                else:
                    first_time = False

                thresh = self.patience_progression_rate_threshold
                if cur_patience_stat < self.cur_best_patience_stat * thresh or first_time:

                    if self.verbose >= 2:
                        fmt = "--Early stopping-- good we have a new best value : {0}={1}, last best : epoch {2}, value={3}"
                        print(fmt.format(self.patience_stat, cur_patience_stat,
                                         self.cur_best_epoch, self.cur_best_patience_stat))
                    self.cur_best_epoch = cur_epoch
                    self.cur_best_patience_stat = cur_patience_stat
                    if hasattr(self, "set_state") and hasattr(self, "get_state"):
                        self.cur_best_model = self.get_state()
                    else:
                        self.cur_best_model = pickle.dumps(
                            self.__dict__, protocol=pickle.HIGHEST_PROTOCOL)
                if (cur_epoch - self.cur_best_epoch) >= self.patience_nb_epochs:
                    finish = True
                    if hasattr(self, "set_state") and hasattr(self, "get_state"):
                        self.set_state(self.cur_best_model)
                    else:
                        self.__dict__.update(pickle.loads(self.cur_best_model))

                    self._stats = self._stats[0:self.cur_best_epoch + 1]
                    if self.verbose >= 2:
                        print("out of patience...take the model at epoch {0} and quit".format(
                            self.cur_best_epoch + 1))
                else:
                    finish = False
                return finish
            else:
                return False

        def monitor(update_status):
            pass

        def observer(monitor_output):
            pass

        return (iter_update, quitter, monitor, observer)
コード例 #20
0
def event_span_classifier(args, input_var, target_var, wordEmbeddings, seqlen, num_feats):

    print("Building model with 1D Convolution")

    vocab_size = wordEmbeddings.shape[1]
    wordDim = wordEmbeddings.shape[0]

    kw = 2
    num_filters = seqlen-kw+1
    stride = 1 

    #important context words as channels
 
    #CNN_sentence config
    filter_size=wordDim
    pool_size=seqlen-filter_size+1

    input = InputLayer((None, seqlen, num_feats),input_var=input_var)
    batchsize, _, _ = input.input_var.shape
    emb = EmbeddingLayer(input, input_size=vocab_size, output_size=wordDim, W=wordEmbeddings.T)
    #emb.params[emb.W].remove('trainable') #(batchsize, seqlen, wordDim)

    #print get_output_shape(emb)
    reshape = ReshapeLayer(emb, (batchsize, seqlen, num_feats*wordDim))
    #print get_output_shape(reshape)

    conv1d = Conv1DLayer(reshape, num_filters=num_filters, filter_size=wordDim, stride=1, 
        nonlinearity=tanh,W=GlorotUniform()) #nOutputFrame = num_flters, 
                                            #nOutputFrameSize = (num_feats*wordDim-filter_size)/stride +1

    #print get_output_shape(conv1d)

    conv1d = DimshuffleLayer(conv1d, (0,2,1))

    #print get_output_shape(conv1d)

    pool_size=num_filters

    maxpool = MaxPool1DLayer(conv1d, pool_size=pool_size) 

    #print get_output_shape(maxpool)
  
    #forward = FlattenLayer(maxpool) 

    #print get_output_shape(forward)
 
    hid = DenseLayer(maxpool, num_units=args.hiddenDim, nonlinearity=sigmoid)

    network = DenseLayer(hid, num_units=2, nonlinearity=softmax)

    prediction = get_output(network)
    
    loss = T.mean(binary_crossentropy(prediction,target_var))
    lambda_val = 0.5 * 1e-4

    layers = {emb:lambda_val, conv1d:lambda_val, hid:lambda_val, network:lambda_val} 
    penalty = regularize_layer_params_weighted(layers, l2)
    loss = loss + penalty


    params = get_all_params(network, trainable=True)

    if args.optimizer == "sgd":
        updates = sgd(loss, params, learning_rate=args.step)
    elif args.optimizer == "adagrad":
        updates = adagrad(loss, params, learning_rate=args.step)
    elif args.optimizer == "adadelta":
        updates = adadelta(loss, params, learning_rate=args.step)
    elif args.optimizer == "nesterov":
        updates = nesterov_momentum(loss, params, learning_rate=args.step)
    elif args.optimizer == "rms":
        updates = rmsprop(loss, params, learning_rate=args.step)
    elif args.optimizer == "adam":
        updates = adam(loss, params, learning_rate=args.step)
    else:
        raise "Need set optimizer correctly"
 
    test_prediction = get_output(network, deterministic=True)
    test_loss = T.mean(binary_crossentropy(test_prediction,target_var))

    train_fn = theano.function([input_var, target_var], 
        loss, updates=updates, allow_input_downcast=True)

    test_acc = T.mean(binary_accuracy(test_prediction, target_var))
    val_fn = theano.function([input_var, target_var], [test_loss, test_acc], allow_input_downcast=True)

    return train_fn, val_fn, network
コード例 #21
0
 def update(all_grads, all_params, learning_rate):
     """ Compute updates from gradients """
     return sgd(all_grads, all_params, learning_rate)
コード例 #22
0
ファイル: nn.py プロジェクト: fagan2888/optimal_landing
def get_network(model):

    input_data = tensor.dmatrix('x')
    targets_var = tensor.dmatrix('y')

    network = layers.InputLayer((model['batch_size'], model['input_vars']),
                                input_data)

    nonlin = nonlinearities.rectify
    if model['hidden_nonlinearity'] != 'ReLu':
        nonlin = nonlinearities.tanh

    prev_layer = network

    for l in range(model['nlayers']):
        W = None
        if model['hidden_nonlinearity'] == 'ReLu':
            W = lasagne.init.GlorotUniform('relu')
        else:
            W = lasagne.init.GlorotUniform(1)

        fc = layers.DenseLayer(prev_layer,
                               model['units'],
                               nonlinearity=nonlin,
                               W=W)
        if model['dropout']:
            fc = layers.DropoutLayer(fc, 0.5)
        prev_layer = fc

    output_lin = None
    if model['output_mode'] == OUTPUT_LOG:
        output_lin = nonlinearities.tanh
    output_layer = layers.DenseLayer(prev_layer, 1, nonlinearity=output_lin)

    predictions = layers.get_output(output_layer)

    if model['output_mode'] != OUTPUT_LOG:
        (minth, maxth) = model['maxmin'][model['control']]
        maxt = theano.shared(np.ones((model['batch_size'], 1)) * maxth)
        mint = theano.shared(np.ones((model['batch_size'], 1)) * minth)
        predictions = tensor.min(tensor.concatenate([maxt, predictions],
                                                    axis=1),
                                 axis=1)
        predictions = tensor.reshape(predictions, (model['batch_size'], 1))
        predictions = tensor.max(tensor.concatenate([mint, predictions],
                                                    axis=1),
                                 axis=1)
        predictions = tensor.reshape(predictions, (model['batch_size'], 1))

    if model['output_mode'] == OUTPUT_NO:
        prediction_unboun = layers.get_output(output_layer)
        loss = objectives.squared_error(prediction_unboun, targets_var)
    else:
        loss = objectives.squared_error(predictions, targets_var)

    loss = objectives.aggregate(loss, mode='mean')

    params = layers.get_all_params(output_layer)
    #    test_prediction = layers.get_output(output_layer, deterministic=True)  #fix for dropout
    test_loss = objectives.squared_error(predictions, targets_var)
    test_loss = test_loss.mean()

    if model['hidden_nonlinearity'] == 'ReLu':
        model['lr'] *= 0.5
    updates_sgd = updates.sgd(loss, params, learning_rate=model['lr'])
    ups = updates.apply_momentum(updates_sgd, params, momentum=0.9)

    train_fn = theano.function([input_data, targets_var], loss, updates=ups)
    pred_fn = theano.function([input_data], predictions)
    #    pred_fn = theano.function([input_data], prediction_unboun)
    val_fn = theano.function([input_data, targets_var], test_loss)

    return {
        'train': train_fn,
        'eval': val_fn,
        'pred': pred_fn,
        'layers': output_layer
    }
def event_span_classifier(args, input_var, input_mask_var, target_var, wordEmbeddings, seqlen):

    print("Building model with LSTM")

    vocab_size = wordEmbeddings.shape[1]
    wordDim = wordEmbeddings.shape[0]

    GRAD_CLIP = wordDim

    args.lstmDim = 150

    input = InputLayer((None, seqlen),input_var=input_var)
    batchsize, seqlen = input.input_var.shape
    input_mask = InputLayer((None, seqlen),input_var=input_mask_var)
    
    emb = EmbeddingLayer(input, input_size=vocab_size, output_size=wordDim, W=wordEmbeddings.T)
    #emb.params[emb_1.W].remove('trainable')

    lstm = LSTMLayer(emb, num_units=args.lstmDim, mask_input=input_mask, grad_clipping=GRAD_CLIP,
        nonlinearity=tanh)

    lstm_back = LSTMLayer(
        emb, num_units=args.lstmDim, mask_input=input_mask, grad_clipping=GRAD_CLIP,
        nonlinearity=tanh, backwards=True)

    slice_forward = SliceLayer(lstm, indices=-1, axis=1) # out_shape (None, args.lstmDim)
    slice_backward = SliceLayer(lstm_back, indices=0, axis=1) # out_shape (None, args.lstmDim)

    concat = ConcatLayer([slice_forward, slice_backward])

    hid = DenseLayer(concat, num_units=args.hiddenDim, nonlinearity=sigmoid)

    network = DenseLayer(hid, num_units=2, nonlinearity=softmax)

    prediction = get_output(network)
    
    loss = T.mean(binary_crossentropy(prediction,target_var))
    lambda_val = 0.5 * 1e-4

    layers = {emb:lambda_val, lstm:lambda_val, hid:lambda_val, network:lambda_val} 
    penalty = regularize_layer_params_weighted(layers, l2)
    loss = loss + penalty


    params = get_all_params(network, trainable=True)

    if args.optimizer == "sgd":
        updates = sgd(loss, params, learning_rate=args.step)
    elif args.optimizer == "adagrad":
        updates = adagrad(loss, params, learning_rate=args.step)
    elif args.optimizer == "adadelta":
        updates = adadelta(loss, params, learning_rate=args.step)
    elif args.optimizer == "nesterov":
        updates = nesterov_momentum(loss, params, learning_rate=args.step)
    elif args.optimizer == "rms":
        updates = rmsprop(loss, params, learning_rate=args.step)
    elif args.optimizer == "adam":
        updates = adam(loss, params, learning_rate=args.step)
    else:
        raise "Need set optimizer correctly"
 
    test_prediction = get_output(network, deterministic=True)
    test_loss = T.mean(binary_crossentropy(test_prediction,target_var))


    train_fn = theano.function([input_var, input_mask_var,target_var], 
        loss, updates=updates, allow_input_downcast=True)

    test_acc = T.mean(binary_accuracy(test_prediction, target_var))

    val_fn = theano.function([input_var, input_mask_var, target_var], [test_loss, test_acc], allow_input_downcast=True)

    return train_fn, val_fn, network
コード例 #24
0
def event_span_classifier(args, input_var, target_var, wordEmbeddings, seqlen, num_feats):

    print("Building model with 1D Convolution")

    vocab_size = wordEmbeddings.shape[1]
    wordDim = wordEmbeddings.shape[0]

    kw = 2
    num_filters = seqlen-kw+1
    stride = 1 

    #important context words as channels
 
    #CNN_sentence config
    filter_size=wordDim
    pool_size=seqlen-filter_size+1

    input = InputLayer((None, seqlen, num_feats),input_var=input_var)
    batchsize, _, _ = input.input_var.shape
    emb = EmbeddingLayer(input, input_size=vocab_size, output_size=wordDim, W=wordEmbeddings.T)
    #emb.params[emb.W].remove('trainable') #(batchsize, seqlen, wordDim)

    #print get_output_shape(emb)
    reshape = ReshapeLayer(emb, (batchsize, seqlen, num_feats*wordDim))
    #print get_output_shape(reshape)

    conv1d = Conv1DLayer(reshape, num_filters=num_filters, filter_size=wordDim, stride=1, 
        nonlinearity=tanh,W=GlorotUniform()) #nOutputFrame = num_flters, 
                                            #nOutputFrameSize = (num_feats*wordDim-filter_size)/stride +1

    #print get_output_shape(conv1d)

    conv1d = DimshuffleLayer(conv1d, (0,2,1))

    #print get_output_shape(conv1d)

    pool_size=num_filters

    maxpool = MaxPool1DLayer(conv1d, pool_size=pool_size) 

    #print get_output_shape(maxpool)
  
    #forward = FlattenLayer(maxpool) 

    #print get_output_shape(forward)
 
    hid = DenseLayer(maxpool, num_units=args.hiddenDim, nonlinearity=sigmoid)

    network = DenseLayer(hid, num_units=2, nonlinearity=softmax)

    prediction = get_output(network)
    
    loss = T.mean(binary_crossentropy(prediction,target_var))
    lambda_val = 0.5 * 1e-4

    layers = {emb:lambda_val, conv1d:lambda_val, hid:lambda_val, network:lambda_val} 
    penalty = regularize_layer_params_weighted(layers, l2)
    loss = loss + penalty


    params = get_all_params(network, trainable=True)

    if args.optimizer == "sgd":
        updates = sgd(loss, params, learning_rate=args.step)
    elif args.optimizer == "adagrad":
        updates = adagrad(loss, params, learning_rate=args.step)
    elif args.optimizer == "adadelta":
        updates = adadelta(loss, params, learning_rate=args.step)
    elif args.optimizer == "nesterov":
        updates = nesterov_momentum(loss, params, learning_rate=args.step)
    elif args.optimizer == "rms":
        updates = rmsprop(loss, params, learning_rate=args.step)
    elif args.optimizer == "adam":
        updates = adam(loss, params, learning_rate=args.step)
    else:
        raise "Need set optimizer correctly"
 
    test_prediction = get_output(network, deterministic=True)
    test_loss = T.mean(binary_crossentropy(test_prediction,target_var))

    train_fn = theano.function([input_var, target_var], 
        loss, updates=updates, allow_input_downcast=True)

    test_acc = T.mean(binary_accuracy(test_prediction, target_var))
    val_fn = theano.function([input_var, target_var], [test_loss, test_acc], allow_input_downcast=True)

    return train_fn, val_fn, network
コード例 #25
0
ファイル: calibration.py プロジェクト: jason790/crayimage
 def optimizer(self, params, learning_rate):
   return updates.sgd(self.loss, params, learning_rate)
コード例 #26
0
    # hidden_layer
    hidden_input = conv_layer.output.flatten(2)
    hidden_input_shape = (conv_layer.output_shape[0], conv_layer.output_shape[1]*conv_layer.output_shape[2]*conv_layer.output_shape[3])
    hidden_layer = FullConectedLayer(rng,hidden_input,hidden_input_shape[1],100)

    # regression_layer
    regession_layer = RegresstionLayer(rng,hidden_layer.output,100,1)

    mse = regession_layer.mse(Y)

    cost = mse + 0.001 * (conv_layer.L2 + hidden_layer.L2 + regession_layer.L2)

    params = conv_layer.params + hidden_layer.params + regession_layer.params

    updates = sgd(cost,params,0.01)

    train_model = theano.function([X,Y],[mse,cost],updates=updates)
    valid_model = theano.function([X,Y],[mse,cost])

    showfunction = theano.function([X,Y],[hidden_input, hidden_layer.output, regession_layer.y_pred, mse, cost])

    # a,b,c,d,e = showfunction(X_train[:100],Y_train_rouge1[:100])
    #
    # print(a,b,c,d,e)

    patience = 0
    best_valid_mse_global = 100
    early_stop = 20
    epoch_i = 0
コード例 #27
0
    x = T.fmatrix()
    t = T.fvector()
    ann = network(x)

    prediction = get_output(ann)[:, 1]
    predict = function([x], outputs=prediction)

    loss = binary_crossentropy(prediction, t).mean()

    # L2 regularization
    if L2_REGULARIZATION:
        l2_penalty = ALPHA * regularize_network_params(ann, l2)
        loss += l2_penalty.mean()

    updates = sgd(loss_or_grads=loss,
                  params=get_all_params(ann, trainable=True),
                  learning_rate=LR)
    train = function([x, t],
                     outputs=loss,
                     updates=updates,
                     allow_input_downcast=True,
                     mode='FAST_COMPILE')

    # Load data
    train_data, test_data = get_data()
    train_data, test_data = np.float32(train_data), np.float32(test_data)

    # Standardize features
    train_data[:, :-1] = (train_data[:, :-1] - np.mean(
        train_data[:, :-1], axis=0)) / np.std(train_data[:, :-1], axis=0)
    test_data[:, :-1] = (test_data[:, :-1] - np.mean(
コード例 #28
0
ファイル: basic_net.py プロジェクト: EdwardBetts/kaggle_otto
                          nonlinearity=softmax,
                          W=Constant())
    # Now, we can generate the symbolic expression of the network's output given an input variable.
    net_input = T.matrix('net_input')
    net_output = l_output.get_output(net_input)

    # As a loss function, we'll use Theano's categorical_crossentropy function.
    # This allows for the network output to be class probabilities,
    # but the target output to be class labels.
    true_output = T.ivector('true_output')
    loss = T.mean(T.nnet.categorical_crossentropy(net_output, true_output))
    # Retrieving all parameters of the network is done using get_all_params,
    # which recursively collects the parameters of all layers connected to the provided layer.
    all_params = get_all_params(l_output)
    # Now, we'll generate updates using Lasagne's SGD function
    updates = sgd(loss, all_params, learning_rate=0.01)
    # Finally, we can compile Theano functions for training and computing the output.
    training = function([net_input, true_output], loss, updates=updates)
    prediction = function([net_input], net_output)

    # Train for 100 epochs
    print 'epoch  logloss'
    for k, n in enumerate(xrange(100)):
        # this is logloss
        res = training(trainT, classT)
        print '{0:.3d}  {1:.4f}'.format(k, res)

    # Compute the predicted label of the training data.
    # The argmax converts the class probability output to class label
    probabilities = prediction(testT)  # normalized
    prediction = np.argmax(probabilities, axis=1)
コード例 #29
0
def build_network_2dconv(args, input_var, target_var, wordEmbeddings, maxlen=60):

    print("Building model with 2D Convolution")

    vocab_size = wordEmbeddings.shape[1]
    wordDim = wordEmbeddings.shape[0]

    num_filters = 100
    stride = 1

    # CNN_sentence config
    filter_size = (3, wordDim)
    pool_size = (maxlen - 3 + 1, 1)

    input = InputLayer((None, maxlen), input_var=input_var)
    batchsize, seqlen = input.input_var.shape
    emb = EmbeddingLayer(input, input_size=vocab_size, output_size=wordDim, W=wordEmbeddings.T)
    emb.params[emb.W].remove("trainable")  # (batchsize, maxlen, wordDim)

    reshape = ReshapeLayer(emb, (batchsize, 1, maxlen, wordDim))

    conv2d = Conv2DLayer(
        reshape,
        num_filters=num_filters,
        filter_size=(filter_size),
        stride=stride,
        nonlinearity=rectify,
        W=GlorotUniform(),
    )  # (None, 100, 34, 1)
    maxpool = MaxPool2DLayer(conv2d, pool_size=pool_size)  # (None, 100, 1, 1)

    forward = FlattenLayer(maxpool)  # (None, 100) #(None, 50400)

    hid = DenseLayer(forward, num_units=args.hiddenDim, nonlinearity=sigmoid)

    network = DenseLayer(hid, num_units=2, nonlinearity=softmax)

    prediction = get_output(network)

    loss = T.mean(binary_crossentropy(prediction, target_var))
    lambda_val = 0.5 * 1e-4

    layers = {conv2d: lambda_val, hid: lambda_val, network: lambda_val}
    penalty = regularize_layer_params_weighted(layers, l2)
    loss = loss + penalty

    params = get_all_params(network, trainable=True)

    if args.optimizer == "sgd":
        updates = sgd(loss, params, learning_rate=args.step)
    elif args.optimizer == "adagrad":
        updates = adagrad(loss, params, learning_rate=args.step)
    elif args.optimizer == "adadelta":
        updates = adadelta(loss, params, learning_rate=args.step)
    elif args.optimizer == "nesterov":
        updates = nesterov_momentum(loss, params, learning_rate=args.step)
    elif args.optimizer == "rms":
        updates = rmsprop(loss, params, learning_rate=args.step)
    elif args.optimizer == "adam":
        updates = adam(loss, params, learning_rate=args.step)
    else:
        raise "Need set optimizer correctly"

    test_prediction = get_output(network, deterministic=True)
    test_loss = T.mean(binary_crossentropy(test_prediction, target_var))

    train_fn = theano.function([input_var, target_var], loss, updates=updates, allow_input_downcast=True)

    test_acc = T.mean(binary_accuracy(test_prediction, target_var))
    val_fn = theano.function([input_var, target_var], [test_loss, test_acc], allow_input_downcast=True)

    return train_fn, val_fn
コード例 #30
0
ファイル: GPOMDP_V.py プロジェクト: liubo-cs/rllab
grad = theano.grad(surr, params)

eval_grad1 = TT.matrix('eval_grad0', dtype=grad[0].dtype)
eval_grad2 = TT.vector('eval_grad1', dtype=grad[1].dtype)
eval_grad3 = TT.col('eval_grad3', dtype=grad[2].dtype)
eval_grad4 = TT.vector('eval_grad4', dtype=grad[3].dtype)
eval_grad5 = TT.vector('eval_grad5', dtype=grad[4].dtype)

f_train = theano.function(
    inputs=[observations_var, actions_var, d_rewards_var], outputs=grad)
f_update = theano.function(
    inputs=[eval_grad1, eval_grad2, eval_grad3, eval_grad4, eval_grad5],
    outputs=None,
    updates=sgd([eval_grad1, eval_grad2, eval_grad3, eval_grad4, eval_grad5],
                params,
                learning_rate=learning_rate))

alla = []
for i in range(10):
    if (load_policy):
        policy.set_param_values(np.loadtxt('policy.txt'), trainable=True)
    avg_return = np.zeros(n_itr)
    #np.savetxt("policy_novar.txt",snap_policy.get_param_values(trainable=True))
    for j in range(n_itr):
        paths = parallel_sampler.sample_paths_on_trajectories(
            policy.get_param_values(), N, T, show_bar=False)
        #baseline.fit(paths)
        observations = [p["observations"] for p in paths]
        actions = [p["actions"] for p in paths]
        d_rewards = [p["rewards"] for p in paths]
コード例 #31
0
ファイル: try.py プロジェクト: birdyLinch/DDPG
from lasagne.layers import InputLayer, DenseLayer
import lasagne
from lasagne.updates import sgd, total_norm_constraint
import theano.tensor as T

x = T.matrix()
y = T.ivector()
l_in = InputLayer((5, 10))
l1 = DenseLayer(l_in, num_units=7, nonlinearity=T.nnet.softmax)
output = lasagne.layers.get_output(l1, x)
cost = T.mean(T.nnet.categorical_crossentropy(output, y))
all_params = lasagne.layers.get_all_params(l1)
all_grads = T.grad(cost, all_params)
scaled_grads = total_norm_constraint(all_grads[i], 5)
updates = sgd(scaled_grads, all_params, learning_rate=0.1)