def generate_theano_func(args, network, penalty, input_dict, target_var): prediction = get_output(network, input_dict) # loss = T.mean( target_var * ( T.log(target_var) - prediction )) loss = T.mean(categorical_crossentropy(prediction, target_var)) # loss += 0.0001 * sum (T.sum(layer_params ** 2) for layer_params in get_all_params(network) ) # penalty = sum ( T.sum(lstm_param**2) for lstm_param in lstm_params ) # penalty = regularize_layer_params(l_forward_1_lstm, l2) # penalty = T.sum(lstm_param**2 for lstm_param in lstm_params) # penalty = 0.0001 * sum (T.sum(layer_params ** 2) for layer_params in get_all_params(l_forward_1) ) loss = loss + penalty params = get_all_params(network, trainable=True) if args.optimizer == "sgd": updates = sgd(loss, params, learning_rate=args.step) elif args.optimizer == "adagrad": updates = adagrad(loss, params, learning_rate=args.step) elif args.optimizer == "adadelta": updates = adadelta(loss, params, learning_rate=args.step) elif args.optimizer == "nesterov": updates = nesterov_momentum(loss, params, learning_rate=args.step) elif args.optimizer == "rms": updates = rmsprop(loss, params, learning_rate=args.step) elif args.optimizer == "adam": updates = adam(loss, params, learning_rate=args.step) else: raise "Need set optimizer correctly" test_prediction = get_output(network, input_dict, deterministic=True) # test_prediction = get_output(network, deterministic=True) # test_loss = T.mean( target_var * ( T.log(target_var) - test_prediction)) test_loss = T.mean(categorical_crossentropy(test_prediction, target_var)) train_fn = theano.function( [input1_var, input1_mask_var, input2_var, input2_mask_var, target_var], loss, updates=updates, allow_input_downcast=True, ) if args.task == "sts": val_fn = theano.function( [input1_var, input1_mask_var, input2_var, input2_mask_var, target_var], [test_loss, test_prediction], allow_input_downcast=True, ) elif args.task == "ent": # test_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), target_var), dtype=theano.config.floatX) test_acc = T.mean(categorical_accuracy(test_prediction, target_var)) val_fn = theano.function( [input1_var, input1_mask_var, input2_var, input2_mask_var, target_var], [test_loss, test_acc], allow_input_downcast=True, ) return train_fn, val_fn
def __init__(self, lr, C, momentum): self.lr = lr self.C = C self.momentum = momentum self.X = T.tensor4('X') self.y = T.ivector('y') self.network = self._build() self.params = layers.get_all_params(self.network, trainable=True) reg = regularization.regularize_network_params(self.network, regularization.l2) reg /= layers.helper.count_params(self.network) # 训练集 yDropProb = layers.get_output(self.network) self.trEqs = myUtils.basic.eqs(yDropProb, self.y) trCrossentropy = objectives.categorical_crossentropy(yDropProb, self.y) self.trCost = trCrossentropy.mean() + C * reg # 验证、测试集 yFullProb = layers.get_output(self.network, deterministic=True) self.vateEqs = myUtils.basic.eqs(yFullProb, self.y) vateCrossentropy = objectives.categorical_crossentropy(yFullProb, self.y) self.vateCost = vateCrossentropy.mean() + C * reg self.yPred = yFullProb # 训练函数,输入训练集,输出训练损失和误差 updatesDict = updates.nesterov_momentum(self.trCost, self.params, lr, momentum) self.trainfn = myUtils.basic.makeFunc([self.X, self.y], [self.trCost, self.trEqs], updatesDict) # 验证或测试函数,输入验证或测试集,输出损失和误差,不进行更新 self.vatefn = myUtils.basic.makeFunc([self.X, self.y], [self.vateCost, self.vateEqs], None)
def __init__(self, istrained, name=None, args=None): self.istrained = istrained self.X = T.tensor4('X') self.y = T.ivector('y') self.outprob = build_model(self.X) if self.istrained: params = cPickle.load(open(dataset_path + 'plain_cnn.pkl', 'r')) layers.set_all_param_values(self.outprob, params) self.yFullProb = layers.get_output(self.outprob, deterministic=True) self.predfn = makeFunc([self.X, ], [self.yFullProb, ], None) else: self.lr, self.C, self.momentum = args self.params = layers.get_all_params(self.outprob, trainable=True) reg = regularization.regularize_network_params(self.outprob, regularization.l2) reg /= layers.helper.count_params(self.outprob) # 训练集 self.yDropProb = layers.get_output(self.outprob) trCrossentropy = objectives.categorical_crossentropy(self.yDropProb, self.y) self.trCost = trCrossentropy.mean() + self.C * reg # 验证、测试集 self.yFullProb = layers.get_output(self.outprob, deterministic=True) vateCrossentropy = objectives.categorical_crossentropy(self.yFullProb, self.y) self.vateCost = vateCrossentropy.mean() + self.C * reg # 训练函数,输入训练集,输出训练损失和误差 updatesDict = updates.nesterov_momentum(self.trCost, self.params, self.lr, self.momentum) self.trainfn = makeFunc([self.X, self.y], [self.trCost, self.yDropProb], updatesDict) # 验证或测试函数,输入验证或测试集,输出损失和误差,不进行更新 self.vatefn = makeFunc([self.X, self.y], [self.vateCost, self.yFullProb], None)
def get_model(input_var, target_var, multiply_var): # input layer with unspecified batch size layer = InputLayer(shape=(None, 30, 64, 64), input_var=input_var) #InputLayer(shape=(None, 1, 30, 64, 64), input_var=input_var) layer = DimshuffleLayer(layer, (0, 'x', 1, 2, 3)) # Z-score? # Convolution then batchNormalisation then activation layer, then zero padding layer followed by a dropout layer layer = batch_norm(Conv3DDNNLayer(incoming=layer, num_filters=16, filter_size=(3,3,3), stride=(1,1,1), pad='same', nonlinearity=rectify)) layer = batch_norm(Conv3DDNNLayer(incoming=layer, num_filters=16, filter_size=(3,3,3), stride=(1,1,1), pad='same', nonlinearity=rectify)) layer = batch_norm(Conv3DDNNLayer(incoming=layer, num_filters=1, filter_size=(3,3,3), stride=(1,1,1), pad='same', nonlinearity=rectify)) layer_prediction = layer # Loss prediction = get_output(layer_prediction) loss = categorical_crossentropy(prediction.flatten(), target_var.flatten()) #Updates : Stochastic Gradient Descent (SGD) with Nesterov momentum params = get_all_params(layer_prediction, trainable=True) # Create a loss expression for validation/testing. The crucial difference # here is that we do a deterministic forward pass through the network, disabling dropout layers. test_prediction = get_output(layer_prediction, deterministic=True) test_loss = categorical_crossentropy(test_prediction.flatten(), target_var.flatten()) return test_prediction, prediction, loss, params
def tied_neighbours(preds, n_sample_preds, n_classes): eps = 1e-8 #preds = T.clip(preds, eps, 1-eps) preds_per_trial_row = preds.reshape((-1, n_sample_preds, n_classes)) earlier_neighbours = preds_per_trial_row[:, :-1] later_neighbours = preds_per_trial_row[:, 1:] # Have to now ensure first values are larger zero # for numerical stability :/ # Example of problem otherwise: """ a = T.fmatrix() b = T.fmatrix() soft_out_a =softmax(a) soft_out_b =softmax(b) loss = categorical_crossentropy(soft_out_a[:,1:],soft_out_b[:,:-1]) neigh_fn = theano.function([a,b], loss) neigh_fn(np.array([[0,1000,0]], dtype=np.float32), np.array([[0.1,0.9,0.3]], dtype=np.float32)) -> inf """ # renormalize(?) earlier_neighbours = (T.gt(earlier_neighbours, eps) * earlier_neighbours + T.le(earlier_neighbours, eps) * earlier_neighbours + eps) loss = categorical_crossentropy(earlier_neighbours, later_neighbours) return loss
def get_cost_U(self, image_input): print('getting_cost_U') prob_ys_given_x = self.classifier.get_output_for( self.classifier_helper.get_output_for(image_input)) ''' label_input_with = [] for i in xrange(self.num_classes): label_input_with.append(self.convert_onehot(T.zeros([image_input.shape[0]], dtype='int64') + i)) cost_L_with = [] for i in xrange(self.num_classes): cost_L_with.append(self.get_cost_L([image_input, label_input_with[i]])) weighted_cost_L = T.zeros([image_input.shape[0],]) for i in xrange(self.num_classes): weighted_cost_L += prob_ys_given_x[:, i] * cost_L_with[i] ''' weighted_cost_L = T.zeros([ image_input.shape[0], ]) for i in xrange(self.num_classes): label_input = T.zeros([image_input.shape[0], self.num_classes]) label_input = T.set_subtensor(label_input[:, i], 1) cost_L = self.get_cost_L([image_input, label_input]) weighted_cost_L += prob_ys_given_x[:, i] * cost_L entropy_y_given_x = objectives.categorical_crossentropy( prob_ys_given_x, prob_ys_given_x) cost_U = weighted_cost_L - entropy_y_given_x return cost_U
def get_cost_test(self, inputs): image_input, label_input = inputs prob_ys_given_x = self.classifier.get_output_for(self.classifier_helper.get_output_for(image_input)) cost_test = objectives.categorical_crossentropy(prob_ys_given_x, label_input) cost_acc = T.eq(T.argmax(prob_ys_given_x, axis=1), T.argmax(label_input, axis=1)) return cost_test.mean(), cost_acc.mean()
def get_cost_U(self, image_input): print('getting_cost_U') prob_ys_given_x = self.classifier.get_output_for(self.classifier_helper.get_output_for(image_input)) ''' label_input_with = [] for i in xrange(self.num_classes): label_input_with.append(self.convert_onehot(T.zeros([image_input.shape[0]], dtype='int64') + i)) cost_L_with = [] for i in xrange(self.num_classes): cost_L_with.append(self.get_cost_L([image_input, label_input_with[i]])) weighted_cost_L = T.zeros([image_input.shape[0],]) for i in xrange(self.num_classes): weighted_cost_L += prob_ys_given_x[:, i] * cost_L_with[i] ''' weighted_cost_L = T.zeros([image_input.shape[0],]) for i in xrange(self.num_classes): label_input = T.zeros([image_input.shape[0], self.num_classes]) label_input = T.set_subtensor(label_input[:, i], 1) cost_L = self.get_cost_L([image_input, label_input]) weighted_cost_L += prob_ys_given_x[:,i] * cost_L entropy_y_given_x = objectives.categorical_crossentropy(prob_ys_given_x, prob_ys_given_x) cost_U = weighted_cost_L - entropy_y_given_x return cost_U
def _get_train_fun(self): output_probs = get_output(self.net['l_dist']) # "long" 2d matrix with prob distribution # cut off the first ids from every id sequence: they correspond to START_TOKEN, that we are not predicting target_ids = self.net['l_in_y'].input_var[:, 1:] target_ids_flattened = target_ids.flatten() # "long" vector with target ids cost = categorical_crossentropy( predictions=output_probs, targets=target_ids_flattened ).mean() all_params = get_all_params(self.net['l_dist'], trainable=True) _logger.info("Computing train updates...") updates = lasagne.updates.adadelta( loss_or_grads=cost, params=all_params, learning_rate=LEARNING_RATE ) _logger.info("Compiling train function...") train_fun = theano.function( inputs=[self.net['l_in_x'].input_var, self.net['l_in_y'].input_var], outputs=cost, updates=updates ) return train_fun
def set_network_predictor(input_data, input_mask, target_data, target_mask, network): # get network output data predict_data = get_output(network, deterministic=True) # get prediction index predict_idx = T.argmax(predict_data, axis=-1) # get prediction cost predict_cost = categorical_crossentropy(predictions=T.reshape(predict_data, (-1, predict_data.shape[-1]))+eps, targets=T.flatten(target_data, 1)) predict_cost = predict_cost*T.flatten(target_mask, 1) predict_cost = predict_cost.sum()/target_mask.sum() # get prediction function predict_fn = theano.function(inputs=[input_data, input_mask, target_data, target_mask], outputs=[predict_idx, predict_cost], allow_input_downcast=True) return predict_fn
def __build_loss_train__fn__(self): # create loss function prediction = layers.get_output(self.net) loss = objectives.categorical_crossentropy(prediction, self.__target_var__) loss = loss.mean() + 1e-4 * regularization.regularize_network_params( self.net, regularization.l2) val_acc = T.mean(T.eq(T.argmax(prediction, axis=1), self.__target_var__), dtype=theano.config.floatX) # create parameter update expressions params = layers.get_all_params(self.net, trainable=True) self.eta = theano.shared(sp.array(sp.float32(0.05), dtype=sp.float32)) update_rule = updates.nesterov_momentum(loss, params, learning_rate=self.eta, momentum=0.9) # compile training function that updates parameters and returns training loss self.__train_fn__ = theano.function( [self.__input_var__, self.__target_var__], loss, updates=update_rule) self.__predict_fn__ = theano.function( [self.__input_var__], layers.get_output(self.net, deterministic=True)) self.__val_fn__ = theano.function( [self.__input_var__, self.__target_var__], [loss, val_acc])
def _get_train_fun(self): output_probs = get_output(self.net['l_dist']) # "long" 2d matrix with prob distribution input_ids = T.imatrix() # cut off the first ids from every id sequence: they correspond to START_TOKEN, that we are not predicting target_ids = input_ids[:, 1:] target_ids_flattened = target_ids.flatten() # "long" vector with target ids cost = categorical_crossentropy( predictions=output_probs, targets=target_ids_flattened ).mean() all_params = get_all_params(self.net['l_dist'], trainable=True) print("Computing train updates...") updates = lasagne.updates.adadelta( loss_or_grads=cost, params=all_params, learning_rate=LEARNING_RATE ) print("Compiling train function...") train_fun = theano.function( inputs=[self.net['l_in_x'].input_var, self.net['l_in_y'].input_var, input_ids], outputs=cost, updates=updates ) return train_fun
def create_theano_loss(d): X, t = T.dmatrix('X'), T.dvector('t') log_sigma2 = theano.shared(np.ones((num_classes, d))) theta = theano.shared(np.random.randn(num_classes, d)) # Change parametrization log_alpha = log_sigma2 - T.log(theta**2) la, alpha = log_alpha, T.exp(log_alpha) # -KL(q || prior) mD_KL = -(0.5 * T.log1p(T.exp(-la)) - (0.03 + 1.0 / (1.0 + T.exp(-(1.5 * (la + 1.3)))) * 0.64)).sum() # NLL through Local Reparametrization mu, si = T.dot(X, theta.T), T.sqrt( T.dot(X * X, (alpha * theta * theta).T)) activation = mu + self._srng.normal(mu.shape, avg=0, std=1) * si predictions = T.nnet.softmax(activation) ell = -T.sum( categorical_crossentropy(predictions, one_hot(t, num_classes))) # Objective Negative SGVLB nlb = -(N / batch_size * ell + mD_KL) # Optimization Method and Function Compiling opt = lasagne.updates.adam(nlb, [log_sigma2, theta], learning_rate=lr, beta1=beta) lbf = function([X, t], nlb, updates=opt) return lbf, theta, log_sigma2
def sensitivityBinaryCrossentropy(self, data, batchSize = 128): """ Returns the sensitivity of the categorical crossentropy with respect to the input data. :param data: Input data. :param labels: Respective labels. :param batchSize: The network iterates through the dataset with batches, whose batch size is given by this parameter. """ sens = np.zeros(data.shape) labelMatrix = T.ivector('labelVector') # Compute number of batches numBatches = int(np.ceil(float(sens.shape[0]) / float(batchSize))) startBatch = 0 inputLayer = self.network.layers_[0].input_var output = get_output(self.network.layers_[-1], deterministic=True) score = categorical_crossentropy(output, labelMatrix).sum() calculatedGradients = theano.grad(score,inputLayer) for i in range(numBatches): endBatch = startBatch + batchSize if endBatch >= sens.shape[0]: endBatch = sens.shape[0] batchSize = endBatch - startBatch inputData = data[startBatch:endBatch].reshape(batchSize, data.shape[1], data.shape[2], data.shape[3]) pred = output.eval({inputLayer: inputData}).argmax(axis=1) sens[startBatch:endBatch] = \ calculatedGradients.eval({inputLayer: inputData, labelMatrix: pred.astype('int32')}) startBatch = endBatch return sens
def __init__(self, C, lr): self.C = C self.X = T.ftensor4() self.Y = T.fmatrix() self.net = self._forward() params = layers.get_all_params(self.net['flatten'], trainable=True) netout = layers.get_output(self.net['out']) flattenout = layers.get_output(self.net['flatten']) reg = regularization.regularize_network_params(self.net['flatten'], regularization.l2) reg /= layers.helper.count_params(self.net['flatten']) self.flattenfn = theano.function([self.X], flattenout, allow_input_downcast=True) self.predictfn = theano.function([self.X], netout, allow_input_downcast=True) accrarcy = myUtils.basic.accuracy(netout, self.Y) self.scorefn = theano.function([self.X, self.Y], accrarcy, allow_input_downcast=True) self.sharedBeta = self.net['out'].get_params()[0] crossentropy = objectives.categorical_crossentropy(netout, self.Y) cost = T.mean(crossentropy) + C * reg updatesDict = updates.nesterov_momentum(cost, params, lr, 0.9) # 训练随机参数 self.trainfn = theano.function([self.X, self.Y], [cost, accrarcy], updates=updatesDict, allow_input_downcast=True)
def compileValFunction(self): message = 'Compiling the Validation Function' self.logger.info(logMessage('+', message)) startTime = time.time() valPrediction = get_output(self.outputLayer, deterministic = True, batch_norm_update_averages=False, batch_norm_use_averages=False) # TODO. Chack wheather the flatten style of targetvar and output are same. self.flattenedTargetVar = T.flatten(self.targetVar) valLoss = categorical_crossentropy(valPrediction, self.flattenedTargetVar).mean() weightNorm = regularize_network_params(self.outputLayer, lasagne.regularization.l2) valLoss += self.weightDecay * weightNorm valPredictionLabel = T.argmax(valPrediction, axis = 1) valACC = T.mean(T.eq(valPredictionLabel, self.flattenedTargetVar), dtype = theano.config.floatX) valFunc = theano.function([self.inputVar, self.targetVar], [valLoss, valACC]) message = 'Compiled the Validation Function, spent {:.2f}s'.format(time.time()- startTime) self.logger.info(logMessage('+', message)) return valFunc
def multi_task_loss(y, t): cross_entropy = categorical_crossentropy(y[:, :num_class], t) regress_predictions = discrete_predict(y[:, -1]) mse = squared_loss(regress_predictions, t) log_loss = cross_entropy.mean() reg_loss = mse.mean() return log_loss, reg_loss, log_loss + 3 * reg_loss
def compile_train_model(config): # build the training model train_batch_size = config['train_batch_size'] #number of bags bag_size = config['bag_size'] input_var_train = T.tensor4('input_var_train') target_var = T.ivector('targets') train_network = build_train_model(train_batch_size, bag_size, input_var_train) learning_rate = theano.shared(np.float32(config['learning_rate'])) classification_scores = lasagne.layers.get_output(train_network['prob']) debug_output = lasagne.layers.get_output(train_network['attention']) params = lasagne.layers.get_all_params(train_network['fc'], trainable=True) loss = T.mean(categorical_crossentropy(classification_scores, target_var)) grads = T.grad(loss, params) for index, grad in enumerate(grads): if index > 25: grad *= 10.0 y_pred = T.argmax(classification_scores, axis=1) error = T.mean(T.neq(y_pred, target_var)) updates = lasagne.updates.nesterov_momentum(grads, params, learning_rate) train_model = theano.function([input_var_train, target_var], [loss, error], updates=updates) return train_network, train_model, learning_rate
def complieTrainFunction(self): message = 'Compiling the Training Function' self.logger.info(logMessage('+', message)) startTime = time.time() trainPrediction = get_output(self.outputLayer, deterministic = False, batch_norm_update_averages=False, batch_norm_use_averages=False) # TODO. Chack wheather the flatten style of targetvar and output are same. self.flattenedTargetVar = T.flatten(self.targetVar) trainLoss = categorical_crossentropy(trainPrediction, self.flattenedTargetVar).mean() weightNorm = regularize_network_params(self.outputLayer, lasagne.regularization.l2) trainLoss += self.weightDecay * weightNorm trainPredictionLabel = T.argmax(trainPrediction, axis = 1) trainACC = T.mean(T.eq(trainPredictionLabel, self.flattenedTargetVar), dtype = theano.config.floatX) params = get_all_params(self.outputLayer, trainable = True) update = self.optimizer(trainLoss, params, learning_rate = self.learningRate) trainFunc = theano.function([self.inputVar, self.targetVar], [trainLoss, trainACC], updates = update) message = 'Compiled the Training Function, spent {:.2f}s'.format(time.time()- startTime) self.logger.info(logMessage('+', message)) return trainFunc
def compute_loss_tbptt(network, target_data, target_mask, is_first_win, delay, context): o = get_output(network, deterministic=False) n_batch, n_seq, n_feat = o.shape if delay: o, target_data, target_mask = delayed_tbptt(o, target_data, target_mask, is_first_win, delay) elif context: o, target_data, target_mask = context_tbptt(o, target_data, target_mask, context) ce = categorical_crossentropy(predictions=T.reshape(o, (-1, o.shape[-1]), ndim=2), targets=T.flatten(target_data, 1)) ce = ce * T.flatten(target_mask, 1) ce_cost = ce.sum() / n_batch ce_frame_sum = ce.sum() pred_idx = T.argmax(o, axis=-1) return ce_cost, ce_frame_sum, pred_idx
def loss_acc(model, input_var, target_var, deterministic=True): """Calculate the loss/error and accuracy of a model. Parameters ---------- model : a :class:`Layer` instance The model to evaluate. input_var : theano symbolic variable A variable representing the network input. target_var : theano symbolic variable A variable representing the desired network output. deterministic : boolean (``True``) Use deterministic mode (for testing) or not (for training). Returns ------- theano symbolic variable (scalar) The categorical cross-entropy. theano symbolic variable (scalar) The accuracy. """ prediction = get_output(model, inputs=input_var, deterministic=deterministic) loss = categorical_crossentropy(prediction, target_var) acc = tensor.eq(tensor.argmax(prediction, axis=1), target_var) return tensor.mean(loss), tensor.mean(acc, dtype=config.floatX)
def tied_neighbours(preds, n_sample_preds, n_classes): eps = 1e-8 #preds = T.clip(preds, eps, 1-eps) preds_per_trial_row = preds.reshape((-1, n_sample_preds, n_classes)) earlier_neighbours = preds_per_trial_row[:,:-1] later_neighbours = preds_per_trial_row[:,1:] # Have to now ensure first values are larger zero # for numerical stability :/ # Example of problem otherwise: """ a = T.fmatrix() b = T.fmatrix() soft_out_a =softmax(a) soft_out_b =softmax(b) loss = categorical_crossentropy(soft_out_a[:,1:],soft_out_b[:,:-1]) neigh_fn = theano.function([a,b], loss) neigh_fn(np.array([[0,1000,0]], dtype=np.float32), np.array([[0.1,0.9,0.3]], dtype=np.float32)) -> inf """ # renormalize(?) earlier_neighbours = (T.gt(earlier_neighbours, eps) * earlier_neighbours + T.le(earlier_neighbours, eps) * earlier_neighbours + eps) loss = categorical_crossentropy(earlier_neighbours, later_neighbours) return loss
def objective(layers_, target, **kwargs): out_a_layer = layers_['output_a'] out_b_layer = layers_['output_b'] # Get the outputs out_a, out_b = get_output([out_a_layer, out_b_layer]) # Get the targets gt_a = T.cast(target[:, 0], 'int32') gt_b = target[:, 1].reshape((-1, 1)) # Calculate the multi task loss cls_loss = aggregate(categorical_crossentropy(out_a, gt_a)) reg_loss = aggregate(categorical_crossentropy(out_b, gt_b)) loss = cls_loss + reg_loss return loss
def test_maxpool_layer(): l_in1 = InputLayer((None, 2)) l_in2 = InputLayer((None, 20)) l_hid = DenseLayer(l_in2, num_units=30, nonlinearity=rectify) l_pool = MaxpoolLayer([l_in1, l_hid]) l_out = DenseLayer(l_pool, num_units=1, nonlinearity=sigmoid) bounds = theano.tensor.lmatrix('bounds') data = theano.tensor.matrix('data') targets = theano.tensor.matrix('targets') predictions = get_output(l_out, {l_in1: bounds, l_in2: data}) loss = categorical_crossentropy(predictions, targets) loss = aggregate(loss, mode='mean') params = get_all_params(l_out) updates_sgd = sgd(loss, params, learning_rate=0.0001) train_function = theano.function([bounds, data, targets], updates=updates_sgd, allow_input_downcast=True) test_bounds = np.array([[0, 3], [3, 5], [5, 7]]) test_X = np.random.randn(10, 20) test_Y = np.array([[0], [1], [0]]) train_function(test_bounds, test_X, test_Y)
def adversarial_training(model, inputs, labels, epsilon): logits = model(inputs) fast_grad_perturbation = fast_gradient_perturbation( inputs, logits, labels, epsilon) logits_adversarial = model(inputs + fast_grad_perturbation) loss = categorical_crossentropy(logits_adversarial, labels) return loss
def _get_train_fn(self): output_probs = get_output(self._net['dist_nolast']) mask = get_output(self._net['input_y_mask'])[:, 1:].flatten() nonpad_ids = mask.nonzero() target_ids = get_output(self._net['target']) loss_per_object = categorical_crossentropy(predictions=output_probs, targets=target_ids) loss = loss_per_object[nonpad_ids].mean() all_params = get_all_params(self._net['dist'], trainable=True) _logger.info('Computing train updates...') updates = lasagne.updates.adadelta(loss_or_grads=loss, params=all_params, learning_rate=self._learning_rate) _logger.info('Compiling train function...') train_fn = theano.function( inputs=[ self._net['input_x'].input_var, self._net['input_y'].input_var, self._net['input_condition_id'].input_var ], givens={ self._net['hid_states_decoder'].input_var: T.zeros((self._batch_size, self._decoder_depth, self._hidden_layer_dim)), self._net['thought_vector'].input_var: self._default_thoughts_vector, self._net['switch_enc_to_tv']: np.cast[np.int32](False) # Doesn't compile without explicit casting here }, outputs=loss, updates=updates) return train_fn
def compile_train_predict(self, stochastic_train, stochastic_predict): # symbolic functions to compute marginal posterior GP input_vars = self.post_gp.data_variables gp_hyperparams = self.post_gp.params self.gp_hyperparams = gp_hyperparams mu = self.post_gp.mean() mu = mu.dimshuffle('x', 0) # make a row out of 1d vector (N to 1xN) self.train_network = self.extend_network(mu, stochastic_train) train_predict = lasagne.layers.get_output(self.train_network) # Compute the exepcted prediction #if stochastic_train and self.n_samples > 1: # train_predict = train_predict.mean(axis=0, keepdims=True) label = T.ivector('label') # For expected loss if stochastic_train: label_rep = label.repeat(self.n_samples) else: label_rep = label loss = categorical_crossentropy(train_predict, label_rep).mean() # For expected prediction #loss = categorical_crossentropy(train_predict, label).mean() if self.regularize_weight > 0: penalty = (self.regularize_weight * regularize_network_params(self.train_network, l2)) loss += penalty params = lasagne.layers.get_all_params(self.train_network, trainable=True) update_params = params if self.update_gp: update_params += gp_hyperparams grad_loss = theano.grad(loss, update_params, consider_constant=input_vars) updates = self.optimizer(grad_loss, update_params, **self.optimizer_kwargs) self.train_fn = theano.function(input_vars + [label], loss, updates=updates) if stochastic_train == stochastic_predict: self.test_network = self.train_network self.copy_params = False else: self.test_network = self.extend_network(mu, stochastic_predict) self.copy_params = True # Set deterministic=True for dropout training if used. test_predict = lasagne.layers.get_output(self.test_network, deterministic=True) if stochastic_predict and self.n_samples > 1: test_predict = test_predict.mean(axis=0, keepdims=True) self.predict_fn = theano.function(input_vars, test_predict)
def train_setup(): x = T.tensor3('input') y = T.lvector('output') network = cnn(x, config.input_length, config.output_length) print 'Number of Parameters {0}'.format(count_params(network)) if config.init_model is not None: with np.load(config.init_model) as f: param_values = [f['arr_%d' % i] for i in range(len(f.files))] set_all_param_values(decoding, param_values) # training tasks in sequence prediction = get_output(network) ent = categorical_crossentropy(prediction, y) ent = ent.mean() l1_norm = config.l1_weight * regularize_network_params(network, l1) l2_norm = config.l2_weight * regularize_network_params(network, l2) total_error = ent + l1_norm + l2_norm params = get_all_params(network, trainable=True) updates = adadelta( total_error, params, config.learning_rate, \ config.rho, \ config.eps ) train_fn = function( [x, y], [ent, l1_norm, l2_norm, prediction], \ updates = updates, \ allow_input_downcast = True ) val_prediction = get_output(network, deterministic=True) val_ent = categorical_crossentropy(val_prediction, y) val_ent = val_ent.mean() val_fn = function([x, y], [val_ent, val_prediction], allow_input_downcast=True) return network, train_fn, val_fn
def set_network_trainer(input_data, input_mask, target_data, target_mask, network, updater, learning_rate, grad_max_norm=10., # l2_lambda=1e-5, load_updater_params=None): # get network output data predict_data = get_output(network, deterministic=False) predict_idx = T.argmax(predict_data, axis=-1) # get prediction cost train_predict_cost = categorical_crossentropy(predictions=T.reshape(predict_data, (-1, predict_data.shape[-1])) + eps, targets=T.flatten(target_data, 1)) train_predict_cost = train_predict_cost*T.flatten(target_mask, 1) train_predict_cost = train_predict_cost.sum()/target_mask.sum() # get regularizer cost train_regularizer_cost = regularize_network_params(network, penalty=l2) # get network parameters network_params = get_all_params(network, trainable=True) # get network gradients with clipping network_grads = theano.grad(cost=train_predict_cost + train_regularizer_cost*l2_lambda, wrt=network_params) network_grads = theano.grad(cost=train_predict_cost, wrt=network_params) network_grads, network_grads_norm = total_norm_constraint(tensor_vars=network_grads, max_norm=grad_max_norm, return_norm=True) # set updater train_lr = theano.shared(lasagne.utils.floatX(learning_rate)) train_updates, trainer_params = updater(loss_or_grads=network_grads, params=network_params, learning_rate=train_lr, load_params_dict=load_updater_params) # get training (update) function training_fn = theano.function(inputs=[input_data, input_mask, target_data, target_mask], outputs=[predict_data, predict_idx, train_predict_cost, train_regularizer_cost], network_grads_norm], updates=train_updates, allow_input_downcast=True)
def compute_cost(self, deterministic=False): output = get_output(self.net, deterministic=deterministic) cost = categorical_crossentropy(output, self.tg).mean() cost.name = 'negll' accuracy = categorical_accuracy(output, self.tg).mean() accuracy.name = 'accuracy' return cost, accuracy
def get_cost_test(self, inputs): image_input, label_input = inputs prob_ys_given_x = self.classifier.get_output_for( self.classifier_helper.get_output_for(image_input)) cost_test = objectives.categorical_crossentropy( prob_ys_given_x, label_input) cost_acc = T.eq(T.argmax(prob_ys_given_x, axis=1), T.argmax(label_input, axis=1)) return cost_test.mean(), cost_acc.mean()
def create_update(nnet): """ create an SVM loss for network given in argument """ inputs = T.tensor4('inputs') targets = T.ivector('targets') C = Cfg.C floatX = Cfg.floatX svm_layer = nnet.svm_layer trainable_params = lasagne.layers.get_all_params(svm_layer, trainable=True) prediction = lasagne.layers.get_output(svm_layer, inputs=inputs, deterministic=False) if Cfg.softmax_loss: print("Using softmax output") out = lasagne.nonlinearities.softmax(prediction) train_loss = l_objectives.categorical_crossentropy(out, targets).mean() train_acc = T.mean(T.eq(T.argmax(prediction, axis=1), targets), dtype='floatX') else: objective, train_acc = svm_layer.objective(prediction, targets) train_loss = T.cast((objective) / targets.shape[0], 'floatX') train_acc = T.cast(train_acc * 1. / targets.shape[0], 'floatX') # NB: biases in L2-regularization l2_penalty = 0 for layer in nnet.trainable_layers: l2_penalty = l2_penalty + T.sum(layer.W**2) + T.sum(layer.b**2) train_obj = floatX(0.5) / C * l2_penalty + train_loss updates = get_updates(nnet, train_obj, trainable_params) nnet.backprop = theano.function([inputs, targets], [train_obj, train_acc], updates=updates) nnet.hinge_loss = theano.function([inputs, targets], [train_loss, train_acc]) prediction = lasagne.layers.get_output(svm_layer, inputs=inputs, deterministic=True) objective, test_acc = svm_layer.objective(prediction, targets) test_loss = T.cast(objective / targets.shape[0], 'floatX') test_acc = T.cast(test_acc * 1. / targets.shape[0], 'floatX') test_obj = floatX(0.5) / C * l2_penalty + test_loss nnet.forward = theano.function([inputs, targets], [test_obj, test_acc])
def __init__(self, x, y, args): self.params_theta = [] self.params_lambda = [] self.params_weight = [] if args.dataset == 'mnist': input_size = (None, 28 * 28) elif args.dataset == 'cifar10': input_size = (None, 3, 32 * 32) else: raise AssertionError layers = [ll.InputLayer(input_size)] penalty = theano.shared(np.array(0.)) for (k, num) in enumerate(args.MLPlayer): # the last layer should use softmax if k == len(args.MLPlayer) - 1: # layers.append(ll.DenseLayer(layers[-1], num, nonlinearity=nonlinearities.softmax)) layers.append( DenseLayerWithReg(args, layers[-1], num_units=num, nonlinearity=nonlinearities.softmax)) else: # layers.append(ll.DenseLayer(layers[-1], num)) layers.append( DenseLayerWithReg(args, layers[-1], num_units=num)) if layers[-1].W is not None: self.params_theta += [layers[-1].W, layers[-1].b] self.params_weight += [layers[-1].W] # define new regularization term for a layer if args.regL2 is True: tempL2 = layers[-1].L2 * T.sqr( layers[-1].W ) #Michael: use 10**regularization constants penalty += T.sum(tempL2) self.params_lambda += [layers[-1].L2] if args.regL1 is True: tempL1 = layers[-1].L1 * T.abs( layers[-1].W ) #Michael: use 10**regularization constants penalty += T.sum(tempL1) self.params_lambda += [layers[-1].L1] self.layers = layers self.y = ll.get_output(layers[-1], x, deterministic=False) self.prediction = T.argmax(self.y, axis=1) self.penalty = penalty # self.penalty = penalty if penalty != 0. else T.constant(0.) print(self.params_lambda) # time.sleep(20) # cost function self.loss = T.mean(categorical_crossentropy(self.y, y)) self.lossWithPenalty = T.add(self.loss, self.penalty) print("loss and losswithpenalty", type(self.loss), type(self.lossWithPenalty))
def build_model0(input_var,target_var,regularW=0,params_load=None): network=layers.InputLayer(shape=(None,3,256,256),input_var=input_var) # size 256*256 network=layers.Pool2DLayer(network,pool_size=(2,2),stride=2,pad=0,mode='average_inc_pad') #size 128*128 network=layers.Pool2DLayer(network,pool_size=(2,2),stride=2,pad=0,mode='average_inc_pad') #size 64*64 network=layers.Conv2DLayer(network,num_filters=32,filter_size=(5,5), nonlinearity=nonLinear.leaky_rectify, W=init.GlorotUniform(gain='relu'),pad='same' ) network=layers.MaxPool2DLayer(network,pool_size=(2,2)) network=layers.DropoutLayer(network,p=0.15) #size 32*32 network=layers.Conv2DLayer(network,num_filters=64,filter_size=(5,5), nonlinearity=nonLinear.leaky_rectify, W=init.GlorotUniform(gain='relu'),pad='same' ) network=layers.MaxPool2DLayer(network,pool_size=(2,2)) network=layers.DropoutLayer(network,p=0.2) #size 16*16 network=layers.Conv2DLayer(network,num_filters=128,filter_size=(5,5), nonlinearity=nonLinear.leaky_rectify, W=init.GlorotUniform(gain='relu'),pad='same' ) network=layers.MaxPool2DLayer(network,pool_size=(2,2)) network=layers.DropoutLayer(network,p=0.3) #size 8*8 network=layers.Conv2DLayer(network,num_filters=256,filter_size=(5,5), nonlinearity=nonLinear.leaky_rectify, W=init.GlorotUniform(gain='relu'),pad='same' ) network=layers.MaxPool2DLayer(network,pool_size=(2,2)) network=layers.DropoutLayer(network,p=0.4) #size 4*4 network = layers.GlobalPoolLayer(network) network=layers.DenseLayer(network,num_units=1000, nonlinearity=nonLinear.leaky_rectify, W=init.GlorotUniform(gain='relu')) network=layers.DenseLayer(network,num_units=2, nonlinearity=nonLinear.softmax) prediction=layers.get_output(network) loss = objectives.categorical_crossentropy(prediction, target_var) loss=loss.mean() params=layers.get_all_params(network,trainable=True) if params_load != None: [p.set_value(pval) for (p, pval) in zip(params, params_load)] return network,loss,params
def compute_cost(rnn_outputs, forward_probabilities, backward_pointers, x_end, y_end, label): def backward_step(backlinks, position): new_position = backlinks[position] return new_position, position initial_state = T.argmax(forward_probabilities[x_end-1,y_end-2:y_end]) + y_end - 2 results, _ = theano.scan(fn = backward_step, sequences = backward_pointers[0:x_end,:], outputs_info = [initial_state, None], go_backwards = True) alignment = label[results[1][::-1]] return aggregate(categorical_crossentropy(rnn_outputs[0:x_end], alignment), mode='sum')
def tied_losses(preds, n_sample_preds, n_classes, n_pairs): preds_per_trial_row = preds.reshape((-1, n_sample_preds, n_classes)) _srng = RandomStreams(get_rng().randint(1, 2147462579)) rand_inds = _srng.choice([n_pairs * 2], n_sample_preds, replace=False) part_1 = preds_per_trial_row[:,rand_inds[:n_pairs]] part_2 = preds_per_trial_row[:,rand_inds[n_pairs:]] # Have to now ensure first values are larger zero # for numerical stability :/ eps = 1e-4 part_1 = T.maximum(part_1, eps) loss = categorical_crossentropy(part_1, part_2) return loss
def create_test_function(self): """ Create Test Function """ test_prediction = lasagne.layers.get_output(self.network, deterministic=True) test_loss = categorical_crossentropy(test_prediction, self.target_var).mean() test_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), self.target_var), dtype=theano.config.floatX) self.test = theano.function([self.input_var, self.target_var], [test_loss, test_acc])
def tied_losses(preds, n_sample_preds, n_classes, n_pairs): preds_per_trial_row = preds.reshape((-1, n_sample_preds, n_classes)) _srng = RandomStreams(get_rng().randint(1, 2147462579)) rand_inds = _srng.choice([n_pairs * 2], n_sample_preds, replace=False) part_1 = preds_per_trial_row[:, rand_inds[:n_pairs]] part_2 = preds_per_trial_row[:, rand_inds[n_pairs:]] # Have to now ensure first values are larger zero # for numerical stability :/ eps = 1e-4 part_1 = T.maximum(part_1, eps) loss = categorical_crossentropy(part_1, part_2) return loss
def __init__(self, x, y, args): self.params_theta = [] self.params_lambda = [] self.params_weight = [] if args.dataset == 'mnist': input_size = (None, 1, 28, 28) elif args.dataset == 'cifar10': input_size = (None, 3, 32, 32) else: raise AssertionError layers = [ll.InputLayer(input_size)] self.penalty = theano.shared(np.array(0.)) #conv1 layers.append(Conv2DLayerWithReg(args, layers[-1], 20, 5)) self.add_params_to_self(args, layers[-1]) layers.append(ll.MaxPool2DLayer(layers[-1], pool_size=2, stride=2)) #conv1 layers.append(Conv2DLayerWithReg(args, layers[-1], 50, 5)) self.add_params_to_self(args, layers[-1]) layers.append(ll.MaxPool2DLayer(layers[-1], pool_size=2, stride=2)) # Michael: add dropout layers.append(ll.DropoutLayer(layers[-1])) # Michael #fc1 layers.append(DenseLayerWithReg(args, layers[-1], num_units=500)) self.add_params_to_self(args, layers[-1]) layers.append(ll.DropoutLayer(layers[-1])) # Michael #softmax layers.append( DenseLayerWithReg(args, layers[-1], num_units=10, nonlinearity=nonlinearities.softmax)) self.add_params_to_self(args, layers[-1]) # no dropout on output self.layers = layers self.y = ll.get_output(layers[-1], x, deterministic=False) self.prediction = T.argmax(self.y, axis=1) # self.penalty = penalty if penalty != 0. else T.constant(0.) print(self.params_lambda) # time.sleep(20) # cost function self.loss = T.mean(categorical_crossentropy(self.y, y)) self.lossWithPenalty = T.add(self.loss, self.penalty) print("loss and losswithpenalty", type(self.loss), type(self.lossWithPenalty)) # Michael: wide resnet: https://gist.github.com/FlorianMuellerklein/3d9ba175038a3f2e7de3794fa303f1ee # https://github.com/FlorianMuellerklein/Identity-Mapping-ResNet-Lasagne/blob/master/models.py
def create_spotlight_fn(final_layer, blur_axes, free_axes, weight_axes, trials_shape): ones_shape = [trials_shape[i_ax] if i_ax in blur_axes + free_axes else 1 for i_ax in xrange(len(trials_shape))] means_stds_shape = [trials_shape[i_ax] if i_ax in free_axes else 1 for i_ax in xrange(len(trials_shape))] means_stds_shape = [len(blur_axes)] + means_stds_shape #toadd: mixture of gaussians full_mask = T.ones(ones_shape, dtype=np.float32) broadcast_pattern = [True if ax not in (free_axes) else False for ax in xrange(len(trials_shape))] broadcast_pattern = [False] + broadcast_pattern means = theano.shared((np.ones(means_stds_shape)* 0.5).astype(np.float32), broadcastable=broadcast_pattern) stds = theano.shared((np.ones(means_stds_shape)* 1).astype(np.float32), broadcastable=broadcast_pattern) for i_blur_axis, axis in enumerate(blur_axes): ax_mask = T.constant(np.linspace(0,1, trials_shape[axis], dtype=np.float32)) dimshuffle_pattern = [0 if ax == axis else 'x' for ax in xrange(len(trials_shape))] ax_mask = ax_mask.dimshuffle(*dimshuffle_pattern) # todo maybe have to fix this here? ax_gaussian = T.exp(-T.square((ax_mask - means[i_blur_axis]) / stds[i_blur_axis]) * 0.5) full_mask = full_mask * ax_gaussian weights_shape = [trials_shape[i_ax] if i_ax in weight_axes else 1 for i_ax in xrange(1,len(trials_shape))] weights_shape = [trials_shape[0]] + weights_shape broadcast_pattern = [True if ax not in (weight_axes) else False for ax in xrange(1, len(trials_shape))] broadcast_pattern = [False] + broadcast_pattern weights = theano.shared((np.ones(weights_shape)).astype(np.float32), broadcastable=broadcast_pattern) full_mask = full_mask * (T.maximum(weights,0) / T.mean(T.maximum(weights,0), axis=0, keepdims=True)) trials_var = T.ftensor4() scaled_trials = trials_var * full_mask targets = T.ivector() outputs = lasagne.layers.get_output(final_layer, inputs=scaled_trials, input_var=scaled_trials) loss = categorical_crossentropy(outputs, targets).sum() loss += T.mean(T.sqr(stds)) * 0.1 loss -= T.mean(T.abs_(weights - T.mean(weights, axis=0, keepdims=True))) * 10 adam_updates = adam(loss,[means, stds, weights], learning_rate=0.01) adam_grad_fn = theano.function([trials_var, targets], [loss,outputs, scaled_trials, full_mask, weights], updates=adam_updates) return adam_grad_fn
def test_categorical_crossentropy(): # symbolic version from lasagne.objectives import categorical_crossentropy p, t = theano.tensor.matrices('pt') c = categorical_crossentropy(p, t) # numeric version floatX = theano.config.floatX predictions = np.random.rand(10, 20).astype(floatX) predictions /= predictions.sum(axis=1, keepdims=True) targets = np.random.rand(10, 20).astype(floatX) targets /= targets.sum(axis=1, keepdims=True) crossent = -(targets * np.log(predictions)).sum(axis=-1) # compare assert np.allclose(crossent, c.eval({p: predictions, t: targets}))
def test_categorical_crossentropy_onehot(): # symbolic version from lasagne.objectives import categorical_crossentropy p = theano.tensor.matrix('p') t = theano.tensor.ivector('t') # correct class per item c = categorical_crossentropy(p, t) # numeric version floatX = theano.config.floatX predictions = np.random.rand(10, 20).astype(floatX) predictions /= predictions.sum(axis=1, keepdims=True) targets = np.random.randint(20, size=10).astype(np.uint8) crossent = -np.log(predictions[np.arange(10), targets]) # compare assert np.allclose(crossent, c.eval({p: predictions, t: targets}))
def build_loss(targets, prediction, optimization): """ setup loss function with weight decay regularization """ if optimization["objective"] == 'categorical': loss = objectives.categorical_crossentropy(prediction, targets) elif optimization["objective"] == 'binary': prediction = T.clip(prediction, 1e-7, 1-1e-7) loss = -(targets*T.log(prediction) + (1.0-targets)*T.log(1.0-prediction)) # loss = objectives.binary_crossentropy(prediction[:,loss_index], targets[:,loss_index]) elif (optimization["objective"] == 'squared_error'): loss = objectives.squared_error(prediction, targets) loss = objectives.aggregate(loss, mode='mean') return loss
def get_cost_updates(self, corrupted_input, learning_rate): """ This function computes the cost and the updates for one trainng step of the dA """ tilde_x=corrupted_input y = self.get_hidden_values(tilde_x) z = self.get_reconstructed_input(y) #z=corrupted_input # note : we sum over the size of a datapoint; if we are using # minibatches, L will be a vector, with one entry per # example in minibatch # L = - T.sum(self.x * T.log(z) + (1 - self.x) * T.log(1 - z)) L=categorical_crossentropy(z,self.x) #L = (self.x * T.log(z) + (1 - self.x) * T.log(1 - z)) #cost=L.mean() # temp=(self.x*T.log(z)+(1-self.x)*T.log(1-z)) # L=-T.sum(temp) # note : L is now a vector, where each element is the # cross-entropy cost of the reconstruction of the # corresponding example of the minibatch. We need to # compute the average of all these to get the cost of # the minibatch cost = T.mean(L) # print cost reg=1e-8*lasagne.regularization.l2(self.params[0]) cost=cost+reg # compute the gradients of the cost of the `dA` with respect # to its parameters gparams = T.grad(cost, self.params,add_names='True') updates_sgd=sgd(cost,self.params,learning_rate) updates_dic=apply_momentum(updates_sgd, self.params, momentum=0.9) updates=updates_dic.items() # generate the list of updates # updates = [ # (param, param - learning_rate * gparam) # for param, gparam in zip(self.params, gparams) # ] return (cost, updates)
def get_functions(): input_layer=layers.InputLayer(shape=(BATCH_SIZE, INPUT_LENGTH)) print "input_layer size: " + str(input_layer.shape[0])+","+ str(input_layer.shape[1]) layer = input_layer for layer_num in range(len(NUM_UNITS_HIDDEN_LAYER)): print "layer_num-"+str(layer_num) layer=layers.DenseLayer(layer, num_units=NUM_UNITS_HIDDEN_LAYER[layer_num], W=lasagne.init.Normal(0.01), nonlinearity=nonlinearities.tanh) output_layer=layers.DenseLayer(layer, num_units=OUTPUT_SIZE, nonlinearity=nonlinearities.softmax) network_output=get_output(output_layer) expected_output=T.ivector() loss_train=aggregate(categorical_crossentropy(network_output, expected_output), mode='mean') all_weigths=layers.get_all_params(output_layer) update_rule=lasagne.updates.nesterov_momentum(loss_train, all_weigths, learning_rate=LEARNING_RATE) print "input_layer_end size: " + str(input_layer.shape[0])+","+ str(input_layer.shape[1]) train_function=theano.function(inputs=[input_layer.input_var, expected_output], outputs=loss_train, updates=update_rule, allow_input_downcast=True) prediction = T.argmax(network_output, axis=1) accuracy = T.mean(T.eq(prediction, expected_output), dtype=theano.config.floatX) # @UndefinedVariable test_function=theano.function(inputs=[input_layer.input_var, expected_output], outputs=[loss_train, accuracy, prediction], allow_input_downcast=True) output_function=theano.function([input_layer.input_var],get_output(output_layer), allow_input_downcast=True) return train_function,test_function,output_function
def __build_loss_train__fn__(self): # create loss function prediction = layers.get_output(self.net) loss = objectives.categorical_crossentropy(prediction, self.__target_var__) loss = loss.mean() + 1e-4 * regularization.regularize_network_params(self.net, regularization.l2) val_acc = T.mean(T.eq(T.argmax(prediction, axis=1), self.__target_var__),dtype=theano.config.floatX) # create parameter update expressions params = layers.get_all_params(self.net, trainable=True) self.eta = theano.shared(sp.array(sp.float32(0.05), dtype=sp.float32)) update_rule = updates.nesterov_momentum(loss, params, learning_rate=self.eta, momentum=0.9) # compile training function that updates parameters and returns training loss self.__train_fn__ = theano.function([self.__input_var__,self.__target_var__], loss, updates=update_rule) self.__predict_fn__ = theano.function([self.__input_var__], layers.get_output(self.net,deterministic=True)) self.__val_fn__ = theano.function([self.__input_var__,self.__target_var__], [loss,val_acc])
def compile_val(self): if self.verbose: print('compiling validation function...') import theano from lasagne.layers import get_output output_val = lasagne.layers.get_output(self.output_layer, self.x, deterministic=True) from lasagne.objectives import categorical_accuracy, categorical_crossentropy cost = categorical_crossentropy(output_val, self.y).mean() error = 1-categorical_accuracy(output_val, self.y, top_k=1).mean() error_top_5 = 1-categorical_accuracy(output_val, self.y, top_k=5).mean() self.val_fn= theano.function([self.subb_ind], [cost,error,error_top_5], updates=[], givens=[(self.x, self.shared_x_slice), (self.y, self.shared_y_slice)] )
def grad_supervised(l_ram, labels): """ return: loss = 1 / M * sum_i_{1..M} cross_entroy_loss(groundtruth, a_T) grads = theano.grad(loss, params) inputs: labels = (n_batch,) [theano tensor variable] """ loc_mean_t, loc_t, h_t, prob, pred = lasagne.layers.get_output(l_ram) params = lasagne.layers.get_all_params(l_ram, trainable=True) ### loss estimation (cross entropy loss) loss = categorical_crossentropy(prob, labels) loss = aggregate(loss, mode='mean') ### gradient estimation grads = theano.grad(loss, params, disconnected_inputs='ignore') return loss, grads
def main(): print "Building network ..." l_out = build_network(N_BATCH) read_model_data(l_out, 'lstm_iter_60000') print "Done building network" target_values = T.tensor3('target_output') input_values = T.tensor3('input') network_output = lasagne.layers.get_output(l_out, input_values) # categorical crossentropy loss because it's the proper way cost = T.mean(categorical_crossentropy(T.reshape(network_output, (N_BATCH*MAX_LENGTH, N_FEAT_DIM)) , T.reshape(target_values, (N_BATCH*MAX_LENGTH, N_FEAT_DIM)))) all_params = lasagne.layers.get_all_params(l_out) print "Computing updates..." updates = lasagne.updates.adagrad(cost, all_params, LEARNING_RATE) print "Compiling functions..." train = theano.function( [input_values, target_values], cost, updates=updates) compute_cost = theano.function([input_values, target_values], cost) train_f = open('chatlog.txt','r') f_data = train_f.read() print "Training ..." try: for n in xrange(N_ITERATIONS): X, Y = gen_data(f_data, n, N_BATCH, MAX_LENGTH) train(X, Y) if not n % CHECK_FREQUENCY: cost_val = compute_cost(X, Y) print "Iteration {} training cost = {}".format(n, cost_val) if n % CHECKPOINT_FREQUENCY == 0 and n > 0: print "Saving checkpoint..." fname = "lstm_iter_%d" % (n) write_model_data(l_out, fname) except KeyboardInterrupt: pass
params = layers.get_all_params(unsupervised_graph, trainable=True) + \ layers.get_all_params(supervised_graph, trainable=True) # params = layers.get_all_params(supervised_graph)[-2:] params = utils.unique(params) # Get regularizable params regularization_params = layers.get_all_params(unsupervised_graph, regularizable=True) + \ layers.get_all_params(supervised_graph, regularizable=True) regularization_params = utils.unique(regularization_params) # Creating loss functions # Train loss has to take into account of labeled image or not if run_parameters.unsupervised_cost_fun == 'squared_error': loss1 = objectives.squared_error(reconstruction, input_var) elif run_parameters.unsupervised_cost_fun == 'categorical_crossentropy': loss1 = objectives.categorical_crossentropy(reconstruction, input_var) if supervised_cost_fun == 'squared_error': loss2 = objectives.squared_error(prediction, target_var) * repeat_col(labeled_var, 10) elif supervised_cost_fun == 'categorical_crossentropy': loss2 = objectives.categorical_crossentropy(prediction, target_var) * labeled_var.T l2_penalties = regularization.apply_penalty(regularization_params, regularization.l2) sparse_layers = get_all_sparse_layers(unsupervised_graph) sparse_layers_output = layers.get_output(sparse_layers, deterministic=True) if run_parameters.sparse_regularizer_type == 0: sparse_regularizer = reduce(lambda x, y: x + T.clip((T.mean(abs(y)) - run_parameters.sparse_regularize_factor) * y.size, 0, float('inf')), sparse_layers_output, 0) elif run_parameters.sparse_regularizer_type == 1: sparse_regularizer = reduce( lambda x, y: x + T.clip(T.mean(abs(y), axis=1) - run_parameters.sparse_regularize_factor, 0, float('inf')).sum() * y.shape[1],
def categorical_test( build_cnn_fn, hyperpars, imgdat, runopts, networkstr, get_eventids_hits_and_targets_fn, get_list_of_hits_fn ): """ Run tests on the reserved test sample ("trainiing" examples with true values to check that were not used for learning or validation); read the data files in chunks into memory. `get_eventids_hits_and_targets_fn` needs to extract from a data slice a tuple of (eventids, [inputs], targets), where `[inputs]` might hold a single view or all three, etc. `get_list_of_hits_fn` needs to extract from a data slice a list of `[inputs]` that might hold a single view or all three, etc. """ logger.info("Loading data for testing...") tstamp = get_tstamp_from_model_name(runopts['save_model_file']) train_sizes, valid_sizes, test_sizes = \ get_and_print_dataset_subsizes(runopts['data_file_list']) used_sizes, used_data_size = get_used_data_sizes_for_testing( train_sizes, valid_sizes, test_sizes, runopts['test_all_data'] ) # Prepare Theano variables for inputs and targets inputlist = networkstr['input_list'] target_var = T.ivector('targets') # Build the model network = build_cnn_fn(inputlist=inputlist, imgw=imgdat['imgw'], imgh=imgdat['imgh'], convpooldictlist=networkstr['topology'], nhidden=networkstr['nhidden'], dropoutp=networkstr['dropoutp'], noutputs=networkstr['noutputs'], depth=networkstr['img_depth'] ) with np.load(runopts['save_model_file']) as f: param_values = [f['arr_%d' % i] for i in range(len(f.files))] lasagne.layers.set_all_param_values(network, param_values) # Create a loss expression for testing. test_prediction = lasagne.layers.get_output(network, deterministic=True) l2_penalty = lasagne.regularization.regularize_layer_params( lasagne.layers.get_all_layers(network), lasagne.regularization.l2) * networkstr['l2_penalty_scale'] test_loss = categorical_crossentropy(test_prediction, target_var) + \ l2_penalty test_loss = test_loss.mean() # Also create an expression for the classification accuracy: test_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), target_var), dtype=theano.config.floatX) # Look at the classifications test_prediction_values = T.argmax(test_prediction, axis=1) # Compute the actual predictions - also instructive is to look at # `test_prediction` as an output (array of softmax probabilities) pred_fn = theano.function(inputlist, [test_prediction, test_prediction_values], allow_input_downcast=True) # Compile a function computing the validation loss and accuracy: inputlist.append(target_var) val_fn = theano.function(inputlist, [test_loss, test_acc], allow_input_downcast=True) logger.info("Starting testing...") # compute and print the test error and... test_err = 0 test_acc = 0 test_batches = 0 # look at some concrete predictions num_poss_segs = networkstr['noutputs'] pred_target = np.zeros(num_poss_segs, dtype='float32') true_target = np.zeros(num_poss_segs, dtype='float32') targs_mat = np.zeros(num_poss_segs * num_poss_segs, dtype='float32').reshape(num_poss_segs, num_poss_segs) test_slices = [] for tsize in used_sizes: test_slices.append(slices_maker(tsize, slice_size=50000)) test_set = None verbose_evt_print_freq = 1 evtcounter = 0 for i, data_file in enumerate(runopts['data_file_list']): for tslice in test_slices[i]: t0 = time.time() test_set = None if runopts['test_all_data']: test_set = load_all_datasubsets(data_file, tslice) else: test_set = load_datasubset(data_file, 'test', tslice) _, test_dstream = make_scheme_and_stream( test_set, 1, shuffle=False ) t1 = time.time() logger.info(" Loading slice {} from {} took {:.3f}s.".format( tslice, data_file, t1 - t0) ) logger.debug( " dset sources: {}".format(test_set.provides_sources) ) t0 = time.time() for data in test_dstream.get_epoch_iterator(): eventids, inputlist, targets = \ get_eventids_hits_and_targets_fn(data) inputlist.append(targets) err, acc = val_fn(*inputlist) test_err += err test_acc += acc test_batches += 1 hits_list = get_list_of_hits_fn(data) probs, pred = pred_fn(*hits_list) pred_targ = zip(pred, targets) evtcounter += 1 if runopts['be_verbose']: if evtcounter % verbose_evt_print_freq == 0: logger.info("{}/{} - {}: (prediction, true target): {}, {}". format(evtcounter, used_data_size, eventids[0], pred_targ, probs)) for p, t in pred_targ: targs_mat[t][p] += 1 true_target[t] += 1 if p == t: pred_target[p] += 1 t1 = time.time() logger.info(" -Iterating over the slice took {:.3f}s.".format(t1 - t0)) del test_set del test_dstream acc_target = 100.0 * pred_target / true_target.astype('float32') perf_file = 'perfmat' + tstamp + '.npy' np.save(perf_file, targs_mat) logger.info( "\nFinal results:" "\n test loss:\t\t\t{:.6f}" "\n test accuracy:\t\t{:.2f} %".format( test_err / test_batches, test_acc / test_batches * 100) ) for i, v in enumerate(acc_target): logger.info(" target {} accuracy:\t\t\t{:.3f} %".format( i, acc_target[i]))
def categorical_learn_and_validate( build_cnn_fn, hyperpars, imgdat, runopts, networkstr, get_list_of_hits_and_targets_fn ): """ Run learning and validation for triamese networks using AdaGrad for learning rate evolution, nesterov momentum; read the data files in chunks into memory. `get_hits_and_targets` should extract a list `[inputs, targets]` from a data slice where `inputs` could be one item or 3 depending on the views studied (so total length is 2 or 4, most likely) """ logger.info("Loading data...") train_sizes, valid_sizes, _ = \ get_and_print_dataset_subsizes(runopts['data_file_list']) # Prepare Theano variables for inputs and targets target_var = T.ivector('targets') inputlist = networkstr['input_list'] # Build the model network = build_cnn_fn(inputlist=inputlist, imgw=imgdat['imgw'], imgh=imgdat['imgh'], convpooldictlist=networkstr['topology'], nhidden=networkstr['nhidden'], dropoutp=networkstr['dropoutp'], noutputs=networkstr['noutputs'], depth=networkstr['img_depth'] ) logger.info(network_repr.get_network_str( lasagne.layers.get_all_layers(network), get_network=False, incomings=True, outgoings=True)) if runopts['start_with_saved_params'] and \ os.path.isfile(runopts['save_model_file']): logger.info(" Loading parameters file: %s" % \ runopts['save_model_file']) with np.load(runopts['save_model_file']) as f: param_values = [f['arr_%d' % i] for i in range(len(f.files))] lasagne.layers.set_all_param_values(network, param_values) else: # Dump the current network weights to file in case we want to study # intialization trends, etc. np.savez('./initial_parameters.npz', *lasagne.layers.get_all_param_values(network)) # Create a loss expression for training. prediction = lasagne.layers.get_output(network) l2_penalty = lasagne.regularization.regularize_layer_params( lasagne.layers.get_all_layers(network), lasagne.regularization.l2) * networkstr['l2_penalty_scale'] loss = categorical_crossentropy(prediction, target_var) + l2_penalty loss = loss.mean() # Create update expressions for training. params = lasagne.layers.get_all_params(network, trainable=True) logger.info( """ //// Use AdaGrad update schedule for learning rate, see Duchi, Hazan, and Singer (2011) "Adaptive subgradient methods for online learning and stochasitic optimization." JMLR, 12:2121-2159 //// """) updates_adagrad = lasagne.updates.adagrad( loss, params, learning_rate=hyperpars['learning_rate'], epsilon=1e-06) logger.info( """ //// Apply Nesterov momentum using Lisa Lab's modifications. //// """) updates = lasagne.updates.apply_nesterov_momentum( updates_adagrad, params, momentum=hyperpars['momentum']) # Create a loss expression for validation/testing. Note we do a # deterministic forward pass through the network, disabling dropout. test_prediction = lasagne.layers.get_output(network, deterministic=True) test_loss = categorical_crossentropy(test_prediction, target_var) + \ l2_penalty test_loss = test_loss.mean() # Also create an expression for the classification accuracy: test_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), target_var), dtype=theano.config.floatX) # Compile a function performing a training step on a mini-batch (by giving # the updates dictionary) and returning the corresponding training loss: inputlist.append(target_var) train_fn = theano.function(inputlist, loss, updates=updates, allow_input_downcast=True) # Compile a second function computing the validation loss and accuracy: val_fn = theano.function(inputlist, [test_loss, test_acc], allow_input_downcast=True) logger.info("Starting training...") # # TODO: early stopping logic goes here... # train_slices = [] for tsize in train_sizes: train_slices.append(slices_maker(tsize, slice_size=50000)) valid_slices = [] for vsize in valid_sizes: valid_slices.append(slices_maker(vsize, slice_size=50000)) train_set = None valid_set = None epoch = 0 for epoch in range(hyperpars['num_epochs']): start_time = time.time() for slicelist in train_slices: shuffle(slicelist) logger.info("Train slices for epoch %d: %s" % (epoch, train_slices)) train_err = 0 train_batches = 0 for i, data_file in enumerate(runopts['data_file_list']): # In each epoch, we do a full pass over the training data: for tslice in train_slices[i]: t0 = time.time() train_set = load_datasubset(data_file, 'train', tslice) _, train_dstream = make_scheme_and_stream( train_set, hyperpars['batchsize'] ) t1 = time.time() logger.info( " Loading slice {} from {} took {:.3f}s.".format( tslice, data_file, t1 - t0) ) logger.debug( " dset sources: {}".format(train_set.provides_sources) ) t0 = time.time() for data in train_dstream.get_epoch_iterator(): inputs = get_list_of_hits_and_targets_fn(data) train_err += train_fn(*inputs) train_batches += 1 t1 = time.time() logger.info( " -Iterating over the slice took {:.3f}s.".format(t1 - t0) ) del train_set # hint to garbage collector del train_dstream # hint to garbage collector # Dump the current network weights to file at end of slice np.savez(runopts['save_model_file'], *lasagne.layers.get_all_param_values(network)) if runopts['do_validation_pass']: # And a full pass over the validation data t0 = time.time() val_err = 0 val_acc = 0 val_batches = 0 for i, data_file in enumerate(runopts['data_file_list']): for vslice in valid_slices[i]: valid_set = load_datasubset(data_file, 'valid', vslice) _, valid_dstream = make_scheme_and_stream( valid_set, hyperpars['batchsize'] ) for data in valid_dstream.get_epoch_iterator(): inputs = get_list_of_hits_and_targets_fn(data) err, acc = val_fn(*inputs) val_err += err val_acc += acc val_batches += 1 del valid_set del valid_dstream t1 = time.time() logger.info(" The validation pass took {:.3f}s.".format(t1 - t0)) # Print the results for this epoch: logger.info( "\nEpoch {} of {} took {:.3f}s" "\n training loss:\t\t{:.6f}".format( epoch + 1, hyperpars['num_epochs'], time.time() - start_time, train_err / train_batches ) ) if runopts['do_validation_pass']: logger.info( "\n validation loss:\t\t{:.6f}" "\n validation accuracy:\t\t{:.2f} %".format( val_err / val_batches, val_acc / val_batches * 100 ) ) logger.info("---") logger.info("Finished {} epochs.".format(epoch + 1))
def build_network(): from lasagne.layers import InputLayer, LSTMLayer, ConcatLayer, ReshapeLayer, DenseLayer, get_output, get_all_params from lasagne.objectives import categorical_crossentropy print("Building network ...") # inputs ############################################### l_in_x = InputLayer(shape=(BATCH_SIZE, None, vocab_size)) l_in_y = InputLayer(shape=(BATCH_SIZE, None, vocab_size)) # encoder ############################################### l_enc = LSTMLayer( l_in_x, N_HIDDEN, grad_clipping=GRAD_CLIP, nonlinearity=lasagne.nonlinearities.tanh, only_return_final=True) # decoder ############################################### l_repeated_enc = Repeat(l_enc, SEQ_LENGTH) l_conc = ConcatLayer([l_in_y, l_repeated_enc], axis=2) l_dec = LSTMLayer( l_conc, N_HIDDEN, grad_clipping=GRAD_CLIP, nonlinearity=lasagne.nonlinearities.tanh) # output ############################################### l_dec_long = ReshapeLayer(l_dec, shape=(-1, N_HIDDEN)) l_dist = DenseLayer( l_dec_long, num_units=vocab_size, nonlinearity=lasagne.nonlinearities.softmax) l_out = ReshapeLayer(l_dist, shape=(BATCH_SIZE, -1, vocab_size)) # print(lasagne.layers.get_output_shape(l_out)) # compilations ############################################### target_values = T.btensor3('target_output') network_output = get_output(l_out) cost = categorical_crossentropy(network_output, target_values).mean() all_params = get_all_params(l_out,trainable=True) print("Computing updates ...") updates = lasagne.updates.adagrad(cost, all_params, LEARNING_RATE) # Theano functions for training and computing cost print("Compiling functions ...") train = theano.function( inputs=[l_in_x.input_var, l_in_y.input_var, target_values], outputs=cost, updates=updates, allow_input_downcast=True) compute_cost = theano.function( inputs=[l_in_x.input_var, target_values], outputs=cost, allow_input_downcast=True) predict = theano.function( inputs=[l_in_x.input_var], outputs=network_output, allow_input_downcast=True) return train, predict, compute_cost
def multi_task_classifier(args, input_var, target_var, wordEmbeddings, seqlen, num_feats, lambda_val = 0.5 * 1e-4): print("Building multi task model with 1D Convolution") vocab_size = wordEmbeddings.shape[1] wordDim = wordEmbeddings.shape[0] kw = 2 num_filters = seqlen-kw+1 stride = 1 filter_size=wordDim pool_size=num_filters input = InputLayer((None, seqlen, num_feats),input_var=input_var) batchsize, _, _ = input.input_var.shape emb = EmbeddingLayer(input, input_size=vocab_size, output_size=wordDim, W=wordEmbeddings.T) reshape = ReshapeLayer(emb, (batchsize, seqlen, num_feats*wordDim)) conv1d_1 = DimshuffleLayer(Conv1DLayer(reshape, num_filters=num_filters, filter_size=wordDim, stride=1, nonlinearity=tanh,W=GlorotUniform()), (0,2,1)) maxpool_1 = MaxPool1DLayer(conv1d_1, pool_size=pool_size) hid_1 = DenseLayer(maxpool_1, num_units=args.hiddenDim, nonlinearity=sigmoid) network_1 = DenseLayer(hid_1, num_units=2, nonlinearity=softmax) conv1d_2 = DimshuffleLayer(Conv1DLayer(reshape, num_filters=num_filters, filter_size=wordDim, stride=1, nonlinearity=tanh,W=GlorotUniform()), (0,2,1)) maxpool_2 = MaxPool1DLayer(conv1d_2, pool_size=pool_size) hid_2 = DenseLayer(maxpool_2, num_units=args.hiddenDim, nonlinearity=sigmoid) network_2 = DenseLayer(hid_2, num_units=4, nonlinearity=softmax) conv1d_3 = DimshuffleLayer(Conv1DLayer(reshape, num_filters=num_filters, filter_size=wordDim, stride=1, nonlinearity=tanh,W=GlorotUniform()), (0,2,1)) maxpool_3 = MaxPool1DLayer(conv1d_3, pool_size=pool_size) hid_3 = DenseLayer(maxpool_3, num_units=args.hiddenDim, nonlinearity=sigmoid) network_3 = DenseLayer(hid_3, num_units=3, nonlinearity=softmax) conv1d_4 = DimshuffleLayer(Conv1DLayer(reshape, num_filters=num_filters, filter_size=wordDim, stride=1, nonlinearity=tanh,W=GlorotUniform()), (0,2,1)) maxpool_4 = MaxPool1DLayer(conv1d_4, pool_size=pool_size) hid_4 = DenseLayer(maxpool_4, num_units=args.hiddenDim, nonlinearity=sigmoid) network_4 = DenseLayer(hid_4, num_units=3, nonlinearity=softmax) conv1d_5 = DimshuffleLayer(Conv1DLayer(reshape, num_filters=num_filters, filter_size=wordDim, stride=1, nonlinearity=tanh,W=GlorotUniform()), (0,2,1)) maxpool_5 = MaxPool1DLayer(conv1d_5, pool_size=pool_size) hid_5 = DenseLayer(maxpool_5, num_units=args.hiddenDim, nonlinearity=sigmoid) network_5 = DenseLayer(hid_5, num_units=2, nonlinearity=softmax) conv1d_6 = DimshuffleLayer(Conv1DLayer(reshape, num_filters=num_filters, filter_size=wordDim, stride=1, nonlinearity=tanh,W=GlorotUniform()), (0,2,1)) maxpool_6 = MaxPool1DLayer(conv1d_6, pool_size=pool_size) hid_6 = DenseLayer(maxpool_6, num_units=args.hiddenDim, nonlinearity=sigmoid) network_6 = DenseLayer(hid_6, num_units=4, nonlinearity=softmax) conv1d_7 = DimshuffleLayer(Conv1DLayer(reshape, num_filters=num_filters, filter_size=wordDim, stride=1, nonlinearity=tanh,W=GlorotUniform()), (0,2,1)) maxpool_7 = MaxPool1DLayer(conv1d_7, pool_size=pool_size) hid_7 = DenseLayer(maxpool_7, num_units=args.hiddenDim, nonlinearity=sigmoid) network_7 = DenseLayer(hid_7, num_units=3, nonlinearity=softmax) conv1d_8 = DimshuffleLayer(Conv1DLayer(reshape, num_filters=num_filters, filter_size=wordDim, stride=1, nonlinearity=tanh,W=GlorotUniform()), (0,2,1)) maxpool_8 = MaxPool1DLayer(conv1d_8, pool_size=pool_size) hid_8 = DenseLayer(maxpool_8, num_units=args.hiddenDim, nonlinearity=sigmoid) network_8 = DenseLayer(hid_8, num_units=3, nonlinearity=softmax) # Is this important? network_1_out, network_2_out, network_3_out, network_4_out, \ network_5_out, network_6_out, network_7_out, network_8_out = \ get_output([network_1, network_2, network_3, network_4, network_5, network_6, network_7, network_8]) loss_1 = T.mean(binary_crossentropy(network_1_out,target_var)) + regularize_layer_params_weighted({emb:lambda_val, conv1d_1:lambda_val, hid_1:lambda_val, network_1:lambda_val} , l2) updates_1 = adagrad(loss_1, get_all_params(network_1, trainable=True), learning_rate=args.step) train_fn_1 = theano.function([input_var, target_var], loss_1, updates=updates_1, allow_input_downcast=True) val_acc_1 = T.mean(binary_accuracy(get_output(network_1, deterministic=True), target_var)) val_fn_1 = theano.function([input_var, target_var], val_acc_1, allow_input_downcast=True) loss_2 = T.mean(categorical_crossentropy(network_2_out,target_var)) + regularize_layer_params_weighted({emb:lambda_val, conv1d_2:lambda_val, hid_2:lambda_val, network_2:lambda_val} , l2) updates_2 = adagrad(loss_2, get_all_params(network_2, trainable=True), learning_rate=args.step) train_fn_2 = theano.function([input_var, target_var], loss_2, updates=updates_2, allow_input_downcast=True) val_acc_2 = T.mean(categorical_accuracy(get_output(network_2, deterministic=True), target_var)) val_fn_2 = theano.function([input_var, target_var], val_acc_2, allow_input_downcast=True) loss_3 = T.mean(categorical_crossentropy(network_3_out,target_var)) + regularize_layer_params_weighted({emb:lambda_val, conv1d_3:lambda_val, hid_3:lambda_val, network_3:lambda_val} , l2) updates_3 = adagrad(loss_3, get_all_params(network_3, trainable=True), learning_rate=args.step) train_fn_3 = theano.function([input_var, target_var], loss_3, updates=updates_3, allow_input_downcast=True) val_acc_3 = T.mean(categorical_accuracy(get_output(network_3, deterministic=True), target_var)) val_fn_3 = theano.function([input_var, target_var], val_acc_3, allow_input_downcast=True) loss_4 = T.mean(categorical_crossentropy(network_4_out,target_var)) + regularize_layer_params_weighted({emb:lambda_val, conv1d_4:lambda_val, hid_4:lambda_val, network_4:lambda_val} , l2) updates_4 = adagrad(loss_4, get_all_params(network_4, trainable=True), learning_rate=args.step) train_fn_4 = theano.function([input_var, target_var], loss_4, updates=updates_4, allow_input_downcast=True) val_acc_4 = T.mean(categorical_accuracy(get_output(network_4, deterministic=True), target_var)) val_fn_4 = theano.function([input_var, target_var], val_acc_4, allow_input_downcast=True) loss_5 = T.mean(binary_crossentropy(network_5_out,target_var)) + regularize_layer_params_weighted({emb:lambda_val, conv1d_5:lambda_val, hid_5:lambda_val, network_5:lambda_val} , l2) updates_5 = adagrad(loss_5, get_all_params(network_5, trainable=True), learning_rate=args.step) train_fn_5 = theano.function([input_var, target_var], loss_5, updates=updates_5, allow_input_downcast=True) val_acc_5 = T.mean(binary_accuracy(get_output(network_5, deterministic=True), target_var)) val_fn_5 = theano.function([input_var, target_var], val_acc_5, allow_input_downcast=True) loss_6 = T.mean(categorical_crossentropy(network_6_out,target_var)) + regularize_layer_params_weighted({emb:lambda_val, conv1d_6:lambda_val, hid_6:lambda_val, network_6:lambda_val} , l2) updates_6 = adagrad(loss_6, get_all_params(network_6, trainable=True), learning_rate=args.step) train_fn_6 = theano.function([input_var, target_var], loss_6, updates=updates_6, allow_input_downcast=True) val_acc_6 = T.mean(categorical_accuracy(get_output(network_6, deterministic=True), target_var)) val_fn_6 = theano.function([input_var, target_var], val_acc_6, allow_input_downcast=True) loss_7 = T.mean(categorical_crossentropy(network_7_out,target_var)) + regularize_layer_params_weighted({emb:lambda_val, conv1d_7:lambda_val, hid_7:lambda_val, network_7:lambda_val} , l2) updates_7 = adagrad(loss_7, get_all_params(network_7, trainable=True), learning_rate=args.step) train_fn_7 = theano.function([input_var, target_var], loss_7, updates=updates_7, allow_input_downcast=True) val_acc_7 = T.mean(categorical_accuracy(get_output(network_7, deterministic=True), target_var)) val_fn_7 = theano.function([input_var, target_var], val_acc_7, allow_input_downcast=True) loss_8 = T.mean(categorical_crossentropy(network_8_out,target_var)) + regularize_layer_params_weighted({emb:lambda_val, conv1d_8:lambda_val, hid_8:lambda_val, network_8:lambda_val} , l2) updates_8 = adagrad(loss_8, get_all_params(network_8, trainable=True), learning_rate=args.step) train_fn_8 = theano.function([input_var, target_var], loss_8, updates=updates_8, allow_input_downcast=True) val_acc_8 = T.mean(categorical_accuracy(get_output(network_8, deterministic=True), target_var)) val_fn_8 = theano.function([input_var, target_var], val_acc_8, allow_input_downcast=True) return train_fn_1, val_fn_1, network_1, train_fn_2, val_fn_2, network_2, train_fn_3, val_fn_3, \ network_3, train_fn_4, val_fn_4, network_4, train_fn_5, val_fn_5, network_5, \ train_fn_6, val_fn_6, network_6, train_fn_7, val_fn_7, network_7, train_fn_8, val_fn_8, network_8
def loss(x, t): return LO.aggregate(LO.categorical_crossentropy(T.clip(x, 1e-6, 1. - 1e-6), t))
input_data = bows_count batch_size = 32 hidden_units = [256, 128] input_var = T.fmatrix('inputs') target_var = T.ivector('targets') input_layer = InputLayer(shape=(batch_size, input_data.shape[1]), name='input_layer', input_var=input_var) hidden = [DenseLayer(input_layer, hidden_units[0])] for ne in hidden_units[1:]: hidden.append(DenseLayer(hidden[-1], ne)) output_layer = DenseLayer(hidden[-1], len(unique_tags), nonlinearity=softmax) prediction = get_output(output_layer) loss = categorical_crossentropy(prediction, target_var) # + 0.0001 * regularize_network_params(hidden[0], l1) loss = loss.mean() params = lasagne.layers.get_all_params(output_layer, trainable=True) updates = nesterov_momentum(loss, params, learning_rate=0.01, momentum=0.9) # updates = adam(loss, params, learning_rate=0.001) train_fn = theano.function([input_var, target_var], [loss, prediction], updates=updates) test_fn = theano.function([input_var, target_var], [loss, prediction]) print_interval = 100 test_interval = 1000 test_size = 100 iter_idx = 0 epoch_idx = 0 stats_accum_train = dict(loss=0.0, acc=0.0, count=0.0) while True: