def get_network(model): input_data = tensor.dmatrix('x') targets_var = tensor.dmatrix('y') network = layers.InputLayer((model['batch_size'], model['input_vars']), input_data) nonlin = nonlinearities.rectify if model['hidden_nonlinearity'] != 'ReLu': nonlin = nonlinearities.tanh prev_layer = network for l in range(model['nlayers']): fc = layers.DenseLayer(prev_layer, model['units'], nonlinearity=nonlin) if model['dropout']: fc = layers.DropoutLayer(fc, 0.5) prev_layer = fc output_lin = None if model['output_mode'] == OUTPUT_LOG: output_lin = nonlinearities.tanh output_layer = layers.DenseLayer(prev_layer, 1, nonlinearity=output_lin) predictions = layers.get_output(output_layer) if model['output_mode'] == OUTPUT_BOUNDED: (minth, maxth) = model['maxmin'][model['control']] maxt = theano.shared(np.ones((model['batch_size'], 1)) * maxth) mint = theano.shared(np.ones((model['batch_size'], 1)) * minth) predictions = tensor.min(tensor.concatenate([maxt, predictions], axis=1), axis=1) predictions = tensor.reshape(predictions, (model['batch_size'], 1)) predictions = tensor.max(tensor.concatenate([mint, predictions], axis=1), axis=1) predictions = tensor.reshape(predictions, (model['batch_size'], 1)) loss = objectives.squared_error(predictions, targets_var) loss = objectives.aggregate(loss, mode='mean') params = layers.get_all_params(output_layer) test_prediction = layers.get_output(output_layer, deterministic=True) test_loss = objectives.squared_error(test_prediction, targets_var) test_loss = test_loss.mean() updates_sgd = updates.sgd(loss, params, learning_rate=model['lr']) ups = updates.apply_momentum(updates_sgd, params, momentum=0.9) train_fn = theano.function([input_data, targets_var], loss, updates=ups) pred_fn = theano.function([input_data], predictions) val_fn = theano.function([input_data, targets_var], test_loss) return {'train': train_fn, 'eval': val_fn, 'pred': pred_fn, 'layers': output_layer}
def get_model(input_var, target_var, multiply_var): # input layer with unspecified batch size layer_both_0 = InputLayer(shape=(None, 30, 64, 64), input_var=input_var) # Z-score? # Convolution then batchNormalisation then activation layer, twice, then zero padding layer followed by a dropout layer layer_both_1 = batch_norm(Conv2DLayer(layer_both_0, 64, (3, 3), pad='same', nonlinearity=leaky_rectify)) layer_both_2 = batch_norm(Conv2DLayer(layer_both_1, 64, (3, 3), pad='same', nonlinearity=leaky_rectify)) layer_both_3 = MaxPool2DLayer(layer_both_2, pool_size=(2, 2), stride=(2, 2), pad=(1, 1)) layer_both_4 = DropoutLayer(layer_both_3, p=0.25) # Convolution then batchNormalisation then activation layer, twice, then zero padding layer followed by a dropout layer layer_both_5 = batch_norm(Conv2DLayer(layer_both_4, 128, (3, 3), pad='same', nonlinearity=leaky_rectify)) layer_both_6 = batch_norm(Conv2DLayer(layer_both_5, 128, (3, 3), pad='same', nonlinearity=leaky_rectify)) layer_both_7 = MaxPool2DLayer(layer_both_6, pool_size=(2, 2), stride=(2, 2), pad=(1, 1)) layer_both_8 = DropoutLayer(layer_both_7, p=0.25) # Convolution then batchNormalisation then activation layer, twice, then zero padding layer followed by a dropout layer layer_both_9 = batch_norm(Conv2DLayer(layer_both_8, 256, (3, 3), pad='same', nonlinearity=leaky_rectify)) layer_both_10 = batch_norm(Conv2DLayer(layer_both_9, 256, (3, 3), pad='same', nonlinearity=leaky_rectify)) layer_both_11 = batch_norm(Conv2DLayer(layer_both_10, 256, (3, 3), pad='same', nonlinearity=leaky_rectify)) layer_both_12 = MaxPool2DLayer(layer_both_11, pool_size=(2, 2), stride=(2, 2), pad=(1, 1)) layer_both_13 = DropoutLayer(layer_both_12, p=0.25) # Flatten layer_flatten = FlattenLayer(layer_both_13) # Prediction layer_hidden = DenseLayer(layer_flatten, 500, nonlinearity=sigmoid) layer_prediction = DenseLayer(layer_hidden, 2, nonlinearity=linear) # Loss prediction = get_output(layer_prediction) / multiply_var loss = squared_error(prediction, target_var) loss = loss.mean() #Updates : Stochastic Gradient Descent (SGD) with Nesterov momentum params = get_all_params(layer_prediction, trainable=True) # Create a loss expression for validation/testing. The crucial difference # here is that we do a deterministic forward pass through the network, disabling dropout layers. test_prediction = get_output(layer_prediction, deterministic=True) / multiply_var test_loss = squared_error(test_prediction, target_var) test_loss = test_loss.mean() # crps estimate crps = T.abs_(test_prediction - target_var).mean()/600 return test_prediction, crps, loss, params
def get_model(input_var, target_var, multiply_var): # input layer with unspecified batch size layer_input = InputLayer(shape=(None, 12, 64, 64), input_var=input_var) #InputLayer(shape=(None, 1, 30, 64, 64), input_var=input_var) layer_0 = DimshuffleLayer(layer_input, (0, 'x', 1, 2, 3)) # Z-score? # Convolution then batchNormalisation then activation layer, then zero padding layer followed by a dropout layer layer_1 = batch_norm(Conv3DDNNLayer(incoming=layer_0, num_filters=16, filter_size=(3,3,3), stride=(1,1,1), pad='same', nonlinearity=leaky_rectify)) layer_2 = batch_norm(Conv3DDNNLayer(incoming=layer_1, num_filters=16, filter_size=(3,3,3), stride=(1,1,1), pad='same', nonlinearity=leaky_rectify)) layer_3 = MaxPool3DDNNLayer(layer_2, pool_size=(2, 2, 2), stride=(2, 2, 2), pad=(1, 1, 1)) layer_4 = DropoutLayer(layer_3, p=0.25) # Convolution then batchNormalisation then activation layer, then zero padding layer followed by a dropout layer layer_5 = batch_norm(Conv3DDNNLayer(incoming=layer_4, num_filters=32, filter_size=(3,3,3), stride=(1,1,1), pad='same', nonlinearity=leaky_rectify)) layer_6 = batch_norm(Conv3DDNNLayer(incoming=layer_5, num_filters=32, filter_size=(3,3,3), stride=(1,1,1), pad='same', nonlinearity=leaky_rectify)) layer_7 = MaxPool3DDNNLayer(layer_6, pool_size=(2, 2, 2), stride=(2, 2, 2), pad=(1, 1, 1)) layer_8 = DropoutLayer(layer_7, p=0.25) # Convolution then batchNormalisation then activation layer, then zero padding layer followed by a dropout layer layer_5 = batch_norm(Conv3DDNNLayer(incoming=layer_8, num_filters=64, filter_size=(3,3,3), stride=(1,1,1), pad='same', nonlinearity=leaky_rectify)) layer_6 = batch_norm(Conv3DDNNLayer(incoming=layer_5, num_filters=64, filter_size=(3,3,3), stride=(1,1,1), pad='same', nonlinearity=leaky_rectify)) layer_7 = batch_norm(Conv3DDNNLayer(incoming=layer_6, num_filters=64, filter_size=(3,3,3), stride=(1,1,1), pad='same', nonlinearity=leaky_rectify)) layer_8 = MaxPool3DDNNLayer(layer_7, pool_size=(2, 2, 2), stride=(2, 2, 2), pad=(1, 1, 1)) layer_9 = DropoutLayer(layer_8, p=0.25) layer_flatten = FlattenLayer(layer_9) # Output Layer layer_hidden = DenseLayer(layer_flatten, 500, nonlinearity=linear) layer_prediction = DenseLayer(layer_hidden, 2, nonlinearity=linear) # Loss prediction = get_output(layer_prediction) #/ multiply_var loss = squared_error(prediction, target_var) loss = loss.mean() #Updates : Stochastic Gradient Descent (SGD) with Nesterov momentum params = get_all_params(layer_prediction, trainable=True) # Create a loss expression for validation/testing. The crucial difference # here is that we do a deterministic forward pass through the network, disabling dropout layers. test_prediction = get_output(layer_prediction, deterministic=True) # / multiply_var test_loss = squared_error(test_prediction, target_var) test_loss = test_loss.mean() # crps estimate crps = T.abs_(test_prediction - target_var).mean()/600 return test_prediction, crps, loss, params
def build_instrument_model(self, n_vars, **kwargs): targets = TT.vector() instrument_vars = TT.matrix() instruments = layers.InputLayer((None, n_vars), instrument_vars) instruments = layers.DropoutLayer(instruments, p=0.2) dense_layer = layers.DenseLayer(instruments, kwargs['dense_size'], nonlinearity=nonlinearities.tanh) dense_layer = layers.DropoutLayer(dense_layer, p=0.2) for _ in xrange(kwargs['n_dense_layers'] - 1): dense_layer = layers.DenseLayer(dense_layer, kwargs['dense_size'], nonlinearity=nonlinearities.tanh) dense_layer = layers.DropoutLayer(dense_layer, p=0.5) self.instrument_output = layers.DenseLayer(dense_layer, 1, nonlinearity=nonlinearities.linear) init_params = layers.get_all_param_values(self.instrument_output) prediction = layers.get_output(self.instrument_output, deterministic=False) test_prediction = layers.get_output(self.instrument_output, deterministic=True) # flexible here, endog variable can be categorical, continuous, etc. l2_cost = regularization.regularize_network_params(self.instrument_output, regularization.l2) loss = objectives.squared_error(prediction.flatten(), targets.flatten()).mean() + 1e-4 * l2_cost loss_total = objectives.squared_error(prediction.flatten(), targets.flatten()).mean() params = layers.get_all_params(self.instrument_output, trainable=True) param_updates = updates.adadelta(loss, params) self._instrument_train_fn = theano.function( [ targets, instrument_vars, ], loss, updates=param_updates ) self._instrument_loss_fn = theano.function( [ targets, instrument_vars, ], loss_total ) self._instrument_output_fn = theano.function([instrument_vars], test_prediction) return init_params
def prepare(): X = T.fmatrix('X') y = T.ivector('y') assert not ("regression" in args and "logistic" in args) if "regression" in args: output_layer = squared_error_net_adaptive() else: output_layer = logistic() all_params = lasagne.layers.get_all_params(output_layer) if "regression" in args: prob_vector = lasagne.layers.get_output(output_layer, X) loss = squared_error(prob_vector, y).mean() pred = T.maximum(0, T.minimum( T.round(prob_vector), args["num_classes"]-1 ) ) accuracy = T.mean( T.eq( pred, y ) ) else: a = args["a"] b = args["b"] loss_fn = get_hybrid_loss(a,b) prob_vector = lasagne.layers.get_output(output_layer, X) loss = loss_fn(prob_vector, y).mean() pred = T.argmax( prob_vector, axis=1 ) accuracy = T.mean( T.eq(pred,y) ) return Container( { "X": X, "y": y, "output_layer": output_layer, "all_params": all_params, "loss": loss, "pred": pred, "accuracy": accuracy, "prob_vector": prob_vector } )
def build_validate_fn(self): prediction = get_output(self.network, deterministic=True) loss = squared_error(prediction, self.target_var) loss = loss.mean() self.val_fn = theano.function([self.input_var, self.target_var], loss)
def _create_nnet(input_dims, output_dims, learning_rate, num_hidden_units=15, batch_size=32, max_train_epochs=1, hidden_nonlinearity=nonlinearities.rectify, output_nonlinearity=None, update_method=updates.sgd): """ A subclass may override this if a different sort of network is desired. """ nnlayers = [] nnlayers.append(layers.InputLayer(shape=(None, input_dims))) nnlayers.append(layers.DenseLayer(nnlayers[-1], num_hidden_units, nonlinearity=hidden_nonlinearity)) nnlayers.append(layers.DenseLayer(nnlayers[-1], output_dims, nonlinearity=output_nonlinearity)) prediction = layers.get_output(nnlayers[-1]) input_var = nnlayers[0].input_var target = T.matrix(name="target", dtype=floatX) loss = objectives.squared_error(prediction, target).mean() params = layers.get_all_params(nnlayers[-1], trainable=True) updates = update_method(loss, params, learning_rate) fit = theano.function([input_var, target], loss, updates=updates) predict = theano.function([input_var], prediction) nnet = Mock( fit=fit, predict=predict, ) return nnet
def test_squared_error(colvect): # symbolic version from lasagne.objectives import squared_error if not colvect: a, b = theano.tensor.matrices('ab') c = squared_error(a, b) else: a, b = theano.tensor.vectors('ab') c = squared_error(a.dimshuffle(0, 'x'), b)[:, 0] # numeric version floatX = theano.config.floatX shape = (10, 20) if not colvect else (10,) x = np.random.rand(*shape).astype(floatX) y = np.random.rand(*shape).astype(floatX) z = (x - y)**2 # compare assert np.allclose(z, c.eval({a: x, b: y}))
def create_network(available_actions_num): # Creates the input variables s1 = tensor.tensor4("States") a = tensor.vector("Actions", dtype="int32") q2 = tensor.vector("Next State best Q-Value") r = tensor.vector("Rewards") nonterminal = tensor.vector("Nonterminal", dtype="int8") # Creates the input layer of the network. dqn = InputLayer(shape=[None, 1, downsampled_y, downsampled_x], input_var=s1) # Adds 3 convolutional layers, each followed by a max pooling layer. dqn = Conv2DLayer(dqn, num_filters=32, filter_size=[8, 8], nonlinearity=rectify, W=GlorotUniform("relu"), b=Constant(.1)) dqn = MaxPool2DLayer(dqn, pool_size=[2, 2]) dqn = Conv2DLayer(dqn, num_filters=64, filter_size=[4, 4], nonlinearity=rectify, W=GlorotUniform("relu"), b=Constant(.1)) dqn = MaxPool2DLayer(dqn, pool_size=[2, 2]) dqn = Conv2DLayer(dqn, num_filters=64, filter_size=[3, 3], nonlinearity=rectify, W=GlorotUniform("relu"), b=Constant(.1)) dqn = MaxPool2DLayer(dqn, pool_size=[2, 2]) # Adds a single fully connected layer. dqn = DenseLayer(dqn, num_units=512, nonlinearity=rectify, W=GlorotUniform("relu"), b=Constant(.1)) # Adds a single fully connected layer which is the output layer. # (no nonlinearity as it is for approximating an arbitrary real function) dqn = DenseLayer(dqn, num_units=available_actions_num, nonlinearity=None) # Theano stuff q = get_output(dqn) # Only q for the chosen actions is updated more or less according to following formula: # target Q(s,a,t) = r + gamma * max Q(s2,_,t+1) target_q = tensor.set_subtensor(q[tensor.arange(q.shape[0]), a], r + discount_factor * nonterminal * q2) loss = squared_error(q, target_q).mean() # Updates the parameters according to the computed gradient using rmsprop. params = get_all_params(dqn, trainable=True) updates = rmsprop(loss, params, learning_rate) # Compiles theano functions print "Compiling the network ..." function_learn = theano.function([s1, q2, a, r, nonterminal], loss, updates=updates, name="learn_fn") function_get_q_values = theano.function([s1], q, name="eval_fn") function_get_best_action = theano.function([s1], tensor.argmax(q), name="test_fn") print "Network compiled." # Returns Theano objects for the net and functions. # We wouldn't need the net anymore but it is nice to save your model. return dqn, function_learn, function_get_q_values, function_get_best_action
def build_train_fn(self): prediction = get_output(self.network, deterministic=False) loss = squared_error(prediction, self.target_var) loss = loss.mean() params = get_all_params(self.network, trainable=True) updates = nesterov_momentum(loss, params, learning_rate=self.learning_rate, momentum=self.momentum) self.train_fn = theano.function([self.input_var, self.target_var], loss, updates=updates)
def test_squared_error(): # symbolic version from lasagne.objectives import squared_error a, b = theano.tensor.matrices('ab') c = squared_error(a, b) # numeric version floatX = theano.config.floatX x = np.random.randn(10, 20).astype(floatX) y = np.random.randn(10, 20).astype(floatX) z = (x - y)**2 # compare assert np.allclose(z, c.eval({a: x, b: y}))
def create_network(available_actions_count): # Create the input variables s1 = tensor.tensor4("States") a = tensor.vector("Actions", dtype="int32") q2 = tensor.vector("Next State's best Q-Value") r = tensor.vector("Rewards") isterminal = tensor.vector("IsTerminal", dtype="int8") # Create the input layer of the network. dqn = InputLayer(shape=[None, 1, resolution[0], resolution[1]], input_var=s1) # Add 2 convolutional layers with ReLu activation dqn = Conv2DLayer(dqn, num_filters=8, filter_size=[6, 6], nonlinearity=rectify, W=HeUniform("relu"), b=Constant(.1), stride=3) dqn = Conv2DLayer(dqn, num_filters=8, filter_size=[3, 3], nonlinearity=rectify, W=HeUniform("relu"), b=Constant(.1), stride=2) # Add a single fully-connected layer. dqn = DenseLayer(dqn, num_units=128, nonlinearity=rectify, W=HeUniform("relu"), b=Constant(.1)) # Add the output layer (also fully-connected). # (no nonlinearity as it is for approximating an arbitrary real function) dqn = DenseLayer(dqn, num_units=available_actions_count, nonlinearity=None) # Define the loss function q = get_output(dqn) # target differs from q only for the selected action. The following means: # target_Q(s,a) = r + gamma * max Q(s2,_) if isterminal else r target_q = tensor.set_subtensor(q[tensor.arange(q.shape[0]), a], r + discount_factor * (1 - isterminal) * q2) loss = squared_error(q, target_q).mean() # Update the parameters according to the computed gradient using RMSProp. params = get_all_params(dqn, trainable=True) updates = rmsprop(loss, params, learning_rate) # Compile the theano functions print "Compiling the network ..." function_learn = theano.function([s1, q2, a, r, isterminal], loss, updates=updates, name="learn_fn") function_get_q_values = theano.function([s1], q, name="eval_fn") function_get_best_action = theano.function([s1], tensor.argmax(q), name="test_fn") print "Network compiled." def simple_get_best_action(state): return function_get_best_action(state.reshape([1, 1, resolution[0], resolution[1]])) # Returns Theano objects for the net and functions. return dqn, function_learn, function_get_q_values, simple_get_best_action
def run(get_model, model_name): train_stream = ServerDataStream(('cases', 'image_position', 'multiplier', 'sax', 'sax_features', 'targets'), False, hwm=10) valid_stream = ServerDataStream(('cases', 'image_position', 'multiplier', 'sax', 'sax_features', 'targets'), False, hwm=10, port=5558) ftensor5 = tensor.TensorType('float32', (False,)*5) input_var = ftensor5('sax_features') target_var = tensor.matrix('targets') multiply_var = tensor.matrix('multiplier') multiply_var = T.addbroadcast(multiply_var, 1) prediction, test_prediction, test_pred_mid, params_bottom, params_top = get_model(input_var, multiply_var) # load parameters cg = ComputationGraph(test_pred_mid) params_val = numpy.load('sunnybrook/best_weights.npz') for p, value in zip(cg.shared_variables, params_val['arr_0']): p.set_value(value) crps = tensor.abs_(test_prediction - target_var).mean() loss = squared_error(prediction, target_var).mean() loss.name = 'loss' crps.name = 'crps' algorithm = GradientDescent( cost=loss, parameters=params_top, step_rule=Adam(), on_unused_sources='ignore' ) host_plot = 'http://localhost:5006' extensions = [ Timing(), TrainingDataMonitoring([loss], after_epoch=True), DataStreamMonitoring(variables=[crps, loss], data_stream=valid_stream, prefix="valid"), Plot('%s %s %s' % (model_name, datetime.date.today(), time.strftime('%H:%M')), channels=[['loss','valid_loss'], ['valid_crps']], after_epoch=True, server_url=host_plot), Printing(), Checkpoint('train'), FinishAfter(after_n_epochs=20) ] main_loop = MainLoop(data_stream=train_stream, algorithm=algorithm, extensions=extensions) main_loop.run()
def __init__(self, conf): self.conf = conf if self.conf.act == "linear": self.conf.act = linear elif self.conf.act == "sigmoid": self.conf.act = sigmoid elif self.conf.act == "relu": self.conf.act = rectify elif self.conf.act == "tanh": self.conf.act = tanh else: raise ValueError("Unknown activation function", self.conf.act) input_var_first = T.matrix('inputs1') input_var_second = T.matrix('inputs2') target_var = T.matrix('targets') # create network self.autoencoder, encoder_first, encoder_second = self.__create_toplogy__(input_var_first, input_var_second) self.out = get_output(self.autoencoder) loss = squared_error(self.out, target_var) loss = loss.mean() params = get_all_params(self.autoencoder, trainable=True) updates = nesterov_momentum(loss, params, learning_rate=self.conf.lr, momentum=self.conf.momentum) # training function self.train_fn = theano.function([input_var_first, input_var_second, target_var], loss, updates=updates) # fuction to reconstruct test_reconstruction = get_output(self.autoencoder, deterministic=True) self.reconstruction_fn = theano.function([input_var_first, input_var_second], test_reconstruction) # encoding function test_encode = get_output([encoder_first, encoder_second], deterministic=True) self.encoding_fn = theano.function([input_var_first, input_var_second], test_encode) # utils blas = lambda name, ndarray: scipy.linalg.get_blas_funcs((name,), (ndarray,))[0] self.blas_nrm2 = blas('nrm2', np.array([], dtype=float)) self.blas_scal = blas('scal', np.array([], dtype=float)) # load weights if necessary if self.conf.load_model is not None: self.load_model()
def build_loss(targets, prediction, optimization): """ setup loss function with weight decay regularization """ if optimization["objective"] == 'categorical': loss = objectives.categorical_crossentropy(prediction, targets) elif optimization["objective"] == 'binary': prediction = T.clip(prediction, 1e-7, 1-1e-7) loss = -(targets*T.log(prediction) + (1.0-targets)*T.log(1.0-prediction)) # loss = objectives.binary_crossentropy(prediction[:,loss_index], targets[:,loss_index]) elif (optimization["objective"] == 'squared_error'): loss = objectives.squared_error(prediction, targets) loss = objectives.aggregate(loss, mode='mean') return loss
def compute_cost_fake(df): """ Pass a pandas dataframe df and then generate signal. Call the entropy error and return the averaged cost """ import numpy as np import cPickle from nn import nn_param from matplotlib import pyplot as plt from theano import tensor as T f=file("nnparams.sav") update=cPickle.load(f) sig=np.asarray(df.l0) sig_noise=np.asarray(df.l0+df.noise) sig/=np.max(sig) sig_noise/=np.max(sig_noise) yval=nn_param(update,sig_noise) return T.mean(squared_error(yval,sig))
def _get_objective(policy,state_values,actions,reference_state_values, is_alive = "always",min_log_proba = -1e6): """returns a2v loss sum""" if is_alive == "always": is_alive = T.ones_like(actions,dtype=theano.config.floatX) action_probas = get_action_Qvalues(policy,actions) reference_state_values = consider_constant(reference_state_values) log_probas = T.maximum(T.log(action_probas),min_log_proba) policy_loss_elwise = - log_probas * (reference_state_values - consider_constant(state_values)) V_err_elwise = squared_error(reference_state_values,state_values) return (policy_loss_elwise + V_err_elwise)*is_alive
def __init__(self, nnet_x_to_z, nnet_z_to_x, batch_optimizer=None, rng=None, noise_function=None, loss_function=None, loss_function_y=None, loss_function_z=None, nnet_x_to_y=None, X_type=None, walkback=1): self.nnet_x_to_z = nnet_x_to_z self.nnet_z_to_x = nnet_z_to_x self.nnet_x_to_y = nnet_x_to_y if batch_optimizer is None: batch_optimizer = easy.BatchOptimizer() self.batch_optimizer = batch_optimizer self.batch_optimizer.model = self if rng is None: rng = RandomStreams(seed=10001) self.rng = rng self.encode_function = None # only available after fit self.decode_function = None # only available after fit self.predict_function = None # only available after fit self.iter_update_batch = None self.iter_update = None self.get_loss = None if loss_function is None: loss_function = lambda x, x_hat : objectives.squared_error(x, x_hat).sum(axis=1) self.loss_function = loss_function self.loss_function_y = loss_function_y self.loss_function_z = loss_function_z self.noise_function = noise_function self.walkback = walkback if X_type is None: X_type = T.matrix self.X_type = X_type
def main(): s1 = tensor.tensor4("States") a = tensor.vector("Actions", dtype="int32") q2 = tensor.vector("Next State best Q-Value") r = tensor.vector("Rewards") nonterminal = tensor.vector("Nonterminal", dtype="int8") dqn = InputLayer(shape=[None, 1, 2000], input_var=s1)#zredukowalem 2 wymiary do jednego - czy dobrze? dqn = DenseLayer(dqn, num_units=2000, nonlinearity=rectify, W=GlorotUniform("relu"),b=Constant(.1)) dqn = DenseLayer(dqn, num_units=2000, nonlinearity=rectify, W=GlorotUniform("relu"),b=Constant(.1)) dqn = DenseLayer(dqn, num_units=2000, nonlinearity=rectify, W=GlorotUniform("relu"),b=Constant(.1)) dqn = DenseLayer(dqn, num_units=available_actions_num, nonlinearity=None) q = get_output(dqn) target_q = tensor.set_subtensor(q[tensor.arange(q.shape[0]), a], r + discount_factor * nonterminal * q2) loss = squared_error(q, target_q).mean() params = get_all_params(dqn, trainable=True) updates = rmsprop(loss, params, learning_rate) function_learn = theano.function([s1, q2, a, r, nonterminal], loss, updates=updates, name="learn_fn") function_get_q_values = theano.function([s1], q, name="eval_fn") function_get_best_action = theano.function([s1], tensor.argmax(q), name="test_fn") # Creates and initializes the environment. print "Initializing doom..." game = DoomGame() game.load_config("../../examples/config/learning.cfg") game.init() print "Doom initialized." # Creates all possible actions. n = game.get_available_buttons_size() actions = [] for perm in it.product([0, 1], repeat=n): actions.append(list(perm))
def _create_network(self): logger.info("Building network ...") net, input_var = self._build_network() target_values = T.matrix('target_output') actions = T.icol('actions') # Create masks # mask = theano.shared(np.zeros((self.batch_size, self.num_actions)).astype(np.int32)) mask = T.zeros_like(target_values) mask = T.set_subtensor(mask[T.arange(self.batch_size), actions.reshape((-1,))], 1) # feed-forward path network_output = lasagne.layers.get_output(net, input_var / 255.0) # Add regularization penalty loss = squared_error(network_output * mask, target_values).mean() if self.weight_decay > 0.0: loss += regularize_network_params(net, l2) * self.weight_decay # Retrieve all parameters from the network all_params = lasagne.layers.get_all_params(net, trainable=True) # Compute updates for training if self.clip_error: grads = theano.gradient.grad(loss, all_params) grads = [lasagne.updates.norm_constraint(grad, self.clip_error, range(grad.ndim)) for grad in grads] updates = self.optimizer(grads, all_params, learning_rate=self.learning_rate, rho=self.decay_rate) else: updates = self.optimizer(loss, all_params, learning_rate=self.learning_rate, rho=self.decay_rate) # Theano functions for training and computing cost logger.info("Compiling functions ...") train = theano.function([input_var, target_values, actions], [loss, network_output, target_values, mask], updates=updates) predict = theano.function([input_var], network_output) return net, train, predict
def learn_params( mst_data, voxels, w_params, \ batches=(1,1,1), holdout_size=100, lr=1e-4, l2=0.0, num_epochs=1, output_val_scores=-1, output_val_every=1, verbose=False, dry_run=False): ''' batches dims are (samples, voxels, candidates) ''' assert len(mst_data) == len(voxels), "data/target length mismatch" n, nf, _, nt = mst_data.shape _, nv = voxels.shape bn, bv, bt = batches nbv, nbt = nv // bv, nt // bt rbv, rbt = nv - nbv * bv, nt - nbt * bt assert rbt == 0, "the model batch size must be an divisor of the total number of models" if verbose: print "Grad. Desc. planned in %d batch with batch size %d and residual %d" % \ (int(np.ceil(float(n-holdout_size) / bn)), bn, (n-holdout_size)%bn) print "%d voxel batches of size %d with residual %d" % (nbv, bv, rbv) print "%d candidate batches of size %d with residual %d" % (nbt, bt, rbt) print "for %d voxelmodel fits." % (nv * nt) sys.stdout.flush() print 'CREATING SYMBOLS\n' _V = T.matrix() __V = _V.dimshuffle((0, 1, 'x')) __lr = theano.shared(fpX(lr)) __l2 = theano.shared(fpX(l2)) ### request shared memory __mst_sdata = theano.shared(np.zeros(shape=(n, nf, 1, bt), dtype=fpX)) __vox_sdata = theano.shared(np.zeros(shape=(n, bv), dtype=fpX)) __range = T.ivector() _smst_batch = __mst_sdata[__range[0]:__range[1]] _fwrf_o = svFWRF(_smst_batch, nf, bv, bt, add_bias=len(w_params) == 2) if verbose: plu.print_lasagne_network(_fwrf_o, skipnoparam=False) ### define and compile the training expressions. _fwrf_o_reg = __l2 * R.regularize_layer_params(_fwrf_o, R.l2) fwrf_o_params = L.get_all_params(_fwrf_o, trainable=True) _sV = __vox_sdata[__range[0]:__range[1]].dimshuffle((0, 1, 'x')) _fwrf_o_trn_pred = L.get_output(_fwrf_o, deterministic=False) _fwrf_o_trn_preloss = O.squared_error(_fwrf_o_trn_pred, _sV).mean(axis=0) _fwrf_o_trn_loss = _fwrf_o_trn_preloss.sum() + _fwrf_o_reg _fwrf_o_val_pred = L.get_output(_fwrf_o, deterministic=True) _fwrf_o_val_preloss = O.squared_error(_fwrf_o_val_pred, _sV).mean( axis=0) #average across the batch elements ### __fwrf_o_updates = lasagne.updates.sgd(_fwrf_o_trn_loss, fwrf_o_params, learning_rate=__lr) #__fwrf_o_updates = lasagne.updates.adam(_fwrf_o_trn_loss, fwrf_o_params, learning_rate=self.__lr, beta1=0.5, epsilon=1e-12) print 'COMPILING...' sys.stdout.flush() comp_t = time.time() fwrf_o_trn_fn = theano.function([__range], updates=__fwrf_o_updates) fwrf_o_val_fn = theano.function([__range], _fwrf_o_val_preloss) print '%.2f seconds to compile theano functions' % (time.time() - comp_t) ### shuffle the time series of voxels and mst_data order = np.arange(n, dtype=int) np.random.shuffle(order) mst_data = mst_data[order] voxels = voxels[order] ### THIS IS WHERE THE MODEL OPTIMIZATION IS PERFORMED ### print "\nVoxel-Candidates model optimization..." start_time = time.time() val_batch_scores = np.zeros((bv, bt), dtype=fpX) best_epochs = np.zeros(shape=(nv), dtype=int) best_scores = np.full(shape=(nv), fill_value=np.inf, dtype=fpX) best_models = np.zeros(shape=(nv), dtype=int) # W, b = fwrf_o_params #!!!!! best_w_params = [np.zeros(p.shape, dtype=fpX) for p in w_params] ### save score history num_outputs = int( num_epochs / output_val_every) + int(num_epochs % output_val_every > 0) val_scores = [] if output_val_scores == -1: val_scores = np.zeros(shape=(num_outputs, nv, nt), dtype=fpX) elif output_val_scores > 0: outv = output_val_scores val_scores = np.zeros(shape=(num_outputs, bv * outv, nt), dtype=fpX) ### if dry_run: # free vram set_shared_parameters([ __mst_sdata, __vox_sdata, ] + fwrf_o_params) return val_scores, best_scores, best_epochs, best_models, best_w_params ### VOXEL LOOP for v, (rv, lv) in tqdm(enumerate(iterate_range(0, nv, bv))): voxelSlice = voxels[:, rv] best_epochs_slice = best_epochs[rv] best_scores_slice = best_scores[rv] best_models_slice = best_models[rv] params_init = [p[rv] for p in w_params] # rW, rb = w_params[0][rv,:], w_params[1][rv] if lv < bv: #PATCH UP MISSING DATA FOR THE FIXED VOXEL BATCH SIZE voxelSlice = np.concatenate( (voxelSlice, np.zeros(shape=(n, bv - lv), dtype=fpX)), axis=1) for i, p in enumerate(params_init): params_init[i] = np.concatenate( (p, np.zeros(shape=(bv - lv, ) + p.shape[1:], dtype=fpX)), axis=0) # rW = np.concatenate((rW, np.zeros(shape=(bv-lv, nf), dtype=fpX)), axis=0) # rb = np.concatenate((rb, np.zeros(shape=(bv-lv), dtype=fpX)), axis=0) for i, p in enumerate(params_init): if len(p.shape) == 2: params_init[i] = np.repeat(p.T, repeats=bt) else: params_init[i] = np.repeat(p, repeats=bt) # pW = np.repeat(rW.T, repeats=bt).reshape((nf,bv,bt)) # ALL CANDIDATE MODELS GET THE SAME INITIAL PARAMETER VALUES # pb = np.repeat(rb, repeats=bt).reshape((1, bv,bt)) set_shared_parameters([__vox_sdata], [voxelSlice]) ### CANDIDATE LOOP for t in range(nbt): ## CANDIDATE BATCH LOOP # need to recompile to reset the solver!!! (depending on the solver used) fwrf_o_trn_fn = theano.function([__range], updates=__fwrf_o_updates) # set the shared parameter values for this candidates. Every candidate restart at the same point. set_shared_parameters([ __mst_sdata, ] + fwrf_o_params, [ mst_data[:, :, :, t * bt:(t + 1) * bt], ] + params_init) print "\n Voxel %d:%d of %d, Candidate %d:%d of %d" % ( rv[0], rv[-1] + 1, nv, t * bt, (t + 1) * bt, nt) ### EPOCH LOOP epoch_start = time.time() for epoch in range(num_epochs): ######## ONE EPOCH OF TRAINING ########### val_batch_scores.fill(0) # In each epoch, we do a full pass over the training data: for rb, lb in iterate_bounds(0, n - holdout_size, bn): fwrf_o_trn_fn(rb) # and one pass over the validation set. val_batches = 0 for rb, lb in iterate_bounds(n - holdout_size, holdout_size, bn): loss = fwrf_o_val_fn(rb) val_batch_scores += loss val_batches += lb val_batch_scores /= val_batches if verbose: print " validation <loss>: %.6f" % ( val_batch_scores.mean()) ### RECORD TIME SERIES ### if epoch % output_val_every == 0: if output_val_scores == -1: val_scores[int(epoch / output_val_every), rv, t * bt:(t + 1) * bt] = val_batch_scores[:lv, :] elif output_val_scores > 0: val_scores[int(epoch / output_val_every), v * outv:(v + 1) * outv, t * bt:(t + 1) * bt] = val_batch_scores[:min(outv, lv), :] ##### RECORD MINIMUM SCORE AND MODELS ##### best_models_for_this_epoch = np.argmin( val_batch_scores[:lv, :], axis=1) best_scores_for_this_epoch = np.amin(val_batch_scores[:lv, :], axis=1) # This updates the BEST RELATIVE MODELS, along with their associated scores best_scores_mask = ( best_scores_for_this_epoch < best_scores_slice ) #all the voxels that show an improvement best_epochs_slice[best_scores_mask] = epoch np.copyto(best_scores_slice, best_scores_for_this_epoch, casting='same_kind', where=best_scores_mask) np.copyto( best_models_slice, best_models_for_this_epoch + t * bt, casting='same_kind', where=best_scores_mask ) #notice the +t*bt to return the best model across all models, not just the batch's #to select the weight slices we need, we need to specify the voxels that showed improvement AND the models that correspond to these improvements. update_vm_pos = np.zeros((bv, bt), dtype=bool) update_vm_pos[ np.arange(lv)[best_scores_mask], best_models_for_this_epoch[best_scores_mask]] = True update_vm_idx = np.arange(bv * bt)[update_vm_pos.flatten()] # update the best parameter values based on the voxelmodel validation scores. for bwp, p in zip(best_w_params, fwrf_o_params): pv = p.get_value() if len(bwp.shape) == 2: bwp[np.asarray(rv)[best_scores_mask]] = (pv.reshape( (pv.shape[0], -1)).T)[update_vm_idx] else: bwp[np.asarray(rv)[best_scores_mask]] = (pv.reshape( (-1)))[update_vm_idx] #best_w_params[0][np.asarray(rv)[best_scores_mask], :] = (W.get_value().reshape((nf,-1))[:,update_vm_idx]).T #best_w_params[1][np.asarray(rv)[best_scores_mask]] = b.get_value().reshape((-1))[update_vm_idx] batch_time = time.time() - epoch_start print " %d Epoch for %d voxelmodels took %.3fs @ %.3f voxelmodels/s" % ( num_epochs, lv * bt, batch_time, fpX(lv * bt) / batch_time) sys.stdout.flush() #end candidate loop best_epochs[rv] = np.copy(best_epochs_slice) best_scores[rv] = np.copy(best_scores_slice) ##NECESSARY TO COPY BACK best_models[rv] = np.copy(best_models_slice) # end voxel loop # free shared vram set_shared_parameters([ __mst_sdata, __vox_sdata, ] + fwrf_o_params) full_time = time.time() - start_time print "\n---------------------------------------------------------------------" print "%d Epoch for %d voxelmodels took %.3fs @ %.3f voxelmodels/s" % ( num_epochs, nv * nt, full_time, fpX(nv * nt) / full_time) return val_scores, best_scores, best_epochs, best_models, best_w_params
def get_model(): dtensor4 = T.TensorType("float32", (False,) * 4) input_var = dtensor4("inputs") dtensor2 = T.TensorType("float32", (False,) * 2) target_var = dtensor2("targets") # input layer with unspecified batch size layer_input = InputLayer( shape=(None, 30, 64, 64), input_var=input_var ) # InputLayer(shape=(None, 1, 30, 64, 64), input_var=input_var) layer_0 = DimshuffleLayer(layer_input, (0, "x", 1, 2, 3)) # Z-score? # Convolution then batchNormalisation then activation layer, then zero padding layer followed by a dropout layer layer_1 = batch_norm( Conv3DDNNLayer( incoming=layer_0, num_filters=64, filter_size=(3, 3, 3), stride=(1, 3, 3), pad="same", nonlinearity=leaky_rectify, W=Orthogonal(), ) ) layer_2 = MaxPool3DDNNLayer(layer_1, pool_size=(1, 2, 2), stride=(1, 2, 2), pad=(0, 1, 1)) layer_3 = DropoutLayer(layer_2, p=0.25) # Convolution then batchNormalisation then activation layer, then zero padding layer followed by a dropout layer layer_4 = batch_norm( Conv3DDNNLayer( incoming=layer_3, num_filters=128, filter_size=(3, 3, 3), stride=(1, 3, 3), pad="same", nonlinearity=leaky_rectify, W=Orthogonal(), ) ) layer_5 = MaxPool3DDNNLayer(layer_4, pool_size=(1, 2, 2), stride=(1, 2, 2), pad=(0, 1, 1)) layer_6 = DropoutLayer(layer_5, p=0.25) # Convolution then batchNormalisation then activation layer, then zero padding layer followed by a dropout layer layer_7 = batch_norm( Conv3DDNNLayer( incoming=layer_6, num_filters=256, filter_size=(3, 3, 3), stride=(1, 3, 3), pad="same", nonlinearity=leaky_rectify, W=Orthogonal(), ) ) layer_8 = MaxPool3DDNNLayer(layer_7, pool_size=(1, 2, 2), stride=(1, 2, 2), pad=(0, 1, 1)) layer_9 = DropoutLayer(layer_8, p=0.25) # Recurrent layer layer_10 = DimshuffleLayer(layer_9, (0, 2, 1, 3, 4)) layer_11 = LSTMLayer(layer_10, num_units=612, hid_init=Orthogonal(), only_return_final=False) # Output Layer layer_systole = DenseLayer(layer_11, 600, nonlinearity=leaky_rectify, W=Orthogonal()) layer_diastole = DenseLayer(layer_11, 600, nonlinearity=leaky_rectify, W=Orthogonal()) layer_systole_1 = DropoutLayer(layer_systole, p=0.3) layer_diastole_1 = DropoutLayer(layer_diastole, p=0.3) layer_systole_2 = DenseLayer(layer_systole_1, 1, nonlinearity=None, W=Orthogonal()) layer_diastole_2 = DenseLayer(layer_diastole_1, 1, nonlinearity=None, W=Orthogonal()) layer_output = ConcatLayer([layer_systole_2, layer_diastole_2]) # Loss prediction = get_output(layer_output) loss = squared_error(prediction, target_var) loss = loss.mean() # Updates : Stochastic Gradient Descent (SGD) with Nesterov momentum Or Adam params = get_all_params(layer_output, trainable=True) updates = adam(loss, params) # updates_0 = rmsprop(loss, params) # updates = apply_nesterov_momentum(updates_0, params) # Create a loss expression for validation/testing. The crucial difference # here is that we do a deterministic forward pass through the network, disabling dropout layers. test_prediction = get_output(layer_output, deterministic=True) test_loss = squared_error(test_prediction, target_var) test_loss = test_loss.mean() # Compile a function performing a training step on a mini-batch (by giving # the updates dictionary) and returning the corresponding training loss: train_fn = theano.function([input_var, target_var], loss, updates=updates, allow_input_downcast=True) # Compile a second function computing the validation loss and accuracy val_fn = theano.function([input_var, target_var], test_loss, allow_input_downcast=True) # Compule a third function computing the prediction predict_fn = theano.function([input_var], test_prediction, allow_input_downcast=True) return [layer_output, train_fn, val_fn, predict_fn]
(Conv2DLayerFast, {'num_filters': 1, 'filter_size': filter_size, 'pad': pad_out}), (ReshapeLayer, {'shape': (([0], -1))}), ] # In[ ]: input_var = T.tensor4('inputs') output_var = T.matrix('outputs') network = layers[0][0](input_var=input_var, **layers[0][1]) for layer in layers[1:]: network = layer[0](network, **layer[1]) prediction = get_output(network) loss = squared_error(prediction, output_var) loss = loss.mean() params = get_all_params(network, trainable=True) #updates = nesterov_momentum(loss, params, learning_rate=0.01, momentum=0.9) updates = sgd(loss, params, learning_rate=0.01) test_prediction = get_output(network, deterministic=True) test_loss = squared_error(test_prediction, output_var) test_loss = test_loss.mean() # As a bonus, also create an expression for the classification accuracy: #test_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), output_var), # dtype=theano.config.floatX) train_fn = theano.function([input_var, output_var], loss, updates=updates)# , mode=theano.compile.MonitorMode(post_func=theano.compile.monitormode.detect_nan))
def test_squared_error_preserve_dtype(): from lasagne.objectives import squared_error for dtype in 'float64', 'float32', 'float16': a = theano.tensor.matrix('a', dtype=dtype) b = theano.tensor.matrix('b', dtype=dtype) assert squared_error(a, b).dtype == dtype
def get_elementwise_objective(Qvalues, actions, rewards, is_alive="always", Qvalues_target=None, gamma_or_gammas=0.95, crop_last=True, force_qvalues_after_end=True, qvalues_after_end="zeros", consider_reference_constant=True, ): """ Returns squared error between predicted and reference Qvalues according to Q-learning algorithm Qreference(state,action) = reward(state,action) + gamma* Q(next_state,next_action) loss = mean over (Qvalues - Qreference)**2 :param Qvalues: [batch,tick,action_id] - predicted qvalues :param actions: [batch,tick] - commited actions :param rewards: [batch,tick] - immediate rewards for taking actions at given time ticks :param is_alive: [batch,tick] - whether given session is still active at given tick. Defaults to always active. Default value of is_alive implies a simplified computation algorithm for Qlearning loss :param Qvalues_target: Older snapshot Qvalues (e.g. from a target network). If None, uses current Qvalues :param gamma_or_gammas: a single value or array[batch,tick](can broadcast dimensions) of delayed reward discounts :param crop_last: if True, zeros-out loss at final tick, if False - computes loss VS Qvalues_after_end :param force_qvalues_after_end: if true, sets reference Qvalues at session end to rewards[end] + qvalues_after_end :param qvalues_after_end: [batch,1,n_actions] - symbolic expression for "next state q-values" for last tick used for reference only. Defaults at T.zeros_like(Qvalues[:,0,None,:]) If you wish to simply ignore the last tick, use defaults and crop output's last tick ( qref[:,:-1] ) :param consider_reference_constant: whether or not zero-out gradient flow through reference_Qvalues (True is highly recommended unless you know what you're doind) :return: tensor [batch, tick] of squared errors over Qvalues (using formula above for loss) """ if Qvalues_target is None: Qvalues_target = Qvalues # get reference Qvalues via Q-learning algorithm reference_Qvalues = get_reference_Qvalues(Qvalues_target, actions, rewards, gamma_or_gammas=gamma_or_gammas, qvalues_after_end=qvalues_after_end, ) if consider_reference_constant: # do not pass gradient through reference Q-values (since they DO depend on Q-values by default) reference_Qvalues = consider_constant(reference_Qvalues) # get predicted qvalues for committed actions (to compare with reference Q-values) action_Qvalues = get_action_Qvalues(Qvalues, actions) # if agent is always alive, return the simplified loss if is_alive == "always": # tensor of element-wise squared errors elwise_squared_error = squared_error(reference_Qvalues, action_Qvalues) else: # we are given an is_alive matrix : uint8[batch,tick] # if asked to force reference_Q[end_tick+1,a] = 0, do it # note: if agent is always alive, this is meaningless if force_qvalues_after_end: # set future rewards at session end to rewards + qvalues_after_end end_ids = get_end_indicator(is_alive, force_end_at_t_max=True).nonzero() if qvalues_after_end == "zeros": # "set reference Q-values at end action ids to just the immediate rewards" reference_Qvalues = T.set_subtensor(reference_Qvalues[end_ids], rewards[end_ids]) else: last_optimal_rewards = T.zeros_like(rewards[:, 0]) # "set reference Q-values at end action ids to the immediate rewards + qvalues after end" reference_Qvalues = T.set_subtensor(reference_Qvalues[end_ids], rewards[end_ids] + gamma_or_gammas * last_optimal_rewards[ end_ids[0], 0] ) # tensor of element-wise squared errors elwise_squared_error = squared_error(reference_Qvalues, action_Qvalues) # zero-out loss after session ended elwise_squared_error = elwise_squared_error * is_alive if crop_last: elwise_squared_error = T.set_subtensor(elwise_squared_error[:,-1],0) return elwise_squared_error
def create_nnet(input_dims, action_dims, observation_dims, value_dims, learning_rate, grad_clip=None, l1_weight=None, l2_weight=None, num_hidden_units=20, num_hidden_action_units=None, num_hidden_observ_units=None, num_hidden_value_units=None, batch_size=32, max_train_epochs=1, hidden_nonlinearity=nonlinearities.rectify, output_nonlinearity=None, update_method=updates.sgd): commonlayers = [] commonlayers.append(layers.InputLayer(shape=(None, input_dims))) commonlayers.append(DenseLayer(commonlayers[-1], num_hidden_units, nonlinearity=hidden_nonlinearity)) if num_hidden_action_units is None: actionlayers = [DenseLayer(commonlayers[-1], action_dims, nonlinearity=output_nonlinearity)] else: actionlayers = [DenseLayer(commonlayers[-1], num_hidden_action_units, nonlinearity=output_nonlinearity)] actionlayers.append(DenseLayer(actionlayers[-1], action_dims, nonlinearity=output_nonlinearity)) if num_hidden_observ_units is None: observlayers = [DenseLayer(commonlayers[-1], observation_dims, nonlinearity=output_nonlinearity)] else: observlayers = [DenseLayer(commonlayers[-1], num_hidden_observ_units, nonlinearity=output_nonlinearity)] observlayers.append(DenseLayer(observlayers[-1], observation_dims, nonlinearity=output_nonlinearity)) if num_hidden_value_units is None: dvaluelayers = [DenseLayer(commonlayers[-1], value_dims, nonlinearity=output_nonlinearity)] else: dvaluelayers = [DenseLayer(commonlayers[-1], num_hidden_value_units, nonlinearity=output_nonlinearity)] dvaluelayers.append(DenseLayer(dvaluelayers[-1], value_dims, nonlinearity=output_nonlinearity)) actvallayers = [layers.ConcatLayer([actionlayers[-1], dvaluelayers[-1]])] obsvallayers = [layers.ConcatLayer([observlayers[-1], dvaluelayers[-1]])] concatlayers = [layers.ConcatLayer([actionlayers[-1], observlayers[-1], dvaluelayers[-1]])] action_prediction = layers.get_output(actionlayers[-1]) dvalue_prediction = layers.get_output(dvaluelayers[-1]) actval_prediction = layers.get_output(actvallayers[-1]) obsval_prediction = layers.get_output(obsvallayers[-1]) concat_prediction = layers.get_output(concatlayers[-1]) input_var = commonlayers[0].input_var action_target = T.matrix(name="action_target", dtype=floatX) dvalue_target = T.matrix(name="value_target", dtype=floatX) actval_target = T.matrix(name="actval_target", dtype=floatX) obsval_target = T.matrix(name="obsval_target", dtype=floatX) concat_target = T.matrix(name="concat_target", dtype=floatX) action_loss = objectives.squared_error(action_prediction, action_target).mean() obsval_loss = objectives.squared_error(obsval_prediction, obsval_target).mean() dvalue_loss = objectives.squared_error(dvalue_prediction, dvalue_target).mean() actval_loss = objectives.squared_error(actval_prediction, actval_target).mean() concat_loss = objectives.squared_error(concat_prediction, concat_target).mean() if l1_weight is not None: action_l1penalty = regularize_layer_params(commonlayers + actionlayers, l1) * l1_weight obsval_l1penalty = regularize_layer_params(commonlayers + observlayers + dvaluelayers, l1) * l1_weight dvalue_l1penalty = regularize_layer_params(commonlayers + dvaluelayers, l1) * l1_weight actval_l1penalty = regularize_layer_params(commonlayers + actionlayers + dvaluelayers, l1) * l1_weight concat_l1penalty = regularize_layer_params(commonlayers + actionlayers + observlayers + dvaluelayers, l1) * l1_weight action_loss += action_l1penalty obsval_loss += obsval_l1penalty dvalue_loss += dvalue_l1penalty actval_loss += actval_l1penalty concat_loss += concat_l1penalty if l2_weight is not None: action_l2penalty = regularize_layer_params(commonlayers + actionlayers, l2) * l2_weight obsval_l2penalty = regularize_layer_params(commonlayers + observlayers + dvaluelayers, l2) * l2_weight dvalue_l2penalty = regularize_layer_params(commonlayers + dvaluelayers, l2) * l2_weight actval_l2penalty = regularize_layer_params(commonlayers + actionlayers + dvaluelayers, l2) * l2_weight concat_l2penalty = regularize_layer_params(commonlayers + actionlayers + observlayers + dvaluelayers, l2) * l2_weight action_loss += action_l2penalty obsval_loss += obsval_l2penalty dvalue_loss += dvalue_l2penalty actval_loss += actval_l2penalty concat_loss += concat_l2penalty action_params = layers.get_all_params(actionlayers[-1], trainable=True) obsval_params = layers.get_all_params(obsvallayers[-1], trainable=True) dvalue_params = layers.get_all_params(dvaluelayers[-1], trainable=True) actval_params = layers.get_all_params(actvallayers[-1], trainable=True) concat_params = layers.get_all_params(concatlayers[-1], trainable=True) if grad_clip is not None: action_grads = theano.grad(action_loss, action_params) obsval_grads = theano.grad(obsval_loss, obsval_params) dvalue_grads = theano.grad(dvalue_loss, dvalue_params) actval_grads = theano.grad(actval_loss, actval_params) concat_grads = theano.grad(concat_loss, concat_params) action_grads = [updates.norm_constraint(grad, grad_clip, range(grad.ndim)) for grad in action_grads] obsval_grads = [updates.norm_constraint(grad, grad_clip, range(grad.ndim)) for grad in obsval_grads] dvalue_grads = [updates.norm_constraint(grad, grad_clip, range(grad.ndim)) for grad in dvalue_grads] actval_grads = [updates.norm_constraint(grad, grad_clip, range(grad.ndim)) for grad in actval_grads] concat_grads = [updates.norm_constraint(grad, grad_clip, range(grad.ndim)) for grad in concat_grads] action_updates = update_method(action_grads, action_params, learning_rate) obsval_updates = update_method(obsval_grads, obsval_params, learning_rate) dvalue_updates = update_method(dvalue_grads, dvalue_params, learning_rate) actval_updates = update_method(actval_grads, actval_params, learning_rate) concat_updates = update_method(concat_grads, concat_params, learning_rate) else: action_updates = update_method(action_loss, action_params, learning_rate) obsval_updates = update_method(obsval_loss, obsval_params, learning_rate) dvalue_updates = update_method(dvalue_loss, dvalue_params, learning_rate) actval_updates = update_method(actval_loss, actval_params, learning_rate) concat_updates = update_method(concat_loss, concat_params, learning_rate) fit_action = theano.function([input_var, action_target], action_loss, updates=action_updates) fit_obsval = theano.function([input_var, obsval_target], obsval_loss, updates=obsval_updates) fit_dvalue = theano.function([input_var, dvalue_target], dvalue_loss, updates=dvalue_updates) fit_actval = theano.function([input_var, actval_target], actval_loss, updates=actval_updates) fit_concat = theano.function([input_var, concat_target], concat_loss, updates=concat_updates) predict_action = theano.function([input_var], action_prediction) predict_obsval = theano.function([input_var], obsval_prediction) predict_dvalue = theano.function([input_var], dvalue_prediction) predict_actval = theano.function([input_var], actval_prediction) predict_concat = theano.function([input_var], concat_prediction) nnet = Mock( fit_action=fit_action, fit_obsval=fit_obsval, fit_value=fit_dvalue, fit_actval=fit_actval, fit_both=fit_concat, predict_action=predict_action, predict_obsval=predict_obsval, predict_value=predict_dvalue, predict_actval=predict_actval, predict_both=predict_concat, ) return nnet
def get_model(input_var, target_var, multiply_var): # input layer with unspecified batch size layer_both_0 = InputLayer(shape=(None, 30, 64, 64), input_var=input_var) # Z-score? # Convolution then batchNormalisation then activation layer, twice, then zero padding layer followed by a dropout layer layer_both_1 = batch_norm( Conv2DLayer(layer_both_0, 64, (3, 3), pad='same', nonlinearity=leaky_rectify)) layer_both_2 = batch_norm( Conv2DLayer(layer_both_1, 64, (3, 3), pad='same', nonlinearity=leaky_rectify)) layer_both_3 = MaxPool2DLayer(layer_both_2, pool_size=(2, 2), stride=(2, 2), pad=(1, 1)) layer_both_4 = DropoutLayer(layer_both_3, p=0.25) # Convolution then batchNormalisation then activation layer, twice, then zero padding layer followed by a dropout layer layer_both_5 = batch_norm( Conv2DLayer(layer_both_4, 128, (3, 3), pad='same', nonlinearity=leaky_rectify)) layer_both_6 = batch_norm( Conv2DLayer(layer_both_5, 128, (3, 3), pad='same', nonlinearity=leaky_rectify)) layer_both_7 = MaxPool2DLayer(layer_both_6, pool_size=(2, 2), stride=(2, 2), pad=(1, 1)) layer_both_8 = DropoutLayer(layer_both_7, p=0.25) # Convolution then batchNormalisation then activation layer, twice, then zero padding layer followed by a dropout layer layer_both_9 = batch_norm( Conv2DLayer(layer_both_8, 256, (3, 3), pad='same', nonlinearity=leaky_rectify)) layer_both_10 = batch_norm( Conv2DLayer(layer_both_9, 256, (3, 3), pad='same', nonlinearity=leaky_rectify)) layer_both_11 = batch_norm( Conv2DLayer(layer_both_10, 256, (3, 3), pad='same', nonlinearity=leaky_rectify)) layer_both_12 = MaxPool2DLayer(layer_both_11, pool_size=(2, 2), stride=(2, 2), pad=(1, 1)) layer_both_13 = DropoutLayer(layer_both_12, p=0.25) # Flatten layer_flatten = FlattenLayer(layer_both_13) # Prediction layer_hidden = DenseLayer(layer_flatten, 500, nonlinearity=sigmoid) layer_prediction = DenseLayer(layer_hidden, 2, nonlinearity=linear) # Loss prediction = get_output(layer_prediction) / multiply_var loss = squared_error(prediction, target_var) loss = loss.mean() #Updates : Stochastic Gradient Descent (SGD) with Nesterov momentum params = get_all_params(layer_prediction, trainable=True) # Create a loss expression for validation/testing. The crucial difference # here is that we do a deterministic forward pass through the network, disabling dropout layers. test_prediction = get_output(layer_prediction, deterministic=True) / multiply_var test_loss = squared_error(test_prediction, target_var) test_loss = test_loss.mean() # crps estimate crps = T.abs_(test_prediction - target_var).mean() / 600 return test_prediction, crps, loss, params
def build_model(self, train_set, test_set, validation_set=None): super(FCAE, self).build_model(train_set, test_set, validation_set) y_train = get_output(self.model, self.sym_x) loss = aggregate(squared_error(y_train, self.sym_x), mode='mean') # loss += + 1e-4 * lasagne.regularization.regularize_network_params(self.model, lasagne.regularization.l2) y_test = get_output(self.model, self.sym_x, deterministic=True) loss_test = aggregate(squared_error(y_test, self.sym_x), mode='mean') all_params = get_all_params(self.model, trainable=True) sym_beta1 = T.scalar('beta1') sym_beta2 = T.scalar('beta2') grads = T.grad(loss, all_params) ngrads = lasagne.updates.total_norm_constraint(grads, 5) cgrads = [T.clip(g, -5, 5) for g in ngrads] updates = rmsprop(cgrads, all_params, self.sym_lr, sym_beta1, sym_beta2) inputs = [ self.sym_index, self.sym_batchsize, self.sym_lr, sym_beta1, sym_beta2 ] f_train = theano.function( inputs, [loss], updates=updates, givens={ self.sym_x: self.sh_train_x[self.batch_slice], }, ) f_test = theano.function( [self.sym_index, self.sym_batchsize], [loss_test], givens={ self.sym_x: self.sh_test_x[self.batch_slice], }, on_unused_input='ignore', ) f_ae = None # f_ae = theano.function( # [self.sym_batchsize], [y_test], # givens={ # self.sym_x: self.sh_valid_x, # }, # on_unused_input='ignore', # ) self.train_args['inputs']['batchsize'] = 128 self.train_args['inputs']['learningrate'] = 1e-3 self.train_args['inputs']['beta1'] = 0.9 self.train_args['inputs']['beta2'] = 1e-6 self.train_args['outputs']['loss'] = '%0.6f' self.test_args['inputs']['batchsize'] = 128 self.test_args['outputs']['loss_test'] = '%0.6f' # self.validate_args['inputs']['batchsize'] = 128 # self.validate_args['outputs']['loss_eval'] = '%0.6f' # self.validate_args['outputs']['loss_acc'] = '%0.6f' return f_train, f_test, f_ae, self.train_args, self.test_args, self.validate_args
def get_elementwise_objective(policy, state_values, actions, rewards, is_alive="always", state_values_target=None, n_steps=None, gamma_or_gammas=0.99, crop_last=True, force_values_after_end=True, state_values_after_end="zeros", consider_value_reference_constant=True, consider_predicted_value_constant=True, scan_dependencies=(), scan_strict=True, min_proba=1e-30): """ returns cross-entropy-like objective function for Actor-Critic method L_policy = - log(policy) * (V_reference - const(V)) L_V = (V - Vreference)^2 :param policy: [batch,tick,action_id] - predicted action probabilities :param state_values: [batch,tick] - predicted state values :param actions: [batch,tick] - committed actions :param rewards: [batch,tick] - immediate rewards for taking actions at given time ticks :param is_alive: [batch,tick] - whether given session is still active at given tick. Defaults to always active. Default value of is_alive implies a simplified computation algorithm for Qlearning loss :param state_values_target: there should be state values used to compute reference (e.g. older network snapshot) If None (defualt), uses current Qvalues to compute reference :param n_steps: if an integer is given, the references are computed in loops of 3 states. Defaults to None: propagating rewards throughout the whole session. If n_steps equals 1, this works exactly as Q-learning (though less efficient one) If you provide symbolic integer here AND strict = True, make sure you added the variable to dependencies. :param gamma_or_gammas: a single value or array[batch,tick](can broadcast dimensions) of delayed reward discounts :param crop_last: if True, zeros-out loss at final tick, if False - computes loss VS Qvalues_after_end :param force_values_after_end: if true, sets reference policy at session end to rewards[end] + qvalues_after_end :param state_values_after_end: [batch,1,n_actions] - "next state values" for last tick used for reference only. Defaults at T.zeros_like(state_values[:,0,None,:]) If you wish to simply ignore the last tick, use defaults and crop output's last tick ( qref[:,:-1] ) :param consider_value_reference_constant: whether or not to zero-out gradients through the "reference state values" term :param consider_predicted_value_constant: whether or not to consider predicted state value constant in the POLICY LOSS COMPONENT :param scan_dependencies: everything you need to evaluate first 3 parameters (only if strict==True) :param scan_strict: whether to evaluate values using strict theano scan or non-strict one :param min_proba: minimum value for policy term. Used to prevent -inf when policy(action) ~ 0. :return: elementwise sum of policy_loss + state_value_loss [batch,tick] """ if state_values_target is None: state_values_target = state_values # get reference values via Q-learning algorithm reference_state_values = get_n_step_value_reference(state_values_target, rewards, is_alive, n_steps=n_steps, optimal_state_values_after_end=state_values_after_end, gamma_or_gammas=gamma_or_gammas, dependencies=scan_dependencies, strict=scan_strict ) # if we have to set after_end values if is_alive != "always" and force_values_after_end: # if asked to force reference_Q[end_tick+1,a] = 0, do it # note: if agent is always alive, this is meaningless # set future rewards at session end to rewards+qvalues_after_end end_ids = get_end_indicator(is_alive, force_end_at_t_max=True).nonzero() if state_values_after_end == "zeros": # "set reference state values at end action ids to just the immediate rewards" reference_state_values = T.set_subtensor(reference_state_values[end_ids], rewards[end_ids]) else: # "set reference state values at end action ids to the immediate rewards + qvalues after end" new_state_values = rewards[end_ids] + gamma_or_gammas * state_values_after_end[end_ids[0], 0] reference_state_values = T.set_subtensor(reference_state_values[end_ids], new_state_values) # now compute the loss if is_alive == "always": is_alive = T.ones_like(actions, dtype=theano.config.floatX) # actor loss action_probas = get_action_Qvalues(policy, actions) if crop_last: reference_state_values = T.set_subtensor(reference_state_values[:,-1], state_values[:,-1]) if consider_value_reference_constant: reference_state_values = consider_constant(reference_state_values) log_probas = T.log(action_probas) #set min proba in a way that does not zero-out the derivatives # idea: # log(p) = log(p) if p != 0 else log(p+min_proba) if min_proba != 0: log_probas = T.switch(T.eq(action_probas,0), T.log(action_probas+min_proba), log_probas ) observed_state_values = consider_constant(state_values) if consider_predicted_value_constant else state_values policy_loss_elwise = - log_probas * (reference_state_values - observed_state_values) # critic loss V_err_elwise = squared_error(reference_state_values, state_values) return (policy_loss_elwise + V_err_elwise) * is_alive
def get_model(): dtensor4 = T.TensorType('float32', (False,)*4) input_var = dtensor4('inputs') dtensor2 = T.TensorType('float32', (False,)*2) target_var = dtensor2('targets') # input layer with unspecified batch size layer_input = InputLayer(shape=(None, 30, 64, 64), input_var=input_var) #InputLayer(shape=(None, 1, 30, 64, 64), input_var=input_var) layer_0 = DimshuffleLayer(layer_input, (0, 'x', 1, 2, 3)) # Z-score? # Convolution then batchNormalisation then activation layer, then zero padding layer followed by a dropout layer layer_1 = batch_norm(Conv3DDNNLayer(incoming=layer_0, num_filters=64, filter_size=(3,3,3), stride=(1,3,3), pad='same', nonlinearity=leaky_rectify, W=Orthogonal())) layer_2 = MaxPool3DDNNLayer(layer_1, pool_size=(1, 2, 2), stride=(1, 2, 2), pad=(0, 1, 1)) layer_3 = DropoutLayer(layer_2, p=0.25) # Convolution then batchNormalisation then activation layer, then zero padding layer followed by a dropout layer layer_4 = batch_norm(Conv3DDNNLayer(incoming=layer_3, num_filters=128, filter_size=(3,3,3), stride=(1,3,3), pad='same', nonlinearity=leaky_rectify, W=Orthogonal())) layer_5 = MaxPool3DDNNLayer(layer_4, pool_size=(1, 2, 2), stride=(1, 2, 2), pad=(0, 1, 1)) layer_6 = DropoutLayer(layer_5, p=0.25) # Convolution then batchNormalisation then activation layer, then zero padding layer followed by a dropout layer layer_7 = batch_norm(Conv3DDNNLayer(incoming=layer_6, num_filters=256, filter_size=(3,3,3), stride=(1,3,3), pad='same', nonlinearity=leaky_rectify, W=Orthogonal())) layer_8 = MaxPool3DDNNLayer(layer_7, pool_size=(1, 2, 2), stride=(1, 2, 2), pad=(0, 1, 1)) layer_9 = DropoutLayer(layer_8, p=0.25) # Recurrent layer layer_10 = DimshuffleLayer(layer_9, (0,2,1,3,4)) layer_11 = LSTMLayer(layer_10, num_units=612, hid_init=Orthogonal(), only_return_final=False) # Output Layer layer_systole = DenseLayer(layer_11, 600, nonlinearity=leaky_rectify, W=Orthogonal()) layer_diastole = DenseLayer(layer_11, 600, nonlinearity=leaky_rectify, W=Orthogonal()) layer_systole_1 = DropoutLayer(layer_systole, p=0.3) layer_diastole_1 = DropoutLayer(layer_diastole, p=0.3) layer_systole_2 = DenseLayer(layer_systole_1, 1, nonlinearity=None, W=Orthogonal()) layer_diastole_2 = DenseLayer(layer_diastole_1, 1, nonlinearity=None, W=Orthogonal()) layer_output = ConcatLayer([layer_systole_2, layer_diastole_2]) # Loss prediction = get_output(layer_output) loss = squared_error(prediction, target_var) loss = loss.mean() #Updates : Stochastic Gradient Descent (SGD) with Nesterov momentum Or Adam params = get_all_params(layer_output, trainable=True) updates = adam(loss, params) #updates_0 = rmsprop(loss, params) #updates = apply_nesterov_momentum(updates_0, params) # Create a loss expression for validation/testing. The crucial difference # here is that we do a deterministic forward pass through the network, disabling dropout layers. test_prediction = get_output(layer_output, deterministic=True) test_loss = squared_error(test_prediction, target_var) test_loss = test_loss.mean() # Compile a function performing a training step on a mini-batch (by giving # the updates dictionary) and returning the corresponding training loss: train_fn = theano.function([input_var, target_var], loss, updates=updates, allow_input_downcast=True) # Compile a second function computing the validation loss and accuracy val_fn = theano.function([input_var, target_var], test_loss, allow_input_downcast=True) # Compule a third function computing the prediction predict_fn = theano.function([input_var], test_prediction, allow_input_downcast=True) return [layer_output, train_fn, val_fn, predict_fn]
def get_elementwise_objective_components( policy, rewards, policy_values, action_values='same', is_alive="always", n_steps=None, gamma_or_gammas=0.99, crop_last=True, force_values_after_end=True, state_values_after_end="zeros", consider_value_reference_constant=True, consider_predicted_value_constant=True, scan_dependencies=tuple(), scan_strict=True, ): """ N-step Deterministic Policy Gradient (A2c) implementation. Works with continuous action space (real value or vector of such) Requires action policy(mu) and state values. Based on http://arxiv.org/abs/1509.02971 http://jmlr.org/proceedings/papers/v32/silver14.pdf This particular implementation also allows N-step reinforcement learning The code mostly relies on the same architecture as advantage actor-critic a2c_n_step returns deterministic policy gradient components for actor and critic L_policy = -critic(state,policy) = -action_values L_V = (V - Vreference)^2 You will have to independently compute updates for actor and critic and then add them up. parameters: policy [batch,tick,action_id] - predicted "optimal policy" (mu) rewards [batch,tick] - immediate rewards for taking actions at given time ticks policy_values [batch,tick] - predicted state values given OPTIMAL policy action_values [batch,tick] - predicted Q_values for commited actions INCLUDING EXPLORATION if any Default value implies action_values = state_values if we have no exploration is_alive [batch,tick] - whether given session is still active at given tick. Defaults to always active. Default value of is_alive implies a simplified computation algorithm for Qlearning loss n_steps: if an integer is given, the references are computed in loops of 3 states. Defaults to None: propagating rewards throughout the whole session. If n_steps equals 1, this works exactly as Q-learning (though less efficient one) If you provide symbolic integer here AND strict = True, make sure you added the variable to dependencies. gamma_or_gammas - a single value or array[batch,tick](can broadcast dimensions) of delayed reward discounts crop_last - if True, zeros-out loss at final tick, if False - computes loss VS Qvalues_after_end force_values_after_end - if true, sets reference policy at session end to rewards[end] + qvalues_after_end state_values_after_end[batch,1,n_actions] - "next state values" for last tick used for reference only. Defaults at T.zeros_like(state_values[:,0,None,:]) If you wish to simply ignore the last tick, use defaults and crop output's last tick ( qref[:,:-1] ) scan_dependencies: everything you need to evaluate first 3 parameters (only if strict==True) scan_strict: whether to evaluate values using strict theano scan or non-strict one Returns: Element-wise sum of policy_loss + state_value_loss """ if action_values == 'same': action_values = policy_values # get reference values via DPG algorithm reference_action_values = get_n_step_value_reference( action_values, rewards, is_alive, n_steps=n_steps, optimal_state_values_after_end=state_values_after_end, gamma_or_gammas=gamma_or_gammas, dependencies=scan_dependencies, strict=scan_strict) if is_alive != "always" and force_values_after_end: # if asked to force reference_Q[end_tick+1,a] = 0, do it # note: if agent is always alive, this is meaningless # set future rewards at session end to rewards+qvalues_after_end end_ids = get_end_indicator(is_alive, force_end_at_t_max=True).nonzero() if state_values_after_end == "zeros": # "set reference state values at end action ids to just the immediate rewards" reference_action_values = T.set_subtensor( reference_action_values[end_ids], rewards[end_ids]) else: # "set reference state values at end action ids to the immediate rewards + qvalues after end" new_subtensor_values = rewards[ end_ids] + gamma_or_gammas * state_values_after_end[end_ids[0], 0] reference_action_values = T.set_subtensor( reference_action_values[end_ids], new_subtensor_values) # now compute the loss components if is_alive == "always": is_alive = T.ones_like(action_values, dtype=theano.config.floatX) # actor loss # here we rely on fact that state_values = critic(state,optimal_policy) # using chain rule, # grad(state_values,actor_weights) = grad(state_values, optimal_policy)*grad(optimal_policy,actor_weights) policy_loss_elwise = -policy_values # critic loss reference_action_values = consider_constant(reference_action_values) v_err_elementwise = squared_error(reference_action_values, action_values) if crop_last: v_err_elementwise = T.set_subtensor(v_err_elementwise[:, -1], 0) return policy_loss_elwise * is_alive, v_err_elementwise * is_alive
def train(options): # -------- setup options and data ------------------ np.random.seed(options['seed']) # Load options host = socket.gethostname() # get computer hostname start_time = datetime.datetime.now().strftime("%y-%m-%d-%H-%M") model = importlib.import_module(options['model_file']) # ---------- build model and compile --------------- input_batch = T.tensor4() # input image sequences target = T.tensor4() # target image print('Build model...') model = model.Model(**options['modelOptions']) print('Compile ...') net, outputs, filters = model.build_model(input_batch) # compute loss outputs = get_output(outputs + [filters]) output_frames = outputs[:-1] output_filter = outputs[-1] train_losses = [] for i in range(options['modelOptions']['target_seqlen']): output_frame = output_frames[i] if options['loss'] == 'squared_error': frame_loss = squared_error(output_frame, target[:, [i], :, :]) elif options['loss'] == 'binary_crossentropy': # Clipping to avoid NaN's in binary crossentropy: https://github.com/Lasagne/Lasagne/issues/436 output_frame = T.clip(output_frame, np.finfo(np.float32).eps, 1-np.finfo(np.float32).eps) frame_loss = binary_crossentropy(output_frame, target[:,[i],:,:]) else: assert False train_losses.append(aggregate(frame_loss)) train_loss = sum(train_losses) / options['modelOptions']['target_seqlen'] # update sh_lr = theano.shared(lasagne.utils.floatX(options['learning_rate'])) # to allow dynamic learning rate layers = get_all_layers(net) all_params = get_all_params(layers, trainable = True) updates = adam(train_loss, all_params, learning_rate=sh_lr) _train = theano.function([input_batch, target], train_loss, updates=updates, allow_input_downcast=True) _test = theano.function([input_batch, target], [train_loss, output_filter] + output_frames, allow_input_downcast=True) # ------------ data setup ---------------- print('Prepare data...') dataset = importlib.import_module(options['dataset_file']) dh = dataset.DataHandler(**options['datasetOptions']) # ------------ training setup ---------------- if options['pretrained_model_path'] is not None: checkpoint = pickle.load(open(options['pretrained_model_path'], 'rb')) model_values = checkpoint['model_values'] # overwrite the values of model parameters lasagne.layers.set_all_param_values(layers, model_values) history_train = checkpoint['history_train'] start_epoch = checkpoint['epoch'] + 1 options['batch_size'] = checkpoint['options']['batch_size'] sh_lr.set_value(floatX(checkpoint['options']['learning_rate'])) else: start_epoch = 0 history_train = [] # ------------ actual training ---------------- print 'Start training ...' input_seqlen = options['modelOptions']['input_seqlen'] for epoch in range(start_epoch, start_epoch + options['num_epochs']): epoch_start_time = time.time() history_batch = [] for batch_index in range(0, options['batches_per_epoch']): batch = dh.GetBatch() # generate data on the fly if options['dataset_file'] == 'datasets.stereoCarsColor': batch_input = batch[..., :input_seqlen].squeeze(axis=4) # first frames batch_target = batch[..., input_seqlen:].squeeze(axis=4) # last frame else: batch_input = batch[..., :input_seqlen].transpose(0,4,2,3,1).squeeze(axis=4) # first frames batch_target = batch[..., input_seqlen:].transpose(0,4,2,3,1).squeeze(axis=4) # last frame # train loss_train = _train(batch_input, batch_target) history_batch.append(loss_train) print("Epoch {} of {}, batch {} of {}, took {:.3f}s".format(epoch + 1, options['num_epochs'], batch_index+1, options['batches_per_epoch'], time.time() - epoch_start_time)) print(" training loss:\t{:.6f}".format(loss_train.item())) # clear the screen display.clear_output(wait=True) # print statistics history_train.append(np.mean(history_batch)) history_batch = [] print("Epoch {} of {}, took {:.3f}s".format(epoch + 1, options['num_epochs'], time.time() - epoch_start_time)) print(" training loss:\t{:.6f}".format(history_train[epoch].item())) # set new learning rate (maybe this is unnecessary with adam updates) if (epoch+1) % options['decay_after'] == 0: options['learning_rate'] = sh_lr.get_value() * 0.5 print "New LR:", options['learning_rate'] sh_lr.set_value(floatX(options['learning_rate'])) # save the model if (epoch+1) % options['save_after'] == 0: save_model(layers, epoch, history_train, start_time, host, options) print("Model saved")
W=lasagne.init.Normal(0.01))) l_output = batch_norm( Conv2DLayer(l_deconv1_2, num_filters=21, filter_size=(1, 1), pad=0, stride=1)) ### # net_l_deconv2_2 = lasagne.layers.get_output(l_output); # l_deconv2_2_func = theano.function([l_in.input_var], [net_l_deconv2_2]); # l_deconv2_2_func_val = l_deconv2_2_func(X_train); # print(l_deconv2_2_func_val[0].shape); print('start training 1') true_output = T.ftensor4('true_output') ### loss = squared_error(lasagne.layers.get_output(l_output), true_output).mean() loss_train = squared_error( lasagne.layers.get_output(l_output, deterministic=False), true_output).mean() loss_eval = squared_error( lasagne.layers.get_output(l_output, deterministic=True), true_output).mean() all_params = lasagne.layers.get_all_params(l_output, trainable=True) updates = lasagne.updates.nesterov_momentum(loss_train, all_params, learning_rate=0.001, momentum=0.985) train = theano.function([l_in.input_var, true_output], loss_train,
def train_model(learning_rate_dis=0.0004, learning_rate_model=0.0004, n_epochs=36, batch_size=20, nb_caption='max'): ''' Function that compute the training of the model ''' ####################### # Loading the dataset # ####################### print ('... Loading data') # Load the dataset on the CPU data_path = get_path() train_input_path = 'train_input_' train_target_path = 'train_target_' train_caption_path = 'train_caption_' valid_input_path = 'valid_input_' valid_target_path = 'valid_target_' valid_caption_path = 'valid_caption_' nb_train_batch = 8 ###################### # Building the model # ###################### # Symbolic variables # Shape = (_, 3, 64, 64) x = T.tensor4('x', dtype=theano.config.floatX) # Shape = (_, 3, 32, 32) y = T.tensor4('y', dtype=theano.config.floatX) # Shape = (_, 3, 32, 32) z = T.tensor4('x', dtype=theano.config.floatX) # Shape = (_, seq_length) w = T.imatrix('captions') # Creation of the model model = build_context_encoder(input_var1=x, input_var2=w) discriminator = build_discriminator(input_var=None) fake_image = layers.get_output(model) fake_image_det = layers.get_output(model, deterministic=True) prob_real = layers.get_output(discriminator, inputs=y) prob_fake = layers.get_output(discriminator, inputs=fake_image) params_model = layers.get_all_params(model, trainable=True) params_dis = layers.get_all_params(discriminator, trainable=True) loss_real = -T.mean(T.log(prob_real)) loss_fake = -T.mean(T.log(1 - prob_fake)) loss_dis = 0.005 * (loss_real + loss_fake) loss_gen = -T.mean(T.log(prob_fake)) recons_error = T.mean(objectives.squared_error(fake_image, z)) loss_model = 0.005 * loss_gen + 0.995 * recons_error updates_dis = lasagne.updates.adam(loss_dis, params_dis, learning_rate=learning_rate_dis, beta1=0.5) updates_model = lasagne.updates.adam(loss_model, params_model, learning_rate=learning_rate_model, beta1=0.5) # Creation of theano functions train_dis = theano.function([x, y, w], loss_dis, updates=updates_dis, allow_input_downcast=True) train_model = theano.function([x, z, w], loss_model, updates=updates_model, allow_input_downcast=True) predict_image = theano.function([x, w], fake_image_det, allow_input_downcast=True) ################### # Train the model # ################### print('... Training') epoch = 0 nb_train_dis = 25 nb_train_gen = 10 nb_batch = 10000 // batch_size nb_block = nb_batch // nb_train_dis loss_dis = [] loss_model = [] idx = [0, 1, 2, 4, 5] #start_time = timeit.default_timer() while (epoch < n_epochs): epoch = epoch + 1 for i in range(nb_train_batch): #print (i) # Shape = (10000, 3, 64, 64) & Shape = (10000, 3, 32, 32) contour, center = get_image(data_path, train_input_path, train_target_path, str(i)) # List of captions of different sequence length caption = get_caption(data_path, train_caption_path, str(i), str(nb_caption)) # List of size nb_train_dis list = [k % len(caption) for k in range(nb_train_dis)] for j in range(nb_block): #print (j) for index in range(nb_train_dis * j, nb_train_dis * (j + 1)): #print (index) train_caption = caption[list[index % nb_train_dis]] if train_caption.shape[0] >= batch_size: random_idx = random.sample(range(0, train_caption.shape[0]), batch_size) else: random_idx = random.sample(range(0, train_caption.shape[0]), train_caption.shape[0]) input = contour[train_caption[random_idx, -1] - i * 10000] target = center[train_caption[random_idx, -1] - i * 10000] train_caption = train_caption[random_idx, :-1] loss = train_dis(input, target, train_caption) loss_dis.append(loss) for index in range(nb_train_gen * j, nb_train_gen * (j + 1)): #print (index) rand_nb = random.randint(0, len(list) - 1) train_caption = caption[rand_nb] if train_caption.shape[0] >= batch_size: random_idx = random.sample(range(0, train_caption.shape[0]), batch_size) else: random_idx = random.sample(range(0, train_caption.shape[0]), train_caption.shape[0]) input = contour[train_caption[random_idx, -1] - i * 10000] target = center[train_caption[random_idx, -1] - i * 10000] train_caption = train_caption[random_idx, :-1] loss = train_model(input, target, train_caption) loss_model.append(loss) if epoch % 4 == 0: # save the model and a bunch of generated pictures print ('... saving model and generated images') np.savez('discriminator_epoch' + str(epoch) + '.npz', *layers.get_all_param_values(discriminator)) np.savez('context_encoder_epoch' + str(epoch) + '.npz', *layers.get_all_param_values(model)) np.save('loss_dis', loss_dis) np.save('loss_gen', loss_model) contour, center = get_image(data_path, valid_input_path, valid_target_path, str(0)) caption = get_caption(data_path, valid_caption_path, str(0), str(nb_caption)) valid_caption = caption[4][idx] input = contour[valid_caption[:, -1]] generated_centers = predict_image(input, valid_caption[:, :-1]) generated_images = assemble(input, generated_centers) for k in range(len(idx)): plt.subplot(1, len(idx), (k + 1)) plt.axis('off') plt.imshow(generated_images[k, :, :, :].transpose(1, 2, 0)) plt.savefig('generated_images_epoch' + str(epoch) + '.png', bbox_inches='tight') #end_time = timeit.default_timer() # Plot the learning curve ax1 = host_subplot(111, axes_class=AA.Axes) plt.subplots_adjust(right=0.75) ax2 = ax1.twiny() x1 = range(1, len(loss_dis) + 1) ax1.set_xlim([x1[0], x1[-1]]) x2 = range(1, len(loss_model) + 1) ax2.set_xlim([x2[0], x2[-1]]) ax1.set_xlabel('training iteration (Discriminator)', color='g') ax2.set_xlabel('training iteration (Context encoder)', color='b') ax1.set_ylabel('Loss') ax1.plot(x1, rolling_average(loss_dis), 'g', label='Discriminator loss') ax2.plot(x2, rolling_average(loss_model), 'b', label='Context encoder Loss') ax1.grid(True) ax1.legend() plt.savefig('Learning_curve') print('Optimization complete.')
def create_network(available_actions_count): # Crea las variables de entrada s1 = tensor.tensor4("State") a = tensor.vector("Action", dtype="int32") q2 = tensor.vector("Q2") r = tensor.vector("Reward") isterminal = tensor.vector("IsTerminal", dtype="int8") # Crea la capa de entradad de la red dqn = InputLayer(shape=[None, 1, resolution[0], resolution[1]], input_var=s1) # Agrega 2 capas convolusionales con activacion ReLu activation dqn = Conv2DLayer(dqn, num_filters=8, filter_size=[6, 6], nonlinearity=rectify, W=HeUniform("relu"), b=Constant(.1), stride=3) dqn = Conv2DLayer(dqn, num_filters=8, filter_size=[3, 3], nonlinearity=rectify, W=HeUniform("relu"), b=Constant(.1), stride=2) # Agrega 1 capa competamente conectada. dqn = DenseLayer(dqn, num_units=128, nonlinearity=rectify, W=HeUniform("relu"), b=Constant(.1)) # Agrega la capa de salida (completamente conectada). dqn = DenseLayer(dqn, num_units=available_actions_count, nonlinearity=None) # Definimos la funcion de perdida q = get_output(dqn) # target_Q(s,a) = r + gamma * max Q(s2,_) if isterminal else r target_q = tensor.set_subtensor( q[tensor.arange(q.shape[0]), a], r + discount_factor * (1 - isterminal) * q2) loss = squared_error(q, target_q).mean() # Actualizamos los parametros de acuerdo a la gracdiente calculada con RMSProp. params = get_all_params(dqn, trainable=True) updates = rmsprop(loss, params, learning_rate) # Compilamos las funciones de theano print("Compiling the network ...") function_learn = theano.function([s1, q2, a, r, isterminal], loss, updates=updates, name="learn_fn") function_get_q_values = theano.function([s1], q, name="eval_fn") function_get_best_action = theano.function([s1], tensor.argmax(q), name="test_fn") print("Network compiled.") def simple_get_best_action(state): return function_get_best_action( state.reshape([1, 1, resolution[0], resolution[1]])) # Retorna los objetos de Theano para la red y las funciones. return dqn, function_learn, function_get_q_values, simple_get_best_action
def net_dict_rnn(seq_length): if seq_length <= 300: learning_rate = 1e-2 learning_rate_changes_by_iteration = {1000: 1e-3, 10000: 1e-4} elif seq_length < 1500: learning_rate = 1e-4 learning_rate_changes_by_iteration = {5000: 1e-5, 9000: 1e-6} else: learning_rate = 1e-5 learning_rate_changes_by_iteration = {5000: 1e-6, 9000: 1e-7} return dict( epochs=10000, save_plot_interval=1000, loss_function=lambda x, t: squared_error(x, t).mean(), updates_func=nesterov_momentum, learning_rate=learning_rate, learning_rate_changes_by_iteration=learning_rate_changes_by_iteration, do_save_activations=True, auto_reshape=True, plotter=Plotter(n_seq_to_plot=32, n_training_examples_to_plot=16), layers_config=[ { 'type': DimshuffleLayer, 'pattern': (0, 2, 1) # (batch, features, time) }, { 'type': Conv1DLayer, # convolve over the time axis 'num_filters': 16, 'filter_size': 4, 'stride': 1, 'nonlinearity': None, 'border_mode': 'same' }, { 'type': DimshuffleLayer, 'pattern': (0, 2, 1), # back to (batch, time, features) 'label': 'dimshuffle3' }, { 'type': BLSTMLayer, 'num_units': 128, 'merge_mode': 'concatenate', 'grad_clipping': 10.0, 'gradient_steps': 500 }, { 'type': BLSTMLayer, 'num_units': 256, 'merge_mode': 'concatenate', 'grad_clipping': 10.0, 'gradient_steps': 500 }, { 'type': DenseLayer, 'num_units': 128, 'nonlinearity': tanh }, { 'type': DenseLayer, 'num_units': 1, 'nonlinearity': None } ])
def get_elementwise_objective(policy, state_values, actions, rewards, is_alive="always", n_steps=None, gamma_or_gammas=0.99, crop_last=True, force_values_after_end=True, state_values_after_end="zeros", consider_value_reference_constant=True, consider_predicted_value_constant=True, scan_dependencies=[], scan_strict=True, min_log_proba=-1e50): """ returns cross-entropy-like objective function for Actor-Critic method L_policy = - log(policy) * (V_reference - const(V)) L_V = (V - Vreference)^2 parameters: policy [batch,tick,action_id] - predicted action probabilities state_values [batch,tick] - predicted state values actions [batch,tick] - committed actions rewards [batch,tick] - immediate rewards for taking actions at given time ticks is_alive [batch,tick] - whether given session is still active at given tick. Defaults to always active. Default value of is_alive implies a simplified computation algorithm for Qlearning loss n_steps: if an integer is given, the references are computed in loops of 3 states. Defaults to None: propagating rewards throughout the whole session. If n_steps equals 1, this works exactly as Q-learning (though less efficient one) If you provide symbolic integer here AND strict = True, make sure you added the variable to dependencies. gamma_or_gammas - a single value or array[batch,tick](can broadcast dimensions) of delayed reward discounts crop_last - if True, zeros-out loss at final tick, if False - computes loss VS Qvalues_after_end force_values_after_end - if true, sets reference policy at session end to rewards[end] + qvalues_after_end state_values_after_end[batch,1,n_actions] - "next state values" for last tick used for reference only. Defaults at T.zeros_like(state_values[:,0,None,:]) If you wish to simply ignore the last tick, use defaults and crop output's last tick ( qref[:,:-1] ) scan_dependencies: everything you need to evaluate first 3 parameters (only if strict==True) scan_strict: whether to evaluate values using strict theano scan or non-strict one Returns: elementwise sum of policy_loss + state_value_loss """ # get reference values via Q-learning algorithm reference_state_values = get_n_step_value_reference( state_values, rewards, is_alive, n_steps=n_steps, optimal_state_values_after_end=state_values_after_end, gamma_or_gammas=gamma_or_gammas, dependencies=scan_dependencies, strict=scan_strict) # if we have to set after_end values if is_alive != "always" and force_values_after_end: # if asked to force reference_Q[end_tick+1,a] = 0, do it # note: if agent is always alive, this is meaningless # set future rewards at session end to rewards+qvalues_after_end end_ids = get_end_indicator(is_alive, force_end_at_t_max=True).nonzero() if state_values_after_end == "zeros": # "set reference state values at end action ids to just the immediate rewards" reference_state_values = T.set_subtensor( reference_state_values[end_ids], rewards[end_ids]) else: # "set reference state values at end action ids to the immediate rewards + qvalues after end" new_state_values = rewards[ end_ids] + gamma_or_gammas * state_values_after_end[end_ids[0], 0] reference_state_values = T.set_subtensor( reference_state_values[end_ids], new_state_values) # now compute the loss if is_alive == "always": is_alive = T.ones_like(actions, dtype=theano.config.floatX) # actor loss action_probas = get_action_Qvalues(policy, actions) reference_state_values = consider_constant(reference_state_values) if crop_last: reference_state_values = T.set_subtensor( reference_state_values[:, -1], consider_constant(state_values[:, -1])) log_probas = T.maximum(T.log(action_probas), min_log_proba) policy_loss_elwise = -log_probas * (reference_state_values - consider_constant(state_values)) # critic loss V_err_elwise = squared_error(reference_state_values, state_values) return (policy_loss_elwise + V_err_elwise) * is_alive
INPUT_STATS = { 'mean': np.array([297.87216187], dtype=np.float32), 'std': np.array([374.43884277], dtype=np.float32) } def only_train_on_real_data(net, iteration): net.logger.info( "Iteration {}: Now only training on real data.".format(iteration)) net.source.sources[0]['train_probability'] = 0.0 net.source.sources[1]['train_probability'] = 1.0 net_dict = dict(save_plot_interval=SAVE_PLOT_INTERVAL, loss_function=lambda x, t: squared_error(x, t).mean(), updates_func=nesterov_momentum, learning_rate=1e-1, learning_rate_changes_by_iteration={ 1000: 1e-2, 10000: 1e-3 }, epoch_callbacks={350000: only_train_on_real_data}, do_save_activations=True, auto_reshape=True, layers_config=[{ 'type': DenseLayer, 'num_units': 10, 'nonlinearity': tanh }, { 'type': BLSTMLayer,
def main(): def signal_handler(signal, frame): global terminate terminate = True print('terminating...'.format(terminate)) signal.signal(signal.SIGINT, signal_handler) configure_theano() options = parse_options() X, X_val = generate_data() # X = np.reshape(X, (-1, 1, 30, 40))[:-5] print('X type and shape:', X.dtype, X.shape) print('X.min():', X.min()) print('X.max():', X.max()) # X_val = np.reshape(X_val, (-1, 1, 30, 40))[:-1] print('X_val type and shape:', X_val.dtype, X_val.shape) print('X_val.min():', X_val.min()) print('X_val.max():', X_val.max()) # we need our target to be 1 dimensional X_out = X.reshape((X.shape[0], -1)) X_val_out = X_val.reshape((X_val.shape[0], -1)) print('X_out:', X_out.dtype, X_out.shape) print('X_val_out', X_val_out.dtype, X_val_out.shape) # X_noisy = apply_gaussian_noise(X_out) # visualize_reconstruction(X_noisy[0:25], X_out[0:25], shape=(28, 28)) # X = np.reshape(X_noisy, (-1, 1, 28, 28)) print('constructing and compiling model...') # input_var = T.tensor4('input', dtype='float32') input_var = T.tensor3('input', dtype='float32') target_var = T.matrix('output', dtype='float32') lr = theano.shared(np.array(0.8, dtype=theano.config.floatX), name='learning_rate') lr_decay = np.array(0.9, dtype=theano.config.floatX) # try building a reshaping layer # network = create_model(input_var, (None, 1, 30, 40), options) l_input = InputLayer((None, None, 1200), input_var, name='input') l_input = ReshapeLayer(l_input, (-1, 1, 30, 40), name='reshape_input') # l_input = InputLayer((None, 1, 30, 40), input_var, name='input') if options['MODEL'] == 'normal': network, encoder = avletters_convae.create_model(l_input, options) if options['MODEL'] == 'batchnorm': network, encoder = avletters_convae_bn.create_model(l_input, options) if options['MODEL'] == 'dropout': network, encoder = avletters_convae_drop.create_model(l_input, options) if options['MODEL'] == 'bn+dropout': network, encoder = avletters_convae_bndrop.create_model( l_input, options) print('AE Network architecture: {}'.format(options['MODEL'])) print_network(network) recon = las.layers.get_output(network, deterministic=False) all_params = las.layers.get_all_params(network, trainable=True) cost = T.mean(squared_error(recon, target_var)) updates = adadelta(cost, all_params, lr) # updates = las.updates.apply_nesterov_momentum(updates, all_params, momentum=0.90) use_max_constraint = False print('apply max norm constraint: {}'.format(use_max_constraint)) if use_max_constraint: MAX_NORM = 4 for param in las.layers.get_all_params(network, regularizable=True): if param.ndim > 1: # only apply to dimensions larger than 1, exclude biases # updates[param] = norm_constraint(param, MAX_NORM * las.utils.compute_norms(param.get_value()).mean()) updates[param] = norm_constraint(param, MAX_NORM) train = theano.function([input_var, target_var], recon, updates=updates, allow_input_downcast=True) train_cost_fn = theano.function([input_var, target_var], cost, allow_input_downcast=True) eval_recon = las.layers.get_output(network, deterministic=True) eval_cost = T.mean(las.objectives.squared_error(eval_recon, target_var)) eval_cost_fn = theano.function([input_var, target_var], eval_cost, allow_input_downcast=True) recon_fn = theano.function([input_var], eval_recon, allow_input_downcast=True) if terminate: exit() NUM_EPOCHS = options['NUM_EPOCHS'] EPOCH_SIZE = options['EPOCH_SIZE'] NO_STRIDES = options['NO_STRIDES'] VAL_NO_STRIDES = options['VAL_NO_STRIDES'] print('begin training for {} epochs...'.format(NUM_EPOCHS)) datagen = batch_iterator(X, X_out, 128) costs = [] val_costs = [] for epoch in range(NUM_EPOCHS): time_start = time.time() for i in range(EPOCH_SIZE): batch_X, batch_y = next(datagen) print_str = 'Epoch {} batch {}/{}: {} examples at learning rate = {:.4f}'.format( epoch + 1, i + 1, EPOCH_SIZE, len(batch_X), lr.get_value()) print(print_str, end='') sys.stdout.flush() batch_X = batch_X.reshape((-1, 1, 1200)) train(batch_X, batch_y) print('\r', end='') if terminate: break if terminate: break cost = batch_compute_cost(X, X_out, NO_STRIDES, train_cost_fn) val_cost = batch_compute_cost(X_val, X_val_out, VAL_NO_STRIDES, eval_cost_fn) costs.append(cost) val_costs.append(val_cost) print("Epoch {} train cost = {}, validation cost = {} ({:.1f}sec) ". format(epoch + 1, cost, val_cost, time.time() - time_start)) if epoch > 10: lr.set_value(lr.get_value() * lr_decay) X_val_recon = recon_fn(X_val) visualize_reconstruction(X_val_out[450:550], X_val_recon[450:550], shape=(30, 40), savefilename='avletters') plot_validation_cost(costs, val_costs, None, savefilename='valid_cost') conv2d1 = las.layers.get_all_layers(network)[2] visualize.plot_conv_weights(conv2d1, (15, 14)).savefig('conv2d1.png') print('saving encoder...') save_model(encoder, 'models/conv_encoder.dat') save_model(network, 'models/conv_ae.dat')
def net_dict_ae_rnn(seq_length): NUM_FILTERS = 8 return dict( epochs=None, save_plot_interval=5000, loss_function=lambda x, t: squared_error(x, t).mean(), updates_func=nesterov_momentum, learning_rate=1e-2, learning_rate_changes_by_iteration={110000: 1e-3}, do_save_activations=True, auto_reshape=False, plotter=Plotter(n_seq_to_plot=32, n_training_examples_to_plot=16), layers_config=[ { 'type': DimshuffleLayer, 'pattern': (0, 2, 1) # (batch, features, time) }, { 'label': 'conv0', 'type': Conv1DLayer, # convolve over the time axis 'num_filters': NUM_FILTERS, 'filter_size': 4, 'stride': 1, 'nonlinearity': None, 'pad': 'valid' }, { 'type': DimshuffleLayer, 'pattern': (0, 2, 1) # back to (batch, time, features) }, { 'type': DenseLayer, 'num_units': (seq_length - 3) * NUM_FILTERS, 'nonlinearity': rectify }, { 'type': DenseLayer, 'num_units': 128, 'nonlinearity': rectify }, { 'type': DenseLayer, 'num_units': (seq_length - 3) * NUM_FILTERS, 'nonlinearity': rectify }, { 'type': ReshapeLayer, 'shape': (-1, (seq_length - 3), NUM_FILTERS) }, { 'type': DimshuffleLayer, 'pattern': (0, 2, 1) # (batch, features, time) }, { # DeConv 'type': Conv1DLayer, 'num_filters': 1, 'filter_size': 4, 'stride': 1, 'nonlinearity': None, 'pad': 'full' }, { 'type': DimshuffleLayer, 'pattern': (0, 2, 1), # back to (batch, time, features) 'label': 'AE_output' } ], layer_changes={ 100001: { 'new_layers': [{ 'type': ConcatLayer, 'axis': 2, 'incomings': ['input', 'AE_output'] }, { 'type': ReshapeLayer, 'shape': (64 * seq_length, 2) }, { 'type': DenseLayer, 'num_units': 16, 'nonlinearity': tanh }, { 'type': ReshapeLayer, 'shape': (64, seq_length, 16) }, { 'type': BLSTMLayer, 'num_units': 128, 'merge_mode': 'concatenate', 'grad_clipping': 10.0, 'gradient_steps': 500 }, { 'type': BLSTMLayer, 'num_units': 256, 'merge_mode': 'concatenate', 'grad_clipping': 10.0, 'gradient_steps': 500 }, { 'type': ReshapeLayer, 'shape': (64 * seq_length, 512) }, { 'type': DenseLayer, 'num_units': 128, 'nonlinearity': tanh }, { 'type': DenseLayer, 'num_units': 1, 'nonlinearity': None }] } })
def get_elementwise_objective( Qvalues, actions, rewards, is_alive="always", Qvalues_target=None, n_steps=None, gamma_or_gammas=0.95, crop_last=True, force_qvalues_after_end=True, optimal_qvalues_after_end="zeros", consider_reference_constant=True, aggregation_function=lambda qv: T.max(qv, axis=-1), return_reference=False, scan_dependencies=[], scan_strict=True): """ Returns squared error between predicted and reference Q-values according to n-step Q-learning algorithm Qreference(state,action) = reward(state,action) + gamma*reward(state_1,action_1) + ... + gamma^n * max[action_n]( Q(state_n,action_n) loss = mean over (Qvalues - Qreference)**2 :param Qvalues: [batch,tick,action_id] - predicted qvalues :param actions: [batch,tick] - commited actions :param rewards: [batch,tick] - immediate rewards for taking actions at given time ticks :param is_alive: [batch,tick] - whether given session is still active at given tick. Defaults to always active. Default value of is_alive implies a simplified computation algorithm for Qlearning loss :param Qvalues_target: Older snapshot Qvalues (e.g. from a target network). If None, uses current Qvalues :param n_steps: if an integer is given, the references are computed in loops of 3 states. Defaults to None: propagating rewards throughout the whole session. If n_steps equals 1, this works exactly as Q-learning (though less efficient one) If you provide symbolic integer here AND strict = True, make sure you added the variable to dependencies. :param gamma_or_gammas: delayed reward discounts: a single value or array[batch,tick](can broadcast dimensions). :param crop_last: if True, zeros-out loss at final tick, if False - computes loss VS Qvalues_after_end :param force_qvalues_after_end: if true, sets reference Qvalues at session end to rewards[end] + qvalues_after_end :param optimal_qvalues_after_end: [batch,1] - symbolic expression for "best next state q-values" for last tick used when computing reference Q-values only. Defaults at T.zeros_like(Q-values[:,0,None,0]) If you wish to simply ignore the last tick, use defaults and crop output's last tick ( qref[:,:-1] ) :param consider_reference_constant: whether or not zero-out gradient flow through reference_Qvalues (True is highly recommended) :param aggregation_function: a function that takes all Qvalues for "next state Q-values" term and returns what is the "best next Q-value". Normally you should not touch it. Defaults to max over actions. Normally you shouldn't touch this Takes input of [batch,n_actions] Q-values :param return_reference: if True, returns reference Qvalues. If False, returns squared_error(action_Qvalues, reference_Qvalues) :param scan_dependencies: everything you need to evaluate first 3 parameters (only if strict==True) :param scan_strict: whether to evaluate Qvalues using strict theano scan or non-strict one :return: mean squared error over Q-values (using formula above for loss) """ if Qvalues_target is None: Qvalues_target = Qvalues # get Qvalues of best actions (used every K steps for reference Q-value computation optimal_Qvalues_target = aggregation_function(Qvalues_target) # get predicted Q-values for committed actions by both current and target networks # (to compare with reference Q-values and use for recurrent reference computation) action_Qvalues = get_action_Qvalues(Qvalues, actions) action_Qvalues_target = get_action_Qvalues(Qvalues_target, actions) # get reference Q-values via Q-learning algorithm reference_Qvalues = get_n_step_value_reference( state_values=action_Qvalues_target, rewards=rewards, is_alive=is_alive, n_steps=n_steps, gamma_or_gammas=gamma_or_gammas, optimal_state_values=optimal_Qvalues_target, optimal_state_values_after_end=optimal_qvalues_after_end, dependencies=scan_dependencies, strict=scan_strict) if consider_reference_constant: # do not pass gradient through reference Qvalues (since they DO depend on Qvalues by default) reference_Qvalues = consider_constant(reference_Qvalues) if force_qvalues_after_end and is_alive != "always": # if asked to force reference_Q[end_tick+1,a] = 0, do it # note: if agent is always alive, this is meaningless # set future rewards at session end to rewards+qvalues_after_end end_ids = get_end_indicator(is_alive, force_end_at_t_max=True).nonzero() if optimal_qvalues_after_end == "zeros": # "set reference Q-values at end action ids to just the immediate rewards" reference_Qvalues = T.set_subtensor(reference_Qvalues[end_ids], rewards[end_ids]) else: # "set reference Q-values at end action ids to the immediate rewards + qvalues after end" new_reference_values = rewards[ end_ids] + gamma_or_gammas * optimal_qvalues_after_end reference_Qvalues = T.set_subtensor( reference_Qvalues[end_ids], new_reference_values[end_ids[0], 0]) #If asked, make sure loss equals 0 for the last time-tick. if crop_last: reference_Qvalues = T.set_subtensor(reference_Qvalues[:, -1], action_Qvalues[:, -1]) if return_reference: return reference_Qvalues else: # tensor of elementwise squared errors elwise_squared_error = squared_error(reference_Qvalues, action_Qvalues) return elwise_squared_error * is_alive
def __init__(self, load_weights=True, is_training=True, model_name='dronet_weights.npz'): self.model_name = os.path.join( os.path.dirname(os.path.realpath(__file__)), model_name) def network(image): input_image = InputLayer(input_var=image, shape=(None, 1, 120, 160)) conv1 = Conv2DLayer(input_image, num_filters=32, filter_size=(5, 5), stride=(2, 2), nonlinearity=rectify, pad='same') pool1 = MaxPool2DLayer(conv1, pool_size=(3, 3), stride=(2, 2), pad=1) conv2 = batch_norm( Conv2DLayer(pool1, num_filters=32, filter_size=(3, 3), stride=(2, 2), nonlinearity=rectify, pad='same')) conv2 = batch_norm( Conv2DLayer(conv2, num_filters=32, filter_size=(3, 3), stride=(1, 1), nonlinearity=rectify, pad='same')) downsample1 = Conv2DLayer(pool1, num_filters=32, filter_size=(1, 1), stride=(2, 2), nonlinearity=rectify, pad='same') input3 = ElemwiseSumLayer([downsample1, conv2]) conv3 = batch_norm( Conv2DLayer(input3, num_filters=64, filter_size=(3, 3), stride=(2, 2), nonlinearity=rectify, pad='same')) conv3 = batch_norm( Conv2DLayer(conv3, num_filters=64, filter_size=(3, 3), stride=(1, 1), nonlinearity=rectify, pad='same')) downsample2 = Conv2DLayer(input3, num_filters=64, filter_size=(1, 1), stride=(2, 2), nonlinearity=rectify, pad='same') input4 = ElemwiseSumLayer([downsample2, conv3]) conv4 = batch_norm( Conv2DLayer(input4, num_filters=128, filter_size=(3, 3), stride=(2, 2), nonlinearity=rectify, pad='same')) conv4 = batch_norm( Conv2DLayer(conv4, num_filters=128, filter_size=(3, 3), stride=(1, 1), nonlinearity=rectify, pad='same')) downsample3 = Conv2DLayer(input4, num_filters=128, filter_size=(1, 1), stride=(2, 2), nonlinearity=rectify, pad='same') input5 = ElemwiseSumLayer([downsample3, conv4]) flatten = DropoutLayer(FlattenLayer(input5), 0.5) prob_out = DenseLayer(flatten, num_units=1, nonlinearity=sigmoid) turn_angle = DenseLayer(flatten, num_units=1, nonlinearity=tanh) return prob_out, turn_angle # declare the variables used in the network self.X = T.ftensor4() self.Y = T.fmatrix() self.Z = T.fmatrix() # Lasagne object for the network self.CollisionProbability, self.TurnAngle = network(self.X) if is_training: # collision probability for training # and testing. Output is a theano object self.collision_prob = get_output(self.CollisionProbability) self.collision_prob_test = get_output(self.CollisionProbability, deterministic=True) # turn angle for training anf testing. # Output is a theano object. self.turn_angle = get_output(self.TurnAngle) self.turn_angle_test = get_output(self.TurnAngle, deterministic=True) # Loss for the network. self.collision_loss = binary_crossentropy(self.collision_prob, self.Y).mean() self.turn_loss = squared_error(self.turn_angle, self.Z).mean() # Loss to call for testing and validation. self.test_collision_loss = binary_crossentropy( self.collision_prob_test, self.Y).mean() self.test_turn_loss = squared_error(self.turn_angle_test, self.Z).mean() # network parameters for training. self.collision_params = get_all_params(self.CollisionProbability, trainable=True) self.turn_params = get_all_params(self.TurnAngle, trainable=True) # network updates self.collision_updates = adam(self.collision_loss, self.collision_params, learning_rate=0.001) self.turn_updates = adam(self.turn_loss, self.turn_params, learning_rate=0.00005) # get test loss self.test_collision = theano.function( inputs=[self.X, self.Y], outputs=self.test_collision_loss, allow_input_downcast=True) self.test_turn = theano.function(inputs=[self.X, self.Z], outputs=self.test_turn_loss, allow_input_downcast=True) # training functions self.train_collision = theano.function( inputs=[self.X, self.Y], outputs=self.collision_loss, updates=self.collision_updates, allow_input_downcast=True) self.train_turn = theano.function(inputs=[self.X, self.Z], outputs=self.turn_loss, updates=self.turn_updates, allow_input_downcast=True) else: # collision probability for # testing. Output is a theano object self.collision_prob_test = get_output(self.CollisionProbability, deterministic=True) # turn angle for testing. # Output is a theano object. self.turn_angle_test = get_output(self.TurnAngle, deterministic=True) # run the network to calculate collision probability # and turn angle given an input. self.dronet = theano.function( inputs=[self.X], outputs=[self.turn_angle_test, self.collision_prob_test], allow_input_downcast=True) def load(): with np.load(self.model_name) as f: param_values = [f['arr_%d' % i] for i in range(len(f.files))] set_all_param_values([self.CollisionProbability, self.TurnAngle], param_values) if load_weights: load()
def create_network(available_actions_count): # Create the input variables s1 = tensor.tensor4("State") a = tensor.vector("Action", dtype="int32") q2 = tensor.vector("Q2") r = tensor.vector("Reward") isterminal = tensor.vector("IsTerminal", dtype="int8") # Create the input layer of the network. dqn = InputLayer(shape=[None, 1, resolution[0], resolution[1]], input_var=s1) # Add 2 convolutional layers with ReLu activation dqn = Conv2DLayer(dqn, num_filters=8, filter_size=[6, 6], nonlinearity=rectify, W=HeUniform("relu"), b=Constant(.1), stride=3) dqn = Conv2DLayer(dqn, num_filters=8, filter_size=[3, 3], nonlinearity=rectify, W=HeUniform("relu"), b=Constant(.1), stride=2) # Add a single fully-connected layer. dqn = DenseLayer(dqn, num_units=128, nonlinearity=rectify, W=HeUniform("relu"), b=Constant(.1)) # Add the output layer (also fully-connected). # (no nonlinearity as it is for approximating an arbitrary real function) dqn = DenseLayer(dqn, num_units=available_actions_count, nonlinearity=None) # Define the loss function q = get_output(dqn) # target differs from q only for the selected action. The following means: # target_Q(s,a) = r + gamma * max Q(s2,_) if isterminal else r target_q = tensor.set_subtensor( q[tensor.arange(q.shape[0]), a], r + discount_factor * (1 - isterminal) * q2) loss = squared_error(q, target_q).mean() # Update the parameters according to the computed gradient using RMSProp. params = get_all_params(dqn, trainable=True) updates = rmsprop(loss, params, learning_rate) # Compile the theano functions print("Compiling the network ...") function_learn = theano.function([s1, q2, a, r, isterminal], loss, updates=updates, name="learn_fn") function_get_q_values = theano.function([s1], q, name="eval_fn") function_get_best_action = theano.function([s1], tensor.argmax(q), name="test_fn") print("Network compiled.") def simple_get_best_action(state): return function_get_best_action( state.reshape([1, 1, resolution[0], resolution[1]])) # Returns Theano objects for the net and functions. return dqn, function_learn, function_get_q_values, simple_get_best_action
X_action = T.bvector() X_reward = T.fvector() X_done = T.bvector() X_action_hot = to_one_hot(X_action, n_action) q_ = q_network(X_state); q = get_output(q_) q_target_ = q_network(X_next_state); q_target = get_output(q_target_) q_max = T.max(q_target, axis=1) action = T.argmax(q, axis=1) mu = theano.function(inputs = [X_state], outputs = action, allow_input_downcast = True) loss = squared_error(X_reward + gamma * q_max * (1.0 - X_done), T.batched_dot(q, X_action_hot)) loss = loss.mean() params = get_all_params(q_) grads = T.grad(loss, params) normed_grads = total_norm_constraint(grads, 1.0) updates = adam(normed_grads, params, learning_rate = learning_rate) update_network = theano.function(inputs = [X_state, X_action,
def get_elementwise_objective(Qvalues, actions, rewards, is_alive="always", gamma_or_gammas=0.95, force_qvalues_after_end=True, qvalues_after_end="zeros", consider_reference_constant=True, ): """ Returns squared error between predicted and reference Qvalues according to Q-learning algorithm Qreference(state,action) = reward(state,action) + gamma* Q(next_state,next_action) loss = mean over (Qvalues - Qreference)**2 parameters: Qvalues [batch,tick,action_id] - predicted qvalues actions [batch,tick] - commited actions rewards [batch,tick] - immediate rewards for taking actions at given time ticks is_alive [batch,tick] - whether given session is still active at given tick. Defaults to always active. Default value of is_alive implies a simplified computation algorithm for Qlearning loss gamma_or_gammas - a single value or array[batch,tick](can broadcast dimensions) of delayed reward discounts force_qvalues_after_end - if true, sets reference Qvalues at session end to rewards[end] + qvalues_after_end qvalues_after_end [batch,1,n_actions] - symbolic expression for "next state q-values" for last tick used for reference only. Defaults at T.zeros_like(Qvalues[:,0,None,:]) If you wish to simply ignore the last tick, use defaults and crop output's last tick ( qref[:,:-1] ) consider_reference_constant - whether or not zero-out gradient flow through reference_Qvalues (True is highly recommended) Returns: tensor [batch, tick] of squared errors over Qvalues (using formula above for loss) """ # get reference Qvalues via Q-learning algorithm reference_Qvalues = get_reference_Qvalues(Qvalues, actions, rewards, gamma_or_gammas=gamma_or_gammas, qvalues_after_end=qvalues_after_end, ) if consider_reference_constant: # do not pass gradient through reference Q-values (since they DO depend on Q-values by default) reference_Qvalues = consider_constant(reference_Qvalues) # get predicted qvalues for committed actions (to compare with reference Q-values) action_Qvalues = get_action_Qvalues(Qvalues, actions) # if agent is always alive, return the simplified loss if is_alive == "always": # tensor of element-wise squared errors elwise_squared_error = squared_error(reference_Qvalues, action_Qvalues) else: # we are given an is_alive matrix : uint8[batch,tick] # if asked to force reference_Q[end_tick+1,a] = 0, do it # note: if agent is always alive, this is meaningless if force_qvalues_after_end: # set future rewards at session end to rewards + qvalues_after_end end_ids = get_end_indicator(is_alive, force_end_at_t_max=True).nonzero() if qvalues_after_end == "zeros": # "set reference Q-values at end action ids to just the immediate rewards" reference_Qvalues = T.set_subtensor(reference_Qvalues[end_ids], rewards[end_ids]) else: last_optimal_rewards = T.zeros_like(rewards[:, 0]) # "set reference Q-values at end action ids to the immediate rewards + qvalues after end" reference_Qvalues = T.set_subtensor(reference_Qvalues[end_ids], rewards[end_ids] + gamma_or_gammas * last_optimal_rewards[ end_ids[0], 0] ) # tensor of element-wise squared errors elwise_squared_error = squared_error(reference_Qvalues, action_Qvalues) # zero-out loss after session ended elwise_squared_error = elwise_squared_error * is_alive return elwise_squared_error
l_input = InputLayer((None, chan, width, height), input_var=input_var) l_conv1 = Conv2DLayer(l_input, num_filters=32, filter_size=(3, 3), nonlinearity=rectify, W=GlorotUniform()) l_pool1 = MaxPool2DLayer(l_conv1, pool_size=(2, 2)) l_conv2 = Conv2DLayer(l_pool1, num_filters=32, filter_size=(1, 1), nonlinearity=rectify, W=GlorotUniform()) l_depool1 = Unpool2DLayer(l_pool1, (2, 2)) l_deconv1 = TransposeConv2DLayer(l_depool1, num_filters=chan, filter_size=(3, 3), W=GlorotUniform(), nonlinearity=linear) l_out = l_deconv1 prediction = get_output(l_out) train_loss = squared_error(prediction, target_var) train_loss = train_loss.mean() valid_prediction = get_output(l_out, deterministic=True) valid_loss = squared_error(valid_prediction, target_var) valid_loss = valid_loss.mean() params = get_all_params(l_out, trainable=True) updates = adam(train_loss, params, learning_rate=1E-4) train_function = theano.function([input_var, target_var], train_loss, updates=updates) valid_function = theano.function([input_var, target_var], valid_loss) n_epochs = 1000 for e in range(n_epochs):
def __init__(self, source, layers_config, updates_func=nesterov_momentum, updates_kwargs=None, learning_rate=0.1, learning_rate_changes_by_iteration=None, experiment_name="", validation_interval=10, save_plot_interval=100, loss_function=lambda x, t: squared_error(x, t).mean(), layer_changes=None, seed=42, epoch_callbacks=None, do_save_activations=True, plotter=Plotter(), auto_reshape=True, logger=None): """ Parameters ---------- layers_config : list of dicts. Keys are: 'type' : BLSTMLayer or a subclass of lasagne.layers.Layer 'num_units' : int """ if logger is None: self.logger = logging.getLogger(experiment_name) else: self.logger = logger self.logger.info("Initialising network...") if seed is not None: np.random.seed(seed) self.source = source self.updates_func = updates_func self._learning_rate = theano.shared( sfloatX(learning_rate), name='learning_rate') self.logger.info( "Learning rate initialised to {:.1E}".format(learning_rate)) self.learning_rate_changes_by_iteration = none_to_dict( learning_rate_changes_by_iteration) self.updates_kwargs = none_to_dict(updates_kwargs) self.experiment_name = experiment_name self.validation_interval = validation_interval self.save_plot_interval = save_plot_interval self.loss_function = loss_function self.layer_changes = none_to_dict(layer_changes) self.epoch_callbacks = none_to_dict(epoch_callbacks) self.do_save_activations = do_save_activations self.plotter = plotter self.plotter.net = self self.auto_reshape = auto_reshape self.set_csv_filenames() self.generate_validation_data_and_set_shapes() self.validation_costs = [] self.training_costs = [] self.training_costs_metadata = [] self.layers = [] self.layer_labels = {} # Shape is (number of examples per batch, # maximum number of time steps per example, # number of features per example) input_layer = InputLayer(shape=self.input_shape) self.layer_labels['input'] = input_layer self.layers.append(input_layer) self.add_layers(layers_config) self.logger.info( "Done initialising network for " + self.experiment_name)
nonlinearity=rectify, W=lasagne.init.Normal(0.01)) l_deconv1_2 = Deconv2DLayer(l_deconv1_1, num_filters=64, filter_size=(3, 3), stride=1, crop='same', nonlinearity=rectify, W=lasagne.init.Normal(0.01)) l_output = Conv2DLayer(l_deconv1_2, num_filters=21, filter_size=(1, 1), pad=0) ### #l_output = DenseLayer(l_hidden1_dropout, num_units=10, nonlinearity=softmax) prediction = get_output(l_output) # target_var = T.ftensor4('true_output') ### loss = squared_error(prediction, target_var) loss = loss.mean() # loss.mean() # loss_train = squared_error(lasagne.layers.get_output(l_output, deterministic=False), true_output).mean() # loss_eval = squared_error(lasagne.layers.get_output(l_output, deterministic=True), true_output).mean() all_params = lasagne.layers.get_all_params(l_output, trainable=True) updates = lasagne.updates.nesterov_momentum(loss, all_params, learning_rate=0.001, momentum=0.985) train_fn = theano.function([input_var, target_var], loss, updates=updates) # get_output = theano.function([l_in.input_var], lasagne.layers.get_output(l_output, deterministic=True)) BATCH_SIZE = 10
def __init__(self, dim, mode, l2, l1, batch_norm, dropout, batch_size, input_dim=76, **kwargs): print "==> not used params in network class:", kwargs.keys() self.dim = dim self.mode = mode self.l2 = l2 self.l1 = l1 self.batch_norm = batch_norm self.dropout = dropout self.batch_size = batch_size self.input_var = T.tensor3('X') self.input_lens = T.ivector('L') self.target_var = T.vector('y') print "==> Building neural network" network = layers.InputLayer((None, None, input_dim), input_var=self.input_var) network = layers.LSTMLayer( incoming=network, num_units=dim, only_return_final=False, grad_clipping=10, ingate=lasagne.layers.Gate(W_in=Orthogonal(), W_hid=Orthogonal(), W_cell=Normal(0.1)), forgetgate=lasagne.layers.Gate(W_in=Orthogonal(), W_hid=Orthogonal(), W_cell=Normal(0.1)), cell=lasagne.layers.Gate(W_cell=None, nonlinearity=lasagne.nonlinearities.tanh, W_in=Orthogonal(), W_hid=Orthogonal()), outgate=lasagne.layers.Gate(W_in=Orthogonal(), W_hid=Orthogonal(), W_cell=Normal(0.1))) lstm_output = layers.get_output(network) self.params = layers.get_all_params(network, trainable=True) self.reg_params = layers.get_all_params(network, regularizable=True) # for each example in minibatch take the last output last_outputs = [] for index in range(self.batch_size): last_outputs.append(lstm_output[index, self.input_lens[index] - 1, :]) last_outputs = T.stack(last_outputs) network = layers.InputLayer(shape=(self.batch_size, self.dim), input_var=last_outputs) network = layers.DenseLayer(incoming=network, num_units=1, nonlinearity=rectify) self.prediction = layers.get_output(network) self.params += layers.get_all_params(network, trainable=True) self.reg_params += layers.get_all_params(network, regularizable=True) self.loss_mse = squared_error(self.prediction, self.target_var).mean() if self.l2 > 0: self.loss_l2 = self.l2 * nn_utils.l2_reg(self.reg_params) else: self.loss_l2 = T.constant(0) if self.l1 > 0: self.loss_l1 = self.l1 * nn_utils.l1_reg(self.reg_params) else: self.loss_l1 = T.constant(0) self.loss_reg = self.loss_l1 + self.loss_l2 self.loss = self.loss_mse + self.loss_reg #updates = lasagne.updates.adadelta(self.loss, self.params, # learning_rate=0.001) #updates = lasagne.updates.momentum(self.loss, self.params, # learning_rate=0.00003) #updates = lasagne.updates.adam(self.loss, self.params) updates = lasagne.updates.adam( self.loss, self.params, beta1=0.5, learning_rate=0.0001) # from DCGAN paper #updates = lasagne.updates.nesterov_momentum(loss, params, momentum=0.9, # learning_rate=0.001, ## compiling theano functions if self.mode == 'train': print "==> compiling train_fn" self.train_fn = theano.function( inputs=[self.input_var, self.input_lens, self.target_var], outputs=[self.prediction, self.loss, self.loss_reg], updates=updates) print "==> compiling test_fn" self.test_fn = theano.function( inputs=[self.input_var, self.input_lens, self.target_var], outputs=[self.prediction, self.loss, self.loss_reg])
def net_dict_ae_rnn(seq_length): NUM_FILTERS = 8 return dict( epochs=None, save_plot_interval=5000, loss_function=lambda x, t: squared_error(x, t).mean(), updates_func=nesterov_momentum, learning_rate=1e-2, learning_rate_changes_by_iteration={ 110000: 1e-3 }, do_save_activations=True, auto_reshape=False, plotter=Plotter( n_seq_to_plot=32, n_training_examples_to_plot=16 ), layers_config=[ { 'type': DimshuffleLayer, 'pattern': (0, 2, 1) # (batch, features, time) }, { 'label': 'conv0', 'type': Conv1DLayer, # convolve over the time axis 'num_filters': NUM_FILTERS, 'filter_size': 4, 'stride': 1, 'nonlinearity': None, 'pad': 'valid' }, { 'type': DimshuffleLayer, 'pattern': (0, 2, 1) # back to (batch, time, features) }, { 'type': DenseLayer, 'num_units': (seq_length - 3) * NUM_FILTERS, 'nonlinearity': rectify }, { 'type': DenseLayer, 'num_units': 128, 'nonlinearity': rectify }, { 'type': DenseLayer, 'num_units': (seq_length - 3) * NUM_FILTERS, 'nonlinearity': rectify }, { 'type': ReshapeLayer, 'shape': (-1, (seq_length - 3), NUM_FILTERS) }, { 'type': DimshuffleLayer, 'pattern': (0, 2, 1) # (batch, features, time) }, { # DeConv 'type': Conv1DLayer, 'num_filters': 1, 'filter_size': 4, 'stride': 1, 'nonlinearity': None, 'pad': 'full' }, { 'type': DimshuffleLayer, 'pattern': (0, 2, 1), # back to (batch, time, features) 'label': 'AE_output' } ], layer_changes={ 100001: { 'new_layers': [ { 'type': ConcatLayer, 'axis': 2, 'incomings': ['input', 'AE_output'] }, { 'type': ReshapeLayer, 'shape': (64 * seq_length, 2) }, { 'type': DenseLayer, 'num_units': 16, 'nonlinearity': tanh }, { 'type': ReshapeLayer, 'shape': (64, seq_length, 16) }, { 'type': BLSTMLayer, 'num_units': 128, 'merge_mode': 'concatenate', 'grad_clipping': 10.0, 'gradient_steps': 500 }, { 'type': BLSTMLayer, 'num_units': 256, 'merge_mode': 'concatenate', 'grad_clipping': 10.0, 'gradient_steps': 500 }, { 'type': ReshapeLayer, 'shape': (64 * seq_length, 512) }, { 'type': DenseLayer, 'num_units': 128, 'nonlinearity': tanh }, { 'type': DenseLayer, 'num_units': 1, 'nonlinearity': None } ] } } )
INPUT_STATS = { 'mean': np.array([297.87216187], dtype=np.float32), 'std': np.array([374.43884277], dtype=np.float32) } def only_train_on_real_data(net, iteration): net.logger.info( "Iteration {}: Now only training on real data.".format(iteration)) net.source.sources[0]['train_probability'] = 0.0 net.source.sources[1]['train_probability'] = 1.0 net_dict = dict( save_plot_interval=SAVE_PLOT_INTERVAL, loss_function=lambda x, t: squared_error(x, t).mean(), updates_func=nesterov_momentum, learning_rate=1e-4, learning_rate_changes_by_iteration={ 400000: 1e-5, 500000: 1e-6 }, epoch_callbacks={ 350000: only_train_on_real_data }, do_save_activations=True, auto_reshape=False, layers_config=[ { 'type': DimshuffleLayer, 'pattern': (0, 2, 1) # (batch, features, time)
def get_model(): # Prepare Theano variables for inputs and targets input_var = T.tensor4('inputs') target_var = T.matrix('targets') # input layer with unspecified batch size layer_0 = InputLayer(shape=(None, 30, 64, 64), input_var=input_var) # Z-score? # Convolution then batchNormalisation then activation layer, twice, then zero padding layer followed by a dropout layer layer_1 = batch_norm( Conv2DLayer(layer_0, 64, (3, 3), pad='same', nonlinearity=leaky_rectify)) layer_2 = batch_norm( Conv2DLayer(layer_1, 64, (3, 3), pad='valid', nonlinearity=leaky_rectify)) layer_3 = MaxPool2DLayer(layer_2, pool_size=(2, 2), stride=(2, 2), pad=(1, 1)) layer_4 = DropoutLayer(layer_3, p=0.25) # Convolution then batchNormalisation then activation layer, twice, then zero padding layer followed by a dropout layer layer_5 = batch_norm( Conv2DLayer(layer_4, 96, (3, 3), pad='same', nonlinearity=leaky_rectify)) layer_6 = batch_norm( Conv2DLayer(layer_5, 96, (3, 3), pad='valid', nonlinearity=leaky_rectify)) layer_7 = MaxPool2DLayer(layer_6, pool_size=(2, 2), stride=(2, 2), pad=(1, 1)) layer_8 = DropoutLayer(layer_7, p=0.25) # Convolution then batchNormalisation then activation layer, twice, then zero padding layer followed by a dropout layer layer_9 = batch_norm( Conv2DLayer(layer_8, 128, (3, 3), pad='same', nonlinearity=leaky_rectify)) layer_10 = batch_norm( Conv2DLayer(layer_9, 128, (3, 3), pad='valid', nonlinearity=leaky_rectify)) layer_11 = MaxPool2DLayer(layer_10, pool_size=(2, 2), stride=(2, 2), pad=(1, 1)) layer_12 = DropoutLayer(layer_11, p=0.25) # Last layers layer_13 = FlattenLayer(layer_12) layer_14 = DenseLayer(layer_13, 1024, nonlinearity=leaky_rectify) layer_15 = DropoutLayer(layer_14, p=0.5) layer_16 = DenseLayer(layer_15, 600, nonlinearity=softmax) # Loss prediction = get_output(layer_16) loss = squared_error(prediction, target_var) loss = loss.mean() + regularize_layer_params(layer_14, l2) #Updates : Stochastic Gradient Descent (SGD) with Nesterov momentum params = get_all_params(layer_16, trainable=True) updates = nesterov_momentum(loss, params, learning_rate=0.01, momentum=0.9) # Create a loss expression for validation/testing. The crucial difference # here is that we do a deterministic forward pass through the network, disabling dropout layers. test_prediction = get_output(layer_16, deterministic=True) test_loss = squared_error(test_prediction, target_var) test_loss = test_loss.mean() # Compile a function performing a training step on a mini-batch (by giving # the updates dictionary) and returning the corresponding training loss: train_fn = theano.function([input_var, target_var], loss, updates=updates) # Compile a second function computing the validation loss and accuracy: val_fn = theano.function([input_var, target_var], test_loss) # Compule a third function computing the prediction predict_fn = theano.function([input_var], test_prediction) return [layer_16, train_fn, val_fn, predict_fn]
def run_network(data=None, num_epochs=10, ratio=0.5): try: global_start_time = time() sequence_length = 50 batchsize = 512 path_to_dataset = 'household_power_consumption.txt' # Loading the data if data is None: print 'Loading data... ' X_train, y_train, X_test, y_test = data_power_consumption( path_to_dataset, sequence_length, ratio) else: X_train, y_train, X_test, y_test = data val_ratio = 0.005 val_rows = round(val_ratio * X_train.shape[0]) X_val = X_train[:val_rows] y_val = y_train[:val_rows] y_val = np.reshape(y_val, (y_val.shape[0], 1)) X_train = X_train[val_rows:] y_train = y_train[val_rows:] # Creating the Theano variables input_var = T.tensor3('inputs') target_var = T.matrix('targets') # Building the Theano expressions on these variables network = build_model(input_var) prediction = lasagne.layers.get_output(network) loss = squared_error(prediction, target_var) loss = aggregate(loss) params = lasagne.layers.get_all_params(network, trainable=True) updates = rmsprop(loss, params, learning_rate=0.001) test_prediction = lasagne.layers.get_output(network, deterministic=True) test_loss = squared_error(test_prediction, target_var) test_loss = aggregate(test_loss) # Compiling the graph by declaring the Theano functions compile_time = time() print 'Data:' print 'X_train ', X_train.shape, ' y_train ', y_train.shape print 'X_val ', X_val.shape, ' y_val ', y_val.shape print 'X_test ', X_test.shape, ' y_test ', y_test.shape print "Compiling..." train_fn = theano.function([input_var, target_var], loss, updates=updates) val_fn = theano.function([input_var, target_var], test_loss) get_pred_fn = theano.function([input_var], prediction) print "Compiling time : ", time() - compile_time # For loop that goes each time through the hole training # and validation data # T R A I N I N G # - - - - - - - - print "Starting training...\n" for epoch in range(num_epochs): # Going over the training data train_err = 0 train_batches = 0 start_time = time() nb_batches = X_train.shape[0] / batchsize time_line = np.zeros(nb_batches) for batch in iterate_minibatches(X_train, y_train, batchsize, shuffle=True): current_time = time() inputs, targets = batch train_err += train_fn(inputs, targets) train_batches += 1 str_out = "\rTrain Batch " + str(train_batches) str_out += "/" + str(nb_batches) str_out += " | Loss : " + str(train_err / train_batches)[:7] str_out += " | Remaining time (s) : " remaining_seconds = time() - current_time remaining_seconds *= (nb_batches - train_batches) time_line[train_batches - 1] = round(remaining_seconds) if (train_batches - 1) % 5 == 0: durations = time_line[train_batches-1: train_batches+50] durations = np.mean([t for t in durations if t > 0]) str_out += str(durations) sys.stdout.write(str_out) sys.stdout.flush() print "\nGoing through validation data" # Going over the validation data val_err = 0 val_batches = 0 for batch in iterate_minibatches( X_val, y_val, batchsize, shuffle=False): inputs, targets = batch err = val_fn(inputs, targets) val_err += err val_batches += 1 # Then we print the results for this epoch: # train_batches - 1 because started at 1 and not 0 print "training loss:\t\t\t" + str(train_err / train_batches) print "validation loss:\t\t" + str(val_err / val_batches) print("Epoch {} of {} took {:.3f}s \n\n".format( epoch + 1, num_epochs, time() - start_time)) # Now that the training is over, let's test the network: test_err = 0 test_batches = 0 for batch in iterate_minibatches( X_test, y_test, batchsize, shuffle=False): inputs, targets = batch err = val_fn(inputs, targets) test_err += err test_batches += 1 print "\nFinal results in {0} seconds:".format( time()-global_start_time) print "Test loss:\t\t\t{:.6f}".format(test_err / test_batches) prediction_size = 200 predicted = get_pred_fn(X_test[:prediction_size]) try: plt.plot(predicted) plt.plot(y_test[prediction_size]) plt.show(block=False) except Exception as e: print str(e) print "predicted = ", repr( np.reshape(predicted[:prediction_size], (prediction_size,))) print '\n' print "y = ", repr( np.reshape(y_test[:prediction_size], (prediction_size,))) return network except KeyboardInterrupt: return network
def get_elementwise_objective_components(policy, rewards, policy_values, action_values='same', is_alive="always", n_steps=None, gamma_or_gammas=0.99, crop_last = True, force_values_after_end=True, state_values_after_end="zeros", consider_value_reference_constant=True, consider_predicted_value_constant=True, scan_dependencies=tuple(), scan_strict=True, ): """ returns deterministic policy gradient components for actor and critic L_policy = -critic(state,policy) = -action_values L_V = (V - Vreference)^2 You will have to independently compute updates for actor and critic and then add them up. parameters: policy [batch,tick,action_id] - predicted "optimal policy" (mu) rewards [batch,tick] - immediate rewards for taking actions at given time ticks policy_values [batch,tick] - predicted state values given OPTIMAL policy action_values [batch,tick] - predicted Q_values for commited actions INCLUDING EXPLORATION if any Default value implies action_values = state_values if we have no exploration is_alive [batch,tick] - whether given session is still active at given tick. Defaults to always active. Default value of is_alive implies a simplified computation algorithm for Qlearning loss n_steps: if an integer is given, the references are computed in loops of 3 states. Defaults to None: propagating rewards throughout the whole session. If n_steps equals 1, this works exactly as Q-learning (though less efficient one) If you provide symbolic integer here AND strict = True, make sure you added the variable to dependencies. gamma_or_gammas - a single value or array[batch,tick](can broadcast dimensions) of delayed reward discounts crop_last - if True, zeros-out loss at final tick, if False - computes loss VS Qvalues_after_end force_values_after_end - if true, sets reference policy at session end to rewards[end] + qvalues_after_end state_values_after_end[batch,1,n_actions] - "next state values" for last tick used for reference only. Defaults at T.zeros_like(state_values[:,0,None,:]) If you wish to simply ignore the last tick, use defaults and crop output's last tick ( qref[:,:-1] ) scan_dependencies: everything you need to evaluate first 3 parameters (only if strict==True) scan_strict: whether to evaluate values using strict theano scan or non-strict one Returns: Element-wise sum of policy_loss + state_value_loss """ if action_values == 'same': action_values = policy_values # get reference values via DPG algorithm reference_action_values = get_n_step_value_reference(action_values, rewards, is_alive, n_steps=n_steps, optimal_state_values_after_end=state_values_after_end, gamma_or_gammas=gamma_or_gammas, dependencies=scan_dependencies, strict=scan_strict ) if is_alive != "always" and force_values_after_end: # if asked to force reference_Q[end_tick+1,a] = 0, do it # note: if agent is always alive, this is meaningless # set future rewards at session end to rewards+qvalues_after_end end_ids = get_end_indicator(is_alive, force_end_at_t_max=True).nonzero() if state_values_after_end == "zeros": # "set reference state values at end action ids to just the immediate rewards" reference_action_values = T.set_subtensor(reference_action_values[end_ids], rewards[end_ids]) else: # "set reference state values at end action ids to the immediate rewards + qvalues after end" new_subtensor_values = rewards[end_ids] + gamma_or_gammas * state_values_after_end[end_ids[0], 0] reference_action_values = T.set_subtensor(reference_action_values[end_ids], new_subtensor_values) # now compute the loss components if is_alive == "always": is_alive = T.ones_like(action_values, dtype=theano.config.floatX) # actor loss # here we rely on fact that state_values = critic(state,optimal_policy) # using chain rule, # grad(state_values,actor_weights) = grad(state_values, optimal_policy)*grad(optimal_policy,actor_weights) policy_loss_elwise = -policy_values # critic loss reference_action_values = consider_constant(reference_action_values) V_err_elementwise = squared_error(reference_action_values, action_values) if crop_last: V_err_elementwise = T.set_subtensor(V_err_elementwise[:,-1],0) return policy_loss_elwise * is_alive, V_err_elementwise * is_alive
# Get all trainable params params = layers.get_all_params(unsupervised_graph, trainable=True) + \ layers.get_all_params(supervised_graph, trainable=True) # params = layers.get_all_params(supervised_graph)[-2:] params = utils.unique(params) # Get regularizable params regularization_params = layers.get_all_params(unsupervised_graph, regularizable=True) + \ layers.get_all_params(supervised_graph, regularizable=True) regularization_params = utils.unique(regularization_params) # Creating loss functions # Train loss has to take into account of labeled image or not if run_parameters.unsupervised_cost_fun == 'squared_error': loss1 = objectives.squared_error(reconstruction, input_var) elif run_parameters.unsupervised_cost_fun == 'categorical_crossentropy': loss1 = objectives.categorical_crossentropy(reconstruction, input_var) if supervised_cost_fun == 'squared_error': loss2 = objectives.squared_error(prediction, target_var) * repeat_col(labeled_var, 10) elif supervised_cost_fun == 'categorical_crossentropy': loss2 = objectives.categorical_crossentropy(prediction, target_var) * labeled_var.T l2_penalties = regularization.apply_penalty(regularization_params, regularization.l2) sparse_layers = get_all_sparse_layers(unsupervised_graph) sparse_layers_output = layers.get_output(sparse_layers, deterministic=True) if run_parameters.sparse_regularizer_type == 0: sparse_regularizer = reduce(lambda x, y: x + T.clip((T.mean(abs(y)) - run_parameters.sparse_regularize_factor) * y.size, 0, float('inf')), sparse_layers_output, 0) elif run_parameters.sparse_regularizer_type == 1: sparse_regularizer = reduce(
def get_model(input_var, target_var, multiply_var): # input layer with unspecified batch size layer_input = InputLayer( shape=(None, 30, 80, 80), input_var=input_var ) #InputLayer(shape=(None, 1, 30, 64, 64), input_var=input_var) layer_0 = DimshuffleLayer(layer_input, (0, 'x', 1, 2, 3)) # Z-score? # Convolution then batchNormalisation then activation layer, then zero padding layer followed by a dropout layer layer_1 = batch_norm( Conv3DDNNLayer(incoming=layer_0, num_filters=64, filter_size=(3, 3, 3), stride=(1, 3, 3), pad='same', nonlinearity=leaky_rectify)) layer_2 = MaxPool3DDNNLayer(layer_1, pool_size=(1, 2, 2), stride=(1, 2, 2), pad=(0, 1, 1)) layer_3 = DropoutLayer(layer_2, p=0.25) # Convolution then batchNormalisation then activation layer, then zero padding layer followed by a dropout layer layer_4 = batch_norm( Conv3DDNNLayer(incoming=layer_3, num_filters=128, filter_size=(3, 3, 3), stride=(1, 3, 3), pad='same', nonlinearity=leaky_rectify)) layer_5 = MaxPool3DDNNLayer(layer_4, pool_size=(1, 2, 2), stride=(1, 2, 2), pad=(0, 1, 1)) layer_6 = DropoutLayer(layer_5, p=0.25) # Recurrent layer layer_7 = DimshuffleLayer(layer_6, (0, 2, 1, 3, 4)) layer_8 = LSTMLayer(layer_7, num_units=612, only_return_final=True) layer_9 = DropoutLayer(layer_8, p=0.25) # Output Layer layer_hidden = DenseLayer(layer_9, 500, nonlinearity=sigmoid) layer_prediction = DenseLayer(layer_hidden, 2, nonlinearity=linear) # Loss prediction = get_output(layer_prediction) / multiply_var loss = squared_error(prediction, target_var) loss = loss.mean() #Updates : Stochastic Gradient Descent (SGD) with Nesterov momentum params = get_all_params(layer_prediction, trainable=True) # Create a loss expression for validation/testing. The crucial difference # here is that we do a deterministic forward pass through the network, disabling dropout layers. test_prediction = get_output(layer_prediction, deterministic=True) / multiply_var test_loss = squared_error(test_prediction, target_var) test_loss = test_loss.mean() # crps estimate crps = T.abs_(test_prediction - target_var).mean() / 600 return test_prediction, crps, loss, params
def __init__(self, atari_env, state_dimension, action_dimension, monitor_env=False, learning_rate=0.001, critic_update=10, train_step=1, gamma=0.95, eps_max=1.0, eps_min=0.1, eps_decay=10000, n_epochs=10000, batch_size=32, buffer_size=50000): self.env = gym.make(atari_env) if monitor_env: None self.state_dimension = state_dimension self.action_dimension = action_dimension self.learning_rate = learning_rate self.critic_update = critic_update self.train_step = train_step self.gamma = gamma self.eps_max = eps_max self.eps_min = eps_min self.eps_decay = eps_decay self.n_epochs = n_epochs self.batch_size = batch_size self.buffer_size = buffer_size self.experience_replay = [] def q_network(state): input_state = InputLayer(input_var=state, shape=(None, self.state_dimension[0], self.state_dimension[1], self.state_dimension[2])) input_state = DimshuffleLayer(input_state, pattern=(0, 3, 1, 2)) conv = Conv2DLayer(input_state, num_filters=32, filter_size=(8, 8), stride=(4, 4), nonlinearity=rectify) conv = Conv2DLayer(conv, num_filters=64, filter_size=(4, 4), stride=(2, 2), nonlinearity=rectify) conv = Conv2DLayer(conv, num_filters=64, filter_size=(3, 3), stride=(1, 1), nonlinearity=rectify) flatten = FlattenLayer(conv) dense = DenseLayer(flatten, num_units=512, nonlinearity=rectify) q_values = DenseLayer(dense, num_units=self.action_dimension, nonlinearity=linear) return q_values self.X_state = T.ftensor4() self.X_action = T.bvector() self.X_reward = T.fvector() self.X_next_state = T.ftensor4() self.X_done = T.bvector() self.X_action_hot = to_one_hot(self.X_action, self.action_dimension) self.q_ = q_network(self.X_state) self.q = get_output(self.q_) self.q_target_ = q_network(self.X_next_state) self.q_target = get_output(self.q_target_) self.q_max = T.max(self.q_target, axis=1) self.action = T.argmax(self.q, axis=1) self.mu = theano.function(inputs=[self.X_state], outputs=self.action, allow_input_downcast=True) self.loss = squared_error( self.X_reward + self.gamma * self.q_max * (1.0 - self.X_done), T.batched_dot(self.q, self.X_action_hot)) self.loss = self.loss.mean() self.params = get_all_params(self.q_) self.grads = T.grad(self.loss, self.params) self.normed_grads = total_norm_constraint(self.grads, 1.0) self.updates = rmsprop(self.normed_grads, self.params, learning_rate=self.learning_rate) self.update_network = theano.function(inputs=[ self.X_state, self.X_action, self.X_reward, self.X_next_state, self.X_done ], outputs=self.loss, updates=self.updates, allow_input_downcast=True)
def net_dict_rectangles(seq_length): return dict( epochs=300000, save_plot_interval=25000, loss_function=lambda x, t: squared_error(x, t).mean(), updates_func=nesterov_momentum, learning_rate=1e-4, learning_rate_changes_by_iteration={ 200000: 1e-5, 250000: 1e-6 }, epoch_callbacks={350000: only_train_on_real_data}, do_save_activations=True, auto_reshape=False, plotter=StartEndMeanPlotter(n_seq_to_plot=32, n_training_examples_to_plot=16), layers_config=[ { 'type': DimshuffleLayer, 'pattern': (0, 2, 1) # (batch, features, time) }, { 'type': PadLayer, 'width': 4 }, { 'type': Conv1DLayer, # convolve over the time axis 'num_filters': 16, 'filter_size': 4, 'stride': 1, 'nonlinearity': None, 'border_mode': 'valid' }, { 'type': Conv1DLayer, # convolve over the time axis 'num_filters': 16, 'filter_size': 4, 'stride': 1, 'nonlinearity': None, 'border_mode': 'valid' }, { 'type': DimshuffleLayer, 'pattern': (0, 2, 1) # back to (batch, time, features) }, { 'type': DenseLayer, 'num_units': 512 * 8, 'nonlinearity': rectify }, # { # 'type': DenseLayer, # 'num_units': 512 * 6, # 'nonlinearity': rectify # }, { 'type': DenseLayer, 'num_units': 512 * 4, 'nonlinearity': rectify }, { 'type': DenseLayer, 'num_units': 512, 'nonlinearity': rectify }, { 'type': DenseLayer, 'num_units': 3, 'nonlinearity': None } ])