def create_network(available_actions_count): s1 = T.matrix("State") a = T.vector("Action", dtype="int32") q2 = T.vector("Q2") r = T.vector("Reward") isterminal = T.vector("IsTerminal", dtype="int8") input_state = s1 #Input(shape=(4096,)) # Add a single fully-connected layer. dense_1 = layers.FCLayer(input=input_state, fan_in=4096, num_hidden=128) # Add the output layer. (no nonlinearity as it is for approximating an arbitrary real function) dense_2 = layers.FCLayer(input=dense_1.out, fan_in=128, num_hidden=available_actions_count, activation=None) q = dense_2.out # target_Q(s,a) = r + gamma * max Q(s2,_) if isterminal else r target_q = T.set_subtensor(q[T.arange(q.shape[0]), a], r + discount_factor * (1 - isterminal) * q2) loss = squared_error(q, target_q).mean() # Update the parameters according to the computed gradient using RMSProp. params = dense_1.params + dense_2.params updates = rmsprop(loss, params, learning_rate) # Compile the theano functions print("Compiling the network ...") function_learn = theano.function([s1, q2, a, r, isterminal], loss, updates=updates, name="learn_fn", allow_input_downcast=True) function_get_q_values = theano.function([s1], q, name="eval_fn", allow_input_downcast=True) function_get_best_action = theano.function([s1], T.argmax(q), name="test_fn", allow_input_downcast=True) print("Network compiled.") def simple_get_best_action(state): return function_get_best_action(state) # Returns Theano objects for the net and functions. return params, function_learn, function_get_q_values, simple_get_best_action
def generate_theano_func(args, network, penalty, input_dict, target_var): prediction = get_output(network, input_dict) # loss = T.mean( target_var * ( T.log(target_var) - prediction )) loss = T.mean(categorical_crossentropy(prediction, target_var)) # loss += 0.0001 * sum (T.sum(layer_params ** 2) for layer_params in get_all_params(network) ) # penalty = sum ( T.sum(lstm_param**2) for lstm_param in lstm_params ) # penalty = regularize_layer_params(l_forward_1_lstm, l2) # penalty = T.sum(lstm_param**2 for lstm_param in lstm_params) # penalty = 0.0001 * sum (T.sum(layer_params ** 2) for layer_params in get_all_params(l_forward_1) ) loss = loss + penalty params = get_all_params(network, trainable=True) if args.optimizer == "sgd": updates = sgd(loss, params, learning_rate=args.step) elif args.optimizer == "adagrad": updates = adagrad(loss, params, learning_rate=args.step) elif args.optimizer == "adadelta": updates = adadelta(loss, params, learning_rate=args.step) elif args.optimizer == "nesterov": updates = nesterov_momentum(loss, params, learning_rate=args.step) elif args.optimizer == "rms": updates = rmsprop(loss, params, learning_rate=args.step) elif args.optimizer == "adam": updates = adam(loss, params, learning_rate=args.step) else: raise "Need set optimizer correctly" test_prediction = get_output(network, input_dict, deterministic=True) # test_prediction = get_output(network, deterministic=True) # test_loss = T.mean( target_var * ( T.log(target_var) - test_prediction)) test_loss = T.mean(categorical_crossentropy(test_prediction, target_var)) train_fn = theano.function( [input1_var, input1_mask_var, input2_var, input2_mask_var, target_var], loss, updates=updates, allow_input_downcast=True, ) if args.task == "sts": val_fn = theano.function( [input1_var, input1_mask_var, input2_var, input2_mask_var, target_var], [test_loss, test_prediction], allow_input_downcast=True, ) elif args.task == "ent": # test_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), target_var), dtype=theano.config.floatX) test_acc = T.mean(categorical_accuracy(test_prediction, target_var)) val_fn = theano.function( [input1_var, input1_mask_var, input2_var, input2_mask_var, target_var], [test_loss, test_acc], allow_input_downcast=True, ) return train_fn, val_fn
def build_updates(grad, params, optimization, learning_rate): """ setup optimization algorithm """ if optimization['optimizer'] == 'sgd': update_op = updates.sgd(grad, params, learning_rate=learning_rate) elif optimization['optimizer'] == 'nesterov_momentum': if momenum in optimization: momentum = optimization['momentum'] else: momentum = 0.9 update_op = updates.nesterov_momentum(grad, params, learning_rate=learning_rate, momentum=momentum) elif optimization['optimizer'] == 'adagrad': update_op = updates.adagrad(grad, params, learning_rate=learning_rate) elif optimization['optimizer'] == 'rmsprop': if 'rho' in optimization: rho = optimization['rho'] else: rho = 0.9 update_op = updates.rmsprop(grad, params, learning_rate=learning_rate, rho=rho) elif optimization['optimizer'] == 'adam': if 'beta1' in optimization: beta1 = optimization['beta1'] else: beta1 = 0.9 if 'beta2' in optimization: beta2 = optimization['beta2'] else: beta2 = 0.999 update_op = updates.adam(grad, params, learning_rate=learning_rate, beta1=beta1, beta2=beta2) return update_op
def create_dqn(available_actions_num, input_shape, n_variables, learning_rate=0.00025, discount_factor=0.99): # Creates the input variables state = tensor.tensor4("State") state_after_action = tensor.tensor4("Next state") variables = tensor.matrix("Variables") variables_after_action = tensor.matrix("Next variables") action = tensor.vector("Actions", dtype="int32") reward = tensor.vector("Rewards") nonterminal = tensor.vector("Nonterminal", dtype="int8") network = _create_network(available_actions_num, input_shape, state, n_variables, variables) target_network = _create_network(available_actions_num, input_shape, state_after_action, n_variables, variables_after_action) q_values = get_output(network) next_q_values = get_output(target_network) target_action_q_value = tensor.clip( reward + discount_factor * nonterminal * tensor.max(next_q_values, axis=1, keepdims=False), -1, 1) target_q_values = tensor.set_subtensor(q_values[tensor.arange(q_values.shape[0]), action], target_action_q_value) loss = squared_error(q_values, target_q_values).mean() params = get_all_params(network, trainable=True) updates = rmsprop(loss, params, learning_rate) function_learn = theano.function([state, variables, action, state_after_action, variables_after_action, reward, nonterminal], loss, updates=updates, name="learn_fn", on_unused_input="ignore") function_get_q_values = theano.function([state, variables], q_values, name="eval_fn", on_unused_input="ignore") function_get_best_action = theano.function([state, variables], tensor.argmax(q_values), name="best_action_fn", on_unused_input="ignore") function_get_max_q_value = theano.function([state, variables], tensor.max(q_values, axis=1), name="max_q_fn", on_unused_input="ignore") return (network, target_network, function_learn, function_get_q_values, function_get_best_action, function_get_max_q_value)
def create_network(available_actions_count): # Create the input variables s1 = T.tensor4("State") a = T.vector("Action", dtype="int32") q2 = T.vector("Q2") r = T.vector("Reward") isterminal = T.vector("IsTerminal", dtype="int8") # Create the input layer of the network. inputLayer = s1 new_w = resolution[0] new_h = resolution[1] # Add 2 convolutional layers with ReLu activation # filter_shape = [num_filters, num_input_feature_maps, filter_height, filter_width] input_shape_1 = [batch_size, 1, resolution[0], resolution[1]] filter_shape_1 = [8, 1, 6, 6] layer1 = layers.ConvLayer(input=inputLayer, filter_shape=filter_shape_1, input_shape=input_shape_1, pool_size=None) new_w = (new_w - filter_shape_1[2] + 1)/1 # No pooling new_h = (new_h - filter_shape_1[3] + 1)/1 # No pooling input_shape_2 = [batch_size, 8, new_w, new_h] filter_shape_2 = [8, 8, 3, 3] layer2 = layers.ConvLayer(input=layer1.out, filter_shape=filter_shape_2, input_shape=input_shape_2, pool_size=None) # Add a single fully-connected layer. new_w = (new_w - filter_shape_2[2] + 1)/1 # No pooling new_h = (new_h - filter_shape_2[3] + 1)/1 # No pooling layer3 = layers.FCLayer(input=layer2.out.flatten(2), fan_in=filter_shape_2[0]*new_w*new_h, num_hidden=128) # Add the output layer (also fully-connected). # (no nonlinearity as it is for approximating an arbitrary real function) layer4 = layers.FCLayer(input=layer3.out, fan_in=128, num_hidden=available_actions_count, activation=None) layer4_out = layer4.out q = layer4_out # target differs from q only for the selected action. The following means: # target_Q(s,a) = r + gamma * max Q(s2,_) if isterminal else r target_q = T.set_subtensor(q[T.arange(q.shape[0]), a], r + discount_factor * (1 - isterminal) * q2) loss = squared_error(q, target_q).mean() # Update the parameters according to the computed gradient using RMSProp. params = layer4.params + layer3.params + layer2.params + layer1.params updates = rmsprop(loss, params, learning_rate) # Compile the theano functions print("Compiling the network ...") function_learn = theano.function([s1, q2, a, r, isterminal], loss, updates=updates, name="learn_fn") function_get_q_values = theano.function([s1], q, name="eval_fn") function_get_best_action = theano.function([s1], T.argmax(q, axis=1), name="test_fn") print("Network compiled.") def simple_get_best_action(state): state = np.expand_dims(state, axis=0) state = np.expand_dims(state, axis=0) state = np.repeat(state, batch_size, axis=0) return function_get_best_action(state) # Returns Theano objects for the net and functions. return params, function_learn, function_get_q_values, simple_get_best_action
def optimizer(loss, params): # use sgd optimization # opt = sgd(loss, params, learning_rate=0.001) # use rmsprop optimization opt = rmsprop(loss, params, learning_rate=0.001, rho=0.9, epsilon=1e-6) # opt = adadelta(loss, params, learning_rate=1) # use deepmind rmsprop optimization """opt = deepmind_rmsprop(loss, params, learning_rate=0.00025, rho=0.95, epsilon=1e-2)""" return opt
def create_network(available_actions_num): # Creates the input variables s1 = tensor.tensor4("States") a = tensor.vector("Actions", dtype="int32") q2 = tensor.vector("Next State best Q-Value") r = tensor.vector("Rewards") nonterminal = tensor.vector("Nonterminal", dtype="int8") # Creates the input layer of the network. dqn = InputLayer(shape=[None, 1, downsampled_y, downsampled_x], input_var=s1) # Adds 3 convolutional layers, each followed by a max pooling layer. dqn = Conv2DLayer(dqn, num_filters=32, filter_size=[8, 8], nonlinearity=rectify, W=GlorotUniform("relu"), b=Constant(.1)) dqn = MaxPool2DLayer(dqn, pool_size=[2, 2]) dqn = Conv2DLayer(dqn, num_filters=64, filter_size=[4, 4], nonlinearity=rectify, W=GlorotUniform("relu"), b=Constant(.1)) dqn = MaxPool2DLayer(dqn, pool_size=[2, 2]) dqn = Conv2DLayer(dqn, num_filters=64, filter_size=[3, 3], nonlinearity=rectify, W=GlorotUniform("relu"), b=Constant(.1)) dqn = MaxPool2DLayer(dqn, pool_size=[2, 2]) # Adds a single fully connected layer. dqn = DenseLayer(dqn, num_units=512, nonlinearity=rectify, W=GlorotUniform("relu"), b=Constant(.1)) # Adds a single fully connected layer which is the output layer. # (no nonlinearity as it is for approximating an arbitrary real function) dqn = DenseLayer(dqn, num_units=available_actions_num, nonlinearity=None) # Theano stuff q = get_output(dqn) # Only q for the chosen actions is updated more or less according to following formula: # target Q(s,a,t) = r + gamma * max Q(s2,_,t+1) target_q = tensor.set_subtensor(q[tensor.arange(q.shape[0]), a], r + discount_factor * nonterminal * q2) loss = squared_error(q, target_q).mean() # Updates the parameters according to the computed gradient using rmsprop. params = get_all_params(dqn, trainable=True) updates = rmsprop(loss, params, learning_rate) # Compiles theano functions print "Compiling the network ..." function_learn = theano.function([s1, q2, a, r, nonterminal], loss, updates=updates, name="learn_fn") function_get_q_values = theano.function([s1], q, name="eval_fn") function_get_best_action = theano.function([s1], tensor.argmax(q), name="test_fn") print "Network compiled." # Returns Theano objects for the net and functions. # We wouldn't need the net anymore but it is nice to save your model. return dqn, function_learn, function_get_q_values, function_get_best_action
def compile(self, lr=1e-4, loss_function='squared_error'): self.lr = lr print('[ConvAE: compile]') self.loss_function = select_loss(loss_function) Y_pred_ = get_output(self.model_) self.loss_ = self.loss_function(Y_pred_, self.Y_).mean() params_ = lasagne.layers.get_all_params(self.model_, trainable=True) if self.is_enc_fixed: updates_ = rmsprop(self.loss_, params_[10:len(params_)], learning_rate=self.lr) else: updates_ = rmsprop(self.loss_, params_, learning_rate=self.lr) self.train_fn = theano.function([self.X_, self.Y_], self.loss_, updates=updates_) Y_test_pred_ = get_output(self.model_, deterministic=True) self.pred_fn = theano.function([self.X_], Y_test_pred_)
def create_network(available_actions_count): # Create the input variables s1 = tensor.tensor4("States") a = tensor.vector("Actions", dtype="int32") q2 = tensor.vector("Next State's best Q-Value") r = tensor.vector("Rewards") isterminal = tensor.vector("IsTerminal", dtype="int8") # Create the input layer of the network. dqn = InputLayer(shape=[None, 1, resolution[0], resolution[1]], input_var=s1) # Add 2 convolutional layers with ReLu activation dqn = Conv2DLayer(dqn, num_filters=8, filter_size=[6, 6], nonlinearity=rectify, W=HeUniform("relu"), b=Constant(.1), stride=3) dqn = Conv2DLayer(dqn, num_filters=8, filter_size=[3, 3], nonlinearity=rectify, W=HeUniform("relu"), b=Constant(.1), stride=2) # Add a single fully-connected layer. dqn = DenseLayer(dqn, num_units=128, nonlinearity=rectify, W=HeUniform("relu"), b=Constant(.1)) # Add the output layer (also fully-connected). # (no nonlinearity as it is for approximating an arbitrary real function) dqn = DenseLayer(dqn, num_units=available_actions_count, nonlinearity=None) # Define the loss function q = get_output(dqn) # target differs from q only for the selected action. The following means: # target_Q(s,a) = r + gamma * max Q(s2,_) if isterminal else r target_q = tensor.set_subtensor(q[tensor.arange(q.shape[0]), a], r + discount_factor * (1 - isterminal) * q2) loss = squared_error(q, target_q).mean() # Update the parameters according to the computed gradient using RMSProp. params = get_all_params(dqn, trainable=True) updates = rmsprop(loss, params, learning_rate) # Compile the theano functions print "Compiling the network ..." function_learn = theano.function([s1, q2, a, r, isterminal], loss, updates=updates, name="learn_fn") function_get_q_values = theano.function([s1], q, name="eval_fn") function_get_best_action = theano.function([s1], tensor.argmax(q), name="test_fn") print "Network compiled." def simple_get_best_action(state): return function_get_best_action(state.reshape([1, 1, resolution[0], resolution[1]])) # Returns Theano objects for the net and functions. return dqn, function_learn, function_get_q_values, simple_get_best_action
def create_iter_functions(self, dataset, output_layer, X_tensor_type=T.matrix): batch_index = T.iscalar('batch_index') X_batch = X_tensor_type('x') y_batch = T.ivector('y') batch_slice = slice(batch_index * self.batch_size, (batch_index + 1) * self.batch_size) objective = Objective(output_layer, loss_function=categorical_crossentropy) loss_train = objective.get_loss(X_batch, target=y_batch) loss_eval = objective.get_loss(X_batch, target=y_batch, deterministic=True) pred = T.argmax(output_layer.get_output(X_batch, deterministic=True), axis=1) proba = output_layer.get_output(X_batch, deterministic=True) accuracy = T.mean(T.eq(pred, y_batch), dtype=theano.config.floatX) all_params = get_all_params(output_layer) updates = rmsprop(loss_train, all_params, self.lr, self.rho) iter_train = theano.function( [batch_index], loss_train, updates=updates, givens={ X_batch: dataset['X_train'][batch_slice], y_batch: dataset['y_train'][batch_slice], }, on_unused_input='ignore', ) iter_valid = None if self.use_valid: iter_valid = theano.function( [batch_index], [loss_eval, accuracy, proba], givens={ X_batch: dataset['X_valid'][batch_slice], y_batch: dataset['y_valid'][batch_slice], }, ) return dict(train=iter_train, valid=iter_valid)
def get_cost_updates(self, corrupted_input, learning_rate): """ This function computes the cost and the updates for one trainng step of the dA """ tilde_x=corrupted_input y = self.get_hidden_values(tilde_x) z = self.get_reconstructed_input(y) #z=corrupted_input # note : we sum over the size of a datapoint; if we are using # minibatches, L will be a vector, with one entry per # example in minibatch L = - T.mean(self.x * T.log(z) + (1 - self.x) * T.log(1 - z)) #L=categorical_crossentropy(z,self.x) #L = (self.x * T.log(z) + (1 - self.x) * T.log(1 - z)) #cost=L.mean() # temp=(self.x*T.log(z)+(1-self.x)*T.log(1-z)) # L=-T.sum(temp) # note : L is now a vector, where each element is the # cross-entropy cost of the reconstruction of the # corresponding example of the minibatch. We need to # compute the average of all these to get the cost of # the minibatch cost = T.sum(L) # print cost.eval() # reg=0.01*lasagne.regularization.l2(self.params[0]) # cost=cost+reg # compute the gradients of the cost of the `dA` with respect # to its parameters # gparams = T.grad(cost, self.params,add_names='True') # updates_sgd=sgd(cost,self.params,learning_rate) updates_adagrad=rmsprop(cost,self.params,learning_rate,rho=0.75) #updates_dic=apply_momentum(updates_adagrad, self.params, momentum=0.8) #updates_dic=apply_momentum(updates_sgd, self.params, momentum=0.8) #updates=updates_sgd.items() updates=updates_adagrad.items() # generate the list of updates # updates = [ # (param, param - learning_rate * gparam) # for param, gparam in zip(self.params, gparams) # ] return (cost, updates)
def get_updates(nnet, train_obj, trainable_params, solver=None): implemented_solvers = ("sgd", "momentum", "nesterov", "adagrad", "rmsprop", "adadelta", "adam", "adamax") if solver not in implemented_solvers: nnet.sgd_solver = "adam" else: nnet.sgd_solver = solver if nnet.sgd_solver == "sgd": updates = l_updates.sgd(train_obj, trainable_params, learning_rate=Cfg.learning_rate) elif nnet.sgd_solver == "momentum": updates = l_updates.momentum(train_obj, trainable_params, learning_rate=Cfg.learning_rate, momentum=Cfg.momentum) elif nnet.sgd_solver == "nesterov": updates = l_updates.nesterov_momentum(train_obj, trainable_params, learning_rate=Cfg.learning_rate, momentum=Cfg.momentum) elif nnet.sgd_solver == "adagrad": updates = l_updates.adagrad(train_obj, trainable_params, learning_rate=Cfg.learning_rate) elif nnet.sgd_solver == "rmsprop": updates = l_updates.rmsprop(train_obj, trainable_params, learning_rate=Cfg.learning_rate, rho=Cfg.rho) elif nnet.sgd_solver == "adadelta": updates = l_updates.adadelta(train_obj, trainable_params, learning_rate=Cfg.learning_rate, rho=Cfg.rho) elif nnet.sgd_solver == "adam": updates = l_updates.adam(train_obj, trainable_params, learning_rate=Cfg.learning_rate) elif nnet.sgd_solver == "adamax": updates = l_updates.adamax(train_obj, trainable_params, learning_rate=Cfg.learning_rate) return updates
def create_network(available_actions_count): # Create the input variables s1 = tensor.tensor4("States") a = tensor.vector("Actions", dtype="int32") q2 = tensor.vector("Next State's best Q-Value") r = tensor.vector("Rewards") isterminal = tensor.vector("IsTerminal", dtype="int8") dqn = InputLayer(shape=[None, 1, resolution[0], resolution[1]], input_var=s1) dqn = Conv2DLayer(dqn, num_filters=32, filter_size=[8, 8], nonlinearity=rectify, W=HeUniform("relu"), b=Constant(.1), stride=4) dqn = Conv2DLayer(dqn, num_filters=64, filter_size=[4, 4], nonlinearity=rectify, W=HeUniform("relu"), b=Constant(.1), stride=2) dqn = DenseLayer(dqn, num_units=512, nonlinearity=rectify, W=HeUniform("relu"), b=Constant(.1)) dqn = DenseLayer(dqn, num_units=available_actions_count, nonlinearity=None) q = get_output(dqn) target_q = tensor.set_subtensor(q[tensor.arange(q.shape[0]), a], r + discount_factor * (1 - isterminal) * q2) loss = squared_error(q, target_q).mean() params = get_all_params(dqn, trainable=True) updates = rmsprop(loss, params, learning_rate) print "Compiling the network ..." function_learn = theano.function([s1, q2, a, r, isterminal], loss, updates=updates, name="learn_fn") function_get_q_values = theano.function([s1], q, name="eval_fn") function_get_best_action = theano.function([s1], tensor.argmax(q), name="test_fn") print "Network compiled." def simple_get_best_action(state): return function_get_best_action(state.reshape([1, 1, resolution[0], resolution[1]])) return dqn, function_learn, function_get_q_values, simple_get_best_action
def compile(self, lr=1e-4, loss_function='categorical_crossentropy'): self.lr = lr print('[ConvNet: compile]') self.loss_function = select_loss(loss_function) Y_pred_ = get_output(self.model_) self.loss_ = self.loss_function(Y_pred_, self.Y_).mean() self.acc_ = T.mean(T.eq(T.argmax(Y_pred_, axis=1), self.Y_), dtype=theano.config.floatX) params_ = lasagne.layers.get_all_params(self.model_, trainable=True) updates_ = rmsprop(self.loss_, params_, learning_rate=self.lr) self.train_fn = theano.function([self.X_, self.Y_], [self.loss_, self.acc_], updates=updates_) Y_test_pred_ = get_output(self.model_, deterministic=True) self.test_loss_ = self.loss_function(Y_test_pred_, self.Y_).mean() self.test_acc_ = T.mean(T.eq(T.argmax(Y_test_pred_, axis=1), self.Y_), dtype=theano.config.floatX) self.pred_fn = theano.function([self.X_], Y_test_pred_) self.test_fn = theano.function([self.X_, self.Y_], [self.test_loss_, self.test_acc_])
def main(): s1 = tensor.tensor4("States") a = tensor.vector("Actions", dtype="int32") q2 = tensor.vector("Next State best Q-Value") r = tensor.vector("Rewards") nonterminal = tensor.vector("Nonterminal", dtype="int8") dqn = InputLayer(shape=[None, 1, 2000], input_var=s1)#zredukowalem 2 wymiary do jednego - czy dobrze? dqn = DenseLayer(dqn, num_units=2000, nonlinearity=rectify, W=GlorotUniform("relu"),b=Constant(.1)) dqn = DenseLayer(dqn, num_units=2000, nonlinearity=rectify, W=GlorotUniform("relu"),b=Constant(.1)) dqn = DenseLayer(dqn, num_units=2000, nonlinearity=rectify, W=GlorotUniform("relu"),b=Constant(.1)) dqn = DenseLayer(dqn, num_units=available_actions_num, nonlinearity=None) q = get_output(dqn) target_q = tensor.set_subtensor(q[tensor.arange(q.shape[0]), a], r + discount_factor * nonterminal * q2) loss = squared_error(q, target_q).mean() params = get_all_params(dqn, trainable=True) updates = rmsprop(loss, params, learning_rate) function_learn = theano.function([s1, q2, a, r, nonterminal], loss, updates=updates, name="learn_fn") function_get_q_values = theano.function([s1], q, name="eval_fn") function_get_best_action = theano.function([s1], tensor.argmax(q), name="test_fn") # Creates and initializes the environment. print "Initializing doom..." game = DoomGame() game.load_config("../../examples/config/learning.cfg") game.init() print "Doom initialized." # Creates all possible actions. n = game.get_available_buttons_size() actions = [] for perm in it.product([0, 1], repeat=n): actions.append(list(perm))
def create_th(image_shape, output_dim, layers_conf): from lasagne.init import GlorotUniform, Constant from lasagne.layers import Conv2DLayer, InputLayer, DenseLayer, get_output, \ get_all_params, set_all_param_values from lasagne.nonlinearities import rectify from lasagne.objectives import squared_error from lasagne.updates import rmsprop x = th.tensor.tensor4("input") t = th.tensor.matrix("target") net = InputLayer(shape=[None, 1, image_shape[0], image_shape[1]], input_var=x) for num_filters, kernel_size, stride in layers_conf[:-1]: net = Conv2DLayer(net, num_filters=num_filters, filter_size=[kernel_size, kernel_size], nonlinearity=rectify, W=GlorotUniform(), b=Constant(.1), stride=stride) net = DenseLayer(net, num_units=layers_conf[-1], nonlinearity=rectify, W=GlorotUniform(), b=Constant(.1)) net = DenseLayer(net, num_units=output_dim, nonlinearity=None) q = get_output(net) loss = squared_error(q, t).mean() params = get_all_params(net, trainable=True) updates = rmsprop(loss, params, learning_rate) backprop = th.function([x, t], loss, updates=updates, name="bprop") fwd_pass = th.function([x], q, name="fwd") return fwd_pass, backprop
def __init__(self, rng, n_in, n_per_base, n_out, n_layer=1, basefuncs1=None, basefuncs2=None, gradient=None, with_shortcuts=False): """Initialize the parameters for the multilayer function graph :type rng: numpy.random.RandomState :param rng: a random number generator used to initialize weights :type n_in: int :param n_in: number of input units, the dimension of the space in which the datapoints lie :type n_layer: int :param n_layer: number of hidden layers :type n_per_base: int :param n_per_base: number of nodes per basis function see FGLayer :type n_out: int :param n_out: number of output units, the dimension of the space in which the labels lie :type basefuncs1: [int] :param basefuncs1: see FGLayer :type basefuncs2: [int] :param basefuncs2: see FGLayer :type gradient: string :param gradient: type of gradient descent algo (None=="sgd+","adagrad","adadelta","nag") :type with_shortcuts: bool :param with_shortcuts: whether to use shortcut connections (output is connected to all units) """ self.input = T.matrix('input') # the data is presented as vector input self.labels = T.matrix( 'labels') # the labels are presented as vector of continous values self.rng = rng self.n_layers = n_layer self.hidden_layers = [] self.params = [] self.n_in = n_in self.n_out = n_out self.with_shortcuts = with_shortcuts self.fixL0 = False for l in xrange(n_layer): if l == 0: layer_input = self.input n_input = n_in else: layer_input = self.hidden_layers[l - 1].output n_input = self.hidden_layers[l - 1].n_out hiddenLayer = FGLayer( rng=rng, inp=layer_input, n_in=n_input, n_per_base=n_per_base, basefuncs1=basefuncs1, basefuncs2=basefuncs2, layer_idx=l, ) self.hidden_layers.append(hiddenLayer) self.params.extend(hiddenLayer.params) div_thresh = T.scalar("div_thresh") # The linear output layer, either it gets as input the output of ALL previous layers if self.with_shortcuts: output_layer_inp = T.concatenate( [l.output for l in reversed(self.hidden_layers)], axis=1) output_layer_n_in = sum([l.n_out for l in self.hidden_layers]) else: # or just of the last hidden layer output_layer_inp = self.hidden_layers[-1].output output_layer_n_in = self.hidden_layers[-1].n_out self.output_layer = DivisionRegression(rng=rng, inp=output_layer_inp, n_in=output_layer_n_in, n_out=n_out, div_thresh=div_thresh) self.params.extend(self.output_layer.params) self.evalfun = theano.function( inputs=[self.input, In(div_thresh, value=0.0001)], outputs=self.output_layer.output) L1_reg = T.scalar('L1_reg') L2_reg = T.scalar('L2_reg') fixL0 = T.bscalar('fixL0') self.L1 = self.output_layer.L1 + sum( [l.L1 for l in self.hidden_layers]) self.L2_sqr = self.output_layer.L2_sqr + sum( [l.L2_sqr for l in self.hidden_layers]) self.penalty = self.output_layer.penalty self.loss = self.output_layer.loss self.errors = self.loss self.cost = (self.loss(self.labels) + L1_reg * self.L1 + L2_reg * self.L2_sqr + self.penalty) #Extrapol penalty self.extrapol_cost = self.output_layer.extrapol_loss learning_rate = T.scalar('learning_rate') def process_updates(par, newp): # print par.name if par.name == "W": # if fixL0 is True, then keep small weights at 0 return par, ifelse( fixL0, T.switch(T.abs_(par) < 0.001, par * 0, newp), newp) return par, newp print "Gradient:", gradient update = None if gradient == 'sgd+' or gradient == 'sgd' or gradient == None: gparams = [T.grad(self.cost, param) for param in self.params] update = OrderedDict([ (param, param - (learning_rate * gparam).clip(-1.0, 1.0)) for param, gparam in zip(self.params, gparams) ]) elif gradient == 'adam': update = Lupdates.adam(self.cost, self.params, learning_rate, epsilon=1e-04) elif gradient == 'adadelta': update = Lupdates.adadelta(self.cost, self.params, learning_rate) elif gradient == 'rmsprop': update = Lupdates.rmsprop(self.cost, self.params, learning_rate) elif gradient == 'nag': update = Lupdates.nesterov_momentum(self.cost, self.params, learning_rate) else: assert ("unknown gradient " + gradient) #Extrapol sanity gradient computation: extrapol_updates = Lupdates.adam(self.extrapol_cost, self.params, learning_rate, epsilon=1e-04) updates = [process_updates(*up) for up in update.items()] self.train_model = theano.function( inputs=[ self.input, self.labels, L1_reg, L2_reg, fixL0, learning_rate, div_thresh ], outputs=self.cost, updates=updates, ) # avoid too large outputs in extrapolation domain self.remove_extrapol_error = theano.function( inputs=[self.input, learning_rate, div_thresh], outputs=self.extrapol_cost, updates=extrapol_updates, ) self.test_model = theano.function( inputs=[self.input, self.labels, In(div_thresh, value=0.0001)], outputs=self.errors(self.labels), ) self.validate_model = theano.function( inputs=[self.input, self.labels, In(div_thresh, value=0.0001)], outputs=self.errors(self.labels), ) self.L1_loss = theano.function( inputs=[], outputs=self.L1, ) self.MSE = theano.function( inputs=[self.input, self.labels, In(div_thresh, value=0.0001)], outputs=self.errors(self.labels), )
def optimizer(loss, params): return rmsprop(loss, params, learning_rate=0.00025, rho=0.9, epsilon=1e-8)
def build_network_2dconv(args, input_var, target_var, wordEmbeddings, maxlen=60): print("Building model with 2D Convolution") vocab_size = wordEmbeddings.shape[1] wordDim = wordEmbeddings.shape[0] num_filters = 100 stride = 1 # CNN_sentence config filter_size = (3, wordDim) pool_size = (maxlen - 3 + 1, 1) input = InputLayer((None, maxlen), input_var=input_var) batchsize, seqlen = input.input_var.shape emb = EmbeddingLayer(input, input_size=vocab_size, output_size=wordDim, W=wordEmbeddings.T) emb.params[emb.W].remove("trainable") # (batchsize, maxlen, wordDim) reshape = ReshapeLayer(emb, (batchsize, 1, maxlen, wordDim)) conv2d = Conv2DLayer( reshape, num_filters=num_filters, filter_size=(filter_size), stride=stride, nonlinearity=rectify, W=GlorotUniform(), ) # (None, 100, 34, 1) maxpool = MaxPool2DLayer(conv2d, pool_size=pool_size) # (None, 100, 1, 1) forward = FlattenLayer(maxpool) # (None, 100) #(None, 50400) hid = DenseLayer(forward, num_units=args.hiddenDim, nonlinearity=sigmoid) network = DenseLayer(hid, num_units=2, nonlinearity=softmax) prediction = get_output(network) loss = T.mean(binary_crossentropy(prediction, target_var)) lambda_val = 0.5 * 1e-4 layers = {conv2d: lambda_val, hid: lambda_val, network: lambda_val} penalty = regularize_layer_params_weighted(layers, l2) loss = loss + penalty params = get_all_params(network, trainable=True) if args.optimizer == "sgd": updates = sgd(loss, params, learning_rate=args.step) elif args.optimizer == "adagrad": updates = adagrad(loss, params, learning_rate=args.step) elif args.optimizer == "adadelta": updates = adadelta(loss, params, learning_rate=args.step) elif args.optimizer == "nesterov": updates = nesterov_momentum(loss, params, learning_rate=args.step) elif args.optimizer == "rms": updates = rmsprop(loss, params, learning_rate=args.step) elif args.optimizer == "adam": updates = adam(loss, params, learning_rate=args.step) else: raise "Need set optimizer correctly" test_prediction = get_output(network, deterministic=True) test_loss = T.mean(binary_crossentropy(test_prediction, target_var)) train_fn = theano.function([input_var, target_var], loss, updates=updates, allow_input_downcast=True) test_acc = T.mean(binary_accuracy(test_prediction, target_var)) val_fn = theano.function([input_var, target_var], [test_loss, test_acc], allow_input_downcast=True) return train_fn, val_fn
def run_network(data=None, num_epochs=20): try: # Loading the data global_start_time = time.time() print('Loading data...') if data is None: X_train, y_train, X_val, y_val, X_test, y_test = load_dataset() else: X_train, y_train, X_val, y_val, X_test, y_test = data # Creating the Theano variables input_var = T.tensor4('inputs') target_var = T.ivector('targets') # Building the Theano expressions on these variables network = build_mlp(input_var) prediction = lasagne.layers.get_output(network) loss = categorical_crossentropy(prediction, target_var) loss = loss.mean() test_prediction = lasagne.layers.get_output(network, deterministic=True) test_loss = categorical_crossentropy(test_prediction, target_var) test_loss = test_loss.mean() test_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), target_var), dtype=theano.config.floatX) params = lasagne.layers.get_all_params(network, trainable=True) updates = rmsprop(loss, params, learning_rate=0.001) # Compiling the graph by declaring the Theano functions train_fn = theano.function([input_var, target_var], loss, updates=updates) val_fn = theano.function([input_var, target_var], [test_loss, test_acc]) # For loop that goes each time through the hole training # and validation data print("Starting training...") for epoch in range(num_epochs): # Going over the training data train_err = 0 train_batches = 0 start_time = time.time() for batch in iterate_minibatches(X_train, y_train, 500, shuffle=True): inputs, targets = batch train_err += train_fn(inputs, targets) train_batches += 1 # Going over the validation data val_err = 0 val_acc = 0 val_batches = 0 for batch in iterate_minibatches(X_val, y_val, 500, shuffle=False): inputs, targets = batch err, acc = val_fn(inputs, targets) val_err += err val_acc += acc val_batches += 1 # Then we print the results for this epoch: print("Epoch {} of {} took {:.3f}s".format( epoch + 1, num_epochs, time.time() - start_time)) print("training loss:\t\t{:.6f}".format(train_err / train_batches)) print("validation loss:\t\t{:.6f}".format(val_err / val_batches)) print("validation accuracy:\t\t{:.2f} %".format(val_acc / val_batches * 100)) # Now that the training is over, let's test the network: test_err = 0 test_acc = 0 test_batches = 0 for batch in iterate_minibatches(X_test, y_test, 500, shuffle=False): inputs, targets = batch err, acc = val_fn(inputs, targets) test_err += err test_acc += acc test_batches += 1 print("Final results in {0} seconds:".format(time.time() - global_start_time)) print(" test loss:\t\t\t{:.6f}".format(test_err / test_batches)) print(" test accuracy:\t\t{:.2f} %".format(test_acc / test_batches * 100)) return network except KeyboardInterrupt: return network
def build_update_functions(train_set_x, train_set_y, valid_set_x, valid_set_y, network, y, X, train_MASK, val_MASK, batch_size=32, l2_reg=.0001): # build update functions # extract tensor representing the network predictions prediction = get_output(network) ###################New######################### # Aggregate the element-wise error into a scalar value using a mask # note that y should note contain NAN, replace them with 0 or -1. The value does not matter. It # is not used to calculate the aggregated error and update of the network. # MASK should be a matrix of size(y), with 0s in place of NaN values and 1s everywhere else. # build tensor variable for mask trainMASK = T.matrix('trainMASK') # collect squared error loss_RMSE = squared_error(prediction, y) # Drop nan values and average over the remaining values loss_RMSE = aggregate(loss_RMSE, weights=trainMASK, mode='normalized_sum') # compute the square root loss_RMSE = loss_RMSE.sqrt() ############################################### # add l2 regularization # l2_penalty = regularize_layer_params(network, l2) # regc = [0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1] # layers = get_all_layers(network) # reg_weights = {key: value for (key, value) in zip(layers, regc)} # l2_penalty = regularize_layer_params_weighted(reg_weights, l2) loss = loss_RMSE#(1 - l2_reg) * loss_RMSE + l2_reg * l2_penalty # get network params params = get_all_params(network) # subset_params = params #subset network params to extract the ones that you want to train # print 'length of params',len(params), '\n' subset_params = [params[0], params[1], params[10], params[11], params[12], params[13]] # print('RMSPROP \n') updates = rmsprop(loss, subset_params, learning_rate=1e-4) # create validation/test loss expression # the loss represents the loss for all the labels test_prediction = get_output(network, deterministic=True) ###################New######################### # Aggregate the element-wise error into a scalar value using a mask # note that y should note contain NAN, replace them with 0 or -1. The value does not matter. It # is not used to calculate the aggregated error and update of the network. # MASK should be a matrix of size(y), with 0s in place of NaN values and 1s everywhere else. # build tensor variable for mask valMASK = T.matrix('valMASK') # collect squared error test_loss = squared_error(test_prediction, y) # Drop nan values and average over the remaining values test_loss = aggregate(test_loss, weights=valMASK, mode='normalized_sum') # compute the square root test_loss = test_loss.sqrt() ################################################ # index for mini-batch slicing index = T.lscalar() # training function train_set_x_size = train_set_x.get_value().shape[0] val_set_x_size = valid_set_x.get_value().shape[0] train_fn = theano.function(inputs=[index], outputs=[loss, loss_RMSE], updates=updates, givens={X: train_set_x[ index * batch_size: T.minimum((index + 1) * batch_size, train_set_x_size)], y: train_set_y[ index * batch_size: T.minimum((index + 1) * batch_size, train_set_x_size)], trainMASK: train_MASK[index * batch_size: T.minimum((index + 1) * batch_size, train_set_x_size)]}) # validation function val_fn = theano.function(inputs=[index], outputs=[test_loss, prediction], givens={X: valid_set_x[ index * batch_size: T.minimum((index + 1) * batch_size, val_set_x_size)], y: valid_set_y[ index * batch_size: T.minimum((index + 1) * batch_size, val_set_x_size)], valMASK: val_MASK[ index * batch_size: T.minimum((index + 1) * batch_size, val_set_x_size)]}) return train_fn, val_fn
def build_update_functions(train_set_x, train_set_y, valid_set_x, valid_set_y, network, y, X, batch_size=32, l2_reg=.01, learning_rate=.005, momentum=.9): # build update functions ##################################### # extract tensor representing the network predictions prediction = get_output(network) loss_RMSE = 0 # collect squared error loss_RMSE = squared_error(prediction, y) # compute the root mean squared errror loss_RMSE = loss_RMSE.mean().sqrt() # add l2 regularization l2_penalty = regularize_network_params(network, l2) loss = (1 - l2_reg) * loss_RMSE + l2_reg * l2_penalty # get network params params = get_all_params(network) # # create update criterion # print('nestrov') # updates = nesterov_momentum( # loss, params, learning_rate=learning_rate, momentum=momentum) # print('AdaGrad') # updates = adagrad(loss, params,learning_rate= 1e-3) print('RMSPROP') updates = rmsprop(loss, params, learning_rate=1e-3) # create validation/test loss expression # the loss represents the loss for all the lables test_prediction = get_output(network, deterministic=True) # collect squared error test_loss = squared_error(test_prediction, y) # compute the root mean squared errror test_loss = test_loss.mean().sqrt() # test_loss_withl2 = (1-l2_reg) * test_loss + l2_reg * l2_penalty # index for minibatch slicing index = T.lscalar() # training function train_set_x_size = train_set_x.get_value().shape[0] val_set_x_size = valid_set_x.get_value().shape[0] train_fn = theano.function( inputs=[index], outputs=[loss, loss_RMSE], updates=updates, givens={ X: train_set_x[index * batch_size:T.minimum((index + 1) * batch_size, train_set_x_size)], y: train_set_y[index * batch_size:T.minimum((index + 1) * batch_size, train_set_x_size)] }) # validation function val_fn = theano.function( inputs=[index], outputs=[test_loss, prediction], givens={ X: valid_set_x[index * batch_size:T.minimum((index + 1) * batch_size, val_set_x_size)], y: valid_set_y[index * batch_size:T.minimum((index + 1) * batch_size, val_set_x_size)] }) return train_fn, val_fn
def run_network(data=None, num_epochs=20): try: # Loading the data global_start_time = time.time() print('Loading data...') if data is None: X_train, y_train, X_val, y_val, X_test, y_test = load_dataset() else: X_train, y_train, X_val, y_val, X_test, y_test = data # Creating the Theano variables input_var = T.tensor4('inputs') target_var = T.ivector('targets') # Building the Theano expressions on these variables network = build_mlp(input_var) prediction = lasagne.layers.get_output(network) loss = categorical_crossentropy(prediction, target_var) loss = loss.mean() test_prediction = lasagne.layers.get_output(network, deterministic=True) test_loss = categorical_crossentropy(test_prediction, target_var) test_loss = test_loss.mean() test_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), target_var), dtype=theano.config.floatX) params = lasagne.layers.get_all_params(network, trainable=True) updates = rmsprop(loss, params, learning_rate=0.001) # Compiling the graph by declaring the Theano functions train_fn = theano.function([input_var, target_var], loss, updates=updates) val_fn = theano.function([input_var, target_var], [test_loss, test_acc]) # For loop that goes each time through the hole training # and validation data print("Starting training...") for epoch in range(num_epochs): # Going over the training data train_err = 0 train_batches = 0 start_time = time.time() for batch in iterate_minibatches(X_train, y_train, 500, shuffle=True): inputs, targets = batch train_err += train_fn(inputs, targets) train_batches += 1 # Going over the validation data val_err = 0 val_acc = 0 val_batches = 0 for batch in iterate_minibatches(X_val, y_val, 500, shuffle=False): inputs, targets = batch err, acc = val_fn(inputs, targets) val_err += err val_acc += acc val_batches += 1 # Then we print the results for this epoch: print("Epoch {} of {} took {:.3f}s".format( epoch + 1, num_epochs, time.time() - start_time)) print("training loss:\t\t{:.6f}".format(train_err / train_batches)) print("validation loss:\t\t{:.6f}".format(val_err / val_batches)) print("validation accuracy:\t\t{:.2f} %".format( val_acc / val_batches * 100)) # Now that the training is over, let's test the network: test_err = 0 test_acc = 0 test_batches = 0 for batch in iterate_minibatches(X_test, y_test, 500, shuffle=False): inputs, targets = batch err, acc = val_fn(inputs, targets) test_err += err test_acc += acc test_batches += 1 print("Final results in {0} seconds:".format( time.time()-global_start_time)) print(" test loss:\t\t\t{:.6f}".format(test_err / test_batches)) print(" test accuracy:\t\t{:.2f} %".format( test_acc / test_batches * 100)) return network except KeyboardInterrupt: return network
def run_network(data=None, num_epochs=10, ratio=0.5): try: global_start_time = time() sequence_length = 50 batchsize = 512 path_to_dataset = 'household_power_consumption.txt' # Loading the data if data is None: print 'Loading data... ' X_train, y_train, X_test, y_test = data_power_consumption( path_to_dataset, sequence_length, ratio) else: X_train, y_train, X_test, y_test = data val_ratio = 0.005 val_rows = round(val_ratio * X_train.shape[0]) X_val = X_train[:val_rows] y_val = y_train[:val_rows] y_val = np.reshape(y_val, (y_val.shape[0], 1)) X_train = X_train[val_rows:] y_train = y_train[val_rows:] # Creating the Theano variables input_var = T.tensor3('inputs') target_var = T.matrix('targets') # Building the Theano expressions on these variables network = build_model(input_var) prediction = lasagne.layers.get_output(network) loss = squared_error(prediction, target_var) loss = aggregate(loss) params = lasagne.layers.get_all_params(network, trainable=True) updates = rmsprop(loss, params, learning_rate=0.001) test_prediction = lasagne.layers.get_output(network, deterministic=True) test_loss = squared_error(test_prediction, target_var) test_loss = aggregate(test_loss) # Compiling the graph by declaring the Theano functions compile_time = time() print 'Data:' print 'X_train ', X_train.shape, ' y_train ', y_train.shape print 'X_val ', X_val.shape, ' y_val ', y_val.shape print 'X_test ', X_test.shape, ' y_test ', y_test.shape print "Compiling..." train_fn = theano.function([input_var, target_var], loss, updates=updates) val_fn = theano.function([input_var, target_var], test_loss) get_pred_fn = theano.function([input_var], prediction) print "Compiling time : ", time() - compile_time # For loop that goes each time through the hole training # and validation data # T R A I N I N G # - - - - - - - - print "Starting training...\n" for epoch in range(num_epochs): # Going over the training data train_err = 0 train_batches = 0 start_time = time() nb_batches = X_train.shape[0] / batchsize time_line = np.zeros(nb_batches) for batch in iterate_minibatches(X_train, y_train, batchsize, shuffle=True): current_time = time() inputs, targets = batch train_err += train_fn(inputs, targets) train_batches += 1 str_out = "\rTrain Batch " + str(train_batches) str_out += "/" + str(nb_batches) str_out += " | Loss : " + str(train_err / train_batches)[:7] str_out += " | Remaining time (s) : " remaining_seconds = time() - current_time remaining_seconds *= (nb_batches - train_batches) time_line[train_batches - 1] = round(remaining_seconds) if (train_batches - 1) % 5 == 0: durations = time_line[train_batches-1: train_batches+50] durations = np.mean([t for t in durations if t > 0]) str_out += str(durations) sys.stdout.write(str_out) sys.stdout.flush() print "\nGoing through validation data" # Going over the validation data val_err = 0 val_batches = 0 for batch in iterate_minibatches( X_val, y_val, batchsize, shuffle=False): inputs, targets = batch err = val_fn(inputs, targets) val_err += err val_batches += 1 # Then we print the results for this epoch: # train_batches - 1 because started at 1 and not 0 print "training loss:\t\t\t" + str(train_err / train_batches) print "validation loss:\t\t" + str(val_err / val_batches) print("Epoch {} of {} took {:.3f}s \n\n".format( epoch + 1, num_epochs, time() - start_time)) # Now that the training is over, let's test the network: test_err = 0 test_batches = 0 for batch in iterate_minibatches( X_test, y_test, batchsize, shuffle=False): inputs, targets = batch err = val_fn(inputs, targets) test_err += err test_batches += 1 print "\nFinal results in {0} seconds:".format( time()-global_start_time) print "Test loss:\t\t\t{:.6f}".format(test_err / test_batches) prediction_size = 200 predicted = get_pred_fn(X_test[:prediction_size]) try: plt.plot(predicted) plt.plot(y_test[prediction_size]) plt.show(block=False) except Exception as e: print str(e) print "predicted = ", repr( np.reshape(predicted[:prediction_size], (prediction_size,))) print '\n' print "y = ", repr( np.reshape(y_test[:prediction_size], (prediction_size,))) return network except KeyboardInterrupt: return network
def create_network(available_actions_count): # Crea las variables de entrada s1 = tensor.tensor4("State") a = tensor.vector("Action", dtype="int32") q2 = tensor.vector("Q2") r = tensor.vector("Reward") isterminal = tensor.vector("IsTerminal", dtype="int8") # Crea la capa de entradad de la red dqn = InputLayer(shape=[None, 1, resolution[0], resolution[1]], input_var=s1) # Agrega 2 capas convolusionales con activacion ReLu activation dqn = Conv2DLayer(dqn, num_filters=8, filter_size=[6, 6], nonlinearity=rectify, W=HeUniform("relu"), b=Constant(.1), stride=3) dqn = Conv2DLayer(dqn, num_filters=8, filter_size=[3, 3], nonlinearity=rectify, W=HeUniform("relu"), b=Constant(.1), stride=2) # Agrega 1 capa competamente conectada. dqn = DenseLayer(dqn, num_units=128, nonlinearity=rectify, W=HeUniform("relu"), b=Constant(.1)) # Agrega la capa de salida (completamente conectada). dqn = DenseLayer(dqn, num_units=available_actions_count, nonlinearity=None) # Definimos la funcion de perdida q = get_output(dqn) # target_Q(s,a) = r + gamma * max Q(s2,_) if isterminal else r target_q = tensor.set_subtensor( q[tensor.arange(q.shape[0]), a], r + discount_factor * (1 - isterminal) * q2) loss = squared_error(q, target_q).mean() # Actualizamos los parametros de acuerdo a la gracdiente calculada con RMSProp. params = get_all_params(dqn, trainable=True) updates = rmsprop(loss, params, learning_rate) # Compilamos las funciones de theano print("Compiling the network ...") function_learn = theano.function([s1, q2, a, r, isterminal], loss, updates=updates, name="learn_fn") function_get_q_values = theano.function([s1], q, name="eval_fn") function_get_best_action = theano.function([s1], tensor.argmax(q), name="test_fn") print("Network compiled.") def simple_get_best_action(state): return function_get_best_action( state.reshape([1, 1, resolution[0], resolution[1]])) # Retorna los objetos de Theano para la red y las funciones. return dqn, function_learn, function_get_q_values, simple_get_best_action
def build_network_2dconv( args, input1_var, input1_mask_var, input2_var, intut2_mask_var, target_var, wordEmbeddings, maxlen=36 ): print ("Building model with 2D Convolution") vocab_size = wordEmbeddings.shape[1] wordDim = wordEmbeddings.shape[0] num_filters = 100 stride = 1 # CNN_sentence config filter_size = (3, wordDim) pool_size = (maxlen - 3 + 1, 1) # two conv pool layer # filter_size=(10, 100) # pool_size=(4,4) input_1 = InputLayer((None, maxlen), input_var=input1_var) batchsize, seqlen = input_1.input_var.shape # input_1_mask = InputLayer((None, maxlen),input_var=input1_mask_var) emb_1 = EmbeddingLayer(input_1, input_size=vocab_size, output_size=wordDim, W=wordEmbeddings.T) emb_1.params[emb_1.W].remove("trainable") # (batchsize, maxlen, wordDim) reshape_1 = ReshapeLayer(emb_1, (batchsize, 1, maxlen, wordDim)) conv2d_1 = Conv2DLayer( reshape_1, num_filters=num_filters, filter_size=(filter_size), stride=stride, nonlinearity=rectify, W=GlorotUniform(), ) # (None, 100, 34, 1) maxpool_1 = MaxPool2DLayer(conv2d_1, pool_size=pool_size) # (None, 100, 1, 1) """ filter_size_2=(4, 10) pool_size_2=(2,2) conv2d_1 = Conv2DLayer(maxpool_1, num_filters=num_filters, filter_size=filter_size_2, stride=stride, nonlinearity=rectify,W=GlorotUniform()) #(None, 100, 34, 1) maxpool_1 = MaxPool2DLayer(conv2d_1, pool_size=pool_size_2) #(None, 100, 1, 1) (None, 100, 1, 20) """ forward_1 = FlattenLayer(maxpool_1) # (None, 100) #(None, 50400) input_2 = InputLayer((None, maxlen), input_var=input2_var) # input_2_mask = InputLayer((None, maxlen),input_var=input2_mask_var) emb_2 = EmbeddingLayer(input_2, input_size=vocab_size, output_size=wordDim, W=wordEmbeddings.T) emb_2.params[emb_2.W].remove("trainable") reshape_2 = ReshapeLayer(emb_2, (batchsize, 1, maxlen, wordDim)) conv2d_2 = Conv2DLayer( reshape_2, num_filters=num_filters, filter_size=filter_size, stride=stride, nonlinearity=rectify, W=GlorotUniform(), ) # (None, 100, 34, 1) maxpool_2 = MaxPool2DLayer(conv2d_2, pool_size=pool_size) # (None, 100, 1, 1) """ conv2d_2 = Conv2DLayer(maxpool_2, num_filters=num_filters, filter_size=filter_size_2, stride=stride, nonlinearity=rectify,W=GlorotUniform()) #(None, 100, 34, 1) maxpool_2 = MaxPool2DLayer(conv2d_2, pool_size=pool_size_2) #(None, 100, 1, 1) """ forward_2 = FlattenLayer(maxpool_2) # (None, 100) # elementwisemerge need fix the sequence length mul = ElemwiseMergeLayer([forward_1, forward_2], merge_function=T.mul) sub = AbsSubLayer([forward_1, forward_2], merge_function=T.sub) concat = ConcatLayer([mul, sub]) concat = ConcatLayer([forward_1, forward_2]) hid = DenseLayer(concat, num_units=args.hiddenDim, nonlinearity=sigmoid) if args.task == "sts": network = DenseLayer(hid, num_units=5, nonlinearity=softmax) elif args.task == "ent": network = DenseLayer(hid, num_units=3, nonlinearity=softmax) # prediction = get_output(network, {input_1:input1_var, input_2:input2_var}) prediction = get_output(network) loss = T.mean(categorical_crossentropy(prediction, target_var)) lambda_val = 0.5 * 1e-4 layers = {conv2d_1: lambda_val, hid: lambda_val, network: lambda_val} penalty = regularize_layer_params_weighted(layers, l2) loss = loss + penalty params = get_all_params(network, trainable=True) if args.optimizer == "sgd": updates = sgd(loss, params, learning_rate=args.step) elif args.optimizer == "adagrad": updates = adagrad(loss, params, learning_rate=args.step) elif args.optimizer == "adadelta": updates = adadelta(loss, params, learning_rate=args.step) elif args.optimizer == "nesterov": updates = nesterov_momentum(loss, params, learning_rate=args.step) elif args.optimizer == "rms": updates = rmsprop(loss, params, learning_rate=args.step) elif args.optimizer == "adam": updates = adam(loss, params, learning_rate=args.step) else: raise "Need set optimizer correctly" # test_prediction = get_output(network, {input_1:input1_var, input_2:input2_var}, deterministic=True) test_prediction = get_output(network, deterministic=True) test_loss = T.mean(categorical_crossentropy(test_prediction, target_var)) """ train_fn = theano.function([input1_var, input1_mask_var, input2_var, intut2_mask_var, target_var], loss, updates=updates, allow_input_downcast=True) """ train_fn = theano.function([input1_var, input2_var, target_var], loss, updates=updates, allow_input_downcast=True) if args.task == "sts": """ val_fn = theano.function([input1_var, input1_mask_var, input2_var, intut2_mask_var, target_var], [test_loss, test_prediction], allow_input_downcast=True) """ val_fn = theano.function( [input1_var, input2_var, target_var], [test_loss, test_prediction], allow_input_downcast=True ) elif args.task == "ent": # test_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), target_var), dtype=theano.config.floatX) test_acc = T.mean(categorical_accuracy(test_prediction, target_var)) """ val_fn = theano.function([input1_var, input1_mask_var, input2_var, intut2_mask_var, target_var], [test_loss, test_acc], allow_input_downcast=True) """ val_fn = theano.function([input1_var, input2_var, target_var], [test_loss, test_acc], allow_input_downcast=True) return train_fn, val_fn
def event_span_classifier(args, input_var, target_var, wordEmbeddings, seqlen, num_feats): print("Building model with 1D Convolution") vocab_size = wordEmbeddings.shape[1] wordDim = wordEmbeddings.shape[0] kw = 2 num_filters = seqlen-kw+1 stride = 1 #important context words as channels #CNN_sentence config filter_size=wordDim pool_size=seqlen-filter_size+1 input = InputLayer((None, seqlen, num_feats),input_var=input_var) batchsize, _, _ = input.input_var.shape emb = EmbeddingLayer(input, input_size=vocab_size, output_size=wordDim, W=wordEmbeddings.T) #emb.params[emb.W].remove('trainable') #(batchsize, seqlen, wordDim) #print get_output_shape(emb) reshape = ReshapeLayer(emb, (batchsize, seqlen, num_feats*wordDim)) #print get_output_shape(reshape) conv1d = Conv1DLayer(reshape, num_filters=num_filters, filter_size=wordDim, stride=1, nonlinearity=tanh,W=GlorotUniform()) #nOutputFrame = num_flters, #nOutputFrameSize = (num_feats*wordDim-filter_size)/stride +1 #print get_output_shape(conv1d) conv1d = DimshuffleLayer(conv1d, (0,2,1)) #print get_output_shape(conv1d) pool_size=num_filters maxpool = MaxPool1DLayer(conv1d, pool_size=pool_size) #print get_output_shape(maxpool) #forward = FlattenLayer(maxpool) #print get_output_shape(forward) hid = DenseLayer(maxpool, num_units=args.hiddenDim, nonlinearity=sigmoid) network = DenseLayer(hid, num_units=2, nonlinearity=softmax) prediction = get_output(network) loss = T.mean(binary_crossentropy(prediction,target_var)) lambda_val = 0.5 * 1e-4 layers = {emb:lambda_val, conv1d:lambda_val, hid:lambda_val, network:lambda_val} penalty = regularize_layer_params_weighted(layers, l2) loss = loss + penalty params = get_all_params(network, trainable=True) if args.optimizer == "sgd": updates = sgd(loss, params, learning_rate=args.step) elif args.optimizer == "adagrad": updates = adagrad(loss, params, learning_rate=args.step) elif args.optimizer == "adadelta": updates = adadelta(loss, params, learning_rate=args.step) elif args.optimizer == "nesterov": updates = nesterov_momentum(loss, params, learning_rate=args.step) elif args.optimizer == "rms": updates = rmsprop(loss, params, learning_rate=args.step) elif args.optimizer == "adam": updates = adam(loss, params, learning_rate=args.step) else: raise "Need set optimizer correctly" test_prediction = get_output(network, deterministic=True) test_loss = T.mean(binary_crossentropy(test_prediction,target_var)) train_fn = theano.function([input_var, target_var], loss, updates=updates, allow_input_downcast=True) test_acc = T.mean(binary_accuracy(test_prediction, target_var)) val_fn = theano.function([input_var, target_var], [test_loss, test_acc], allow_input_downcast=True) return train_fn, val_fn, network
def build_update_functions(train_set_x, train_set_y, valid_set_x, valid_set_y, network, y, X, train_MASK, val_MASK, batch_size=32, l2_reg=.0001, learning_rate=.005, momentum=.9): # build update functions # extract tensor representing the network predictions prediction = get_output(network) ################################################ ##################old########################### # # collect squared error # loss_RMSE = squared_error(prediction, y) # # compute the root mean squared error # loss_RMSE = loss_RMSE.mean().sqrt() ###################New######################### # Aggregate the element-wise error into a scalar value using a mask # note that y should note contain NAN, replace them with 0 or -1. The value does not matter. It # is not used to calculate the aggregated error and update of the network. # MASK should be a matrix of size(y), with 0s in place of NaN values and 1s everywhere else. # build tensor variable for mask trainMASK = T.matrix('trainMASK') # collect squared error loss_RMSE = squared_error(prediction, y) # Drop nan values and average over the remaining values loss_RMSE = aggregate(loss_RMSE, weights=trainMASK, mode='normalized_sum') # compute the square root loss_RMSE = loss_RMSE.sqrt() ############################################### # add l2 regularization l2_penalty = regularize_network_params(network, l2) loss = (1 - l2_reg) * loss_RMSE + l2_reg * l2_penalty # get network params params = get_all_params(network, trainable = True) # # create update criterion # print('nestrov') # updates = nesterov_momentum( loss, params, learning_rate=.01, momentum=.9) # print('AdaGrad') # updates = adagrad(loss, params,learning_rate= 1e-2) # print('RMSPROP \n') updates = rmsprop(loss, params, learning_rate=learning_rate) # create validation/test loss expression # the loss represents the loss for all the labels test_prediction = get_output(network, deterministic=True) ################################################ ##################old########################### # # collect squared error # test_loss = squared_error(test_prediction,y) # # compute the root mean squared error # test_loss = test_loss.mean().sqrt() # # test_loss_withl2 = (1-l2_reg) * test_loss + l2_reg * l2_penalty ################################################ ###################New######################### # Aggregate the element-wise error into a scalar value using a mask # note that y should note contain NAN, replace them with 0 or -1. The value does not matter. It # is not used to calculate the aggregated error and update of the network. # MASK should be a matrix of size(y), with 0s in place of NaN values and 1s everywhere else. # build tensor variable for mask valMASK = T.matrix('valMASK') # collect squared error test_loss = squared_error(test_prediction, y) # Drop nan values and average over the remaining values test_loss = aggregate(test_loss, weights=valMASK, mode='normalized_sum') # compute the square root test_loss = test_loss.sqrt() ################################################ # index for mini-batch slicing index = T.lscalar() # training function train_set_x_size = train_set_x.get_value().shape[0] val_set_x_size = valid_set_x.get_value().shape[0] train_fn = theano.function(inputs=[index], outputs=[loss, loss_RMSE], updates=updates, givens={X: train_set_x[ index * batch_size: T.minimum((index + 1) * batch_size, train_set_x_size)], y: train_set_y[ index * batch_size: T.minimum((index + 1) * batch_size, train_set_x_size)], trainMASK: train_MASK[index * batch_size: T.minimum((index + 1) * batch_size, train_set_x_size)]}) # validation function val_fn = theano.function(inputs=[index], outputs=[test_loss, prediction], givens={X: valid_set_x[ index * batch_size: T.minimum((index + 1) * batch_size, val_set_x_size)], y: valid_set_y[ index * batch_size: T.minimum((index + 1) * batch_size, val_set_x_size)], valMASK: val_MASK[ index * batch_size: T.minimum((index + 1) * batch_size, val_set_x_size)]}) return train_fn, val_fn
def __init__(self, atari_env, state_dimension, action_dimension, monitor_env=False, learning_rate=0.001, critic_update=10, train_step=1, gamma=0.95, eps_max=1.0, eps_min=0.1, eps_decay=10000, n_epochs=10000, batch_size=32, buffer_size=50000): self.env = gym.make(atari_env) if monitor_env: None self.state_dimension = state_dimension self.action_dimension = action_dimension self.learning_rate = learning_rate self.critic_update = critic_update self.train_step = train_step self.gamma = gamma self.eps_max = eps_max self.eps_min = eps_min self.eps_decay = eps_decay self.n_epochs = n_epochs self.batch_size = batch_size self.buffer_size = buffer_size self.experience_replay = [] def q_network(state): input_state = InputLayer(input_var=state, shape=(None, self.state_dimension[0], self.state_dimension[1], self.state_dimension[2])) input_state = DimshuffleLayer(input_state, pattern=(0, 3, 1, 2)) conv = Conv2DLayer(input_state, num_filters=32, filter_size=(8, 8), stride=(4, 4), nonlinearity=rectify) conv = Conv2DLayer(conv, num_filters=64, filter_size=(4, 4), stride=(2, 2), nonlinearity=rectify) conv = Conv2DLayer(conv, num_filters=64, filter_size=(3, 3), stride=(1, 1), nonlinearity=rectify) flatten = FlattenLayer(conv) dense = DenseLayer(flatten, num_units=512, nonlinearity=rectify) q_values = DenseLayer(dense, num_units=self.action_dimension, nonlinearity=linear) return q_values self.X_state = T.ftensor4() self.X_action = T.bvector() self.X_reward = T.fvector() self.X_next_state = T.ftensor4() self.X_done = T.bvector() self.X_action_hot = to_one_hot(self.X_action, self.action_dimension) self.q_ = q_network(self.X_state) self.q = get_output(self.q_) self.q_target_ = q_network(self.X_next_state) self.q_target = get_output(self.q_target_) self.q_max = T.max(self.q_target, axis=1) self.action = T.argmax(self.q, axis=1) self.mu = theano.function(inputs=[self.X_state], outputs=self.action, allow_input_downcast=True) self.loss = squared_error( self.X_reward + self.gamma * self.q_max * (1.0 - self.X_done), T.batched_dot(self.q, self.X_action_hot)) self.loss = self.loss.mean() self.params = get_all_params(self.q_) self.grads = T.grad(self.loss, self.params) self.normed_grads = total_norm_constraint(self.grads, 1.0) self.updates = rmsprop(self.normed_grads, self.params, learning_rate=self.learning_rate) self.update_network = theano.function(inputs=[ self.X_state, self.X_action, self.X_reward, self.X_next_state, self.X_done ], outputs=self.loss, updates=self.updates, allow_input_downcast=True)
def create_network(available_actions_count): # Create the input variables s1 = tensor.tensor4("States") a = tensor.vector("Actions", dtype="int32") q2 = tensor.vector("Next State's best Q-Value") r = tensor.vector("Rewards") isterminal = tensor.vector("IsTerminal", dtype="int8") # Create the input layer of the network. dqn = InputLayer(shape=[None, 1, resolution[0], resolution[1]], input_var=s1) # Add 2 convolutional layers with ReLu activation dqn = Conv2DLayer(dqn, num_filters=8, filter_size=[6, 6], nonlinearity=rectify, W=HeUniform("relu"), b=Constant(.1), stride=3) dqn = Conv2DLayer(dqn, num_filters=8, filter_size=[3, 3], nonlinearity=rectify, W=HeUniform("relu"), b=Constant(.1), stride=2) # Add a single fully-connected layer. dqn = DenseLayer(dqn, num_units=128, nonlinearity=rectify, W=HeUniform("relu"), b=Constant(.1)) # Add the output layer (also fully-connected). # (no nonlinearity as it is for approximating an arbitrary real function) dqn = DenseLayer(dqn, num_units=available_actions_count, nonlinearity=None) # Define the loss function q = get_output(dqn) # target differs from q only for the selected action. The following means: # target_Q(s,a) = r + gamma * max Q(s2,_) if isterminal else r target_q = tensor.set_subtensor( q[tensor.arange(q.shape[0]), a], r + discount_factor * (1 - isterminal) * q2) loss = squared_error(q, target_q).mean() # Update the parameters according to the computed gradient using RMSProp. params = get_all_params(dqn, trainable=True) updates = rmsprop(loss, params, learning_rate) # Compile the theano functions print "Compiling the network ..." function_learn = theano.function([s1, q2, a, r, isterminal], loss, updates=updates, name="learn_fn") function_get_q_values = theano.function([s1], q, name="eval_fn") function_get_best_action = theano.function([s1], tensor.argmax(q), name="test_fn") print "Network compiled." def simple_get_best_action(state): return function_get_best_action( state.reshape([1, 1, resolution[0], resolution[1]])) # Returns Theano objects for the net and functions. return dqn, function_learn, function_get_q_values, simple_get_best_action
}) evaluate_model_batch = theano.function([state_batch], network.output, givens={ network.input: state_batch, }) cost = T.mean( T.sqr(network.output[T.arange(target_batch.shape[0]), action_batch] - target_batch)) alpha = 0.001 rho = 0.9 epsilon = 1e-6 updates = rmsprop(cost, network.params, alpha, rho, epsilon) train_model = theano.function([state_batch, target_batch, action_batch], cost, updates=updates, givens={ network.input: state_batch, }) print "Running episodes..." epsilon_q = 0.1 last_save = time.clock() last_snapshot = time.clock() show_plots() try: for i in range(numEpisodes):
) evaluate_model_batch = theano.function( [state_batch], network.output, givens={ network.input: state_batch, } ) cost = T.mean(T.sqr(network.output[T.arange(target_batch.shape[0]),action_batch] - target_batch)) alpha = 0.001 rho = 0.9 epsilon = 1e-6 updates = rmsprop(cost, network.params, alpha, rho, epsilon) train_model = theano.function( [state_batch,target_batch,action_batch], cost, updates = updates, givens={ network.input: state_batch, } ) actor_net = Actor() print "Running episodes..." epsilon_q = 0.1 last_save = time.clock()
def rmsprop_nesterov(loss, params, eta=1e-3, alpha=0.9, **kwargs): rms = updt.rmsprop(loss, params, learning_rate=eta, **kwargs) return updt.apply_nesterov_momentum(rms, params, momentum=alpha)
300, W=GlorotUniform(), nonlinearity=rectify) lnet_dense4_drop = DropoutLayer(lnet_dense4, p=confnet['dropout_rate']) convnet = DenseLayer(lnet_dense4_drop, 10, nonlinearity=softmax) print('[ConvNet] define loss, optimizer, and compile') Ynet_train_pred_ = get_output(convnet) loss_ = categorical_crossentropy(Ynet_train_pred_, Ynet_) loss_ = loss_.mean() acc_ = T.mean(T.eq(T.argmax(Ynet_train_pred_, axis=1), Ynet_), dtype=theano.config.floatX) params_ = lasagne.layers.get_all_params(convnet, trainable=True) updates_ = rmsprop(loss_, params_, learning_rate=confnet['lr']) train_net_fn = theano.function([Xnet_, Ynet_], [loss_, acc_], updates=updates_) # test loss Ynet_test_pred_ = get_output(convnet, deterministic=True) test_net_loss_ = categorical_crossentropy(Ynet_test_pred_, Ynet_) test_net_loss_ = test_net_loss_.mean() # test accuracy test_net_acc_ = T.mean(T.eq(T.argmax(Ynet_test_pred_, axis=1), Ynet_), dtype=theano.config.floatX) test_net_fn = theano.function([Xnet_, Ynet_], [test_net_loss_, test_net_acc_]) ############### ##### CONVEA ####
get_policy_batch = theano.function( [state_batch], policy_network.output, givens={ policy_network.input: state_batch, } ) Q_cost = T.mean(T.sqr(network.output[T.arange(target_batch.shape[0]),action_batch] - target_batch)) P_cost = T.mean(T.sum(policy_network.output*score_batch, 1)) alpha = 0.001 rho = 0.9 epsilon = 1e-6 Q_updates = rmsprop(Q_cost, network.params, alpha, rho, epsilon) P_updates = rmsprop(P_cost, policy_network.params, alpha, rho, epsilon) train_Q_model = theano.function( [state_batch,target_batch,action_batch], [Q_cost, network.output], updates = Q_updates, givens={ network.input: state_batch, } ) train_P_model = theano.function( [state_batch, score_batch], P_cost, updates = P_updates,
# inps.insert(1, model['latent'].input_var) if args.dataset == 'MNIST': cost = lasagne.objectives.binary_crossentropy(output_train, Y) elif args.dataset == 'CIFAR10': cost = lasagne.objectives.categorical_crossentropy( output_train.reshape(-1, 256), Y.flatten()) cost = lasagne.objectives.aggregate(cost, weights=None, mode='mean') if args.l2_penalty is not None: l2_penalty = regularize_network_params(output_layer, l2) * L2 cost += l2_penalty sh_lr = theano.shared(lasagne.utils.np.float32(LEARNING_RATE)) # updates = adam(cost, model_pars, learning_rate=sh_lr) updates = rmsprop(cost, model_pars, learning_rate=sh_lr) print('Compiling functions ...') train_fn = theano.function(inps, cost, updates=updates) val_fn = theano.function(inps, cost) generate = theano.function(inps[:-1], output_val) network_dump = {'model': model, 'output_layer': output_layer} print('Loading data ...') dataset = load_data(False, dataset) if args.small_dataset: ones_idx = dataset['y_train'] == 1 six_idx = dataset['y_train'] == 6 X_train = [
def update_params(self, loss, all_params): return rmsprop(loss, all_params, self.learning_rate)
def event_span_classifier(args, input_var, input_mask_var, target_var, wordEmbeddings, seqlen): print("Building model with LSTM") vocab_size = wordEmbeddings.shape[1] wordDim = wordEmbeddings.shape[0] GRAD_CLIP = wordDim args.lstmDim = 150 input = InputLayer((None, seqlen),input_var=input_var) batchsize, seqlen = input.input_var.shape input_mask = InputLayer((None, seqlen),input_var=input_mask_var) emb = EmbeddingLayer(input, input_size=vocab_size, output_size=wordDim, W=wordEmbeddings.T) #emb.params[emb_1.W].remove('trainable') lstm = LSTMLayer(emb, num_units=args.lstmDim, mask_input=input_mask, grad_clipping=GRAD_CLIP, nonlinearity=tanh) lstm_back = LSTMLayer( emb, num_units=args.lstmDim, mask_input=input_mask, grad_clipping=GRAD_CLIP, nonlinearity=tanh, backwards=True) slice_forward = SliceLayer(lstm, indices=-1, axis=1) # out_shape (None, args.lstmDim) slice_backward = SliceLayer(lstm_back, indices=0, axis=1) # out_shape (None, args.lstmDim) concat = ConcatLayer([slice_forward, slice_backward]) hid = DenseLayer(concat, num_units=args.hiddenDim, nonlinearity=sigmoid) network = DenseLayer(hid, num_units=2, nonlinearity=softmax) prediction = get_output(network) loss = T.mean(binary_crossentropy(prediction,target_var)) lambda_val = 0.5 * 1e-4 layers = {emb:lambda_val, lstm:lambda_val, hid:lambda_val, network:lambda_val} penalty = regularize_layer_params_weighted(layers, l2) loss = loss + penalty params = get_all_params(network, trainable=True) if args.optimizer == "sgd": updates = sgd(loss, params, learning_rate=args.step) elif args.optimizer == "adagrad": updates = adagrad(loss, params, learning_rate=args.step) elif args.optimizer == "adadelta": updates = adadelta(loss, params, learning_rate=args.step) elif args.optimizer == "nesterov": updates = nesterov_momentum(loss, params, learning_rate=args.step) elif args.optimizer == "rms": updates = rmsprop(loss, params, learning_rate=args.step) elif args.optimizer == "adam": updates = adam(loss, params, learning_rate=args.step) else: raise "Need set optimizer correctly" test_prediction = get_output(network, deterministic=True) test_loss = T.mean(binary_crossentropy(test_prediction,target_var)) train_fn = theano.function([input_var, input_mask_var,target_var], loss, updates=updates, allow_input_downcast=True) test_acc = T.mean(binary_accuracy(test_prediction, target_var)) val_fn = theano.function([input_var, input_mask_var, target_var], [test_loss, test_acc], allow_input_downcast=True) return train_fn, val_fn, network
def __init__(self, env, colors=True, scale=1, discount_factor=0.99, learning_rate=0.00025, \ replay_memory_size=100000, batch_size=64, cropping=(0, 0, 0, 0)): # Create the input variables s1 = T.tensor4("States") a = T.vector("Actions", dtype="int32") q2 = T.vector("Next State's best Q-Value") r = T.vector("Rewards") isterminal = T.vector("IsTerminal", dtype="int8") # Set field values if colors: self.channels = 3 else: self.channels = 1 self.resolution = ((env.observation_space.shape[0] - cropping[0] - cropping[1]) * scale, \ (env.observation_space.shape[1] - cropping[2] - cropping[3]) * scale) self.learning_rate = learning_rate self.discount_factor = discount_factor self.batch_size = batch_size self.actions = env.action_space self.scale = scale self.cropping = cropping print("Resolution = " + str(self.resolution)) print("Channels = " + str(self.channels)) # Create replay memory which will store the transitions self.memory = ReplayMemory(capacity=replay_memory_size, resolution=self.resolution, channels=self.channels) # policy network l_in = InputLayer(shape=(None, self.channels, self.resolution[0], self.resolution[1]), input_var=s1) l_conv1 = Conv2DLayer(l_in, num_filters=32, filter_size=[6, 6], nonlinearity=rectify, W=HeUniform("relu"), b=Constant(.1), stride=2) l_conv2 = Conv2DLayer(l_conv1, num_filters=64, filter_size=[3, 3], nonlinearity=rectify, W=HeUniform("relu"), b=Constant(.1), stride=1) #l_conv3 = Conv2DLayer(l_conv2, num_filters=64, filter_size=[3, 3], nonlinearity=rectify, W=HeUniform("relu"), # b=Constant(.1), stride=1) l_hid1 = DenseLayer(l_conv2, num_units=128, nonlinearity=rectify, W=HeUniform("relu"), b=Constant(.1)) self.dqn = DenseLayer(l_hid1, num_units=self.actions.n, nonlinearity=None) # Define the loss function q = get_output(self.dqn) # target differs from q only for the selected action. The following means: # target_Q(s,a) = r + gamma * max Q(s2,_) if isterminal else r target_q = T.set_subtensor(q[T.arange(q.shape[0]), a], r + discount_factor * (1 - isterminal) * q2) loss = squared_error(q, target_q).mean() # Update the parameters according to the computed gradient using RMSProp. params = get_all_params(self.dqn, trainable=True) updates = rmsprop(loss, params, learning_rate) # Compile the theano functions print "Compiling the network ..." self.fn_learn = theano.function([s1, q2, a, r, isterminal], loss, updates=updates, name="learn_fn") self.fn_get_q_values = theano.function([s1], q, name="eval_fn") self.fn_get_best_action = theano.function([s1], T.argmax(q), name="test_fn") print "Network compiled."