def test_convnet(): batch_size = 500 # size of the minibatch learning_rate = 0.1 n_epochs = 200 # Load the dataset f = gzip.open('data/mnist.pkl.gz', 'rb') train_set, valid_set, test_set = cPickle.load(f) f.close() test_set_x, test_set_y = test_set valid_set_x, valid_set_y = valid_set train_set_x, train_set_y = train_set train_set_x = np.asarray(train_set_x, dtype=theano.config.floatX) train_set_y = np.asarray(train_set_y, dtype=theano.config.floatX) valid_set_x = np.asarray(valid_set_x, dtype=theano.config.floatX) valid_set_y = np.asarray(valid_set_y, dtype=theano.config.floatX) test_set_x = np.asarray(test_set_x, dtype=theano.config.floatX) test_set_y = np.asarray(test_set_y, dtype=theano.config.floatX) train_set_x = train_set_x.reshape((train_set_x.shape[0], 1, 28, 28)) test_set_x = test_set_x.reshape((test_set_x.shape[0], 1, 28, 28)) valid_set_x = valid_set_x.reshape((valid_set_x.shape[0], 1, 28, 28)) nn_layers = [] nkerns = [20, 50] # nn_layers.append(layers.Input2DLayer(batch_size, 1, 28, 28, scale=255)) # nn_layers.append(layers.Conv2DLayer(nn_layers[-1], nkerns[0], 5, 5, .01, .01)) # nn_layers.append(layers.Pooling2DLayer(nn_layers[-1], pool_size=(2, 2))) # nn_layers.append(layers.Conv2DLayer(nn_layers[-1], nkerns[1], 5, 5, .01, .01)) # nn_layers.append(layers.Pooling2DLayer(nn_layers[-1], pool_size=(2, 2))) # #nn_layers.append(layers.FlattenLayer(nn_layers[-1])) # nn_layers.append(layers.DenseLayer(nn_layers[-1], 500, 0.1, 0, nonlinearity=layers.tanh)) # nn_layers.append(layers.SoftmaxLayer(nn_layers[-1], 10, 0.1, 0, nonlinearity=layers.tanh)) # #nn_layers.append(layers.OutputLayer(nn_layers[-1])) nn_layers.append(layers.Input2DLayer(batch_size, 1, 28, 28)) nn_layers.append( layers.StridedConv2DLayer(nn_layers[-1], n_filters=nkerns[0], filter_width=5, filter_height=5, stride_x=2, stride_y=2, weights_std=.01, init_bias_value=0.01, nonlinearity=T.tanh)) nn_layers.append( layers.StridedConv2DLayer(nn_layers[-1], n_filters=nkerns[1], filter_width=5, filter_height=5, stride_x=2, stride_y=2, weights_std=.01, init_bias_value=0.01, nonlinearity=T.tanh)) nn_layers.append( layers.DenseLayer(nn_layers[-1], 500, 0.1, 0, nonlinearity=layers.tanh)) nn_layers.append( layers.SoftmaxLayer(nn_layers[-1], 10, 0.1, 0, nonlinearity=layers.tanh)) mlp = NN(nn_layers, learning_rate=learning_rate, batch_size=batch_size, discrete_target=True) mlp.train_model_batch_patience(train_set_x, train_set_y, valid_set_x, valid_set_y, test_set_x, test_set_y, n_epochs=n_epochs) # start_time = time.clock() # train_losses = mlp.train_model_batch(train_set_x, train_set_y, n_epochs) # end_time = time.clock() # print >> sys.stderr, ('The code ran for %.2fm' % ((end_time - start_time) / 60.)) # print 'train losses' # print train_losses # print 'mean train loss' # np.mean(train_losses) # print 'testing' # #test_mb_size = test_set_x.shape[0] # #nn_layers[0].mb_size = test_mb_size # #mlp_test = NN(nn_layers, batch_size=test_mb_size) # predicted_classes = mlp.output_model_batch(test_set_x) # miss = predicted_classes != test_set_y # test_error_rate = float(len(miss[miss])) / len(miss) # print test_error_rate print 'done'
num_valid = num_train // 10 # integer division num_train -= num_valid num_test = load_data.num_test valid_ids = load_data.train_ids[num_train:] train_ids = load_data.train_ids[:num_train] test_ids = load_data.test_ids train_indices = np.arange(num_train) valid_indices = np.arange(num_train, num_train + num_valid) test_indices = np.arange(num_test) y_valid = np.load("data/solutions_train.npy")[num_train:] print("Build model") l0 = layers.Input2DLayer(BATCH_SIZE, NUM_INPUT_FEATURES, input_sizes[0][0], input_sizes[0][1]) l0_45 = layers.Input2DLayer(BATCH_SIZE, NUM_INPUT_FEATURES, input_sizes[1][0], input_sizes[1][1]) l0r = layers.MultiRotSliceLayer([l0, l0_45], part_size=45, include_flip=True) l0s = cc_layers.ShuffleBC01ToC01BLayer(l0r) l1a = cc_layers.CudaConvnetConv2DLayer(l0s, n_filters=32, filter_size=6, weights_std=0.01, init_bias_value=0.1, dropout=0.0, partial_sum=1, untie_biases=True)
def __init__(self, num_actions, phi_length, width, height, discount=.9, learning_rate=.01, batch_size=32, approximator='none'): self._batch_size = batch_size self._num_input_features = phi_length self._phi_length = phi_length self._img_width = width self._img_height = height self._discount = discount self.num_actions = num_actions self.learning_rate = learning_rate self.scale_input_by = 255.0 print "neural net initialization, lr is: ", self.learning_rate, approximator # CONSTRUCT THE LAYERS self.q_layers = [] self.q_layers.append( layers.Input2DLayer(self._batch_size, self._num_input_features, self._img_height, self._img_width, self.scale_input_by)) if approximator == 'cuda_conv': self.q_layers.append( cc_layers.ShuffleBC01ToC01BLayer(self.q_layers[-1])) self.q_layers.append( cc_layers.CudaConvnetConv2DLayer(self.q_layers[-1], n_filters=16, filter_size=8, stride=4, weights_std=.01, init_bias_value=0.1)) self.q_layers.append( cc_layers.CudaConvnetConv2DLayer(self.q_layers[-1], n_filters=32, filter_size=4, stride=2, weights_std=.01, init_bias_value=0.1)) self.q_layers.append( cc_layers.ShuffleC01BToBC01Layer(self.q_layers[-1])) elif approximator == 'conv': self.q_layers.append( layers.StridedConv2DLayer(self.q_layers[-1], n_filters=16, filter_width=8, filter_height=8, stride_x=4, stride_y=4, weights_std=.01, init_bias_value=0.01)) self.q_layers.append( layers.StridedConv2DLayer(self.q_layers[-1], n_filters=32, filter_width=4, filter_height=4, stride_x=2, stride_y=2, weights_std=.01, init_bias_value=0.01)) if approximator == 'cuda_conv' or approximator == 'conv': self.q_layers.append( layers.DenseLayer(self.q_layers[-1], n_outputs=256, weights_std=0.01, init_bias_value=0.1, dropout=0, nonlinearity=layers.rectify)) self.q_layers.append( layers.DenseLayer(self.q_layers[-1], n_outputs=num_actions, weights_std=0.01, init_bias_value=0.1, dropout=0, nonlinearity=layers.identity)) if approximator == 'none': self.q_layers.append(\ layers.DenseLayerNoBias(self.q_layers[-1], n_outputs=num_actions, weights_std=0.00, dropout=0, nonlinearity=layers.identity)) self.q_layers.append(layers.OutputLayer(self.q_layers[-1])) for i in range(len(self.q_layers) - 1): print self.q_layers[i].get_output_shape() # Now create a network (using the same weights) # for next state q values self.next_layers = copy_layers(self.q_layers) self.next_layers[0] = layers.Input2DLayer(self._batch_size, self._num_input_features, self._img_width, self._img_height, self.scale_input_by) self.next_layers[1].input_layer = self.next_layers[0] self.rewards = T.col() self.actions = T.icol() # Build the loss function ... print "building loss funtion" q_vals = self.q_layers[-1].predictions() next_q_vals = self.next_layers[-1].predictions() next_maxes = T.max(next_q_vals, axis=1, keepdims=True) target = self.rewards + discount * next_maxes target = theano.gradient.consider_constant(target) diff = target - q_vals # Zero out all entries for actions that were not chosen... mask = build_mask(T.zeros_like(diff), self.actions, 1.0) diff_masked = diff * mask error = T.mean(diff_masked**2) self._loss = error * diff_masked.shape[1] # self._parameters = layers.all_parameters(self.q_layers[-1]) self._idx = T.lscalar('idx') # CREATE VARIABLES FOR INPUT AND OUTPUT self.states_shared = theano.shared( np.zeros((1, 1, 1, 1), dtype=theano.config.floatX)) self.states_shared_next = theano.shared( np.zeros((1, 1, 1, 1), dtype=theano.config.floatX)) self.rewards_shared = theano.shared(np.zeros( (1, 1), dtype=theano.config.floatX), broadcastable=(False, True)) self.actions_shared = theano.shared(np.zeros((1, 1), dtype='int32'), broadcastable=(False, True)) self._givens = \ {self.q_layers[0].input_var: self.states_shared[self._idx*self._batch_size: (self._idx+1)*self._batch_size, :, :, :], self.next_layers[0].input_var: self.states_shared_next[self._idx*self._batch_size: (self._idx+1)*self._batch_size, :, :, :], self.rewards: self.rewards_shared[self._idx*self._batch_size: (self._idx+1)*self._batch_size, :], self.actions: self.actions_shared[self._idx*self._batch_size: (self._idx+1)*self._batch_size, :] } self._updates = layers.gen_updates_rmsprop_and_nesterov_momentum(\ self._loss, self._parameters, learning_rate=self.learning_rate, rho=0.9, momentum=0.9, epsilon=1e-6) self._train = theano.function([self._idx], self._loss, givens=self._givens, updates=self._updates) self._compute_loss = theano.function([self._idx], self._loss, givens=self._givens) self._compute_q_vals = \ theano.function([self.q_layers[0].input_var], self.q_layers[-1].predictions(), on_unused_input='ignore')