예제 #1
0
def test_convnet():
    batch_size = 500  # size of the minibatch
    learning_rate = 0.1
    n_epochs = 200

    # Load the dataset
    f = gzip.open('data/mnist.pkl.gz', 'rb')
    train_set, valid_set, test_set = cPickle.load(f)
    f.close()

    test_set_x, test_set_y = test_set
    valid_set_x, valid_set_y = valid_set
    train_set_x, train_set_y = train_set

    train_set_x = np.asarray(train_set_x, dtype=theano.config.floatX)
    train_set_y = np.asarray(train_set_y, dtype=theano.config.floatX)
    valid_set_x = np.asarray(valid_set_x, dtype=theano.config.floatX)
    valid_set_y = np.asarray(valid_set_y, dtype=theano.config.floatX)
    test_set_x = np.asarray(test_set_x, dtype=theano.config.floatX)
    test_set_y = np.asarray(test_set_y, dtype=theano.config.floatX)

    train_set_x = train_set_x.reshape((train_set_x.shape[0], 1, 28, 28))
    test_set_x = test_set_x.reshape((test_set_x.shape[0], 1, 28, 28))
    valid_set_x = valid_set_x.reshape((valid_set_x.shape[0], 1, 28, 28))

    nn_layers = []
    nkerns = [20, 50]
    # nn_layers.append(layers.Input2DLayer(batch_size, 1, 28, 28, scale=255))
    # nn_layers.append(layers.Conv2DLayer(nn_layers[-1], nkerns[0], 5, 5, .01, .01))
    # nn_layers.append(layers.Pooling2DLayer(nn_layers[-1], pool_size=(2, 2)))
    # nn_layers.append(layers.Conv2DLayer(nn_layers[-1], nkerns[1], 5, 5, .01, .01))
    # nn_layers.append(layers.Pooling2DLayer(nn_layers[-1], pool_size=(2, 2)))
    # #nn_layers.append(layers.FlattenLayer(nn_layers[-1]))
    # nn_layers.append(layers.DenseLayer(nn_layers[-1], 500, 0.1, 0, nonlinearity=layers.tanh))
    # nn_layers.append(layers.SoftmaxLayer(nn_layers[-1], 10, 0.1, 0, nonlinearity=layers.tanh))
    # #nn_layers.append(layers.OutputLayer(nn_layers[-1]))

    nn_layers.append(layers.Input2DLayer(batch_size, 1, 28, 28))
    nn_layers.append(
        layers.StridedConv2DLayer(nn_layers[-1],
                                  n_filters=nkerns[0],
                                  filter_width=5,
                                  filter_height=5,
                                  stride_x=2,
                                  stride_y=2,
                                  weights_std=.01,
                                  init_bias_value=0.01,
                                  nonlinearity=T.tanh))

    nn_layers.append(
        layers.StridedConv2DLayer(nn_layers[-1],
                                  n_filters=nkerns[1],
                                  filter_width=5,
                                  filter_height=5,
                                  stride_x=2,
                                  stride_y=2,
                                  weights_std=.01,
                                  init_bias_value=0.01,
                                  nonlinearity=T.tanh))

    nn_layers.append(
        layers.DenseLayer(nn_layers[-1], 500, 0.1, 0,
                          nonlinearity=layers.tanh))

    nn_layers.append(
        layers.SoftmaxLayer(nn_layers[-1],
                            10,
                            0.1,
                            0,
                            nonlinearity=layers.tanh))

    mlp = NN(nn_layers,
             learning_rate=learning_rate,
             batch_size=batch_size,
             discrete_target=True)
    mlp.train_model_batch_patience(train_set_x,
                                   train_set_y,
                                   valid_set_x,
                                   valid_set_y,
                                   test_set_x,
                                   test_set_y,
                                   n_epochs=n_epochs)
    # start_time = time.clock()
    # train_losses = mlp.train_model_batch(train_set_x, train_set_y, n_epochs)
    # end_time = time.clock()
    # print >> sys.stderr, ('The code ran for %.2fm' % ((end_time - start_time) / 60.))
    # print 'train losses'
    # print train_losses
    # print 'mean train loss'
    # np.mean(train_losses)

    # print 'testing'
    # #test_mb_size = test_set_x.shape[0]
    # #nn_layers[0].mb_size = test_mb_size
    # #mlp_test = NN(nn_layers, batch_size=test_mb_size)
    # predicted_classes = mlp.output_model_batch(test_set_x)
    # miss = predicted_classes != test_set_y
    # test_error_rate = float(len(miss[miss])) / len(miss)
    # print test_error_rate
    print 'done'
num_valid = num_train // 10  # integer division
num_train -= num_valid
num_test = load_data.num_test

valid_ids = load_data.train_ids[num_train:]
train_ids = load_data.train_ids[:num_train]
test_ids = load_data.test_ids

train_indices = np.arange(num_train)
valid_indices = np.arange(num_train, num_train + num_valid)
test_indices = np.arange(num_test)

y_valid = np.load("data/solutions_train.npy")[num_train:]

print("Build model")
l0 = layers.Input2DLayer(BATCH_SIZE, NUM_INPUT_FEATURES, input_sizes[0][0],
                         input_sizes[0][1])
l0_45 = layers.Input2DLayer(BATCH_SIZE, NUM_INPUT_FEATURES, input_sizes[1][0],
                            input_sizes[1][1])

l0r = layers.MultiRotSliceLayer([l0, l0_45], part_size=45, include_flip=True)

l0s = cc_layers.ShuffleBC01ToC01BLayer(l0r)

l1a = cc_layers.CudaConvnetConv2DLayer(l0s,
                                       n_filters=32,
                                       filter_size=6,
                                       weights_std=0.01,
                                       init_bias_value=0.1,
                                       dropout=0.0,
                                       partial_sum=1,
                                       untie_biases=True)
예제 #3
0
    def __init__(self,
                 num_actions,
                 phi_length,
                 width,
                 height,
                 discount=.9,
                 learning_rate=.01,
                 batch_size=32,
                 approximator='none'):
        self._batch_size = batch_size
        self._num_input_features = phi_length
        self._phi_length = phi_length
        self._img_width = width
        self._img_height = height
        self._discount = discount
        self.num_actions = num_actions
        self.learning_rate = learning_rate
        self.scale_input_by = 255.0

        print "neural net initialization, lr is: ", self.learning_rate, approximator

        # CONSTRUCT THE LAYERS
        self.q_layers = []
        self.q_layers.append(
            layers.Input2DLayer(self._batch_size, self._num_input_features,
                                self._img_height, self._img_width,
                                self.scale_input_by))

        if approximator == 'cuda_conv':
            self.q_layers.append(
                cc_layers.ShuffleBC01ToC01BLayer(self.q_layers[-1]))
            self.q_layers.append(
                cc_layers.CudaConvnetConv2DLayer(self.q_layers[-1],
                                                 n_filters=16,
                                                 filter_size=8,
                                                 stride=4,
                                                 weights_std=.01,
                                                 init_bias_value=0.1))
            self.q_layers.append(
                cc_layers.CudaConvnetConv2DLayer(self.q_layers[-1],
                                                 n_filters=32,
                                                 filter_size=4,
                                                 stride=2,
                                                 weights_std=.01,
                                                 init_bias_value=0.1))
            self.q_layers.append(
                cc_layers.ShuffleC01BToBC01Layer(self.q_layers[-1]))

        elif approximator == 'conv':
            self.q_layers.append(
                layers.StridedConv2DLayer(self.q_layers[-1],
                                          n_filters=16,
                                          filter_width=8,
                                          filter_height=8,
                                          stride_x=4,
                                          stride_y=4,
                                          weights_std=.01,
                                          init_bias_value=0.01))

            self.q_layers.append(
                layers.StridedConv2DLayer(self.q_layers[-1],
                                          n_filters=32,
                                          filter_width=4,
                                          filter_height=4,
                                          stride_x=2,
                                          stride_y=2,
                                          weights_std=.01,
                                          init_bias_value=0.01))
        if approximator == 'cuda_conv' or approximator == 'conv':

            self.q_layers.append(
                layers.DenseLayer(self.q_layers[-1],
                                  n_outputs=256,
                                  weights_std=0.01,
                                  init_bias_value=0.1,
                                  dropout=0,
                                  nonlinearity=layers.rectify))

            self.q_layers.append(
                layers.DenseLayer(self.q_layers[-1],
                                  n_outputs=num_actions,
                                  weights_std=0.01,
                                  init_bias_value=0.1,
                                  dropout=0,
                                  nonlinearity=layers.identity))

        if approximator == 'none':
            self.q_layers.append(\
                layers.DenseLayerNoBias(self.q_layers[-1],
                                        n_outputs=num_actions,
                                        weights_std=0.00,
                                        dropout=0,
                                        nonlinearity=layers.identity))

        self.q_layers.append(layers.OutputLayer(self.q_layers[-1]))

        for i in range(len(self.q_layers) - 1):
            print self.q_layers[i].get_output_shape()

        # Now create a network (using the same weights)
        # for next state q values
        self.next_layers = copy_layers(self.q_layers)
        self.next_layers[0] = layers.Input2DLayer(self._batch_size,
                                                  self._num_input_features,
                                                  self._img_width,
                                                  self._img_height,
                                                  self.scale_input_by)
        self.next_layers[1].input_layer = self.next_layers[0]

        self.rewards = T.col()
        self.actions = T.icol()

        # Build the loss function ...
        print "building loss funtion"
        q_vals = self.q_layers[-1].predictions()
        next_q_vals = self.next_layers[-1].predictions()
        next_maxes = T.max(next_q_vals, axis=1, keepdims=True)
        target = self.rewards + discount * next_maxes
        target = theano.gradient.consider_constant(target)
        diff = target - q_vals
        # Zero out all entries for actions that were not chosen...
        mask = build_mask(T.zeros_like(diff), self.actions, 1.0)
        diff_masked = diff * mask
        error = T.mean(diff_masked**2)
        self._loss = error * diff_masked.shape[1]  #

        self._parameters = layers.all_parameters(self.q_layers[-1])

        self._idx = T.lscalar('idx')

        # CREATE VARIABLES FOR INPUT AND OUTPUT
        self.states_shared = theano.shared(
            np.zeros((1, 1, 1, 1), dtype=theano.config.floatX))
        self.states_shared_next = theano.shared(
            np.zeros((1, 1, 1, 1), dtype=theano.config.floatX))
        self.rewards_shared = theano.shared(np.zeros(
            (1, 1), dtype=theano.config.floatX),
                                            broadcastable=(False, True))
        self.actions_shared = theano.shared(np.zeros((1, 1), dtype='int32'),
                                            broadcastable=(False, True))

        self._givens = \
            {self.q_layers[0].input_var:
             self.states_shared[self._idx*self._batch_size:
                                (self._idx+1)*self._batch_size, :, :, :],
             self.next_layers[0].input_var:
             self.states_shared_next[self._idx*self._batch_size:
                                     (self._idx+1)*self._batch_size, :, :, :],

             self.rewards:
             self.rewards_shared[self._idx*self._batch_size:
                                 (self._idx+1)*self._batch_size, :],
             self.actions:
             self.actions_shared[self._idx*self._batch_size:
                                 (self._idx+1)*self._batch_size, :]
             }

        self._updates = layers.gen_updates_rmsprop_and_nesterov_momentum(\
            self._loss, self._parameters, learning_rate=self.learning_rate,
            rho=0.9, momentum=0.9, epsilon=1e-6)

        self._train = theano.function([self._idx],
                                      self._loss,
                                      givens=self._givens,
                                      updates=self._updates)
        self._compute_loss = theano.function([self._idx],
                                             self._loss,
                                             givens=self._givens)
        self._compute_q_vals = \
            theano.function([self.q_layers[0].input_var],
                            self.q_layers[-1].predictions(),
                            on_unused_input='ignore')