Example #1
0
 def _testSolver(self, solver):
     # We are going to test if the solver correctly deals with the mpi case
     # where multiple nodes host different data. To this end we will
     # create a dummy regression problem which, when run under mpi with
     # >1 nodes, will create a different result from a single-node run.
     np.random.seed(1701)
     X = base.Blob((10, 1),
                   filler=fillers.GaussianRandFiller(mean=mpi.RANK,
                                                     std=0.01))
     Y = base.Blob((10, 1),
                   filler=fillers.ConstantFiller(value=mpi.RANK + 1.))
     decaf_net = base.Net()
     decaf_net.add_layer(core_layers.InnerProductLayer(name='ip',
                                                       num_output=1),
                         needs='X',
                         provides='pred')
     decaf_net.add_layer(core_layers.SquaredLossLayer(name='loss'),
                         needs=['pred', 'Y'])
     decaf_net.finish()
     solver.solve(decaf_net, previous_net={'X': X, 'Y': Y})
     w, b = decaf_net.layers['ip'].param()
     print w.data(), b.data()
     if mpi.SIZE == 1:
         # If size is 1, we are fitting y = 0 * x + 1
         np.testing.assert_array_almost_equal(w.data(), 0., 2)
         np.testing.assert_array_almost_equal(b.data(), 1., 2)
     else:
         # if size is not 1, we are fitting y = x + 1
         np.testing.assert_array_almost_equal(w.data(), 1., 2)
         np.testing.assert_array_almost_equal(b.data(), 1., 2)
     self.assertTrue(True)
Example #2
0
def imagenet_layers():
    return [
        core_layers.ConvolutionLayer(
            name='conv-220-3-to-55-96', num_kernels=96, ksize=11,
            stride=4, mode='same', filler=fillers.XavierFiller()),
        core_layers.ReLULayer(name='relu-55-96'),
        core_layers.LocalResponseNormalizeLayer(
            name='lrn-55-96', k=2., alpha=0.0001, beta=0.75, size=5),
        core_layers.PoolingLayer(
            name='pool-55-to-27', psize=3, stride=2, mode='max'),
        core_layers.GroupConvolutionLayer(
            name='conv-27-256', num_kernels=128, group=2, ksize=5,
            stride=1, mode='same', filler=fillers.XavierFiller()),
        core_layers.ReLULayer(name='relu-27-256'),
        core_layers.LocalResponseNormalizeLayer(
            name='lrn-27-256', k=2., alpha=0.0001, beta=0.75, size=5),
        core_layers.PoolingLayer(
            name='pool-27-to-13', psize=3, stride=2, mode='max'),
        core_layers.ConvolutionLayer(
            name='conv-13-384', num_kernels=384, ksize=3,
            stride=1, mode='same', filler=fillers.XavierFiller()),
        core_layers.ReLULayer(name='relu-13-384'),
        core_layers.GroupConvolutionLayer(
            name='conv-13-384-second', num_kernels=192, group=2, ksize=3,
            stride=1, mode='same', filler=fillers.XavierFiller()),
        core_layers.ReLULayer(name='relu-13-384-second'),
        core_layers.GroupConvolutionLayer(
            name='conv-13-256', num_kernels=128, group=2, ksize=3,
            stride=1, mode='same', filler=fillers.XavierFiller()),
        core_layers.ReLULayer(name='relu-13-256'),
        core_layers.PoolingLayer(
            name='pool-13-to-6', psize=3, stride=2, mode='max'),
        core_layers.FlattenLayer(name='flatten'),
        core_layers.InnerProductLayer(
            name='fully-1', num_output=4096,
            filler=fillers.XavierFiller()),
        core_layers.ReLULayer(name='relu-full1'),
        core_layers.InnerProductLayer(
            name='fully-2', num_output=4096,
            filler=fillers.XavierFiller()),
        core_layers.ReLULayer(name='relu-full2'),
        core_layers.InnerProductLayer(
            name='predict', num_output=1000,
            filler=fillers.XavierFiller()),
    ]
    def testInnerproductGrad(self):
        np.random.seed(1701)
        input_blob = base.Blob((4, 3), filler=fillers.GaussianRandFiller())
        output_blob = base.Blob()
        checker = gradcheck.GradChecker(1e-5)

        ip_layer = core_layers.InnerProductLayer(
            name='ip',
            num_output=5,
            bias=True,
            filler=fillers.GaussianRandFiller(),
            bias_filler=fillers.GaussianRandFiller(),
            reg=None)
        result = checker.check(ip_layer, [input_blob], [output_blob])
        print(result)
        self.assertTrue(result[0])

        ip_layer = core_layers.InnerProductLayer(
            name='ip',
            num_output=5,
            bias=False,
            filler=fillers.GaussianRandFiller(),
            reg=None)
        result = checker.check(ip_layer, [input_blob], [output_blob])
        print(result)
        self.assertTrue(result[0])

        ip_layer = core_layers.InnerProductLayer(
            name='ip',
            num_output=5,
            bias=True,
            filler=fillers.GaussianRandFiller(),
            bias_filler=fillers.GaussianRandFiller(),
            reg=regularization.L2Regularizer(weight=0.1))
        result = checker.check(ip_layer, [input_blob], [output_blob])
        print(result)
        self.assertTrue(result[0])
Example #4
0
def translator_fc(cuda_layer, output_shapes):
    """The translator for the fc layer."""
    input_shape = output_shapes[cuda_layer['inputLayers'][0]['name']]
    input_size = reduce(mul, input_shape)
    num_output = cuda_layer['outputs']
    output_shapes[cuda_layer['name']] = (num_output,)
    decaf_layer = core_layers.InnerProductLayer(
        name=cuda_layer['name'],
        num_output=num_output)
    # put the parameters
    params = decaf_layer.param()
    # weight
    weight = cuda_layer['weights'][0]
    if weight.shape[0] != input_size or weight.shape[1] != num_output:
        raise ValueError('Incorrect shapes: weight shape %s, input shape %s,'
                         ' num_output %d' %
                         (weight.shape, input_shape, num_output))
    if len(input_shape) == 3:
        # The original input is an image, so we will need to reshape it
        weight = weight.reshape(
            (input_shape[2], input_shape[0], input_shape[1], num_output))
        converted_weight = np.empty(input_shape + (num_output,),
                                    weight.dtype)
        for i in range(input_shape[2]):
            converted_weight[:, :, i, :] = weight[i, :, :, :]
        converted_weight.resize(input_size, num_output)
    else:
        converted_weight = weight
    params[0].mirror(converted_weight)
    bias = cuda_layer['biases'][0]
    params[1].mirror(bias)
    if len(input_shape) == 1:
        return decaf_layer
    else:
        # If the input is not a vector, we need to have a flatten layer first.
        return [core_layers.FlattenLayer(name=cuda_layer['name'] + '_flatten'),
                decaf_layer]
def main():
    logging.getLogger().setLevel(logging.INFO)
    ######################################
    # First, let's create the decaf layer.
    ######################################
    logging.info('Loading data and creating the network...')
    decaf_net = base.Net()
    # add data layer
    dataset = mnist.MNISTDataLayer(name='mnist',
                                   rootfolder=ROOT_FOLDER,
                                   is_training=True)
    decaf_net.add_layer(dataset, provides=['image-all', 'label-all'])
    # add minibatch layer for stochastic optimization
    minibatch_layer = core_layers.BasicMinibatchLayer(name='batch',
                                                      minibatch=MINIBATCH)
    decaf_net.add_layer(minibatch_layer,
                        needs=['image-all', 'label-all'],
                        provides=['image', 'label'])
    # add the two_layer network
    decaf_net.add_layers([
        core_layers.FlattenLayer(name='flatten'),
        core_layers.InnerProductLayer(
            name='ip1',
            num_output=NUM_NEURONS,
            filler=fillers.GaussianRandFiller(std=0.1),
            bias_filler=fillers.ConstantFiller(value=0.1)),
        core_layers.ReLULayer(name='relu1'),
        core_layers.InnerProductLayer(
            name='ip2',
            num_output=NUM_CLASS,
            filler=fillers.GaussianRandFiller(std=0.3))
    ],
                         needs='image',
                         provides='prediction')
    # add loss layer
    loss_layer = core_layers.MultinomialLogisticLossLayer(name='loss')
    decaf_net.add_layer(loss_layer, needs=['prediction', 'label'])
    # finish.
    decaf_net.finish()
    ####################################
    # Decaf layer finished construction!
    ####################################

    # now, try to solve it
    if METHOD == 'adagrad':
        # The Adagrad Solver
        solver = core_solvers.AdagradSolver(base_lr=0.02,
                                            base_accum=1.e-6,
                                            max_iter=1000)
    elif METHOD == 'sgd':
        solver = core_solvers.SGDSolver(base_lr=0.1,
                                        lr_policy='inv',
                                        gamma=0.001,
                                        power=0.75,
                                        momentum=0.9,
                                        max_iter=1000)
    solver.solve(decaf_net)
    visualize.draw_net_to_file(decaf_net, 'mnist.png')
    decaf_net.save('mnist_2layers.decafnet')

    ##############################################
    # Now, let's load the net and run predictions
    ##############################################
    prediction_net = base.Net.load('mnist_2layers.decafnet')
    visualize.draw_net_to_file(prediction_net, 'mnist_test.png')
    # obtain the test data.
    dataset_test = mnist.MNISTDataLayer(name='mnist',
                                        rootfolder=ROOT_FOLDER,
                                        is_training=False)
    test_image = base.Blob()
    test_label = base.Blob()
    dataset_test.forward([], [test_image, test_label])
    # Run the net.
    pred = prediction_net.predict(image=test_image)['prediction']
    accuracy = (pred.argmax(1) == test_label.data()).sum() / float(
        test_label.data().size)
    print 'Testing accuracy:', accuracy
    print 'Done.'