def _testSolver(self, solver): # We are going to test if the solver correctly deals with the mpi case # where multiple nodes host different data. To this end we will # create a dummy regression problem which, when run under mpi with # >1 nodes, will create a different result from a single-node run. np.random.seed(1701) X = base.Blob((10, 1), filler=fillers.GaussianRandFiller(mean=mpi.RANK, std=0.01)) Y = base.Blob((10, 1), filler=fillers.ConstantFiller(value=mpi.RANK + 1.)) decaf_net = base.Net() decaf_net.add_layer(core_layers.InnerProductLayer(name='ip', num_output=1), needs='X', provides='pred') decaf_net.add_layer(core_layers.SquaredLossLayer(name='loss'), needs=['pred', 'Y']) decaf_net.finish() solver.solve(decaf_net, previous_net={'X': X, 'Y': Y}) w, b = decaf_net.layers['ip'].param() print w.data(), b.data() if mpi.SIZE == 1: # If size is 1, we are fitting y = 0 * x + 1 np.testing.assert_array_almost_equal(w.data(), 0., 2) np.testing.assert_array_almost_equal(b.data(), 1., 2) else: # if size is not 1, we are fitting y = x + 1 np.testing.assert_array_almost_equal(w.data(), 1., 2) np.testing.assert_array_almost_equal(b.data(), 1., 2) self.assertTrue(True)
def imagenet_layers(): return [ core_layers.ConvolutionLayer( name='conv-220-3-to-55-96', num_kernels=96, ksize=11, stride=4, mode='same', filler=fillers.XavierFiller()), core_layers.ReLULayer(name='relu-55-96'), core_layers.LocalResponseNormalizeLayer( name='lrn-55-96', k=2., alpha=0.0001, beta=0.75, size=5), core_layers.PoolingLayer( name='pool-55-to-27', psize=3, stride=2, mode='max'), core_layers.GroupConvolutionLayer( name='conv-27-256', num_kernels=128, group=2, ksize=5, stride=1, mode='same', filler=fillers.XavierFiller()), core_layers.ReLULayer(name='relu-27-256'), core_layers.LocalResponseNormalizeLayer( name='lrn-27-256', k=2., alpha=0.0001, beta=0.75, size=5), core_layers.PoolingLayer( name='pool-27-to-13', psize=3, stride=2, mode='max'), core_layers.ConvolutionLayer( name='conv-13-384', num_kernels=384, ksize=3, stride=1, mode='same', filler=fillers.XavierFiller()), core_layers.ReLULayer(name='relu-13-384'), core_layers.GroupConvolutionLayer( name='conv-13-384-second', num_kernels=192, group=2, ksize=3, stride=1, mode='same', filler=fillers.XavierFiller()), core_layers.ReLULayer(name='relu-13-384-second'), core_layers.GroupConvolutionLayer( name='conv-13-256', num_kernels=128, group=2, ksize=3, stride=1, mode='same', filler=fillers.XavierFiller()), core_layers.ReLULayer(name='relu-13-256'), core_layers.PoolingLayer( name='pool-13-to-6', psize=3, stride=2, mode='max'), core_layers.FlattenLayer(name='flatten'), core_layers.InnerProductLayer( name='fully-1', num_output=4096, filler=fillers.XavierFiller()), core_layers.ReLULayer(name='relu-full1'), core_layers.InnerProductLayer( name='fully-2', num_output=4096, filler=fillers.XavierFiller()), core_layers.ReLULayer(name='relu-full2'), core_layers.InnerProductLayer( name='predict', num_output=1000, filler=fillers.XavierFiller()), ]
def testInnerproductGrad(self): np.random.seed(1701) input_blob = base.Blob((4, 3), filler=fillers.GaussianRandFiller()) output_blob = base.Blob() checker = gradcheck.GradChecker(1e-5) ip_layer = core_layers.InnerProductLayer( name='ip', num_output=5, bias=True, filler=fillers.GaussianRandFiller(), bias_filler=fillers.GaussianRandFiller(), reg=None) result = checker.check(ip_layer, [input_blob], [output_blob]) print(result) self.assertTrue(result[0]) ip_layer = core_layers.InnerProductLayer( name='ip', num_output=5, bias=False, filler=fillers.GaussianRandFiller(), reg=None) result = checker.check(ip_layer, [input_blob], [output_blob]) print(result) self.assertTrue(result[0]) ip_layer = core_layers.InnerProductLayer( name='ip', num_output=5, bias=True, filler=fillers.GaussianRandFiller(), bias_filler=fillers.GaussianRandFiller(), reg=regularization.L2Regularizer(weight=0.1)) result = checker.check(ip_layer, [input_blob], [output_blob]) print(result) self.assertTrue(result[0])
def translator_fc(cuda_layer, output_shapes): """The translator for the fc layer.""" input_shape = output_shapes[cuda_layer['inputLayers'][0]['name']] input_size = reduce(mul, input_shape) num_output = cuda_layer['outputs'] output_shapes[cuda_layer['name']] = (num_output,) decaf_layer = core_layers.InnerProductLayer( name=cuda_layer['name'], num_output=num_output) # put the parameters params = decaf_layer.param() # weight weight = cuda_layer['weights'][0] if weight.shape[0] != input_size or weight.shape[1] != num_output: raise ValueError('Incorrect shapes: weight shape %s, input shape %s,' ' num_output %d' % (weight.shape, input_shape, num_output)) if len(input_shape) == 3: # The original input is an image, so we will need to reshape it weight = weight.reshape( (input_shape[2], input_shape[0], input_shape[1], num_output)) converted_weight = np.empty(input_shape + (num_output,), weight.dtype) for i in range(input_shape[2]): converted_weight[:, :, i, :] = weight[i, :, :, :] converted_weight.resize(input_size, num_output) else: converted_weight = weight params[0].mirror(converted_weight) bias = cuda_layer['biases'][0] params[1].mirror(bias) if len(input_shape) == 1: return decaf_layer else: # If the input is not a vector, we need to have a flatten layer first. return [core_layers.FlattenLayer(name=cuda_layer['name'] + '_flatten'), decaf_layer]
def main(): logging.getLogger().setLevel(logging.INFO) ###################################### # First, let's create the decaf layer. ###################################### logging.info('Loading data and creating the network...') decaf_net = base.Net() # add data layer dataset = mnist.MNISTDataLayer(name='mnist', rootfolder=ROOT_FOLDER, is_training=True) decaf_net.add_layer(dataset, provides=['image-all', 'label-all']) # add minibatch layer for stochastic optimization minibatch_layer = core_layers.BasicMinibatchLayer(name='batch', minibatch=MINIBATCH) decaf_net.add_layer(minibatch_layer, needs=['image-all', 'label-all'], provides=['image', 'label']) # add the two_layer network decaf_net.add_layers([ core_layers.FlattenLayer(name='flatten'), core_layers.InnerProductLayer( name='ip1', num_output=NUM_NEURONS, filler=fillers.GaussianRandFiller(std=0.1), bias_filler=fillers.ConstantFiller(value=0.1)), core_layers.ReLULayer(name='relu1'), core_layers.InnerProductLayer( name='ip2', num_output=NUM_CLASS, filler=fillers.GaussianRandFiller(std=0.3)) ], needs='image', provides='prediction') # add loss layer loss_layer = core_layers.MultinomialLogisticLossLayer(name='loss') decaf_net.add_layer(loss_layer, needs=['prediction', 'label']) # finish. decaf_net.finish() #################################### # Decaf layer finished construction! #################################### # now, try to solve it if METHOD == 'adagrad': # The Adagrad Solver solver = core_solvers.AdagradSolver(base_lr=0.02, base_accum=1.e-6, max_iter=1000) elif METHOD == 'sgd': solver = core_solvers.SGDSolver(base_lr=0.1, lr_policy='inv', gamma=0.001, power=0.75, momentum=0.9, max_iter=1000) solver.solve(decaf_net) visualize.draw_net_to_file(decaf_net, 'mnist.png') decaf_net.save('mnist_2layers.decafnet') ############################################## # Now, let's load the net and run predictions ############################################## prediction_net = base.Net.load('mnist_2layers.decafnet') visualize.draw_net_to_file(prediction_net, 'mnist_test.png') # obtain the test data. dataset_test = mnist.MNISTDataLayer(name='mnist', rootfolder=ROOT_FOLDER, is_training=False) test_image = base.Blob() test_label = base.Blob() dataset_test.forward([], [test_image, test_label]) # Run the net. pred = prediction_net.predict(image=test_image)['prediction'] accuracy = (pred.argmax(1) == test_label.data()).sum() / float( test_label.data().size) print 'Testing accuracy:', accuracy print 'Done.'