def test_last_n_window_ops(self): collect_net = core.Net("collect_net") collect_net.GivenTensorFill( [], "input", shape=[3, 2], values=[1.0, 2.0, 3.0, 4.0, 5.0, 6.0], ) input_array = np.array(list(range(1, 7)), dtype=np.float32).reshape(3, 2) workspace.CreateBlob("output") workspace.FeedBlob("next", np.array(0, dtype=np.int32)) collect_net.LastNWindowCollector( ["output", "next", "input"], ["output", "next"], num_to_collect=7, ) plan = core.Plan("collect_data") plan.AddStep(core.execution_step("collect_data", [collect_net], num_iter=1)) workspace.RunPlan(plan) reference_result = workspace.FetchBlob("output") npt.assert_array_equal(input_array, reference_result) plan = core.Plan("collect_data") plan.AddStep(core.execution_step("collect_data", [collect_net], num_iter=2)) workspace.RunPlan(plan) reference_result = workspace.FetchBlob("output") npt.assert_array_equal(input_array[[1, 2, 2, 0, 1, 2, 0]], reference_result) plan = core.Plan("collect_data") plan.AddStep(core.execution_step("collect_data", [collect_net], num_iter=3)) workspace.RunPlan(plan) reference_result = workspace.FetchBlob("output") npt.assert_array_equal(input_array[[2, 0, 1, 2, 2, 0, 1]], reference_result)
def test_pair_wise_loss_predictions(self, X, label, gc, dc): workspace.FeedBlob('X', X) workspace.FeedBlob('label', label) new_label = np.array([label[1], label[0]]) new_x = np.array([X[1], X[0]]) workspace.FeedBlob('new_x', new_x) workspace.FeedBlob('new_label', new_label) net = core.Net('net') net.PairWiseLoss(['X', 'label'], ['output']) net.PairWiseLoss(['new_x', 'new_label'], ['new_output']) plan = core.Plan('predict_data') plan.AddStep(core.execution_step('predict_data', [net], num_iter=1)) workspace.RunPlan(plan) output = workspace.FetchBlob('output') new_output = workspace.FetchBlob('new_output') sign = 1 if label[0] > label[1] else -1 if label[0] == label[1]: self.assertEqual(np.asscalar(output), 0) return self.assertAlmostEqual(np.asscalar(output), np.asscalar( np.log(1 + np.exp(sign * (X[1] - X[0])))), delta=1e-4) # check swapping row order doesn't alter overall loss self.assertAlmostEqual(output, new_output)
def test_create_plan_from_proto_correctly(self): from caffe2.python.net_builder import ops with Node('trainer'), Task(name='my_task', num_instances=2) as task: with ops.task_init(): globl = ops.Const(0) with ops.task_instance_init(): local = ops.Const(0) with ops.loop(100): ops.Copy(globl, local) with ops.task_instance_exit(): ops.Add([globl, local], [globl]) with ops.task_exit(): ops.Mul([globl, globl], [globl]) plan = core.Plan(task.get_step()) test_plan = core.Plan.create_from_proto(plan.Proto()) self.assertEqual(len(plan.Steps()), 1) self.assertEqual(len(test_plan.Steps()), 1) self.assertEqual(len(plan.Proto().network), 9) self.assertEqual(len(test_plan.Proto().network), 9) self.assertEqual(len(plan.Proto().execution_step), 1) self.assertEqual(len(test_plan.Proto().execution_step), 1) self.assertEqual(plan.Steps()[0].Name(), test_plan.Steps()[0].Name()) self.assertEqual(len(plan.Nets()), len(test_plan.Nets())) for idx in range(0, len(plan.Nets())): # When we create Net for test_plan, we will end up with new Net # name with postfix. net_1 = plan.Nets()[idx] net_2 = test_plan.Nets()[idx] trim_size = len(net_1.Name()) self.assertEqual(net_1.Name(), net_2.Name()[:trim_size])
def test_atomic_ops(self): """ Test that both countdown and checksum are update atomically by having cowntdown count from 20k to 0 from parallel the workers and updating the checksum to the value fetched. If operations are trully atomic, each value from 1 to 20k should be fetched exactly once from the countdown, and fed exactly once to the checksum, such that at the end checksum must contain the exact value of sum[i=0..20000](i). """ init_net = core.Net('init') mutex_countdown = init_net.CreateMutex([]) mutex_checksum = init_net.CreateMutex([]) countdown = init_net.ConstantIntFill([], shape=[], value=20000.) checksum = init_net.ConstantIntFill([], shape=[], value=0.) minus_one = init_net.ConstantIntFill([], shape=[], value=-1.) steps = [] for i in range(0, 100): net = core.Net('net:%d' % i) _, fetched_count = net.AtomicFetchAdd( [mutex_countdown, countdown, minus_one], [countdown, 'fetched_count:%d' % i]) net.AtomicFetchAdd([mutex_checksum, checksum, fetched_count], [checksum, 'not_used']) steps.append( core.execution_step('worker:%d' % i, net, num_iter=200)) super_step = core.execution_step('parent', steps, concurrent_substeps=True) plan = core.Plan('plan') plan.AddStep(core.execution_step('init', init_net)) plan.AddStep(super_step) workspace.RunPlan(plan) # checksum = sum[i=1..20000](i) = 20000 * 20001 / 2 = 200010000 self.assertEquals(workspace.FetchBlob(checksum), 200010000)
def testRunPlan(self): plan = core.Plan("test-plan") plan.AddNets([self.net]) plan.AddStep(core.ExecutionStep("test-step", self.net)) self.assertEqual(workspace.RunPlan(plan.Proto().SerializeToString()), True) self.assertEqual(workspace.HasBlob("testblob"), True)
def test_collect_tensor_ops(self): init_net = core.Net('init_net') blobs = ['blob_1', 'blob_2', 'blob_3'] bvec_map = {} ONE = init_net.ConstantFill([], 'ONE', shape=[1, 2], value=1) for b in blobs: init_net.ConstantFill([], [b], shape=[1, 2], value=0) bvec_map[b] = b + '_vec' init_net.CreateTensorVector([], [bvec_map[b]]) reader_net = core.Net('reader_net') for b in blobs: reader_net.Add([b, ONE], [b]) collect_net = core.Net('collect_net') num_to_collect = 1000 max_example_to_cover = 100000 for i, b in enumerate(blobs): if i == 0: bvec_map[b], position = collect_net.CollectTensor( [bvec_map[b], b], [bvec_map[b], 'position'], num_to_collect=num_to_collect) else: # sample in the same way as the first blob bvec_map[b], position = collect_net.CollectTensor( [bvec_map[b], b, position], [bvec_map[b], position], num_to_collect=num_to_collect) print('Collect Net Proto: {}'.format(collect_net.Proto())) plan = core.Plan('collect_data') plan.AddStep(core.execution_step('collect_init', init_net)) plan.AddStep( core.execution_step('collect_data', [reader_net, collect_net], num_iter=max_example_to_cover)) workspace.RunPlan(plan) # concat the collected tensors concat_net = core.Net('concat_net') bconcated_map = {} for b in blobs: bconcated_map[b] = b + '_concated' concat_net.ConcatTensorVector([bvec_map[b]], [bconcated_map[b]]) workspace.RunNetOnce(concat_net) # check data reference_result = workspace.FetchBlob(bconcated_map[blobs[0]]) self.assertEqual(reference_result.shape, (min(num_to_collect, max_example_to_cover), 2)) hist, _ = np.histogram(reference_result[:, 0], bins=10, range=(1, max_example_to_cover)) print('Sample histogram: {}'.format(hist)) self.assertTrue(all(hist > 0.7 * (num_to_collect / 10))) for i in range(1, len(blobs)): result = workspace.FetchBlob(bconcated_map[blobs[i]]) self.assertEqual(reference_result.tolist(), result.tolist())
def testRunPlanInBackground(self): plan = core.Plan("test-plan") plan.AddStep(core.ExecutionStep("test-step", self.net)) background_plan = workspace.RunPlanInBackground(plan) while not background_plan.is_done(): pass self.assertEqual(background_plan.is_succeeded(), True) self.assertEqual(workspace.HasBlob("testblob"), True)
def testToyRegression(self): """Tests a toy regression end to end. The test code carries a simple toy regression in the form y = 2.0 x1 + 1.5 x2 + 0.5 by randomly generating gaussian inputs and calculating the ground truth outputs in the net as well. It uses a standard SGD to then train the parameters. """ workspace.ResetWorkspace() init_net = core.Net("init") W = init_net.UniformFill([], "W", shape=[1, 2], min=-1., max=1.) B = init_net.ConstantFill([], "B", shape=[1], value=0.0) W_gt = init_net.GivenTensorFill([], "W_gt", shape=[1, 2], values=[2.0, 1.5]) B_gt = init_net.GivenTensorFill([], "B_gt", shape=[1], values=[0.5]) LR = init_net.ConstantFill([], "LR", shape=[1], value=-0.1) ONE = init_net.ConstantFill([], "ONE", shape=[1], value=1.) ITER = init_net.ConstantIntFill([], "ITER", shape=[1], value=0.) train_net = core.Net("train") X = train_net.GaussianFill([], "X", shape=[64, 2], mean=0.0, std=1.0) Y_gt = X.FC([W_gt, B_gt], "Y_gt") Y_pred = X.FC([W, B], "Y_pred") dist = train_net.SquaredL2Distance([Y_gt, Y_pred], "dist") loss = dist.AveragedLoss([], ["loss"]) # Get gradients for all the computations above. Note that in fact we # don't need to get the gradient the Y_gt computation, but we'll just # leave it there. In many cases, I am expecting one to load X and Y # from the disk, so there is really no operator that will calculate the # Y_gt input. input_to_grad = train_net.AddGradientOperators([loss], skip=2) # updates train_net.Iter(ITER, ITER) train_net.LearningRate(ITER, "LR", base_lr=-0.1, policy="step", stepsize=20, gamma=0.9) train_net.WeightedSum([W, ONE, input_to_grad[str(W)], LR], W) train_net.WeightedSum([B, ONE, input_to_grad[str(B)], LR], B) for blob in [loss, W, B]: train_net.Print(blob, []) # the CPU part. plan = core.Plan("toy_regression") plan.AddStep(core.ExecutionStep("init", init_net)) plan.AddStep(core.ExecutionStep("train", train_net, 200)) workspace.RunPlan(plan) W_result = workspace.FetchBlob("W") B_result = workspace.FetchBlob("B") np.testing.assert_array_almost_equal(W_result, [[2.0, 1.5]], decimal=2) np.testing.assert_array_almost_equal(B_result, [0.5], decimal=2) workspace.ResetWorkspace()
def test_pair_wise_loss_gradient(self, X, label, dY, gc, dc): workspace.FeedBlob('X', X) workspace.FeedBlob('dY', dY) workspace.FeedBlob('label', label) net = core.Net('net') net.PairWiseLossGradient( ['X', 'label', 'dY'], ['dX'], ) plan = core.Plan('predict_data') plan.AddStep(core.execution_step('predict_data', [net], num_iter=1)) workspace.RunPlan(plan) dx = workspace.FetchBlob('dX') sign = 1 if label[0] > label[1] else -1 if label[0] == label[1]: self.assertEqual(np.asscalar(dx[0]), 0) return self.assertAlmostEqual(np.asscalar(dx[0]), np.asscalar(-dY[0] * sign / (1 + np.exp(sign * (X[0] - X[1])))), delta=1e-2 * abs(np.asscalar(dx[0]))) self.assertEqual(np.asscalar(dx[0]), np.asscalar(-dx[1])) delta = 1e-3 up_x = np.array([[X[0] + delta], [X[1]]], dtype=np.float32) down_x = np.array([[X[0] - delta], [X[1]]], dtype=np.float32) workspace.FeedBlob('up_x', up_x) workspace.FeedBlob('down_x', down_x) new_net = core.Net('new_net') new_net.PairWiseLoss(['up_x', 'label'], ['up_output']) new_net.PairWiseLoss(['down_x', 'label'], ['down_output']) plan = core.Plan('predict_data') plan.AddStep(core.execution_step('predict_data', [new_net], num_iter=1)) workspace.RunPlan(plan) down_output_pred = workspace.FetchBlob('down_output') up_output_pred = workspace.FetchBlob('up_output') np.testing.assert_allclose( np.asscalar(dx[0]), np.asscalar(0.5 * dY[0] * (up_output_pred[0] - down_output_pred[0]) / delta), rtol=1e-2, atol=1e-2)
def benchmark(net, warmups=5, iters=100): for _ in range(warmups): workspace.RunNetOnce(net.Proto().SerializeToString()) plan = core.Plan("plan") plan.AddStep(core.ExecutionStep("test-step", net, iters)) before = time.time() workspace.RunPlan(plan.Proto().SerializeToString()) after = time.time() print("Timing network, time taken per-iteration: {:.6f}ms".format( (after - before) / float(iters) * 1000.0)) return after - before
def benchmark(ws, net, warmups=5, iters=100): for _ in range(warmups): ws.run(net) plan = core.Plan("plan") plan.AddStep(core.ExecutionStep("test-step", net, iters)) before = time.time() ws.run(plan) after = time.time() print("Timing network, time taken per-iteration: {:.6f}ms".format(( after - before) / float(iters) * 1000.0)) return after - before
def test_last_n_window_ops(self): collect_net = core.Net('collect_net') collect_net.GivenTensorFill( [], 'input', shape=[3, 2], values=[1.0, 2.0, 3.0, 4.0, 5.0, 6.0], ) collect_net.LastNWindowCollector( ['input'], ['output'], num_to_collect=7, ) plan = core.Plan('collect_data') plan.AddStep( core.execution_step('collect_data', [collect_net], num_iter=1)) workspace.RunPlan(plan) reference_result = workspace.FetchBlob('output') self.assertSequenceEqual( [item for sublist in reference_result for item in sublist], [1, 2, 3, 4, 5, 6]) plan = core.Plan('collect_data') plan.AddStep( core.execution_step('collect_data', [collect_net], num_iter=2)) workspace.RunPlan(plan) reference_result = workspace.FetchBlob('output') self.assertSequenceEqual( [item for sublist in reference_result for item in sublist], [1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6]) plan = core.Plan('collect_data') plan.AddStep( core.execution_step('collect_data', [collect_net], num_iter=3)) workspace.RunPlan(plan) reference_result = workspace.FetchBlob('output') self.assertSequenceEqual( [item for sublist in reference_result for item in sublist], [3, 4, 5, 6, 5, 6, 1, 2, 3, 4, 5, 6, 1, 2])
def test_plan_run(self, blob_name, plan_name, net_name, value): ws = workspace.C.Workspace() plan = core.Plan(plan_name) net = core.Net(net_name) net.ConstantFill([], [blob_name], shape=[1], value=value) plan.AddStep(core.ExecutionStep("step", nets=[net], num_iter=1)) ws.run(plan) self.assertIn(blob_name, ws.blobs) self.assertIn(net.Name(), ws.nets) np.testing.assert_allclose( [value], ws.blobs[blob_name].fetch(), atol=1e-4, rtol=1e-4)
def test_multithreaded_evaluation(self, x, n, w): def f(inputs, outputs): outputs[0].reshape(inputs[0].shape) outputs[0].data[...] = inputs[0].data ops = [CreatePythonOperator(f, ["x"], [str(i)]) for i in range(n)] net = core.Net("net") net.Proto().op.extend(ops) net.Proto().type = "dag" net.Proto().num_workers = w iters = 100 plan = core.Plan("plan") plan.AddStep(core.ExecutionStep("test-step", net, iters)) workspace.FeedBlob("x", x) workspace.RunPlan(plan.Proto().SerializeToString()) for i in range(n): y = workspace.FetchBlob(str(i)) np.testing.assert_almost_equal(x, y)
def _run_task_group(self, task_group): if task_group not in self._plan_caches: task = task_group.to_task() plan = core.Plan('task_group_plan') plan.AddStep(task.get_step()) self._plan_caches[task_group] = (plan, task) plan, task = self._plan_caches[task_group] # make sure the output blobs belong to the parent workspace outputs = [] for name in task.output_names(): self._ws.create_blob(str(name)) outputs.append(core.BlobReference(str(name))) task.set_outputs(outputs, _fetch_func=self._fetch_output) task_ws = (workspace.C.Workspace(self._ws) if task.workspace_type == WorkspaceType.PRIVATE else self._ws) with workspace.WorkspaceGuard(task_ws): task_ws.run(plan)
stride=1, order="NHWC").MaxPool([], kernel=2, stride=2, order="NHWC")) softmax = pool2.Flatten().FC([W3, B3]).Relu().FC([W4, B4]).Softmax() # Cross entropy, and accuracy xent = softmax.LabelCrossEntropy([label], "xent") # The loss function. loss = xent.AveragedLoss([], ["loss"]) # Get gradient train_net.AddGradientOperators() accuracy = softmax.Accuracy([label], "accuracy") # parameter update. for param in params: train_net.WeightedSum([param, ONE, param.Grad(), LR], param) LR = train_net.Mul([LR, DECAY], "LR") train_net.Print([accuracy], []) # Run all on GPU. #init_net.RunAllOnGPU() #train_net.RunAllOnGPU() plan = core.Plan("mnist_lenet_gc") plan.AddNets([init_net, train_net]) plan.AddStep(core.ExecutionStep("init", init_net)) plan.AddStep(core.ExecutionStep("train", train_net, 1000)) with open('mnist_lenet_group_convolution_nhwc.pbtxt', 'w') as fid: fid.write(str(plan.Proto()))
def test_collect_tensor_ops(self): init_net = core.Net("init_net") blobs = ["blob_1", "blob_2", "blob_3"] bvec_map = {} ONE = init_net.ConstantFill([], "ONE", shape=[1, 2], value=1) for b in blobs: init_net.ConstantFill([], [b], shape=[1, 2], value=0) bvec_map[b] = b + "_vec" init_net.CreateTensorVector([], [bvec_map[b]]) reader_net = core.Net("reader_net") for b in blobs: reader_net.Add([b, ONE], [b]) collect_net = core.Net("collect_net") num_to_collect = 1000 max_example_to_cover = 100000 bvec = [bvec_map[b] for b in blobs] collect_net.CollectTensor( bvec + blobs, bvec, num_to_collect=num_to_collect, ) print("Collect Net Proto: {}".format(collect_net.Proto())) plan = core.Plan("collect_data") plan.AddStep(core.execution_step("collect_init", init_net)) plan.AddStep( core.execution_step( "collect_data", [reader_net, collect_net], num_iter=max_example_to_cover ) ) workspace.RunPlan(plan) # concat the collected tensors concat_net = core.Net("concat_net") bconcated_map = {} bsize_map = {} for b in blobs: bconcated_map[b] = b + "_concated" bsize_map[b] = b + "_size" concat_net.ConcatTensorVector([bvec_map[b]], [bconcated_map[b]]) concat_net.TensorVectorSize([bvec_map[b]], [bsize_map[b]]) workspace.RunNetOnce(concat_net) # check data reference_result = workspace.FetchBlob(bconcated_map[blobs[0]]) self.assertEqual( reference_result.shape, (min(num_to_collect, max_example_to_cover), 2) ) size = workspace.FetchBlob(bsize_map[blobs[0]]) self.assertEqual(tuple(), size.shape) self.assertEqual(min(num_to_collect, max_example_to_cover), size.item()) hist, _ = np.histogram( reference_result[:, 0], bins=10, range=(1, max_example_to_cover) ) print("Sample histogram: {}".format(hist)) self.assertTrue(all(hist > 0.6 * (num_to_collect / 10))) for i in range(1, len(blobs)): result = workspace.FetchBlob(bconcated_map[blobs[i]]) self.assertEqual(reference_result.tolist(), result.tolist())
def RunPlan(plan_or_step): # TODO(jiayq): refactor core.py/workspace.py to avoid circular deps import caffe2.python.core as core if isinstance(plan_or_step, core.ExecutionStep): plan_or_step = core.Plan(plan_or_step) return C.run_plan(StringifyProto(plan_or_step))
def test_rebatching_parallel_producer_consumer( self, num_producers, num_consumers, producer_input_size, producer_num_iterations, capacity ): ### Init ### total_inputs = producer_num_iterations * producer_input_size * num_producers inputs = [] init_net = core.Net('init_net') queue = init_net.CreateRebatchingQueue( [], 1, capacity=capacity, num_blobs=1 ) ### Producers ### producer_steps = [] for i in range(num_producers): name = 'producer_%d' % i net = core.Net(name) values = [ producer_input_size * i + x for x in range(producer_input_size) ] for _ in range(producer_num_iterations): inputs.extend(values) tensors = net.GivenTensorIntFill( [], 1, shape=[producer_input_size], values=values ) net.EnqueueRebatchingQueue([queue, tensors], [], enqueue_batch=True) step = core.execution_step( name, net, num_iter=producer_num_iterations ) producer_steps.append(step) producer_step = core.execution_step( 'producer', [ core.execution_step( 'producers', producer_steps, concurrent_substeps=True ) ] ) ### Consumers ### outputs = [] def append(ins, outs): # Extend is atomic outputs.extend(ins[0].data.tolist()) consumer_steps = [] for i in range(num_consumers): # This is just a way of deterministally read all the elements. # We make `num_consumers` almost equal splits # (the reminder goes to the last consumer). num_elements_to_read = total_inputs // num_consumers if i == num_consumers - 1: num_elements_to_read = num_elements_to_read \ + total_inputs % num_consumers # If we have nothing to read this consumer will be idle if (num_elements_to_read == 0): continue # Now we have to make a split on number of iterations and the read # size for each iteration. This is again just one of many # deterministic ways of doing it. We factorize the total number of # elements we have to read and assign half of the factors to the # iterations half to the read size. factors = list(primefac(num_elements_to_read)) num_elements_per_iteration = functools.reduce( lambda x, y: x * y, factors[len(factors) // 2:], 1 ) num_iterations = functools.reduce( lambda x, y: x * y, factors[:len(factors) // 2], 1 ) name = 'consumer_%d' % i net = core.Net(name) blobs = net.DequeueRebatchingQueue( [queue], 1, num_elements=num_elements_per_iteration ) net.Python(append)([blobs], 0) consumer_steps.append( core.execution_step(name, net, num_iter=num_iterations) ) consumer_step = core.execution_step( 'consumer', consumer_steps, concurrent_substeps=True ) init_step = core.execution_step('init', init_net) worker_step = core.execution_step( 'worker', [consumer_step, producer_step], concurrent_substeps=True ) ### Execute Plan ### plan = core.Plan('test') plan.AddStep(init_step) plan.AddStep(worker_step) self.ws.run(plan) ### Check Results ### # We check that the outputs are a permutation of inputs inputs.sort() outputs.sort() self.assertEquals(inputs, outputs)
def Benchmark(model_gen, arg): model, input_size = model_gen(arg.order) model.Proto().type = arg.net_type model.Proto().num_workers = arg.num_workers # In order to be able to run everything without feeding more stuff, let's # add the data and label blobs to the parameter initialization net as well. if arg.order == "NCHW": input_shape = [arg.batch_size, 3, input_size, input_size] else: input_shape = [arg.batch_size, input_size, input_size, 3] if arg.model == "MLP": input_shape = [arg.batch_size, input_size] model.param_init_net.GaussianFill([], "data", shape=input_shape, mean=0.0, std=1.0) model.param_init_net.UniformIntFill([], "label", shape=[ arg.batch_size, ], min=0, max=999) if arg.forward_only: print('{}: running forward only.'.format(arg.model)) else: print('{}: running forward-backward.'.format(arg.model)) model.AddGradientOperators(["loss"]) AddParameterUpdate(model) if arg.order == 'NHWC': print( '==WARNING==\n' 'NHWC order with CuDNN may not be supported yet, so I might\n' 'exit suddenly.') if not arg.cpu: model.param_init_net.RunAllOnGPU() model.net.RunAllOnGPU() if arg.dump_model: # Writes out the pbtxt for benchmarks on e.g. Android with open("{0}_init_batch_{1}.pbtxt".format(arg.model, arg.batch_size), "w") as fid: fid.write(str(model.param_init_net.Proto())) with open("{0}.pbtxt".format(arg.model, arg.batch_size), "w") as fid: fid.write(str(model.net.Proto())) workspace.RunNetOnce(model.param_init_net) workspace.CreateNet(model.net) for i in range(arg.warmup_iterations): workspace.RunNet(model.net.Proto().name) plan = core.Plan("plan") plan.AddStep(core.ExecutionStep("run", model.net, arg.iterations)) start = time.time() workspace.RunPlan(plan) print('Spent: {}'.format((time.time() - start) / arg.iterations)) if arg.layer_wise_benchmark: print('Layer-wise benchmark.') workspace.BenchmarkNet(model.net.Proto().name, 1, arg.iterations, True)
data, label = train_net.TensorProtosDBInput( [], ["data", "label"], batch_size=64, db="gen/data/cifar/cifar-10-train-leveldb") softmax = (data.Flatten([], "data_flatten").FC([W1, B1], "hidden").Relu( [], "hidden_relu").FC([W2, B2], 'pred').Softmax([], "softmax")) # Cross entropy, and accuracy xent = softmax.LabelCrossEntropy([label], "xent") accuracy = softmax.Accuracy([label], "accuracy") # The loss function. loss, xent_grad = xent.AveragedLoss([], ["loss", xent.Grad()]) # Get gradient, skipping the input and flatten layers. train_net.AddGradientOperators(first=2) # parameter update. for param in [W1, B1, W2, B2]: train_net.WeightedSum([param, ONE, param.Grad(), LR], param) LR = train_net.Mul([LR, DECAY], "LR") train_net.Print([LR, accuracy]) # Run all on GPU. init_net.RunAllOnGPU() train_net.RunAllOnGPU() plan = core.Plan("mnist_relu_network") plan.AddNets([init_net, train_net]) plan.AddStep(core.ExecutionStep("init", init_net)) plan.AddStep(core.ExecutionStep("train", train_net, 10000)) with open('cifar_relu_network.pbtxt', 'w') as fid: fid.write(str(plan.Proto()))
xent = softmax.LabelCrossEntropy([label], "xent") # The loss function. loss = xent.AveragedLoss([], ["loss"]) # Get gradient, skipping the input and flatten layers. train_net.AddGradientOperators(skip=1) accuracy = softmax.Accuracy([label], "accuracy") # parameter update. for param in [filter1, bias1, filter2, bias2, W3, B3, W4, B4]: train_net.WeightedSum([param, ONE, param.Grad(), LR], param) LR = train_net.Mul([LR, DECAY], "LR") train_net.Print([accuracy], []) train_net._net.net_type = 'dag' train_net._net.num_workers = 8 # CPU version plan = core.Plan("mnist_lenet") plan.AddNets([init_net, train_net]) plan.AddStep(core.ExecutionStep("init", init_net)) plan.AddStep(core.ExecutionStep("train", train_net, 1000)) with open('mnist_lenet_nhwc.pbtxt', 'w') as fid: fid.write(str(plan.Proto())) # GPU version init_net.RunAllOnGPU() train_net.RunAllOnGPU() plan = core.Plan("mnist_lenet") plan.AddNets([init_net, train_net]) plan.AddStep(core.ExecutionStep("init", init_net)) plan.AddStep(core.ExecutionStep("train", train_net, 1000)) with open('mnist_lenet_nhwc_gpu.pbtxt', 'w') as fid: fid.write(str(plan.Proto()))
def BuildAndRunPlan(self, step): plan = core.Plan("test") plan.AddStep(control.Do('init', self.init_net_)) plan.AddStep(step) self.assertEqual(workspace.RunPlan(plan), True)
def test_dataset_ops(self): """ 1. Defining the schema of our dataset. This example schema could represent, for example, a search query log. """ schema = Struct( # fixed size vector, which will be stored as a matrix when batched ('dense', Scalar((np.float32, 3))), # could represent a feature map from feature ID to float value ('floats', Map( Scalar(np.int32), Scalar(np.float32) )), # could represent a multi-valued categorical feature map ('int_lists', Map( Scalar(np.int32), List(Scalar(np.int64)), )), # could represent a multi-valued, weighted categorical feature map ( 'id_score_pairs', Map( Scalar(np.int32), Map( Scalar(np.int64), Scalar(np.float32), keys_name='ids', values_name='scores' ), ) ), # additional scalar information ( 'metadata', Struct( ('user_id', Scalar(np.int64)), ('user_embed', Scalar((np.float32, 2))), ('query', Scalar(str)), ) ), ) """ This is what the flattened fields for this schema look like, along with its type. Each one of these fields will be stored, read and writen as a tensor. """ expected_fields = [ ('dense', (np.float32, 3)), ('floats:lengths', np.int32), ('floats:values:keys', np.int32), ('floats:values:values', np.float32), ('int_lists:lengths', np.int32), ('int_lists:values:keys', np.int32), ('int_lists:values:values:lengths', np.int32), ('int_lists:values:values:values', np.int64), ('id_score_pairs:lengths', np.int32), ('id_score_pairs:values:keys', np.int32), ('id_score_pairs:values:values:lengths', np.int32), ('id_score_pairs:values:values:values:ids', np.int64), ('id_score_pairs:values:values:values:scores', np.float32), ('metadata:user_id', np.int64), ('metadata:user_embed', (np.float32, 2)), ('metadata:query', str), ] zipped = zip( expected_fields, schema.field_names(), schema.field_types() ) for (ref_name, ref_type), name, dtype in zipped: self.assertEquals(ref_name, name) self.assertEquals(np.dtype(ref_type), dtype) """ 2. The contents of our dataset. Contents as defined below could represent, for example, a log of search queries along with dense, sparse features and metadata. The datset below has 3 top-level entries. """ contents_raw = [ # dense [[1.1, 1.2, 1.3], [2.1, 2.2, 2.3], [3.1, 3.2, 3.3]], # floats [1, 2, 3], # len [11, 21, 22, 31, 32, 33], # key [1.1, 2.1, 2.2, 3.1, 3.2, 3.3], # value # int lists [2, 0, 1], # len [11, 12, 31], # key [2, 4, 3], # value:len [111, 112, 121, 122, 123, 124, 311, 312, 313], # value:value # id score pairs [1, 2, 2], # len [11, 21, 22, 31, 32], # key [1, 1, 2, 2, 3], # value:len [111, 211, 221, 222, 311, 312, 321, 322, 323], # value:ids [11.1, 21.1, 22.1, 22.2, 31.1, 31.2, 32.1, 32.2, 32.3], # val:score # metadata [123, 234, 456], # user_id [[0.2, 0.8], [0.5, 0.5], [0.7, 0.3]], # user_embed ['dog posts', 'friends who like to', 'posts about ca'], # query ] # convert the above content to ndarrays, checking against the schema contents = from_blob_list(schema, contents_raw) """ 3. Creating and appending to the dataset. We first create an empty dataset with the given schema. Then, a Writer is used to append these entries to the dataset. """ ds = dataset.Dataset(schema) net = core.Net('init') with core.NameScope('init'): ds.init_empty(net) content_blobs = NewRecord(net, contents) FeedRecord(content_blobs, contents) writer = ds.writer(init_net=net) writer.write_record(net, content_blobs) workspace.RunNetOnce(net) """ 4. Iterating through the dataset contents. If we were to iterate through the top level entries of our dataset, this is what we should expect to see: """ entries_raw = [ ( [[1.1, 1.2, 1.3]], # dense [1], [11], [1.1], # floats [2], [11, 12], [2, 4], [111, 112, 121, 122, 123, 124], # intlst [1], [11], [1], [111], [11.1], # id score pairs [123], [[0.2, 0.8]], ['dog posts'], # metadata ), ( [[2.1, 2.2, 2.3]], # dense [2], [21, 22], [2.1, 2.2], # floats [0], [], [], [], # int list [2], [21, 22], [1, 2], [211, 221, 222], [21.1, 22.1, 22.2], [234], [[0.5, 0.5]], ['friends who like to'], # metadata ), ( [[3.1, 3.2, 3.3]], # dense [3], [31, 32, 33], [3.1, 3.2, 3.3], # floats [1], [31], [3], [311, 312, 313], # int lst [2], [31, 32], [2, 3], [311, 312, 321, 322, 323], [31.1, 31.2, 32.1, 32.2, 32.3], # id score list [456], [[0.7, 0.3]], ['posts about ca'], # metadata ), # after the end of the dataset, we will keep getting empty vectors ([], ) * 16, ([], ) * 16, ] entries = [from_blob_list(schema, e) for e in entries_raw] """ Let's go ahead and create the reading nets. We will run `read` net multiple times and assert that we are reading the entries the way we stated above. """ read_init_net = core.Net('read_init') read_next_net = core.Net('read_next') reader = ds.reader(read_init_net) should_continue, batch = reader.read_record(read_next_net) workspace.RunNetOnce(read_init_net) workspace.CreateNet(read_next_net, True) for entry in entries: workspace.RunNet(str(read_next_net)) actual = FetchRecord(batch) _assert_records_equal(actual, entry) """ 5. Reading/writing in a single plan If all of operations on the data are expressible as Caffe2 operators, we don't need to load the data to python, iterating through the dataset in a single Plan. Where we will process the dataset a little and store it in a second dataset. We can reuse the same Reader since it supports reset. """ reset_net = core.Net('reset_net') reader.reset(reset_net) read_step, batch = reader.execution_step() """ We will add the line number * 1000 to the feature ids. """ process_net = core.Net('process') line_no = Const(process_net, 0, dtype=np.int32) const_one = Const(process_net, 1000, dtype=np.int32) process_net.Add([line_no, const_one], [line_no]) field = batch.floats.keys.get() process_net.Print(field, []) process_net.Add([field, line_no], field, broadcast=1, axis=0) """ Lets create a second dataset and append to it. """ ds2 = dataset.Dataset(schema, name='dataset2') ds2.init_empty(reset_net) writer = ds2.writer(reset_net) writer.write_record(process_net, batch) # commit is not necessary for DatasetWriter but will add it for # generality of the example commit_net = core.Net('commit') writer.commit(commit_net) """ Time to create and run a plan which will do the processing """ plan = core.Plan('process') plan.AddStep(core.execution_step('reset', reset_net)) plan.AddStep(read_step.AddNet(process_net)) plan.AddStep(core.execution_step('commit', commit_net)) workspace.RunPlan(plan) """ Now we should have dataset2 populated. """ ds2_data = FetchRecord(ds2.content()) field = ds2_data.floats.keys field.set(blob=field.get() - [1000, 2000, 2000, 3000, 3000, 3000]) _assert_records_equal(contents, ds2_data) """ 6. Slicing a dataset You can create a new schema from pieces of another schema and reuse the same data. """ subschema = Struct(('top_level', schema.int_lists.values)) int_list_contents = contents.int_lists.values.field_names() self.assertEquals(len(subschema.field_names()), len(int_list_contents)) """ 7. Random Access a dataset """ read_init_net = core.Net('read_init') read_next_net = core.Net('read_next') idx = np.array([2, 1, 0]) indices_blob = Const(read_init_net, idx, name='indices') reader = ds.random_reader(read_init_net, indices_blob) reader.computeoffset(read_init_net) should_stop, batch = reader.read_record(read_next_net) workspace.CreateNet(read_init_net, True) workspace.RunNetOnce(read_init_net) workspace.CreateNet(read_next_net, True) for i in range(len(entries)): k = idx[i] if i in idx else i entry = entries[k] workspace.RunNet(str(read_next_net)) actual = FetchRecord(batch) _assert_records_equal(actual, entry) workspace.RunNet(str(read_next_net)) self.assertEquals(True, workspace.FetchBlob(should_stop)) """ 8. Random Access a dataset with loop_over = true """ read_init_net = core.Net('read_init') read_next_net = core.Net('read_next') idx = np.array([2, 1, 0]) indices_blob = Const(read_init_net, idx, name='indices') reader = ds.random_reader(read_init_net, indices_blob, loop_over=True) reader.computeoffset(read_init_net) should_stop, batch = reader.read_record(read_next_net) workspace.CreateNet(read_init_net, True) workspace.RunNetOnce(read_init_net) workspace.CreateNet(read_next_net, True) for _ in range(len(entries) * 3): workspace.RunNet(str(read_next_net)) self.assertEquals(False, workspace.FetchBlob(should_stop)) """ 9. Sort and shuffle a dataset This sort the dataset using the score of a certain column, and then shuffle within each chunk of size batch_size * shuffle_size before shuffling the chunks. """ read_init_net = core.Net('read_init') read_next_net = core.Net('read_next') reader = ds.random_reader(read_init_net) reader.sort_and_shuffle(read_init_net, 'int_lists:lengths', 1, 2) reader.computeoffset(read_init_net) should_continue, batch = reader.read_record(read_next_net) workspace.CreateNet(read_init_net, True) workspace.RunNetOnce(read_init_net) workspace.CreateNet(read_next_net, True) expected_idx = np.array([2, 1, 0]) for i in range(len(entries)): k = expected_idx[i] if i in expected_idx else i entry = entries[k] workspace.RunNet(str(read_next_net)) actual = FetchRecord(batch) _assert_records_equal(actual, entry)
Y_gt = X.FC([W_gt, B_gt], "Y_gt") Y_pred = X.FC([W, B], "Y_pred") dist = train_net.SquaredL2Distance([Y_gt, Y_pred], "dist") loss = dist.AveragedLoss([], ["loss"]) # Get gradients for all the computations above. Note that in fact we don't need # to get the gradient the Y_gt computation, but we'll just leave it there. In # many cases, I am expecting one to load X and Y from the disk, so there is # really no operator that will calculate the Y_gt input. train_net.AddGradientOperators(skip=2) # updates train_net.WeightedSum([W, ONE, "W_grad", LR], W) train_net.WeightedSum([B, ONE, "B_grad", LR], B) train_net.Print([loss, W, B], []) # the CPU part. plan = core.Plan("toy_regression") plan.AddNets([init_net, train_net]) plan.AddStep(core.ExecutionStep("init", init_net)) plan.AddStep(core.ExecutionStep("train", train_net, 100)) with open('toy_regression.pbtxt', 'w') as fid: fid.write(str(plan.Proto())) # the GPU part init_net.RunAllOnGPU() train_net.RunAllOnGPU() plan = core.Plan("toy_regression") plan.AddNets([init_net, train_net]) plan.AddStep(core.ExecutionStep("init", init_net)) plan.AddStep(core.ExecutionStep("train", train_net, 100))
def test_record_queue(self): num_prod = 8 num_consume = 3 schema = Struct( ('floats', Map(Scalar(np.int32), Scalar(np.float32))), ) contents_raw = [ [1, 2, 3], # len [11, 21, 22, 31, 32, 33], # key [1.1, 2.1, 2.2, 3.1, 3.2, 3.3], # value ] contents = from_blob_list(schema, contents_raw) ds = Dataset(schema) net = core.Net('init') ds.init_empty(net) content_blobs = NewRecord(net, contents) FeedRecord(content_blobs, contents) writer = ds.writer(init_net=net) writer.write_record(net, content_blobs) reader = ds.reader(init_net=net) # prepare receiving dataset rec_dataset = Dataset(contents, name='rec') rec_dataset.init_empty(init_net=net) rec_dataset_writer = rec_dataset.writer(init_net=net) workspace.RunNetOnce(net) queue = RecordQueue(contents, num_threads=num_prod) def process(net, fields): new_fields = [] for f in fields.field_blobs(): new_f = net.Copy(f) new_fields.append(new_f) new_fields = from_blob_list(fields, new_fields) return new_fields q_reader, q_step, q_exit, fields = queue.build(reader, process) producer_step = core.execution_step('producer', [q_step, q_exit]) consumer_steps = [] for i in range(num_consume): name = 'queue_reader_' + str(i) net_consume = core.Net(name) should_stop, fields = q_reader.read_record(net_consume) step_consume = core.execution_step(name, net_consume) name = 'dataset_writer_' + str(i) net_dataset = core.Net(name) rec_dataset_writer.write(net_dataset, fields.field_blobs()) step_dataset = core.execution_step(name, net_dataset) step = core.execution_step('consumer_' + str(i), [step_consume, step_dataset], should_stop_blob=should_stop) consumer_steps.append(step) consumer_step = core.execution_step('consumers', consumer_steps, concurrent_substeps=True) work_steps = core.execution_step('work', [producer_step, consumer_step], concurrent_substeps=True) plan = core.Plan('test') plan.AddStep(work_steps) core.workspace.RunPlan(plan) data = workspace.FetchBlobs(rec_dataset.get_blobs()) self.assertEqual(6, sum(data[0])) self.assertEqual(150, sum(data[1])) self.assertAlmostEqual(15, sum(data[2]), places=5)
def _compile_task_group(cls, task_group, setup_net_list=None): with Cluster(): task = task_group.to_task() plan = core.Plan('task_group_plan') plan.AddStep(task.get_step()) return (plan, task.output_list(), task.workspace_type)
W8 = init_net.ConstantFill([], "W8", shape=[1000, 4096]) B8 = init_net.ConstantFill([], "B8", shape=[1000], value=0.0) pred = (pool5_flatten.FC([W6, B6]).Relu().Dropout(outputs=2)[0].FC( [W7, B7]).Relu().Dropout(outputs=2)[0].FC([W8, B8]).Softmax()) xent = pred.LabelCrossEntropy([label], "xent") # The loss function. loss = xent.AveragedLoss([], ["loss"]) test_net.AddGradientOperators(first=2) test_net.Print([loss], []) dump_net = core.Net("dump") for blob in [ data, pool1, pool1a, pool1b, pool2, conv3a, conv3b, conv4a, conv4b, conv5a, conv5b, pool5_flatten ]: dump_net.SaveFloatTensor([blob], [], file=str(blob)) init_net.RunAllOnGPU() test_net.RunAllOnGPU() dump_net.RunAllOnGPU() plan = core.Plan("alexnet") plan.AddNets([init_net, test_net, dump_net]) plan.AddStep(core.ExecutionStep("init", init_net)) plan.AddStep(core.ExecutionStep("first_run", test_net)) #plan.AddStep(core.ExecutionStep("subsequent_run", test_net, 10)) plan.AddStep(core.ExecutionStep("dump", dump_net)) with open('alexnet.pbtxt', 'w') as fid: fid.write(str(plan.Proto()))
db="gen/caffe2/data/mnist/mnist-train-nchw-minidb", db_type="minidb") softmax = data.Flatten([], "data_flatten").FC([W, B], "pred").Softmax([], "softmax") xent = softmax.LabelCrossEntropy([label], "xent") loss = xent.AveragedLoss([], ["loss"]) # Get gradient train_net.AddGradientOperators() accuracy = softmax.Accuracy([label], "accuracy") # parameter update. W = train_net.WeightedSum([W, ONE, "W_grad", LR], "W") B = train_net.WeightedSum([B, ONE, "B_grad", LR], "B") LR = train_net.Mul([LR, DECAY], "LR") train_net.PrintInt([it], []) train_net.Print([loss, accuracy, LR], []) train_net.Snapshot([it, W, B], db=snapshot_db_pattern, db_type="protodb", every=100) # Run all on GPU. # init_net.RunAllOnGPU() # train_net.RunAllOnGPU() plan = core.Plan("mnist_train") plan.AddNets([init_net, train_net]) plan.AddStep(core.ExecutionStep("init", init_net)) plan.AddStep(core.ExecutionStep("train", train_net, 1000)) with open('linear_classifier_plan.pbtxt', 'w') as fid: fid.write(str(plan.Proto()))