def test_pair_wise_loss_predictions(self, X, label, gc, dc): workspace.FeedBlob('X', X) workspace.FeedBlob('label', label) new_label = np.array([label[1], label[0]]) new_x = np.array([X[1], X[0]]) workspace.FeedBlob('new_x', new_x) workspace.FeedBlob('new_label', new_label) net = core.Net('net') net.PairWiseLoss(['X', 'label'], ['output']) net.PairWiseLoss(['new_x', 'new_label'], ['new_output']) plan = core.Plan('predict_data') plan.AddStep(core.execution_step('predict_data', [net], num_iter=1)) workspace.RunPlan(plan) output = workspace.FetchBlob('output') new_output = workspace.FetchBlob('new_output') sign = 1 if label[0] > label[1] else -1 if label[0] == label[1]: self.assertEqual(np.asscalar(output), 0) return self.assertAlmostEqual(np.asscalar(output), np.asscalar( np.log(1 + np.exp(sign * (X[1] - X[0])))), delta=1e-4) # check swapping row order doesn't alter overall loss self.assertAlmostEqual(output, new_output)
def testRunPlan(self): plan = core.Plan("test-plan") plan.AddNets([self.net]) plan.AddStep(core.ExecutionStep("test-step", self.net)) self.assertEqual(workspace.RunPlan(plan.Proto().SerializeToString()), True) self.assertEqual(workspace.HasBlob("testblob"), True)
def test_last_n_window_ops(self): collect_net = core.Net('collect_net') collect_net.GivenTensorFill( [], 'input', shape=[3, 2], values=[1.0, 2.0, 3.0, 4.0, 5.0, 6.0], ) input_array =\ np.array(list(range(1, 7)), dtype=np.float32).reshape(3, 2) workspace.CreateBlob('output') workspace.FeedBlob('next', np.array(0, dtype=np.int32)) collect_net.LastNWindowCollector( ['output', 'next', 'input'], ['output', 'next'], num_to_collect=7, ) plan = core.Plan('collect_data') plan.AddStep( core.execution_step('collect_data', [collect_net], num_iter=1) ) workspace.RunPlan(plan) reference_result = workspace.FetchBlob('output') npt.assert_array_equal(input_array, reference_result) plan = core.Plan('collect_data') plan.AddStep( core.execution_step('collect_data', [collect_net], num_iter=2) ) workspace.RunPlan(plan) reference_result = workspace.FetchBlob('output') npt.assert_array_equal(input_array[[1, 2, 2, 0, 1, 2, 0]], reference_result) plan = core.Plan('collect_data') plan.AddStep( core.execution_step('collect_data', [collect_net], num_iter=3) ) workspace.RunPlan(plan) reference_result = workspace.FetchBlob('output') npt.assert_array_equal(input_array[[2, 0, 1, 2, 2, 0, 1]], reference_result)
def testToyRegression(self): """Tests a toy regression end to end. The test code carries a simple toy regression in the form y = 2.0 x1 + 1.5 x2 + 0.5 by randomly generating gaussian inputs and calculating the ground truth outputs in the net as well. It uses a standard SGD to then train the parameters. """ workspace.ResetWorkspace() init_net = core.Net("init") W = init_net.UniformFill([], "W", shape=[1, 2], min=-1., max=1.) B = init_net.ConstantFill([], "B", shape=[1], value=0.0) W_gt = init_net.GivenTensorFill([], "W_gt", shape=[1, 2], values=[2.0, 1.5]) B_gt = init_net.GivenTensorFill([], "B_gt", shape=[1], values=[0.5]) LR = init_net.ConstantFill([], "LR", shape=[1], value=-0.1) ONE = init_net.ConstantFill([], "ONE", shape=[1], value=1.) ITER = init_net.ConstantIntFill([], "ITER", shape=[1], value=0.) train_net = core.Net("train") X = train_net.GaussianFill([], "X", shape=[64, 2], mean=0.0, std=1.0) Y_gt = X.FC([W_gt, B_gt], "Y_gt") Y_pred = X.FC([W, B], "Y_pred") dist = train_net.SquaredL2Distance([Y_gt, Y_pred], "dist") loss = dist.AveragedLoss([], ["loss"]) # Get gradients for all the computations above. Note that in fact we # don't need to get the gradient the Y_gt computation, but we'll just # leave it there. In many cases, I am expecting one to load X and Y # from the disk, so there is really no operator that will calculate the # Y_gt input. input_to_grad = train_net.AddGradientOperators([loss], skip=2) # updates train_net.Iter(ITER, ITER) train_net.LearningRate(ITER, "LR", base_lr=-0.1, policy="step", stepsize=20, gamma=0.9) train_net.WeightedSum([W, ONE, input_to_grad[str(W)], LR], W) train_net.WeightedSum([B, ONE, input_to_grad[str(B)], LR], B) for blob in [loss, W, B]: train_net.Print(blob, []) # the CPU part. plan = core.Plan("toy_regression") plan.AddStep(core.ExecutionStep("init", init_net)) plan.AddStep(core.ExecutionStep("train", train_net, 200)) workspace.RunPlan(plan) W_result = workspace.FetchBlob("W") B_result = workspace.FetchBlob("B") np.testing.assert_array_almost_equal(W_result, [[2.0, 1.5]], decimal=2) np.testing.assert_array_almost_equal(B_result, [0.5], decimal=2) workspace.ResetWorkspace()
def test_pair_wise_loss_gradient(self, X, label, dY, gc, dc): workspace.FeedBlob('X', X) workspace.FeedBlob('dY', dY) workspace.FeedBlob('label', label) net = core.Net('net') net.PairWiseLossGradient( ['X', 'label', 'dY'], ['dX'], ) plan = core.Plan('predict_data') plan.AddStep(core.execution_step('predict_data', [net], num_iter=1)) workspace.RunPlan(plan) dx = workspace.FetchBlob('dX') sign = 1 if label[0] > label[1] else -1 if label[0] == label[1]: self.assertEqual(np.asscalar(dx[0]), 0) return self.assertAlmostEqual(np.asscalar(dx[0]), np.asscalar(-dY[0] * sign / (1 + np.exp(sign * (X[0] - X[1])))), delta=1e-2 * abs(np.asscalar(dx[0]))) self.assertEqual(np.asscalar(dx[0]), np.asscalar(-dx[1])) delta = 1e-3 up_x = np.array([[X[0] + delta], [X[1]]], dtype=np.float32) down_x = np.array([[X[0] - delta], [X[1]]], dtype=np.float32) workspace.FeedBlob('up_x', up_x) workspace.FeedBlob('down_x', down_x) new_net = core.Net('new_net') new_net.PairWiseLoss(['up_x', 'label'], ['up_output']) new_net.PairWiseLoss(['down_x', 'label'], ['down_output']) plan = core.Plan('predict_data') plan.AddStep(core.execution_step('predict_data', [new_net], num_iter=1)) workspace.RunPlan(plan) down_output_pred = workspace.FetchBlob('down_output') up_output_pred = workspace.FetchBlob('up_output') np.testing.assert_allclose( np.asscalar(dx[0]), np.asscalar(0.5 * dY[0] * (up_output_pred[0] - down_output_pred[0]) / delta), rtol=1e-2, atol=1e-2)
def test_collect_tensor_ops(self): init_net = core.Net('init_net') blobs = ['blob_1', 'blob_2', 'blob_3'] bvec_map = {} ONE = init_net.ConstantFill([], 'ONE', shape=[1, 2], value=1) for b in blobs: init_net.ConstantFill([], [b], shape=[1, 2], value=0) bvec_map[b] = b + '_vec' init_net.CreateTensorVector([], [bvec_map[b]]) reader_net = core.Net('reader_net') for b in blobs: reader_net.Add([b, ONE], [b]) collect_net = core.Net('collect_net') num_to_collect = 1000 max_example_to_cover = 100000 bvec = [bvec_map[b] for b in blobs] collect_net.CollectTensor( bvec + blobs, bvec, num_to_collect=num_to_collect, ) print('Collect Net Proto: {}'.format(collect_net.Proto())) plan = core.Plan('collect_data') plan.AddStep(core.execution_step('collect_init', init_net)) plan.AddStep( core.execution_step('collect_data', [reader_net, collect_net], num_iter=max_example_to_cover)) workspace.RunPlan(plan) # concat the collected tensors concat_net = core.Net('concat_net') bconcated_map = {} for b in blobs: bconcated_map[b] = b + '_concated' concat_net.ConcatTensorVector([bvec_map[b]], [bconcated_map[b]]) workspace.RunNetOnce(concat_net) # check data reference_result = workspace.FetchBlob(bconcated_map[blobs[0]]) self.assertEqual(reference_result.shape, (min(num_to_collect, max_example_to_cover), 2)) hist, _ = np.histogram(reference_result[:, 0], bins=10, range=(1, max_example_to_cover)) print('Sample histogram: {}'.format(hist)) self.assertTrue(all(hist > 0.7 * (num_to_collect / 10))) for i in range(1, len(blobs)): result = workspace.FetchBlob(bconcated_map[blobs[i]]) self.assertEqual(reference_result.tolist(), result.tolist())
def benchmark(net, warmups=5, iters=100): for _ in range(warmups): workspace.RunNetOnce(net.Proto().SerializeToString()) plan = core.Plan("plan") plan.AddStep(core.ExecutionStep("test-step", net, iters)) before = time.time() workspace.RunPlan(plan.Proto().SerializeToString()) after = time.time() print("Timing network, time taken per-iteration: {:.6f}ms".format( (after - before) / float(iters) * 1000.0)) return after - before
def test_last_n_window_ops(self): collect_net = core.Net('collect_net') collect_net.GivenTensorFill( [], 'input', shape=[3, 2], values=[1.0, 2.0, 3.0, 4.0, 5.0, 6.0], ) collect_net.LastNWindowCollector( ['input'], ['output'], num_to_collect=7, ) plan = core.Plan('collect_data') plan.AddStep( core.execution_step('collect_data', [collect_net], num_iter=1)) workspace.RunPlan(plan) reference_result = workspace.FetchBlob('output') self.assertSequenceEqual( [item for sublist in reference_result for item in sublist], [1, 2, 3, 4, 5, 6]) plan = core.Plan('collect_data') plan.AddStep( core.execution_step('collect_data', [collect_net], num_iter=2)) workspace.RunPlan(plan) reference_result = workspace.FetchBlob('output') self.assertSequenceEqual( [item for sublist in reference_result for item in sublist], [1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6]) plan = core.Plan('collect_data') plan.AddStep( core.execution_step('collect_data', [collect_net], num_iter=3)) workspace.RunPlan(plan) reference_result = workspace.FetchBlob('output') self.assertSequenceEqual( [item for sublist in reference_result for item in sublist], [3, 4, 5, 6, 5, 6, 1, 2, 3, 4, 5, 6, 1, 2])
def test_multithreaded_evaluation(self, x, n, w): def f(inputs, outputs): outputs[0].reshape(inputs[0].shape) outputs[0].data[...] = inputs[0].data ops = [CreatePythonOperator(f, ["x"], [str(i)]) for i in range(n)] net = core.Net("net") net.Proto().op.extend(ops) net.Proto().type = "dag" net.Proto().num_workers = w iters = 100 plan = core.Plan("plan") plan.AddStep(core.ExecutionStep("test-step", net, iters)) workspace.FeedBlob("x", x) workspace.RunPlan(plan.Proto().SerializeToString()) for i in range(n): y = workspace.FetchBlob(str(i)) np.testing.assert_almost_equal(x, y)
def test_atomic_ops(self): """ Test that both countdown and checksum are update atomically by having cowntdown count from 20k to 0 from parallel the workers and updating the checksum to the value fetched. If operations are trully atomic, each value from 1 to 20k should be fetched exactly once from the countdown, and fed exactly once to the checksum, such that at the end checksum must contain the exact value of sum[i=0..20000](i). """ init_net = core.Net('init') mutex_countdown = init_net.CreateMutex([]) mutex_checksum = init_net.CreateMutex([]) countdown = init_net.ConstantFill([], shape=[], value=20000, dtype=core.DataType.INT32) checksum = init_net.ConstantFill([], shape=[], value=0, dtype=core.DataType.INT32) minus_one = init_net.ConstantFill([], shape=[], value=-1, dtype=core.DataType.INT32) steps = [] for i in range(0, 100): net = core.Net('net:%d' % i) _, fetched_count = net.AtomicFetchAdd( [mutex_countdown, countdown, minus_one], [countdown, 'fetched_count:%d' % i]) net.AtomicFetchAdd([mutex_checksum, checksum, fetched_count], [checksum, 'not_used']) steps.append( core.execution_step('worker:%d' % i, net, num_iter=200)) super_step = core.execution_step('parent', steps, concurrent_substeps=True) plan = core.Plan('plan') plan.AddStep(core.execution_step('init', init_net)) plan.AddStep(super_step) workspace.RunPlan(plan) # checksum = sum[i=1..20000](i) = 20000 * 20001 / 2 = 200010000 self.assertEquals(workspace.FetchBlob(checksum), 200010000)
def testConstructPlanFromSteps(self): step = core.ExecutionStep("test-step-as-plan", self.net) self.assertEqual(workspace.RunPlan(step), True) self.assertEqual(workspace.HasBlob("testblob"), True)
def Benchmark(model_gen, arg): model, input_size = model_gen(arg.order) model.Proto().type = arg.net_type model.Proto().num_workers = arg.num_workers # In order to be able to run everything without feeding more stuff, let's # add the data and label blobs to the parameter initialization net as well. if arg.order == "NCHW": input_shape = [arg.batch_size, 3, input_size, input_size] else: input_shape = [arg.batch_size, input_size, input_size, 3] if arg.model == "MLP": input_shape = [arg.batch_size, input_size] model.param_init_net.GaussianFill([], "data", shape=input_shape, mean=0.0, std=1.0) model.param_init_net.UniformIntFill([], "label", shape=[ arg.batch_size, ], min=0, max=999) if arg.forward_only: print('{}: running forward only.'.format(arg.model)) else: print('{}: running forward-backward.'.format(arg.model)) model.AddGradientOperators(["loss"]) AddParameterUpdate(model) if arg.order == 'NHWC': print( '==WARNING==\n' 'NHWC order with CuDNN may not be supported yet, so I might\n' 'exit suddenly.') if not arg.cpu: model.param_init_net.RunAllOnGPU() model.net.RunAllOnGPU() if arg.dump_model: # Writes out the pbtxt for benchmarks on e.g. Android with open("{0}_init_batch_{1}.pbtxt".format(arg.model, arg.batch_size), "w") as fid: fid.write(str(model.param_init_net.Proto())) with open("{0}.pbtxt".format(arg.model, arg.batch_size), "w") as fid: fid.write(str(model.net.Proto())) workspace.RunNetOnce(model.param_init_net) workspace.CreateNet(model.net) for i in range(arg.warmup_iterations): workspace.RunNet(model.net.Proto().name) plan = core.Plan("plan") plan.AddStep(core.ExecutionStep("run", model.net, arg.iterations)) start = time.time() workspace.RunPlan(plan) print('Spent: {}'.format((time.time() - start) / arg.iterations)) if arg.layer_wise_benchmark: print('Layer-wise benchmark.') workspace.BenchmarkNet(model.net.Proto().name, 1, arg.iterations, True)
def test_dataset_ops(self): """ 1. Defining the schema of our dataset. This example schema could represent, for example, a search query log. """ schema = Struct( # fixed size vector, which will be stored as a matrix when batched ('dense', Scalar((np.float32, 3))), # could represent a feature map from feature ID to float value ('floats', Map( Scalar(np.int32), Scalar(np.float32) )), # could represent a multi-valued categorical feature map ('int_lists', Map( Scalar(np.int32), List(Scalar(np.int64)), )), # could represent a multi-valued, weighted categorical feature map ( 'id_score_pairs', Map( Scalar(np.int32), Map( Scalar(np.int64), Scalar(np.float32), keys_name='ids', values_name='scores' ), ) ), # additional scalar information ( 'metadata', Struct( ('user_id', Scalar(np.int64)), ('user_embed', Scalar((np.float32, 2))), ('query', Scalar(str)), ) ), ) """ This is what the flattened fields for this schema look like, along with its type. Each one of these fields will be stored, read and writen as a tensor. """ expected_fields = [ ('dense', (np.float32, 3)), ('floats:lengths', np.int32), ('floats:values:keys', np.int32), ('floats:values:values', np.float32), ('int_lists:lengths', np.int32), ('int_lists:values:keys', np.int32), ('int_lists:values:values:lengths', np.int32), ('int_lists:values:values:values', np.int64), ('id_score_pairs:lengths', np.int32), ('id_score_pairs:values:keys', np.int32), ('id_score_pairs:values:values:lengths', np.int32), ('id_score_pairs:values:values:values:ids', np.int64), ('id_score_pairs:values:values:values:scores', np.float32), ('metadata:user_id', np.int64), ('metadata:user_embed', (np.float32, 2)), ('metadata:query', str), ] zipped = zip( expected_fields, schema.field_names(), schema.field_types() ) for (ref_name, ref_type), name, dtype in zipped: self.assertEquals(ref_name, name) self.assertEquals(np.dtype(ref_type), dtype) """ 2. The contents of our dataset. Contents as defined below could represent, for example, a log of search queries along with dense, sparse features and metadata. The datset below has 3 top-level entries. """ contents_raw = [ # dense [[1.1, 1.2, 1.3], [2.1, 2.2, 2.3], [3.1, 3.2, 3.3]], # floats [1, 2, 3], # len [11, 21, 22, 31, 32, 33], # key [1.1, 2.1, 2.2, 3.1, 3.2, 3.3], # value # int lists [2, 0, 1], # len [11, 12, 31], # key [2, 4, 3], # value:len [111, 112, 121, 122, 123, 124, 311, 312, 313], # value:value # id score pairs [1, 2, 2], # len [11, 21, 22, 31, 32], # key [1, 1, 2, 2, 3], # value:len [111, 211, 221, 222, 311, 312, 321, 322, 323], # value:ids [11.1, 21.1, 22.1, 22.2, 31.1, 31.2, 32.1, 32.2, 32.3], # val:score # metadata [123, 234, 456], # user_id [[0.2, 0.8], [0.5, 0.5], [0.7, 0.3]], # user_embed ['dog posts', 'friends who like to', 'posts about ca'], # query ] # convert the above content to ndarrays, checking against the schema contents = from_blob_list(schema, contents_raw) """ 3. Creating and appending to the dataset. We first create an empty dataset with the given schema. Then, a Writer is used to append these entries to the dataset. """ ds = dataset.Dataset(schema) net = core.Net('init') with core.NameScope('init'): ds.init_empty(net) content_blobs = NewRecord(net, contents) FeedRecord(content_blobs, contents) writer = ds.writer(init_net=net) writer.write_record(net, content_blobs) workspace.RunNetOnce(net) """ 4. Iterating through the dataset contents. If we were to iterate through the top level entries of our dataset, this is what we should expect to see: """ entries_raw = [ ( [[1.1, 1.2, 1.3]], # dense [1], [11], [1.1], # floats [2], [11, 12], [2, 4], [111, 112, 121, 122, 123, 124], # intlst [1], [11], [1], [111], [11.1], # id score pairs [123], [[0.2, 0.8]], ['dog posts'], # metadata ), ( [[2.1, 2.2, 2.3]], # dense [2], [21, 22], [2.1, 2.2], # floats [0], [], [], [], # int list [2], [21, 22], [1, 2], [211, 221, 222], [21.1, 22.1, 22.2], [234], [[0.5, 0.5]], ['friends who like to'], # metadata ), ( [[3.1, 3.2, 3.3]], # dense [3], [31, 32, 33], [3.1, 3.2, 3.3], # floats [1], [31], [3], [311, 312, 313], # int lst [2], [31, 32], [2, 3], [311, 312, 321, 322, 323], [31.1, 31.2, 32.1, 32.2, 32.3], # id score list [456], [[0.7, 0.3]], ['posts about ca'], # metadata ), # after the end of the dataset, we will keep getting empty vectors ([], ) * 16, ([], ) * 16, ] entries = [from_blob_list(schema, e) for e in entries_raw] """ Let's go ahead and create the reading nets. We will run `read` net multiple times and assert that we are reading the entries the way we stated above. """ read_init_net = core.Net('read_init') read_next_net = core.Net('read_next') reader = ds.reader(read_init_net) should_continue, batch = reader.read_record(read_next_net) workspace.RunNetOnce(read_init_net) workspace.CreateNet(read_next_net, True) for entry in entries: workspace.RunNet(str(read_next_net)) actual = FetchRecord(batch) _assert_records_equal(actual, entry) """ 5. Reading/writing in a single plan If all of operations on the data are expressible as Caffe2 operators, we don't need to load the data to python, iterating through the dataset in a single Plan. Where we will process the dataset a little and store it in a second dataset. We can reuse the same Reader since it supports reset. """ reset_net = core.Net('reset_net') reader.reset(reset_net) read_step, batch = reader.execution_step() """ We will add the line number * 1000 to the feature ids. """ process_net = core.Net('process') line_no = Const(process_net, 0, dtype=np.int32) const_one = Const(process_net, 1000, dtype=np.int32) process_net.Add([line_no, const_one], [line_no]) field = batch.floats.keys.get() process_net.Print(field, []) process_net.Add([field, line_no], field, broadcast=1, axis=0) """ Lets create a second dataset and append to it. """ ds2 = dataset.Dataset(schema, name='dataset2') ds2.init_empty(reset_net) writer = ds2.writer(reset_net) writer.write_record(process_net, batch) # commit is not necessary for DatasetWriter but will add it for # generality of the example commit_net = core.Net('commit') writer.commit(commit_net) """ Time to create and run a plan which will do the processing """ plan = core.Plan('process') plan.AddStep(core.execution_step('reset', reset_net)) plan.AddStep(read_step.AddNet(process_net)) plan.AddStep(core.execution_step('commit', commit_net)) workspace.RunPlan(plan) """ Now we should have dataset2 populated. """ ds2_data = FetchRecord(ds2.content()) field = ds2_data.floats.keys field.set(blob=field.get() - [1000, 2000, 2000, 3000, 3000, 3000]) _assert_records_equal(contents, ds2_data) """ 6. Slicing a dataset You can create a new schema from pieces of another schema and reuse the same data. """ subschema = Struct(('top_level', schema.int_lists.values)) int_list_contents = contents.int_lists.values.field_names() self.assertEquals(len(subschema.field_names()), len(int_list_contents)) """ 7. Random Access a dataset """ read_init_net = core.Net('read_init') read_next_net = core.Net('read_next') idx = np.array([2, 1, 0]) indices_blob = Const(read_init_net, idx, name='indices') reader = ds.random_reader(read_init_net, indices_blob) reader.computeoffset(read_init_net) should_stop, batch = reader.read_record(read_next_net) workspace.CreateNet(read_init_net, True) workspace.RunNetOnce(read_init_net) workspace.CreateNet(read_next_net, True) for i in range(len(entries)): k = idx[i] if i in idx else i entry = entries[k] workspace.RunNet(str(read_next_net)) actual = FetchRecord(batch) _assert_records_equal(actual, entry) workspace.RunNet(str(read_next_net)) self.assertEquals(True, workspace.FetchBlob(should_stop)) """ 8. Random Access a dataset with loop_over = true """ read_init_net = core.Net('read_init') read_next_net = core.Net('read_next') idx = np.array([2, 1, 0]) indices_blob = Const(read_init_net, idx, name='indices') reader = ds.random_reader(read_init_net, indices_blob, loop_over=True) reader.computeoffset(read_init_net) should_stop, batch = reader.read_record(read_next_net) workspace.CreateNet(read_init_net, True) workspace.RunNetOnce(read_init_net) workspace.CreateNet(read_next_net, True) for _ in range(len(entries) * 3): workspace.RunNet(str(read_next_net)) self.assertEquals(False, workspace.FetchBlob(should_stop)) """ 9. Sort and shuffle a dataset This sort the dataset using the score of a certain column, and then shuffle within each chunk of size batch_size * shuffle_size before shuffling the chunks. """ read_init_net = core.Net('read_init') read_next_net = core.Net('read_next') reader = ds.random_reader(read_init_net) reader.sort_and_shuffle(read_init_net, 'int_lists:lengths', 1, 2) reader.computeoffset(read_init_net) should_continue, batch = reader.read_record(read_next_net) workspace.CreateNet(read_init_net, True) workspace.RunNetOnce(read_init_net) workspace.CreateNet(read_next_net, True) expected_idx = np.array([2, 1, 0]) for i in range(len(entries)): k = expected_idx[i] if i in expected_idx else i entry = entries[k] workspace.RunNet(str(read_next_net)) actual = FetchRecord(batch) _assert_records_equal(actual, entry)
def BuildAndRunPlan(self, step): plan = core.Plan("test") plan.AddStep(control.Do('init', self.init_net_)) plan.AddStep(step) self.assertEqual(workspace.RunPlan(plan), True)
def test_collect_tensor_ops(self): init_net = core.Net("init_net") blobs = ["blob_1", "blob_2", "blob_3"] bvec_map = {} ONE = init_net.ConstantFill([], "ONE", shape=[1, 2], value=1) for b in blobs: init_net.ConstantFill([], [b], shape=[1, 2], value=0) bvec_map[b] = b + "_vec" init_net.CreateTensorVector([], [bvec_map[b]]) reader_net = core.Net("reader_net") for b in blobs: reader_net.Add([b, ONE], [b]) collect_net = core.Net("collect_net") num_to_collect = 1000 max_example_to_cover = 100000 bvec = [bvec_map[b] for b in blobs] collect_net.CollectTensor( bvec + blobs, bvec, num_to_collect=num_to_collect, ) print("Collect Net Proto: {}".format(collect_net.Proto())) plan = core.Plan("collect_data") plan.AddStep(core.execution_step("collect_init", init_net)) plan.AddStep( core.execution_step( "collect_data", [reader_net, collect_net], num_iter=max_example_to_cover ) ) workspace.RunPlan(plan) # concat the collected tensors concat_net = core.Net("concat_net") bconcated_map = {} bsize_map = {} for b in blobs: bconcated_map[b] = b + "_concated" bsize_map[b] = b + "_size" concat_net.ConcatTensorVector([bvec_map[b]], [bconcated_map[b]]) concat_net.TensorVectorSize([bvec_map[b]], [bsize_map[b]]) workspace.RunNetOnce(concat_net) # check data reference_result = workspace.FetchBlob(bconcated_map[blobs[0]]) self.assertEqual( reference_result.shape, (min(num_to_collect, max_example_to_cover), 2) ) size = workspace.FetchBlob(bsize_map[blobs[0]]) self.assertEqual(tuple(), size.shape) self.assertEqual(min(num_to_collect, max_example_to_cover), size.item()) hist, _ = np.histogram( reference_result[:, 0], bins=10, range=(1, max_example_to_cover) ) print("Sample histogram: {}".format(hist)) self.assertTrue(all(hist > 0.6 * (num_to_collect / 10))) for i in range(1, len(blobs)): result = workspace.FetchBlob(bconcated_map[blobs[i]]) self.assertEqual(reference_result.tolist(), result.tolist())