def test_fast_memonger(self, input_dim, output_dim, batch_size, do): m = model_helper.ModelHelper() fc1 = brew.fc(m, "data", "fc1", dim_in=input_dim, dim_out=output_dim) fc2 = brew.fc(m, fc1, "fc2", dim_in=output_dim, dim_out=output_dim) fc3 = brew.fc(m, fc2, "fc3", dim_in=output_dim, dim_out=output_dim) fc3.Relu([], fc3)\ .Softmax([], "pred") \ .LabelCrossEntropy(["label"], ["xent"]) \ .AveragedLoss([], "loss") input_to_grad = m.AddGradientOperators(["loss"]) m.net.Proto().device_option.CopyFrom(do) m.param_init_net.Proto().device_option.CopyFrom(do) static_blobs = \ [o for op in m.param_init_net.Proto().op for o in op.output] + \ ["data", "label", "loss", input_to_grad["fc1_w"]] optimized_net = memonger.optimize_inference_fast( m.Proto(), static_blobs) data = np.random.randn(batch_size, input_dim).astype(np.float32) label = np.random.randint(low=0, high=output_dim, size=(batch_size, )).astype(np.int32) workspace.RunNetOnce(m.param_init_net) workspace.FeedBlob("data", data, device_option=do) workspace.FeedBlob("label", label, device_option=do) workspace.RunNetOnce(m.net) loss = workspace.FetchBlob("loss") grad = workspace.FetchBlob(str(input_to_grad["fc1_w"])) workspace.RunNetOnce(optimized_net) optimized_loss = workspace.FetchBlob("loss") optimized_grad = workspace.FetchBlob(str(input_to_grad["fc1_w"])) np.testing.assert_almost_equal(loss, optimized_loss) np.testing.assert_almost_equal(grad, optimized_grad) self.assertLess(count_blobs(optimized_net), count_blobs(m.Proto()))
def test_fast_memonger_unique_outputs(self): m = model_helper.ModelHelper() fc = [] for i in range(2): z = brew.fc(m, "data{}".format(i), "fc".format(i), dim_in=2, dim_out=2) fc.append(z) r = [] # Trick is here to have same input appear twice in a same Sum for x in fc: for y in fc: r.append(brew.sum(m, [x, y], 1)) concated = brew.concat(m, r, "concated") brew.relu(m, concated, "merged") static_blobs = \ [o for op in m.param_init_net.Proto().op for o in op.output] + \ ["merged"] + ["data{}".format(i) for i in range(len(fc))] optimized_net = memonger.optimize_inference_fast( m.Proto(), static_blobs) for op in optimized_net.op: self.assertEqual(len(op.output), len(set(op.output)), str(op))
def test_fast_memonger(self, input_dim, output_dim, batch_size, do): m = model_helper.ModelHelper() fc1 = brew.fc(m, "data", "fc1", dim_in=input_dim, dim_out=output_dim) fc2 = brew.fc(m, fc1, "fc2", dim_in=output_dim, dim_out=output_dim) fc3 = brew.fc(m, fc2, "fc3", dim_in=output_dim, dim_out=output_dim) fc3.Relu([], fc3)\ .Softmax([], "pred") \ .LabelCrossEntropy(["label"], ["xent"]) \ .AveragedLoss([], "loss") input_to_grad = m.AddGradientOperators(["loss"]) m.net.Proto().device_option.CopyFrom(do) m.param_init_net.Proto().device_option.CopyFrom(do) static_blobs = \ [o for op in m.param_init_net.Proto().op for o in op.output] + \ ["data", "label", "loss", input_to_grad["fc1_w"]] optimized_net = memonger.optimize_inference_fast( m.Proto(), static_blobs) data = np.random.randn(batch_size, input_dim).astype(np.float32) label = np.random.randint( low=0, high=output_dim, size=(batch_size,)).astype(np.int32) workspace.RunNetOnce(m.param_init_net) workspace.FeedBlob("data", data, device_option=do) workspace.FeedBlob("label", label, device_option=do) workspace.RunNetOnce(m.net) loss = workspace.FetchBlob("loss") grad = workspace.FetchBlob(str(input_to_grad["fc1_w"])) workspace.RunNetOnce(optimized_net) optimized_loss = workspace.FetchBlob("loss") optimized_grad = workspace.FetchBlob(str(input_to_grad["fc1_w"])) np.testing.assert_almost_equal(loss, optimized_loss) np.testing.assert_almost_equal(grad, optimized_grad) self.assertLess(count_blobs(optimized_net), count_blobs(m.Proto()))
def test_resnet_forward_only_fast_simplenet(self): ''' Test C++ memonger that is only for simple nets ''' model = cnn.CNNModelHelper( order="NCHW", name="test", cudnn_exhaustive_search=True, ) with core.NameScope("gpu_0"): data = model.net.AddExternalInput("gpu_0/data") resnet.create_resnet50( model, data, num_input_channels=3, num_labels=1000, is_test=True ) count_before = count_blobs(model.net.Proto()) t = time.time() optim_proto = memonger.optimize_inference_fast( model.net.Proto(), set(["gpu_0/data", "gpu_0/last_out_L1000"]).union( set(model.net.Proto().external_input)) ) print("Optimization took {} secs".format(time.time() - t)) count_after = count_blobs(optim_proto) num_shared_blobs = count_shared_blobs(optim_proto) self.assertTrue(count_after < count_before) print(count_after, count_before, num_shared_blobs) self.assertTrue(num_shared_blobs < 7 and num_shared_blobs > 0) # Run model and compare results workspace.RunNetOnce(model.param_init_net) data = np.random.rand(4, 3, 227, 227).astype(np.float32) workspace.FeedBlob("gpu_0/data", data) model.net.Proto().type = 'simple' workspace.RunNetOnce(model.net) loss1 = workspace.FetchBlob("gpu_0/last_out_L1000") workspace.RunNetOnce(optim_proto) optimized_loss1 = workspace.FetchBlob("gpu_0/last_out_L1000") np.testing.assert_almost_equal(loss1, optimized_loss1)
def test_forward_only_fast_simplenet( create_model, last_out_blob, data_blob="gpu_0/data", num_labels=1000, ): model = cnn.CNNModelHelper( order="NCHW", name="test", cudnn_exhaustive_search=True, ) with core.NameScope("gpu_0"): data = model.net.AddExternalInput(data_blob) create_model(model, data, num_input_channels=3, num_labels=num_labels, is_test=True) count_before = count_blobs(model.net.Proto()) t = time.time() optim_proto = memonger.optimize_inference_fast( model.net.Proto(), set([data_blob, last_out_blob]).union(set(model.net.Proto().external_input))) print("Optimization took {} secs".format(time.time() - t)) count_after = count_blobs(optim_proto) num_shared_blobs = count_shared_blobs(optim_proto) print(count_after, count_before, num_shared_blobs) # Run model and compare results workspace.RunNetOnce(model.param_init_net) data = np.random.rand(4, 3, 227, 227).astype(np.float32) workspace.FeedBlob(data_blob, data) model.net.Proto().type = 'simple' workspace.RunNetOnce(model.net) loss1 = workspace.FetchBlob(last_out_blob) workspace.RunNetOnce(optim_proto) optimized_loss1 = workspace.FetchBlob(last_out_blob) return [(count_after, count_before), (num_shared_blobs), (loss1, optimized_loss1)]
def test_fast_memonger_unique_outputs(self): m = model_helper.ModelHelper() fc = [] for i in range(2): z = brew.fc( m, "data{}".format(i), "fc".format(i), dim_in=2, dim_out=2) fc.append(z) r = [] # Trick is here to have same input appear twice in a same Sum for x in fc: for y in fc: r.append(brew.sum(m, [x, y], 1)) concated = brew.concat(m, r, "concated") brew.relu(m, concated, "merged") static_blobs = \ [o for op in m.param_init_net.Proto().op for o in op.output] + \ ["merged"] + ["data{}".format(i) for i in range(len(fc))] optimized_net = memonger.optimize_inference_fast( m.Proto(), static_blobs) for op in optimized_net.op: self.assertEqual(len(op.output), len(set(op.output)), str(op))