def main(): assert workspace.IsNUMAEnabled() and workspace.GetNumNUMANodes() >= 2 single_init, single_net = build_net("single_net", False) cross_init, cross_net = build_net("cross_net", True) workspace.CreateNet(single_init) workspace.RunNet(single_init.Name()) workspace.CreateNet(cross_init) workspace.RunNet(cross_init.Name()) workspace.CreateNet(single_net) workspace.CreateNet(cross_net) for _ in range(4): t = time.time() workspace.RunNet(single_net.Name(), NUM_ITER) dt = time.time() - t print("Single socket time:", dt) single_bw = 4 * SHAPE_LEN * SHAPE_LEN * NUM_REPLICAS * NUM_ITER / dt / GB print("Single socket BW: {} GB/s".format(single_bw)) t = time.time() workspace.RunNet(cross_net.Name(), NUM_ITER) dt = time.time() - t print("Cross socket time:", dt) cross_bw = 4 * SHAPE_LEN * SHAPE_LEN * NUM_REPLICAS * NUM_ITER / dt / GB print("Cross socket BW: {} GB/s".format(cross_bw)) print("Single BW / Cross BW: {}".format(single_bw / cross_bw))
def main(): assert workspace.IsNUMAEnabled() and workspace.GetNumNUMANodes() >= 2 single_net = build_net("single_net", False) cross_net = build_net("cross_net", True) workspace.CreateNet(single_net) workspace.CreateNet(cross_net) for _ in range(4): t = time.time() workspace.RunNet(single_net.Name(), 5000) print("Single socket time:", time.time() - t) t = time.time() workspace.RunNet(cross_net.Name(), 5000) print("Cross socket time:", time.time() - t)
gpu_device_option = caffe2_pb2.DeviceOption() gpu_device_option.device_type = caffe2_pb2.CUDA gpu_device_option.device_id = 0 net.CopyCPUToGPU("output_blob_0", "output_blob_0_gpu", device_option=gpu_device_option) net.CopyCPUToGPU("output_blob_1", "output_blob_1_gpu", device_option=gpu_device_option) return net @unittest.skipIf(not workspace.IsNUMAEnabled(), "NUMA is not enabled") @unittest.skipIf(workspace.GetNumNUMANodes() < 2, "Not enough NUMA nodes") @unittest.skipIf(not workspace.has_gpu_support, "No GPU support") class NUMATest(TestCase): def test_numa(self): net = build_test_net("test_numa") workspace.RunNetOnce(net) self.assertEqual(workspace.GetBlobNUMANode("output_blob_0"), 0) self.assertEqual(workspace.GetBlobNUMANode("output_blob_1"), 1) if __name__ == '__main__': unittest.main()
"--num-workers", type=int, default=1, help="the number of worker per numa node in the thread pool", ) parser.add_argument("--num-numa-nodes", type=int, default=1) args, extra_args = parser.parse_known_args() ### some basic setup ### np.random.seed(args.numpy_rand_seed) np.set_printoptions(precision=args.print_precision) global_options = [ "caffe2", "--caffe2_log_level=2", "--caffe2_cpu_numa_enabled=1" ] + extra_args workspace.GlobalInit(global_options) assert workspace.IsNUMAEnabled() ln = np.fromstring(args.arch, dtype=int, sep="-") # test prints print("mlp arch (" + str(ln.size - 1) + " layers, with input to output dimensions):") print(ln) ### prepare training data ### nbatches = int(np.ceil((args.data_size * 1.0) / args.mini_batch_size)) # inputs m0 = ln[0] lX = [] # targets ml = ln[ln.size - 1] lT = [] for j in range(0, nbatches):