def test_cross_nets(self): net = core.Net("test") init_net = core.Net("init") device_option = caffe2_pb2.DeviceOption() device_option.device_type = caffe2_pb2.CUDA device_option.cuda_gpu_id = 1 weight = init_net.XavierFill([], 'fc_w', shape=[10, 100]) bias = init_net.ConstantFill([], 'fc_b', shape=[ 10, ]) with core.DeviceScope(device_option): net.FC(["data", weight, bias], "fc1") data_remap = {'data': device_option} nets, _ = core.InjectDeviceCopiesAmongNets( [init_net, net], blob_to_device_init=data_remap) op = nets[1]._net.op[0] self.assertEqual(op.type, "CopyCPUToGPU") self.assertEqual(op.device_option.device_type, 1) self.assertEqual(op.device_option.cuda_gpu_id, 1) self.assertEqual(op.output[0], "fc_w_cuda_1") op = nets[1]._net.op[1] self.assertEqual(op.type, "CopyCPUToGPU") self.assertEqual(op.device_option.device_type, 1) self.assertEqual(op.device_option.cuda_gpu_id, 1) self.assertEqual(op.output[0], "fc_b_cuda_1") op = nets[1]._net.op[2] self.assertEqual(op.type, "FC") self.assertEqual(op.input[0], "data") self.assertEqual(op.input[1], "fc_w_cuda_1") self.assertEqual(op.input[2], "fc_b_cuda_1") self.assertEqual(op.device_option.device_type, 1) self.assertEqual(op.device_option.cuda_gpu_id, 1) """
def test_cross_nets(self): net = core.Net("test") init_net = core.Net("init") device_option = caffe2_pb2.DeviceOption() device_option.device_type = caffe2_pb2.CUDA device_option.cuda_gpu_id = 1 weight = init_net.XavierFill([], 'fc_w', shape=[10, 100]) bias = init_net.ConstantFill([], 'fc_b', shape=[ 10, ]) with core.DeviceScope(device_option): net.FC(["data", weight, bias], "fc1") data_remap = {'data': device_option} nets, _ = core.InjectDeviceCopiesAmongNets( [init_net, net], blob_to_device_init=data_remap) print(nets[1].Proto()) ref_str = """ name: "" op { input: "fc_w" output: "fc_w_cuda_1" name: "" type: "CopyCPUToGPU" device_option { device_type: 1 cuda_gpu_id: 1 } } op { input: "fc_b" output: "fc_b_cuda_1" name: "" type: "CopyCPUToGPU" device_option { device_type: 1 cuda_gpu_id: 1 } } op { input: "data" input: "fc_w_cuda_1" input: "fc_b_cuda_1" output: "fc1" name: "" type: "FC" device_option { device_type: 1 cuda_gpu_id: 1 } } external_input: "data" external_input: "fc_w" external_input: "fc_b" """ nets[1].Proto().name = '' # Ignore the name self.assertEqual(str(nets[1].Proto()).strip(), ref_str.strip())
def test_cross_nets(self): net = core.Net("test") init_net = core.Net("init") device_option = caffe2_pb2.DeviceOption() device_option.device_type = caffe2_pb2.CUDA device_option.cuda_gpu_id = 1 weight = init_net.XavierFill([], 'fc_w', shape=[10, 100]) bias = init_net.ConstantFill([], 'fc_b', shape=[ 10, ]) const = init_net.ConstantFill([], 'const', shape=[], value=1.) with core.DeviceScope(device_option): const = init_net.Add([const, const], [const]) fc_out = net.FC(["data", weight, bias], "fc1") net.Add([fc_out, const], [fc_out]) data_remap = {'data': device_option} nets, _ = core.InjectDeviceCopiesAmongNets( [init_net, net], blob_to_device_init=data_remap) op = nets[1]._net.op[0] self.assertEqual(op.type, "CopyCPUToGPU") self.assertEqual(op.device_option.device_type, 1) self.assertEqual(op.device_option.cuda_gpu_id, 1) self.assertEqual(op.output[0], "fc_w_cuda_1") op = nets[1]._net.op[1] self.assertEqual(op.type, "CopyCPUToGPU") self.assertEqual(op.device_option.device_type, 1) self.assertEqual(op.device_option.cuda_gpu_id, 1) self.assertEqual(op.output[0], "fc_b_cuda_1") op = nets[1]._net.op[2] self.assertEqual(op.type, "FC") self.assertEqual(op.input[0], "data") self.assertEqual(op.input[1], "fc_w_cuda_1") self.assertEqual(op.input[2], "fc_b_cuda_1") self.assertEqual(op.device_option.device_type, 1) self.assertEqual(op.device_option.cuda_gpu_id, 1) op = nets[1]._net.op[3] self.assertEqual(op.type, "Add") self.assertEqual(op.input[0], "fc1") self.assertEqual(op.input[1], "const_cuda_1") # check that moved blob is in input to the new net for c in ["data", "fc_w", "fc_b", "const_cuda_1"]: self.assertTrue(c in nets[1]._net.external_input) """
def convert_model_gpu(args, net, init_net): assert args.device == 'gpu' ret_net = copy.deepcopy(net) ret_init_net = copy.deepcopy(init_net) cdo_cuda = mutils.get_device_option_cuda() cdo_cpu = mutils.get_device_option_cpu() CPU_OPS = [ ["CollectAndDistributeFpnRpnProposals", None], ["GenerateProposals", None], ["BBoxTransform", None], ["BoxWithNMSLimit", None], ] CPU_BLOBS = ["im_info", "anchor"] @op_filter() def convert_op_gpu(op): for x in CPU_OPS: if mutils.filter_op(op, type=x[0], inputs=x[1]): return None op.device_option.CopyFrom(cdo_cuda) return [op] @op_filter() def convert_init_op_gpu(op): if op.output[0] in CPU_BLOBS: op.device_option.CopyFrom(cdo_cpu) else: op.device_option.CopyFrom(cdo_cuda) return [op] convert_op_in_proto(ret_init_net.Proto(), convert_init_op_gpu) convert_op_in_proto(ret_net.Proto(), convert_op_gpu) ret = core.InjectDeviceCopiesAmongNets([ret_init_net, ret_net]) return [ret[0][1], ret[0][0]]