def RunningAllreduceWithGPUs(self, gpu_ids, allreduce_function): """A base function to test different scenarios.""" workspace.ResetWorkspace() net = core.Net("mujitest") for id in gpu_ids: net.ConstantFill([], "testblob_gpu_" + str(id), shape=[1, 2, 3, 4], value=float(id + 1), device_option=muji.OnGPU(id)) allreduce_function(net, ["testblob_gpu_" + str(i) for i in gpu_ids], "_reduced", gpu_ids) workspace.RunNetOnce(net) target_value = sum(gpu_ids) + len(gpu_ids) all_blobs = workspace.Blobs() all_blobs.sort() for blob in all_blobs: print blob, workspace.FetchBlob(blob) for id in gpu_ids: blob = workspace.FetchBlob("testblob_gpu_" + str(i) + "_reduced") np.testing.assert_array_equal(blob, target_value, err_msg="gpu id %d of %s" % (id, str(gpu_ids)))
def testFetchBlobGPU(self): self.assertEqual(workspace.RunNetOnce(self.net.Proto().SerializeToString()), True) fetched = workspace.FetchBlob("testblob") # check if fetched is correct. self.assertEqual(fetched.shape, (1, 2, 3, 4)) np.testing.assert_array_equal(fetched, 1.0) fetched[:] = 2.0 self.assertEqual(workspace.FeedBlob("testblob", fetched), True) fetched_again = workspace.FetchBlob("testblob") self.assertEqual(fetched_again.shape, (1, 2, 3, 4)) np.testing.assert_array_equal(fetched_again, 2.0)
def CheckNet(self, net, inputs={}, ignore=set()): """Checks a network by inspecting all of its intermediate results, and see if things match. """ old_ws_name = workspace.CurrentWorkspace() results = [] blobs_to_check = sum([list(op.outputs) for op in net.operators], []) blobs_to_check = [b for b in blobs_to_check if b not in ignore] workspace.SwitchWorkspace("_device_check_", True) for i, device_option in enumerate(self._device_options): for name, arr in inputs.iteritems(): workspace.FeedBlob(name, arr, device_option) for op in net.operators: op.device_option.CopyFrom(device_option) workspace.RunNetOnce(net) results.append( [workspace.FetchBlob(name) for name in blobs_to_check]) # After running on all devices, check correctness success = True for i in range(1, len(results)): for j in range(len(blobs_to_check)): x = results[i][j] y = results[0][j] if np.any(np.abs(x - y) > self._threshold): print 'Failure in checking device option', i, 'and blob ', print blobs_to_check[j], '. The outputs are:' print x.flatten() print y.flatten() success = False continue workspace.SwitchWorkspace(old_ws_name) return success
def _testMiniAlexNet(self, order): # First, we get all the random initialization of parameters. model = self._MiniAlexNetNoDropout(order) workspace.ResetWorkspace() workspace.RunNetOnce(model.param_init_net) inputs = dict([(str(name), workspace.FetchBlob(str(name))) for name in model.params]) if order == "NCHW": inputs["data"] = np.random.rand(4, 3, 227, 227).astype(np.float32) else: inputs["data"] = np.random.rand(4, 227, 227, 3).astype(np.float32) inputs["label"] = np.array([1, 2, 3, 4]).astype(np.int32) cpu_device = caffe2_pb2.DeviceOption() cpu_device.device_type = caffe2_pb2.CPU gpu_device = caffe2_pb2.DeviceOption() gpu_device.device_type = caffe2_pb2.CUDA checker = device_checker.DeviceChecker(1e-5, [cpu_device, gpu_device]) ret = checker.CheckNet( model.net.Proto(), inputs, # The indices sometimes may be sensitive to small numerical differences # in the input, so we ignore checking them. ignore=['_pool1_idx', '_pool2_idx', '_pool5_idx']) self.assertEqual(ret, True)
def GetLossAndGrad(self, op, grad_ops, x, input_name, outputs_with_grads): # First, feed in the current input. Note that we are not changing anything # else, so we don't need to feed in others. workspace.FeedBlob(input_name, x, self._device_option) # Run. workspace.RunOperatorOnce(op) loss = 0. # Get Loss and feed in the gradients, run gradient ops. for idx in outputs_with_grads: name = op.outputs[idx] arr = workspace.FetchBlob(name) loss += (arr**2).sum() workspace.FeedBlob(core.GetGradientName(name), arr, self._device_option) loss /= 2. # Run gradient ops workspace.RunOperatorsOnce(grad_ops) # Get gradients grad = workspace.FetchBlob(core.GetGradientName(input_name)) return loss, grad
def testBlobs(self): names = [ "conv1", "pool1", "norm1", "conv2", "pool2", "norm2", "conv3", "conv4", "conv5", "pool5", "fc6", "fc7", "fc8", "prob" ] for name in names: print 'Verifying ', name caffe2_result = workspace.FetchBlob(name) reference = np.load('data/testdata/caffe_translator/' + name + '_dump.npy') self.assertEqual(caffe2_result.shape, reference.shape) scale = np.max(caffe2_result) np.testing.assert_almost_equal(caffe2_result / scale, reference / scale, decimal=5)
def testMNISTNetworks(self): # First, we get all the random initialization of parameters. init_net, train_net = self._MNISTNetworks() workspace.ResetWorkspace() workspace.RunNetOnce(init_net) inputs = dict([(str(name), workspace.FetchBlob(str(name))) for name in workspace.Blobs()]) cpu_device = caffe2_pb2.DeviceOption() cpu_device.device_type = caffe2_pb2.CPU gpu_device = caffe2_pb2.DeviceOption() gpu_device.device_type = caffe2_pb2.CUDA checker = device_checker.DeviceChecker(1e-2, [cpu_device, gpu_device]) ret = checker.CheckNet(train_net.Proto(), inputs) self.assertEqual(ret, True)
def CheckSimple(self, op, inputs, outputs_to_check): """Checks the operator in a very simple fashion by stacking a sum of squares on the top. Inputs: op: the operator to be checked. inputs: the input data in numpy arrays. input_to_check: an index specifying which input blob we should check. outputs_with_grads: indices specifying which output blobs will we need to check gradients with. For these outputs, we will collect a squared sum and also feed in their gradients. grad_operator: the gradient operator. If not given, we will get the gradient operator from the gradient registry. Outputs: boolean: True if it passes, False if it does not pass. """ # Entering the checker workspace old_ws_name = workspace.CurrentWorkspace() results = [] workspace.SwitchWorkspace("_device_check_", True) for i, device_option in enumerate(self._device_options): for i, arr in enumerate(inputs): workspace.FeedBlob(op.inputs[i], arr, device_option) op.device_option.CopyFrom(device_option) workspace.RunOperatorOnce(op) results.append([ workspace.FetchBlob(op.outputs[idx]) for idx in outputs_to_check ]) # Everything is done, reset the workspace. workspace.ResetWorkspace() # After running on all devices, check correctness success = True for i in range(1, len(self._device_options)): for j in range(len(outputs_to_check)): x = results[i][j] y = results[0][j] if np.any(np.abs(x - y) > self._threshold): print 'Failure in checking device option', i, 'and output ', print op.outputs[j], '. The outputs are:' print x.flatten() print y.flatten() success = False continue workspace.SwitchWorkspace(old_ws_name) return success
def CheckSimple(self, op, inputs, outputs_to_check): """Checks the operator with different device implementations. Inputs: op: the operator to be checked. inputs: the input data in numpy arrays. outputs_to_check: the outputs to check between devices. Outputs: boolean: True if it passes, False if it does not pass. """ # Entering the checker workspace old_ws_name = workspace.CurrentWorkspace() results = [] workspace.SwitchWorkspace("_device_check_", True) for i, device_option in enumerate(self._device_options): for i, arr in enumerate(inputs): workspace.FeedBlob(op.input[i], arr, device_option) op.device_option.CopyFrom(device_option) workspace.RunOperatorOnce(op) results.append([ workspace.FetchBlob(op.output[idx]) for idx in outputs_to_check ]) # Everything is done, reset the workspace. workspace.ResetWorkspace() # After running on all devices, check correctness success = True for i in range(1, len(self._device_options)): for j in range(len(outputs_to_check)): x = results[i][j] y = results[0][j] if np.any(np.abs(x - y) > self._threshold): print 'Failure in checking device option', i, 'and output ', print op.output[j], '. The outputs are:' print x.flatten() print y.flatten() success = False #else: # print ('Passed device pair (0, %d), %s %s' % # (i, outputs_to_check[j], y.shape)) workspace.SwitchWorkspace(old_ws_name) return success
# If you have multiple GPUs, you also might want to specify a gpu id. #DEVICE_OPTION.device_type = caffe2_pb2.CUDA #DEVICE_OPTION.cuda_gpu_id = 0 # Caffe2 has a concept of "workspace", which is similar to that of Matlab. Each workspace # is a self-contained set of tensors and networks. In this case, we will just use the default # workspace so we won't dive too deep into it. workspace.SwitchWorkspace('default') # First, we feed all the parameters to the workspace. for param in tensors.protos: workspace.FeedBlob(param.name, param, DEVICE_OPTION) # The network expects an input blob called "input", which we create here. # The content of the input blob is going to be fed when we actually do # classification. workspace.CreateBlob("input") # Specify the device option of the network, and then create it. net.device_option.CopyFrom(DEVICE_OPTION) workspace.CreateNet(net) ######################################## ### MY CODE ############################ for param in tensors.protos: print(param.name) filters = workspace.FetchBlob(param.name) import h5py h5f = h5py.File('dump/' + param.name + '.h5', 'w') h5f.create_dataset(param.name, data=filters) h5f.close()