def runOpOnInput( device_option, op, inputs, input_device_options=None, ): op = copy.deepcopy(op) op.device_option.CopyFrom(device_option) with temp_workspace(): if (len(op.input) > len(inputs)): raise ValueError( 'must supply an input for each input on the op: %s vs %s' % (op.input, inputs)) _input_device_options = input_device_options or \ core.InferOpBlobDevicesAsDict(op)[0] for (n, b) in zip(op.input, inputs): workspace.FeedBlob( n, b, device_option=_input_device_options.get(n, device_option) ) workspace.RunOperatorOnce(op) outputs_to_check = list(range(len(op.output))) outs = [] for output_index in outputs_to_check: output_blob_name = op.output[output_index] output = workspace.FetchBlob(output_blob_name) outs.append(output) return outs
def runOpBenchmark( device_option, op, inputs, input_device_options=None, iterations=10, ): op = copy.deepcopy(op) op.device_option.CopyFrom(device_option) net = caffe2_pb2.NetDef() net.op.extend([op]) net.name = op.name if op.name else "test" with temp_workspace(): _input_device_options = input_device_options or \ core.InferOpBlobDevicesAsDict(op)[0] for (n, b) in zip(op.input, inputs): workspace.FeedBlob( n, b, device_option=_input_device_options.get(n, device_option) ) workspace.CreateNet(net) ret = workspace.BenchmarkNet(net.name, 1, iterations, True) return ret
def assertRunOpRaises( self, device_option, op, inputs, input_device_options=None, exception=(Exception, ), regexp=None, ): op = copy.deepcopy(op) op.device_option.CopyFrom(device_option) with temp_workspace(): _input_device_options = input_device_options or \ core.InferOpBlobDevicesAsDict(op)[0] for (n, b) in zip(op.input, inputs): workspace.FeedBlob(n, b, device_option=_input_device_options.get( n, device_option)) if regexp is None: self.assertRaises(exception, workspace.RunOperatorOnce, op) else: six.assertRaisesRegex(self, exception, regexp, workspace.RunOperatorOnce, op)
def assertValidationChecks( self, device_option, op, inputs, validator, input_device_options=None, as_kwargs=True, init_net=None, ): if as_kwargs: assert len(set(list(op.input) + list(op.output))) == \ len(op.input) + len(op.output), \ "in-place ops are not supported in as_kwargs mode" op = copy.deepcopy(op) op.device_option.CopyFrom(device_option) with temp_workspace(): _input_device_options = input_device_options or \ core.InferOpBlobDevicesAsDict(op)[0] for (n, b) in zip(op.input, inputs): workspace.FeedBlob(n, b, device_option=_input_device_options.get( n, device_option)) if init_net: workspace.RunNetOnce(init_net) workspace.RunOperatorOnce(op) outputs = [workspace.FetchBlob(n) for n in op.output] if as_kwargs: validator(**dict( zip(list(op.input) + list(op.output), inputs + outputs))) else: validator(inputs=inputs, outputs=outputs)
def assertReferenceChecks( self, device_option, op, inputs, reference, input_device_options=None, threshold=1e-4, output_to_grad=None, grad_reference=None, atol=None, outputs_to_check=None, ensure_outputs_are_inferred=False, ): """ This runs the reference Python function implementation (effectively calling `reference(*inputs)`, and compares that to the output of output, with an absolute/relative tolerance given by the `threshold` parameter. Useful for checking the implementation matches the Python (typically NumPy) implementation of the same functionality. Usage example: @given(X=hu.tensor(), inplace=st.booleans(), **hu.gcs) def test_softsign(self, X, inplace, gc, dc): op = core.CreateOperator( "Softsign", ["X"], ["X" if inplace else "Y"]) def softsign(X): return (X / (1 + np.abs(X)),) self.assertReferenceChecks(gc, op, [X], softsign) """ op = copy.deepcopy(op) op.device_option.CopyFrom(device_option) with temp_workspace(): if (len(op.input) > len(inputs)): raise ValueError( 'must supply an input for each input on the op: %s vs %s' % (op.input, inputs)) _input_device_options = input_device_options or \ core.InferOpBlobDevicesAsDict(op)[0] for (n, b) in zip(op.input, inputs): workspace.FeedBlob( n, b, device_option=_input_device_options.get(n, device_option) ) net = core.Net("opnet") net.Proto().op.extend([op]) test_shape_inference = False try: (shapes, types) = workspace.InferShapesAndTypes([net]) test_shape_inference = True except RuntimeError as e: # Temporarily catch runtime errors when inferring shape # and type info logging.warning(str(e)) if os.getenv('CAFFE2_ASSERT_SHAPEINFERENCE') == '1' or ensure_outputs_are_inferred: raise e workspace.RunNetOnce(net) reference_outputs = reference(*inputs) if not (isinstance(reference_outputs, tuple) or isinstance(reference_outputs, list)): raise RuntimeError( "You are providing a wrong reference implementation. A " "proper one should return a tuple/list of numpy arrays.") if not outputs_to_check: self.assertEqual(len(reference_outputs), len(op.output)) outputs_to_check = list(range(len(op.output))) outs = [] for (output_index, ref) in zip(outputs_to_check, reference_outputs): output_blob_name = op.output[output_index] output = workspace.FetchBlob(output_blob_name) if output.dtype.kind in ('S', 'O'): np.testing.assert_array_equal(output, ref) else: if atol is None: atol = threshold np.testing.assert_allclose( output, ref, atol=atol, rtol=threshold, err_msg=( 'Output {0} is not matching the reference'.format( output_blob_name, )), ) if test_shape_inference: self._assertInferTensorChecks( output_blob_name, shapes, types, output, ensure_output_is_inferred=ensure_outputs_are_inferred) outs.append(output) if grad_reference is not None: assert output_to_grad is not None, \ "If grad_reference is set," \ "output_to_grad has to be set as well" with core.DeviceScope(device_option): self._assertGradReferenceChecks( op, inputs, reference_outputs, output_to_grad, grad_reference, threshold=threshold) return outs
def CheckSimple(self, op, inputs, input_to_check, outputs_with_grads, grad_ops=None, input_device_options=None): """Checks the operator in a very simple fashion by stacking a sum of squares on the top. Inputs: op: the operator to be checked. inputs: the input data in numpy arrays. input_to_check: an index specifying which input blob we should check. outputs_with_grads: indices specifying which output blobs will we need to check gradients with. For these outputs, we will collect a squared sum and also feed in their gradients. grad_operator: the gradient operator. If not given, we will get the gradient operator from the gradient registry. input_device_options: an optional mapping from input names to DeviceOptions (to override the default DeviceOption) Outputs: boolean: True if it passes, False if it does not pass. """ # Entering the checker workspace old_ws_name = workspace.CurrentWorkspace() if self._workspace_name != old_ws_name: workspace.SwitchWorkspace(self._workspace_name, True) op.device_option.CopyFrom(self._device_option) if grad_ops is None: # TODO(jiayq): use the gradient registration instead of the old # hack. grad_ops, g_input = core.GradientRegistry.GetGradientForOp( op, [s + '_grad' for s in op.output]) dims_to_check = inputs[input_to_check].size _input_device_options = input_device_options or \ core.InferOpBlobDevicesAsDict(op)[0] # First, feed in the input. for i, arr in enumerate(inputs): workspace.FeedBlob( op.input[i], arr, _input_device_options.get(op.input[i], self._device_option)) # Get the loss and gradient for the original. input_name = op.input[input_to_check] grad_name = g_input[input_to_check] loss, grad = self.GetLossAndGrad(op, grad_ops, inputs[input_to_check], input_name, grad_name, outputs_with_grads) grad_estimate = np.zeros_like(inputs[input_to_check]) if grad_estimate.shape != grad.shape: raise Exception( "Mismatched gradient shapes: estimated ({}), grad ({})".format( grad_estimate.shape, grad.shape)) for current_dim in range(dims_to_check): # Positive gradient inputs[input_to_check].flat[current_dim] += self._stepsize pos_loss, _ = self.GetLossAndGrad(op, grad_ops, inputs[input_to_check], input_name, grad_name, outputs_with_grads) # Negative gradient inputs[input_to_check].flat[current_dim] -= self._stepsize * 2 neg_loss, _ = self.GetLossAndGrad(op, grad_ops, inputs[input_to_check], input_name, grad_name, outputs_with_grads) # Recover the value inputs[input_to_check].flat[current_dim] += self._stepsize grad_estimate.flat[current_dim] = (pos_loss - neg_loss) / self._stepsize / 2 # Now, check correctness fail_mat = ~np.isclose( grad, grad_estimate, atol=self._threshold, rtol=self._threshold) if np.any(fail_mat): idx = np.flatnonzero(fail_mat) print('Failed. [idx, grad, grad_estimate] are:') print(np.vstack([idx, grad.flat[idx], grad_estimate.flat[idx]]).T) ret = False else: ret = True # After finishing, cleaning up things. if self._workspace_name != old_ws_name: # We reset the workspace to make sure everything intermediate is # cleaned up. Note that there is no need to delete a workspace - # when empty it takes a very limited amount of memory. workspace.ResetWorkspace() workspace.SwitchWorkspace(old_ws_name) return ret, grad, grad_estimate
def CheckSimple( self, op, inputs, input_to_check, outputs_with_grads, grad_ops=None, input_device_options=None, ensure_outputs_are_inferred=False, ): """Checks the operator in a very simple fashion by stacking a sum of squares on the top. Inputs: op: the operator to be checked. inputs: the input data in numpy arrays. input_to_check: an index specifying which input blob we should check. outputs_with_grads: indices specifying which output blobs will we need to check gradients with. For these outputs, we will collect a squared sum and also feed in their gradients. grad_operator: the gradient operator. If not given, we will get the gradient operator from the gradient registry. input_device_options: an optional mapping from input names to DeviceOptions (to override the default DeviceOption) ensure_outputs_are_inferred: if set will assert that the gradient output shapes matches the inferred shapes Outputs: boolean: True if it passes, False if it does not pass. """ # Entering the checker workspace old_ws_name = workspace.CurrentWorkspace() if self._workspace_name != old_ws_name: workspace.SwitchWorkspace(self._workspace_name, True) op.device_option.CopyFrom(self._device_option) if grad_ops is None: # TODO(jiayq): use the gradient registration instead of the old # hack. grad_ops, g_input = getGradientForOp(op) _input_device_options = input_device_options or \ core.InferOpBlobDevicesAsDict(op)[0] # First, feed in the input. for i, arr in enumerate(inputs): workspace.FeedBlob( op.input[i], arr, _input_device_options.get( op.input[i], self._device_option)) # Get the loss and gradient for the original. grad_name = g_input[input_to_check] loss, grad = self.GetLossAndGrad( op, grad_ops, inputs, op.input, input_to_check, grad_name, outputs_with_grads, ) grad_estimate = np.zeros_like(inputs[input_to_check]) if grad_estimate.shape != grad.shape: raise Exception( "Mismatched gradient shapes: estimated ({}), grad ({})".format( grad_estimate.shape, grad.shape)) if ensure_outputs_are_inferred: self._assertInferTensorChecks(op, grad_ops) full_grad_check = os.getenv('CAFFE2_FULL_GRAD_CHECK') == '1' dims_to_check = inputs[input_to_check].size for current_dim in range(dims_to_check): # Grad check is very expensive (as it involves running the op from # scratch for each of the input tensor elements). Thus, let's # run it by default only on a small subset of dimensions. Here we # apply very scientific approach: the first and the last 3 elements # of each tensor. Pass CAFFE2_FULL_GRAD_CHECK=1 env var to enable # the full check if not full_grad_check and current_dim >= 3 and \ current_dim + 3 < dims_to_check: grad_estimate.flat[current_dim] = grad.flat[current_dim] continue # Positive gradient inputs[input_to_check].flat[current_dim] += self._stepsize pos_loss, _ = self.GetLossAndGrad( op, grad_ops, inputs, op.input, input_to_check, grad_name, outputs_with_grads ) # Negative gradient inputs[input_to_check].flat[current_dim] -= self._stepsize * 2 neg_loss, _ = self.GetLossAndGrad( op, grad_ops, inputs, op.input, input_to_check, grad_name, outputs_with_grads ) # Recover the value inputs[input_to_check].flat[current_dim] += self._stepsize grad_estimate.flat[current_dim] = ( pos_loss - neg_loss) / self._stepsize / 2 # Now, check correctness fail_mat = ~np.isclose( grad, grad_estimate, atol=self._threshold, rtol=self._threshold) if np.any(fail_mat): idx = np.flatnonzero(fail_mat) print('Failed. [idx, grad, grad_estimate] are:') print(np.vstack([idx, grad.flat[idx], grad_estimate.flat[idx]]).T) ret = False else: ret = True # After finishing, cleaning up things. if self._workspace_name != old_ws_name: # We reset the workspace to make sure everything intermediate is # cleaned up. Note that there is no need to delete a workspace - # when empty it takes a very limited amount of memory. workspace.ResetWorkspace() workspace.SwitchWorkspace(old_ws_name) return ret, grad, grad_estimate