def _backward(self, state, root_gradients, variables): ''' Backpropagates supplied ``root_gradients`` for one or more of the output variables of the Function, to calculate gradients with respect to ``variables``. Formally, multiplies the values of ``root_gradients`` by the Jacobian of the Function and returns the subset of the output that corresponds to ``variables``. This function calls :func:`backward`, which is to be implemented by the user. Args: state (BackPropState): state obtained from a previous call to the func:`cntk.ops.Function.forward` method on this Function for the computation that this gradient backpropagation corresponds to. root_gradients (dict): the gradients that will be backpropagated variables (set): a list of input variables with respect to which the gradients have to be computed. Returns: dict: mapping of ``variables`` to NumPy arrays ''' device = state.device() if self.as_numpy: for v in root_gradients: root_gradients[v] = variable_value_to_seq(root_gradients[v], v) state = cntk_py.UserBackPropState.data(state) else: if not isinstance(state, cntk_py.BackPropState): if state is None: state = self._none_state else: raise ValueError('if as_numpy=False, state must be of ' 'type BackPropState') map_if_possible(variables) if len(root_gradients) == 1: for rg in root_gradients.values(): break root_gradients = rg possible_wrt = [input for input in self.inputs if input.needs_gradient] if len(possible_wrt) > 1: self.backward(state, root_gradients, variables) else: result = self.backward(state, root_gradients) for k in variables: variables[k] = result if self.as_numpy: for k,v in variables.items(): if v is None: raise ValueError('gradients were not provided for all variables') variables[k] = sanitize_batch(k, v, None, device)
def _forward(self, arguments, outputs, device=None, outputs_to_retain=None): ''' Computes the values of speficied variables in ``outputs``, using values provided in ``arguments`` that correspond to each input `Variable` of the function whose ``is_input`` is `True`. This function calls :func:`forward`, which is to be implemented by the user. Args: arguments (tuple): Value objects of the Function's input outputs (iterable): outputs to fetch values for. device (:class:`~cntk.device.DeviceDescriptor`, default `None`): the device descriptor that contains the type and id of the device on which the computation is. If `None`, the default device is used. Returns: A BackPropState instance, which is used by :func:`backward`. ''' if self.as_numpy: arguments = tuple( variable_value_to_seq(v, self.inputs[i]) for i, v in enumerate(arguments)) map_if_possible(outputs) map_if_possible(outputs_to_retain) args = arguments if len(arguments) > 1 else arguments[0] if len(outputs) <= 1: state, result = self.forward(args, device, outputs_to_retain) for k in outputs: outputs[k] = result else: state = self.forward(args, outputs, device, outputs_to_retain) if state is None: state = self._none_state elif not isinstance(state, cntk_py.BackPropState): state = cntk_py.UserBackPropState(self, device, state) if self.as_numpy: for k, v in outputs.items(): if v is None: raise ValueError('not all outputs have been provided') # FIXME: seq_starts outputs[k] = sanitize_batch(k, v, None, device) return state, outputs
def backward(self, state, root_gradients, variables, as_numpy=True): ''' Backpropagates supplied ``root_gradients`` for one or more of the output variables of the Function, to calculate gradients with respect to ``variables``. Formally, multiplies the values of ``root_gradients`` by the Jacobian of the Function and returns the subset of the output that corresponds to ``variables``. Example: >>> # compute the value and the derivative of the sigmoid at 0 >>> v = C.input_variable(shape=(1,), needs_gradient=True) >>> f = C.sigmoid(v) >>> df, fv = f.forward({v:[[0]]}, [f.output], set([f.output])) >>> value = list(fv.values())[0] >>> grad = f.backward(df, {f.output: np.ones_like(value)}, set([v])) >>> value array([[[ 0.5]]], dtype=float32) >>> list(grad.values())[0] array([[[ 0.25]]], dtype=float32) Args: state (BackPropState): state obtained from a previous call to the func:`cntk.ops.Function.forward` method on this Function for the computation that this gradient backpropagation corresponds to. root_gradients (dict): the gradients that will be backpropagated variables (set): a list of input variables with respect to which the gradients have to be computed. as_numpy (bool): whether to return the gradients as a NumPy array. Default True. Specifying this as False returns a CNTK Value which avoids a costly conversion but returns a somewhat opaque object. Note: See :meth:`~cntk.ops.functions.Function.forward` for more examples on passing input data. Returns: dict: mapping of ``variables`` to NumPy arrays ''' device = state.device() root_gradients = sanitize_var_map(self.outputs, root_gradients, None, device) var_gradients = dict((var, None) for var in variables) self._backward(state, root_gradients, var_gradients) if as_numpy: for var, value in var_gradients.items(): var_gradients[var] = variable_value_to_seq(value, var) return var_gradients
def _forward(self, arguments, outputs, device=None, outputs_to_retain=None): ''' Computes the values of speficied variables in ``outputs``, using values provided in ``arguments`` that correspond to each input `Variable` of the function whose ``is_input`` is `True`. This function calls :func:`forward`, which is to be implemented by the user. Args: arguments (tuple): Value objects of the Function's input outputs (iterable): outputs to fetch values for. device (:class:`~cntk.device.DeviceDescriptor`, default `None`): the device descriptor that contains the type and id of the device on which the computation is. If `None`, the default device is used. Returns: A BackPropState instance, which is used by :func:`backward`. ''' if self.as_numpy: arguments = tuple(variable_value_to_seq(v, self.inputs[i]) for i, v in enumerate(arguments)) map_if_possible(outputs) map_if_possible(outputs_to_retain) args = arguments if len(arguments)>1 else arguments[0] if len(outputs) <= 1: state, result = self.forward(args, device, outputs_to_retain) for k in outputs: outputs[k] = result else: state = self.forward(args, outputs, device, outputs_to_retain) if state is None: state = self._none_state elif not isinstance(state, cntk_py.BackPropState): state = cntk_py.UserBackPropState(self, device, state) if self.as_numpy: for k,v in outputs.items(): if v is None: raise ValueError('not all outputs have been provided') # FIXME: seq_starts outputs[k] = sanitize_batch(k, v, None, device) return state, outputs
def _backward(self, state, root_gradients, variables): ''' Backpropagates supplied ``root_gradients`` for one or more of the output variables of the Function, to calculate gradients with respect to ``variables``. Formally, multiplies the values of ``root_gradients`` by the Jacobian of the Function and returns the subset of the output that corresponds to ``variables``. This function calls :func:`backward`, which is to be implemented by the user. Example: TBD Args: state (BackPropState): state obtained from a previous call to the func:`cntk.ops.Function.forward` method on this Function for the computation that this gradient backpropagation corresponds to. root_gradients (dict): the gradients that will be backpropagated variables (set): a list of input variables with respect to which the gradients have to be computed. Returns: dict: mapping of ``variables`` to NumPy arrays ''' for v in root_gradients: root_gradients[v] = variable_value_to_seq(root_gradients[v], v) map_if_possible(variables) if len(variables) > 1: self.backward(cntk_py.UserBackPropState.data(state), root_gradients, variables) else: for rg in root_gradients.values(): break result = self.backward(cntk_py.UserBackPropState.data(state), rg) for k in variables: variables[k] = result for k, v in variables.items(): if v is None: raise ValueError( 'gradients were not provided for all variables') variables[k] = sanitize_batch(k, v, None, state.device())
def _backward(self, state, root_gradients, variables): ''' Backpropagates supplied ``root_gradients`` for one or more of the output variables of the Function, to calculate gradients with respect to ``variables``. Formally, multiplies the values of ``root_gradients`` by the Jacobian of the Function and returns the subset of the output that corresponds to ``variables``. This function calls :func:`backward`, which is to be implemented by the user. Example: TBD Args: state (BackPropState): state obtained from a previous call to the func:`cntk.ops.Function.forward` method on this Function for the computation that this gradient backpropagation corresponds to. root_gradients (dict): the gradients that will be backpropagated variables (set): a list of input variables with respect to which the gradients have to be computed. Returns: dict: mapping of ``variables`` to NumPy arrays ''' for v in root_gradients: root_gradients[v] = variable_value_to_seq(root_gradients[v], v) map_if_possible(variables) if len(variables)>1: self.backward(cntk_py.UserBackPropState.data(state), root_gradients, variables) else: for rg in root_gradients.values(): break result = self.backward(cntk_py.UserBackPropState.data(state), rg) for k in variables: variables[k] = result for k,v in variables.items(): if v is None: raise ValueError('gradients were not provided for all variables') variables[k] = sanitize_batch(k, v, None, state.device())
def asarray(variable, value): ''' Converts a Value object to a sequence of NumPy arrays (if dense) or CSR arrays (if sparse). ''' if value.is_sparse(): network = _sparse_to_dense_network_cache(variable.shape) warnings.warn( 'converting Value object to CSR format might be very costly') # TODO: Add direct conversion, since creating an intermediate array might be very slow dense_data = network.eval(value, value.device()) array_to_return = [sparse.csr_matrix(seq) for seq in dense_data] else: from cntk.utils import variable_value_to_seq array_to_return = variable_value_to_seq(value, variable) return array_to_return
def forward(self, arguments, outputs, keep_for_backward=None, device=None): ''' Computes the values of speficied variables in ``outputs``, using values provided in ``arguments`` that correspond to each input `Variable` of the function whose ``is_input`` is `True`. Example: >>> v = C.input_variable(shape=(3,)) >>> f = C.reciprocal(v) >>> _, fv = f.forward({v:[[1, 2, 4]]}, [f.output]) >>> list(fv.values())[0] array([[[ 1. , 0.5 , 0.25]]], dtype=float32) Args: arguments: maps variables to their input data. The interpretation depends on the input type: * dict: keys are input variable or names, and values are the input data. To specify a minibatch, provide a list of arrays. The shape of each array must be compatible with the shape of the dictionary key.If the array denotes a sequence then the elements of the sequence are grouped along axis 0. * any other type: if node has an unique input, arguments is mapped to this input. For nodes with more than one input, only dict is allowed. In both cases, every every sample in the data will be interpreted as a new sequence. Sequences can be marked as continuations of the same sequence in the previous minibatch (that is the sequence in the same slot). There are two possibilities for this: * specifying arguments as a `tuple` where the first element is used as arguments and the second one will be used as a list of bools, denoting whether a sequence is a new one (`True`) or a continuation of the sequence in the same slot of the previous minibatch (`False`). This will be applied to all batches. * specifying arguments as a dictionary of variables to tuples where the first element is used as arguments and the second one will be used as a list of bools, denoting whether a sequence is a new one (`True`) or a continuation of the sequence in the same slot of the previous minibatch (`False`). This will be applied to all batches. Data should be either NumPy arrays or a :class:`~cntk.io.MinibatchData` instance. outputs (iterable): outputs to fetch values for. keep_for_backward (set, default `None`): the subset of the Function's output variables for which gradients shall be calculated in a subsequent backward call. If `None`, the returned state will be `None` and a subsequent call to :func:`backward` will not be possible. device (:class:`~cntk.device.DeviceDescriptor`, default `None`): the device descriptor that contains the type and id of the device on which the computation is. If `None`, the default device is used. Returns: A tuple (BackPropState, map of outputs to NumPy arrays). The BackPropState is a handle taken by :func:`backward`. ''' if device is None: device = DeviceDescriptor.use_default_device() in_var_map = sanitize_var_map(self.arguments, arguments, None, device) output_map = {v: None for v in outputs} keep_for_backward = set(keep_for_backward or {}) state = super(Function, self)._forward(in_var_map, output_map, device, keep_for_backward) for k in output_map: output_map[k] = variable_value_to_seq(output_map[k], k) return state, output_map
def forward(self, arguments, outputs=None, keep_for_backward=None, device=None, as_numpy=True): ''' Computes the values of speficied variables in ``outputs``, using values provided in ``arguments`` that correspond to each input `Variable` of the function (i.e. those that have ``is_input = True``). Example: >>> # Example of passing dense data >>> v = C.input_variable(shape=(3,)) >>> f = C.reciprocal(v) >>> _, fv = f.forward({v:[[1, 2, 4]]}) >>> list(fv.values())[0] array([[[ 1. , 0.5 , 0.25]]], dtype=float32) Example: >>> # Passing sparse values as one-hot with a vocabulary size of 5 >>> vocab_size = 5 >>> v = C.input_variable(shape=(vocab_size,), is_sparse=True) >>> f = C.times(v, np.eye(vocab_size)) >>> # Passing a batch of two sequences: >>> # 1st sequence: word 1 >>> # 2nd sequence: words 2 and 4 >>> batch = [[1],[2,4]] >>> sparse_batch = C.one_hot(batch, vocab_size) >>> _, fv = f.forward({v:sparse_batch}) >>> list(fv.values())[0] [array([[ 0., 1., 0., 0., 0.]], dtype=float32), array([[ 0., 0., 1., 0., 0.], [ 0., 0., 0., 0., 1.]], dtype=float32)] Example: >>> # Doing the same, but with a CSR matrix from scipy.sparse >>> vocab_size = 5 >>> from scipy.sparse import csr_matrix >>> v = C.input_variable(shape=(vocab_size,), is_sparse=True) >>> f = C.times(v, np.eye(vocab_size)) >>> # Note that csr_matrix automatically uses a sparse representation underneath. >>> sparse_batch = [csr_matrix([[0,1,0,0,0]]), csr_matrix([[0,0,1,0,0], [0,0,0,0,1]])] >>> _, fv = f.forward({v:sparse_batch}) >>> list(fv.values())[0] [array([[ 0., 1., 0., 0., 0.]], dtype=float32), array([[ 0., 0., 1., 0., 0.], [ 0., 0., 0., 0., 1.]], dtype=float32)] <BLANKLINE> >>> # Much more efficient, however, is to incrementally create CSR arrays. >>> # See https://docs.scipy.org/doc/scipy/reference/generated/scipy.sparse.csr_matrix.html >>> # for more information. >>> def seq_to_csr_matrix(seq, vocab_size): ... indptr = [0] ... indices = [] ... data = [] ... for term_idx in seq: ... indices.append(term_idx) ... data.append(1) ... indptr.append(len(indices)) ... return csr_matrix((data, indices, indptr), shape=(len(seq), vocab_size)) >>> sparse_batch = [seq_to_csr_matrix(seq, vocab_size) for seq in batch] >>> _, fv = f.forward({v:sparse_batch}) >>> list(fv.values())[0] [array([[ 0., 1., 0., 0., 0.]], dtype=float32), array([[ 0., 0., 1., 0., 0.], [ 0., 0., 0., 0., 1.]], dtype=float32)] Args: arguments: maps variables to their input data. The interpretation depends on the input type: * dict: keys are input variable or names, and values are the input data. To specify a minibatch, provide a list of arrays. The shape of each array must be compatible with the shape of the dictionary key. If the array denotes a sequence then the elements of the sequence are grouped along axis 0. * any other type: if node has an unique input, arguments is mapped to this input. For nodes with more than one input, only dict is allowed. In both cases, every sample in the data will be interpreted as a new sequence. Sequences can be marked as continuations of the same sequence in the previous minibatch (that is the sequence in the same slot). There are two possibilities for this: * specifying arguments as a `tuple` where the first element is used as arguments and the second one will be used as a list of bools, denoting whether a sequence is a new one (`True`) or a continuation of the sequence in the same slot of the previous minibatch (`False`). This will be applied to all batches. * specifying arguments as a dictionary of variables to tuples where the first element is used as arguments and the second one will be used as a list of bools, denoting whether a sequence is a new one (`True`) or a continuation of the sequence in the same slot of the previous minibatch (`False`). This will be applied to all batches. Data should be either NumPy arrays or a :class:`~cntk.io.MinibatchData` instance. outputs (iterable, optional): outputs to fetch values for. If not set, all outputs of the function will be fetched. keep_for_backward (set, default `None`): the subset of the Function's output variables for which gradients shall be calculated in a subsequent backward call. If `None`, the returned state will be `None` and a subsequent call to :func:`backward` will not be possible. device (:class:`~cntk.device.DeviceDescriptor`, default `None`): the device descriptor that contains the type and id of the device on which the computation is. If `None`, the default device is used. as_numpy (bool): whether to return the result as a NumPy array. Default True. Specifying this as False returns a CNTK Value which avoids a costly conversion but returns a somewhat opaque object. Returns: A tuple (BackPropState, map of outputs to NumPy arrays). The BackPropState is a handle taken by :func:`backward`. ''' if device is None: device = DeviceDescriptor.use_default_device() in_var_map = sanitize_var_map(self.arguments, arguments, None, device) if outputs is None: outputs = self.outputs output_map = {v: None for v in outputs} keep_for_backward = set(keep_for_backward or {}) state = super(Function, self)._forward(in_var_map, output_map, device, keep_for_backward) if as_numpy: for k in output_map: output_map[k] = variable_value_to_seq(output_map[k], k) return state, output_map