Beispiel #1
0
  def grad(self, inputs, output_grads):
    if not self.lua_bw_func:
      # Unknown how to calculate gradient.
      return [T.DisconnectedType()() for inp in inputs]

    assert len(self.in_info) == len(inputs)
    assert len(self.out_info) == len(output_grads)
    out_info = [info.copy() for info in self.in_info]
    for idx, info in enumerate(out_info):
      # Refer to input shapes. See infer_shape().
      info["shape"] = [(idx, i) for i in range(info["ndim"])]
    out_info = [info for info in out_info if info.get("gradient", "") != "disconnected"]

    grad_op = TorchWrapperOp(
      name="grad-of-%s" % self.name,
      in_info=self.in_info + self.out_info,  # inputs + output_grads
      out_info=out_info,
      lua_file=self.lua_file,
      lua_fw_func=self.lua_bw_func
    )
    input_grads = grad_op(*(inputs + output_grads))
    assert len(out_info) == len(input_grads)

    results = []
    for info in self.in_info:
      if info.get("gradient", "") == "disconnected":
        results += [T.DisconnectedType()()]
      else:
        results += input_grads[:1]
        input_grads = input_grads[1:]
    assert len(input_grads) == 0
    assert len(results) == len(self.in_info)
    return results
Beispiel #2
0
    def grad(self, inputs, xxx_todo_changeme5):

        (doutput, ) = xxx_todo_changeme5
        if self.breakpoint_grad:

            doutput = Breakpoint(self.var_names, self.cond, self.tb,
                                 self.py_vars, True, True)(doutput,
                                                           *inputs[1:])

        return [doutput] + [T.DisconnectedType()() for _ in range(self.nvars)]
def _theano_cpu_multi_batch_beam_grad(array,
                                      start_idxs,
                                      batch_lens,
                                      beam_width,
                                      wrap_mode,
                                      pad_left=0,
                                      pad_right=0,
                                      idx_dim=0,
                                      batch_dim=1,
                                      output_grad=None):
    # Note: This is slow and hacky. This will create an index-array of the size of the original array.
    # This is calculated on the CPU. The subtensor then can be done on the GPU, but we should avoid the first part.
    D_beam = output_grad
    prod_array_shape = T.prod(array.shape)
    prod_pad_left_shape = T.prod(pad_left.shape)
    prod_pad_right_shape = T.prod(pad_right.shape)
    D_array_tmp_size = prod_array_shape
    if wrap_mode == "pad":
        D_array_tmp_size += prod_pad_left_shape + prod_pad_right_shape
    D_array_tmp_flat = T.zeros([D_array_tmp_size],
                               dtype="float32")  # with pad values
    if wrap_mode == "pad":
        # Calculate the indices for D_pad_left/D_pad_right in D_array_tmp_flat.
        pad_left_idxs = T.arange(prod_pad_left_shape) + prod_array_shape
        pad_right_idxs = T.arange(
            prod_pad_right_shape) + prod_array_shape + prod_pad_left_shape
        pad_left_idxs = pad_left_idxs.reshape(pad_left.shape)
        pad_right_idxs = pad_right_idxs.reshape(pad_right.shape)
    else:
        pad_left_idxs = pad_right_idxs = 0
    all_idxs = T.arange(T.prod(array.shape)).reshape(array.shape)
    idxs = multi_batch_beam(array=all_idxs,
                            start_idxs=start_idxs,
                            batch_lens=batch_lens,
                            beam_width=beam_width,
                            wrap_mode=wrap_mode,
                            pad_left=pad_left_idxs,
                            pad_right=pad_right_idxs,
                            idx_dim=idx_dim,
                            batch_dim=batch_dim)
    D_array_tmp_flat = T.inc_subtensor(D_array_tmp_flat[idxs.flatten()],
                                       D_beam.flatten())
    if wrap_mode == "pad":
        D_array = D_array_tmp_flat[:prod_array_shape].reshape(array.shape)
        D_pad_left = D_array_tmp_flat[pad_left_idxs.flatten()].reshape(
            pad_left.shape)
        D_pad_right = D_array_tmp_flat[pad_right_idxs.flatten()].reshape(
            pad_right.shape)
    else:
        D_array = D_array_tmp_flat.reshape(array.shape)
        D_pad_left = D_pad_right = T.DisconnectedType()()

    return D_array, D_pad_left, D_pad_right
Beispiel #4
0
    def grad(self, inputs, grads):
        x, ilist = inputs
        gz, = grads
        assert len(inputs) == 2
        if self.sparse_grad:
            raise RuntimeError(
                "sparse grad not supported for AdvancedSubtensor1Floats")

        setinc, inpl = self.set_instead_of_inc, self.inplace
        inc_op = AdvancedIncSubtensor1Floats(set_instead_of_inc=setinc,
                                             inplace=inpl)
        rval1 = [inc_op(x.zeros_like(), gz, ilist)]
        return rval1 + [T.DisconnectedType()()] * (len(inputs) - 1)
Beispiel #5
0
  def grad(self, inputs, output_grads):
    """
    For Theano.

    :param inputs:
    :param output_grads:
    :return:
    """
    if self.custom_grad:
      return self.custom_grad(self, inputs, output_grads)

    if not self.c_bw_code:
      # Unknown how to calculate gradient.
      return [T.DisconnectedType()() for inp in inputs]

    assert len(self.in_info) == len(inputs)
    assert len(self.out_info) == len(output_grads)

    # Some of output_grads might be of disconnected type.
    out_shapes = self.infer_shape(None, [v.shape for v in inputs])
    assert len(out_shapes) == len(output_grads)
    for i, out_grad in enumerate(output_grads):
      if isinstance(out_grad.type, T.DisconnectedType):
        output_grads[i] = T.zeros(out_shapes[i], dtype="float32")

    kwargs_for_grad = self.kwargs_for_grad_op()
    grad_op = self.__class__(**kwargs_for_grad)

    # noinspection PyCallingNonCallable
    grad_inputs = inputs + list(make_var_tuple(self(*inputs))) + output_grads
    grad_inputs = self._filter_grad_inputs(grad_inputs)
    assert len(grad_op.in_info) == len(grad_inputs)
    # noinspection PyCallingNonCallable
    grad_outputs = make_var_tuple(grad_op(*grad_inputs))
    assert len(grad_op.out_info) == len(grad_outputs)
    if grad_op.num_dummy_outs > 0:
      grad_outputs = grad_outputs[:-grad_op.num_dummy_outs]  # remove any dummy outputs

    return self.make_results_of_gradient(grad_outputs, disconnected_type=T.DisconnectedType())
    def grad(self, inputs, output_grads):
        array, start_idxs, batch_lens, beam_width, pad_left, pad_right = inputs
        D_beam, = output_grads

        if not isinstance(pad_left, theano.Constant):
            raise NotImplementedError("D_pad_left not implemented...")
        if not isinstance(pad_right, theano.Constant):
            raise NotImplementedError("D_pad_right not implemented...")

        grad_op = MultiBatchBeamGradAddOp(wrap_mode=self.wrap_mode,
                                          zero_with_shape=True,
                                          array_ndim=array.ndim,
                                          idx_dim=self.idx_dim,
                                          batch_dim=self.batch_dim)
        D_array = grad_op(array.shape, start_idxs, batch_lens, beam_width,
                          D_beam)

        if self.wrap_mode == "wrap_around":
            D_pad_left = D_pad_right = T.DisconnectedType()()
        elif self.wrap_mode == "pad":
            D_pad_left = D_pad_right = T.DisconnectedType()()
            # XXX...
            # D_pad_left = T.zeros(pad_left.shape, dtype="float32")
            # D_pad_right = T.zeros(pad_right.shape, dtype="float32")
        else:
            assert False, self.wrap_mode

        # Those are all discrete values. The gradient is 0 almost everywhere, except for integers where it is not defined.
        D_start_idxs = T.DisconnectedType()()
        D_batch_lens = T.DisconnectedType()()
        D_beam_width = T.DisconnectedType()()

        return [
            D_array, D_start_idxs, D_batch_lens, D_beam_width, D_pad_left,
            D_pad_right
        ]
Beispiel #7
0
    def grad(self, inputs, grads):
        g_output, = grads
        x, y, idx_list = inputs
        if x.dtype in theano.tensor.discrete_dtypes:
            # The output dtype is the same as x
            gx = x.zeros_like(dtype=theano.config.floatX)
            if y.dtype in theano.tensor.discrete_dtypes:
                gy = y.zeros_like(dtype=theano.config.floatX)
            else:
                gy = y.zeros_like()
        elif x.dtype in theano.tensor.complex_dtypes:
            raise NotImplementedError("No support for complex grad yet")
        else:
            if self.set_instead_of_inc:
                gx_op = AdvancedIncSubtensor1Floats(set_instead_of_inc=True,
                                                    inplace=self.inplace)
                gx = gx_op(g_output, y.zeros_like(), idx_list)
            else:
                gx = g_output
            gy = AdvancedSubtensor1Floats()(g_output, idx_list)
            gy = T.subtensor._sum_grad_over_bcasted_dims(y, gy)

        return [gx, gy] + [T.DisconnectedType()()]
Beispiel #8
0
 def grad(self, inputs, doutputs):
     return [T.DisconnectedType()() for _ in inputs]
Beispiel #9
0
            if self.is_grad:
                place = 'theano gradient eval'
            else:
                place = 'theano eval'
            print >> sys.stderr, 'Breakpoint in %s, created at' % place
            print >> sys.stderr, '  ...'
            traceback.print_list(self.tb[-4:], sys.stderr)
            ipdb.set_trace()
            pass    # in theano breakpoint

    def grad(self, inputs, (doutput,)):
        if self.breakpoint_grad:
            doutput = Breakpoint(self.var_names, self.cond,
                                 self.tb, self.py_vars, True, True) \
                          (doutput, *inputs[1:])
        return [doutput] + [T.DisconnectedType()() for _ in xrange(self.nvars)]

_theano_types = (theano.tensor.basic.TensorConstant,
                 theano.tensor.basic.TensorVariable,
                 theano.compile.SharedVariable,
                 )

def is_theano_var(x):
    return isinstance(x, _theano_types)

def breakpoint(output, vars=None, cond=lambda v: True, grad=True):
    tb = tuple(traceback.extract_stack()[:-1])
    py_vars = {}
    if type(vars) not in (tuple, list, dict, types.NoneType):
        raise ValueError('vars keyword arg must be None, dict, list or tuple')
    if not isinstance(vars, dict):