Exemple #1
0
    def L_op(self, inputs, outputs, out_grads):
        x, k = inputs
        k_grad = grad_undefined(self, 1, k, "topk: k is not differentiable")

        if not (self.return_indices or self.return_values):
            x_grad = grad_undefined(
                self,
                0,
                x,
                "topk: cannot get gradient"
                " without both indices and values",
            )
        else:
            x_shp = theano.tensor.shape(x)
            z_grad = out_grads[0]
            ndim = x.ndim
            axis = self.axis % ndim
            grad_indices = [
                arange(x_shp[i]).dimshuffle([0] + ["x"] * (ndim - i - 1))
                if i != axis else outputs[-1] for i in range(ndim)
            ]
            x_grad = x.zeros_like(dtype=z_grad.dtype)
            x_grad = set_subtensor(x_grad[tuple(grad_indices)], z_grad)

        return [x_grad, k_grad]
Exemple #2
0
    def L_op(self, inputs, outputs, output_grads):
        # Gradients computed by Op
        assert self.compute_grad and len(outputs) == 2
        gradients = outputs[1]
        assert gradients is not None

        # Gradients of original function, to compose chain rule
        grad_op = output_grads[0]
        grad_shuffle = GpuDimShuffle(input_broadcastable=(
            False,
            False,
            False,
        ),
                                     new_order=(1, 0, 2))(gradients)
        grad_bdot = T.basic.batched_dot(grad_op, grad_shuffle)
        grad_shuffle_reverse = GpuDimShuffle(input_broadcastable=(
            False,
            False,
            False,
        ),
                                             new_order=(1, 0, 2))(grad_bdot)
        return [
            grad_shuffle_reverse,
            grad_undefined(self, 1, inputs[1]),
            grad_undefined(self, 2, inputs[2])
        ]
Exemple #3
0
 def grad(self, inputs, output_grads):
     return [
         self.gradients,
         grad_undefined(self, 1, inputs[1]),
         grad_undefined(self, 2, inputs[2]),
         grad_undefined(self, 3, inputs[3]),
         grad_undefined(self, 4, inputs[4])
     ]
 def grad(self, inputs, output_grads):
   # self.gradients.shape = [seqLen, batchSize, outputSize]
   # output_grads[0].shape = [batchSize]  (one cost per sequence)
   # So, reshape output_grads to [1, batchSize, 1] for broadcasting
   output_grad = output_grads[0].reshape( (1, -1, 1) )
   return [output_grad * self.gradients,
           grad_undefined(self, 1, inputs[1]),
           grad_undefined(self, 2, inputs[2])]
Exemple #5
0
 def grad(self, inputs, output_grads):
   # self.gradients.shape = [seqLen, batchSize, outputSize]
   # output_grads[0].shape = [batchSize]  (one cost per sequence)
   # So, reshape output_grads to [1, batchSize, 1] for broadcasting
   output_grad = output_grads[0].reshape( (1, -1, 1) )
   return [output_grad * self.gradients,
           grad_undefined(self, 1, inputs[1]),
           grad_undefined(self, 2, inputs[2])]
Exemple #6
0
 def grad(self, inputs, output_grads):
     gradients = CPUCTCGrad()(*inputs)
     return [
         gradients,
         grad_undefined(self, 1, inputs[1]),
         grad_undefined(self, 2, inputs[2]),
         grad_undefined(self, 3, inputs[3]),
     ]
Exemple #7
0
 def grad(self, inputs, output_grads):
     gradients = output_grads * CPUCTCGrad()(*inputs)
     return [
         gradients,
         grad_undefined(self, 1, inputs[1]),
         grad_undefined(self, 2, inputs[2]),
         grad_undefined(self, 3, inputs[3])
     ]
Exemple #8
0
  def grad(self, inputs, output_grads):
    # Enable gradient computation
    self.computeGradient.set_value(np.asarray([1], dtype=np.int32))

    return [self.gradients,
            grad_undefined(self, 1, inputs[1]),
            grad_undefined(self, 2, inputs[2]),
            grad_undefined(self, 3, inputs[3]),
            grad_undefined(self, 4, inputs[4])]
Exemple #9
0
    def L_op(self, inputs, outputs, output_grads):
        assert self.compute_grad and len(outputs) == 2
        gradients = outputs[1]
        assert gradients is not None

        grad_op = output_grads[0]
        total_grad = T.basic.batched_dot(grad_op, gradients.dimshuffle(1, 0, 2)).dimshuffle(1, 0, 2)
        return [total_grad,
                grad_undefined(self, 1, inputs[1]),
                grad_undefined(self, 2, inputs[2])]
Exemple #10
0
    def L_op(self, inputs, outputs, output_grads):
        assert self.compute_grad and len(outputs) == 2
        gradients = outputs[1]
        assert gradients is not None

        grad_op = output_grads[0]
        total_grad = T.basic.batched_dot(grad_op, gradients.dimshuffle(1, 0, 2)).dimshuffle(1, 0, 2)
        return [total_grad,
                grad_undefined(self, 1, inputs[1]),
                grad_undefined(self, 2, inputs[2])]
Exemple #11
0
    def grad(self, inputs, grads):
        o, W, h, inputIdx, outputIdx = inputs
        go = grads[0]

        Wgrad = gpu_sparse_block_outer(W.zeros_like(), h, go, inputIdx, outputIdx)
        hgrad = gpu_sparse_block_gemv(h.zeros_like(), W.dimshuffle((1, 0, 3, 2)), go, outputIdx, inputIdx)
        return [
            go,
            Wgrad,
            hgrad,
            grad_undefined(self, 3, inputIdx, "grad of inputIdx makes no sense"),
            grad_undefined(self, 4, outputIdx, "grad of outputIdx makes no sense"),
        ]
Exemple #12
0
    def grad(self, inputs, grads):
        o, W, h, inputIdx, outputIdx = inputs
        go = grads[0]

        outer_fun = SparseBlockOuter(self.inplace)
        gemv_fun = SparseBlockGemv(self.inplace)

        Wgrad = outer_fun(W.zeros_like(), h, go, inputIdx, outputIdx)
        hgrad = gemv_fun(h.zeros_like(), W.dimshuffle((1, 0, 3, 2)),
                         go, outputIdx, inputIdx)
        return [go, Wgrad, hgrad,
                grad_undefined(self, 3, inputIdx,
                               "grad of inputIdx makes no sense"),
                grad_undefined(self, 4, outputIdx,
                               "grad of outputIdx makes no sense")]
Exemple #13
0
    def grad(self, inputs, grads):
        o, W, h, inputIdx, outputIdx = inputs
        go = grads[0]

        Wgrad = sparse_block_outer_ss(W.zeros_like(),
                                      h, go, inputIdx, outputIdx)
        hgrad = sparse_block_gemv_ss(h.zeros_like(),
                                     W.dimshuffle((1, 0, 3, 2)),
                                     go,
                                     outputIdx, inputIdx)
        return [go, Wgrad, hgrad,
                grad_undefined(self, 3, inputIdx,
                               "grad of inputIdx makes no sense"),
                grad_undefined(self, 4, outputIdx,
                               "grad of outputIdx makes no sense")]
  def grad(self, inp, grads):
    outs = self(*inp)
    grad_op = ROIPoolingGradOp(
        self.pooled_h, self.pooled_w, self.spatial_scale)
    data_grad = grad_op(*(inp + [outs[1], grads[0]]))

    return [data_grad, grad_undefined(self, 1, inp[1])]
Exemple #15
0
 def grad(self, inputs, ograd):
     return [
         gradient.grad_undefined(
             self, k, inp, "No gradient defined through " "random sampling op"
         )
         for k, inp in enumerate(inputs)
     ]
Exemple #16
0
    def L_op(self, inputs, outputs, output_grads):
        # Gradients computed by Op
        assert self.compute_grad and len(outputs) == 2
        gradients = outputs[1]
        assert gradients is not None

        # Gradients of original function, to compose chain rule
        grad_op = output_grads[0]
        grad_shuffle = GpuDimShuffle(input_broadcastable=(False, False, False,),
                                     new_order=(1, 0, 2))(gradients)
        grad_bdot = T.basic.batched_dot(grad_op, grad_shuffle)
        grad_shuffle_reverse = GpuDimShuffle(input_broadcastable=(False, False, False,),
                                             new_order=(1, 0, 2))(grad_bdot)
        return [grad_shuffle_reverse,
                grad_undefined(self, 1, inputs[1]),
                grad_undefined(self, 2, inputs[2])]
Exemple #17
0
    def grad(self, inputs, grads):
        o, W, h, inputIdx, outputIdx = inputs
        go = grads[0]

        # might revise that interface to not have a huge output
        Wgrad = sparse_block_outer_ss(W.zeros_like(),
                                      h, go, inputIdx, outputIdx)
        hgrad = sparse_block_gemv_ss(h.zeros_like(),
                                     W.dimshuffle((1, 0, 3, 2)),
                                     go,
                                     outputIdx, inputIdx)
        return [go, Wgrad, hgrad,
                grad_undefined(self, 3, inputIdx,
                               "grad of inputIdx makes no sense"),
                grad_undefined(self, 4, outputIdx,
                               "grad of outputIdx makes no sense")]
Exemple #18
0
    def grad(self, inp, grads):
        outs = self(*inp)
        grad_op = ROIPoolingGradOp(self.pooled_h, self.pooled_w,
                                   self.spatial_scale)
        data_grad = grad_op(*(inp + [outs[1], grads[0]]))

        return [data_grad, grad_undefined(self, 1, inp[1])]
Exemple #19
0
 def grad(self, inputs, output_grads):
     a, axis = inputs
     indices = self.__get_argsort_indices(a, axis)
     inp_grad = output_grads[0][tuple(indices)]
     axis_grad = grad_undefined(
         self, 1, axis, "The gradient of sort is not defined "
         "with respect to the integer axes itself")
     return [inp_grad, axis_grad]
Exemple #20
0
 def grad(self, inputs, output_grads):
     # No grad defined for intergers.
     inp, axis = inputs
     inp_grad = inp.zeros_like()
     axis_grad = grad_undefined(
         self, 1, axis, "argsort is not defined for non-integer axes so"
         " argsort(x, axis+eps) is undefined")
     return [inp_grad, axis_grad]
Exemple #21
0
    def grad(self, inp, grads):
        x, neib_shape, neib_step = inp
        gz, = grads

        if self.mode in ['valid', 'ignore_borders']:
            if (neib_shape is neib_step or
                neib_shape == neib_step or
                # Theano Constant == do not compare the data
                # the equals function do that.
                (hasattr(neib_shape, "equals") and
                 neib_shape.equals(neib_step))):
                return [neibs2images(gz, neib_shape, x.shape, mode=self.mode),
                        grad_undefined(self, 1, neib_shape),
                        grad_undefined(self, 2, neib_step)]
        return [grad_not_implemented(self, 0, x),
                grad_undefined(self, 1, neib_shape),
                grad_undefined(self, 2, neib_step)]
Exemple #22
0
    def grad(self, inp, grads):
        x, neib_shape, neib_step = inp
        gz, = grads

        if self.mode in ['valid', 'ignore_borders']:
            if (neib_shape is neib_step or
                neib_shape == neib_step or
                # Theano Constant == do not compare the data
                # the equals function do that.
                (hasattr(neib_shape, "equals") and
                 neib_shape.equals(neib_step))):
                return [neibs2images(gz, neib_shape, x.shape, mode=self.mode),
                        grad_undefined(self, 1, neib_shape),
                        grad_undefined(self, 2, neib_step)]
        return [grad_not_implemented(self, 0, x),
                grad_undefined(self, 1, neib_shape),
                grad_undefined(self, 2, neib_step)]
Exemple #23
0
 def grad(self, inputs, output_grads):
     # No grad defined for intergers.
     inp, axis = inputs
     inp_grad = inp.zeros_like()
     axis_grad = grad_undefined(
         self, 1, axis,
         "argsort is not defined for non-integer axes so"
         " argsort(x, axis+eps) is undefined")
     return [inp_grad, axis_grad]
Exemple #24
0
 def grad(self, inputs, output_grads):
     a, axis = inputs
     indices = self.__get_argsort_indices(a, axis)
     inp_grad = output_grads[0][tuple(indices)]
     axis_grad = grad_undefined(
         self, 1, axis,
         "The gradient of sort is not defined "
         "with respect to the integer axes itself")
     return [inp_grad, axis_grad]
Exemple #25
0
    def L_op(self, inputs, outputs, out_grads):
        x, k = inputs
        k_grad = grad_undefined(self, 1, k, 'topk: k is not differentiable')

        if not (self.return_indices or self.return_values):
            x_grad = grad_undefined(
                self, 0, x, 'topk: cannot get gradient'
                ' without both indices and values')
        else:
            x_shp = theano.tensor.shape(x)
            z_grad = out_grads[0]
            ndim = x.ndim
            axis = self.axis % ndim
            grad_indices = [
                arange(x_shp[i]).dimshuffle([0] + ['x'] * (ndim - i - 1))
                if i != axis else outputs[-1] for i in range(ndim)]
            x_grad = x.zeros_like(dtype=z_grad.dtype)
            x_grad = set_subtensor(x_grad[tuple(grad_indices)], z_grad)

        return [x_grad, k_grad]
Exemple #26
0
    def grad(self, inputs, grads):
        logger.warning("BernoulliOp.grad(...) called")

        prob = inputs[0]
        noise = inputs[1]
        #import ipdb; ipdb.set_trace()

        #g0 = prob.zeros_like().astype(theano.config.floatX)
        g0 = prob * grads[0]
        g1 = grad_undefined(self, 1, noise)
        return [g0, g1]
Exemple #27
0
    def grad(self, inputs, grads):
        logger.warning("BernoulliOp.grad(...) called")

        prob = inputs[0]
        noise = inputs[1]
        #import ipdb; ipdb.set_trace()

        #g0 = prob.zeros_like().astype(theano.config.floatX)
        g0 = prob * grads[0]
        g1 = grad_undefined(self, 1, noise)
        return [g0, g1]
Exemple #28
0
    def grad(self, inputs, output_gradients):
        V, W, b, d = inputs
        dCdH, = output_gradients
        # make all of these ops support broadcasting of scalar b to vector b and eplace the zeros_like in all their grads
        # print dCdH.broadcastable
        # print "dCdH.broadcastable"
        # quit(-1)
        # dCdH = printing.Print("dCdH = ",["shape"])

        # Make sure the broadcasting pattern of the gradient is the the same
        # as the initial variable
        dCdV = theano.tensor.nnet.convTransp3D(W, T.zeros_like(V[0, 0, 0,
                                                                 0, :]), d,
                                               dCdH, V.shape[1:4])
        dCdV = T.patternbroadcast(dCdV, V.broadcastable)
        WShape = W.shape
        dCdW = theano.tensor.nnet.convGrad3D(V, d, WShape, dCdH)
        dCdW = T.patternbroadcast(dCdW, W.broadcastable)
        dCdb = T.sum(dCdH, axis=(0, 1, 2, 3))
        dCdb = T.patternbroadcast(dCdb, b.broadcastable)
        dCdd = grad_undefined(
            self, 3, inputs[3],
            "The gradient of Conv3D with respect to the convolution"
            " stride is undefined because Conv3D is only defined for"
            " integer strides.")

        if 'name' in dir(dCdH) and dCdH.name is not None:
            dCdH_name = dCdH.name
        else:
            dCdH_name = 'anon_dCdH'

        if 'name' in dir(V) and V.name is not None:
            V_name = V.name
        else:
            V_name = 'anon_V'

        if 'name' in dir(W) and W.name is not None:
            W_name = W.name
        else:
            W_name = 'anon_W'

        if 'name' in dir(b) and b.name is not None:
            b_name = b.name
        else:
            b_name = 'anon_b'

        dCdV.name = 'Conv3D_dCdV(dCdH=' + dCdH_name + ',V=' + V_name + ')'
        dCdW.name = ('Conv3D_dCdW(dCdH=' + dCdH_name + ',V=' + V_name + ',W=' +
                     W_name + ')')
        dCdb.name = ('Conv3D_dCdb(dCdH=' + dCdH_name + ',V=' + V_name + ',W=' +
                     W_name + ',b=' + b_name + ')')

        return [dCdV, dCdW, dCdb, dCdd]
Exemple #29
0
    def grad(self, inp, grads):
        x, neib_shape, neib_step = inp
        gz, = grads

        if self.mode in ['valid', 'ignore_borders']:
            if (neib_shape is neib_step or neib_shape == neib_step or
                    # Theano Constant == do not compare the data
                    # the equals function do that.
                (hasattr(neib_shape, "equals") and neib_shape.equals(neib_step)
                 )):
                return [
                    neibs2images(gz, neib_shape, x.shape, mode=self.mode),
                    grad_undefined(self, 1, neib_shape),
                    grad_undefined(self, 2, neib_step)
                ]

        if self.mode in ['valid']:
            # Iterate over neighborhood positions, summing contributions.
            def pos2map(pidx, pgz, prior_result, neib_shape, neib_step):
                '''
                Helper function that adds gradient contribution from a single
                neighborhood position i,j.
                pidx = Index of position within neighborhood.
                pgz  = Gradient of shape (batch_size*num_channels*neibs)
                prior_result  = Shape (batch_size, num_channnels, rows, cols)
                neib_shape = Number of rows, cols in a neighborhood.
                neib_step  = Step sizes from image2neibs.
                '''
                nrows, ncols = neib_shape
                rstep, cstep = neib_step
                batch_size, num_channels, rows, cols = prior_result.shape
                i = pidx // ncols
                j = pidx - (i * ncols)
                # This position does not touch some img pixels in valid mode.
                result_indices = prior_result[:, :,
                                              i:(rows - nrows + i + 1):rstep,
                                              j:(cols - ncols + j + 1):cstep]
                newshape = (batch_size, num_channels) + \
                           ((rows - nrows) // rstep + 1,) + \
                           ((cols - ncols) // cstep + 1,)
                return T.inc_subtensor(result_indices, pgz.reshape(newshape))

            indices = T.arange(neib_shape[0] * neib_shape[1])
            pgzs = gz.dimshuffle((1, 0))
            result, _ = theano.scan(fn=pos2map,
                                    sequences=[indices, pgzs],
                                    outputs_info=T.zeros(x.shape),
                                    non_sequences=[neib_shape, neib_step])
            grad_input = result[-1]
            return [
                grad_input,
                grad_undefined(self, 1, neib_shape),
                grad_undefined(self, 2, neib_step)
            ]

        return [
            grad_not_implemented(self, 0, x),
            grad_undefined(self, 1, neib_shape),
            grad_undefined(self, 2, neib_step)
        ]
Exemple #30
0
    def grad(self, inputs, output_gradients):
        V, W, b, d = inputs
        dCdH, = output_gradients
        # make all of these ops support broadcasting of scalar b to vector b and eplace the zeros_like in all their grads
        # print dCdH.broadcastable
        # print "dCdH.broadcastable"
        # quit(-1)
        # dCdH = printing.Print("dCdH = ",["shape"])

        # Make sure the broadcasting pattern of the gradient is the the same
        # as the initial variable
        dCdV = theano.tensor.nnet.convTransp3D(
            W, T.zeros_like(V[0, 0, 0, 0, :]), d, dCdH, V.shape[1:4])
        dCdV = T.patternbroadcast(dCdV, V.broadcastable)
        WShape = W.shape
        dCdW = theano.tensor.nnet.convGrad3D(V, d, WShape, dCdH)
        dCdW = T.patternbroadcast(dCdW, W.broadcastable)
        dCdb = T.sum(dCdH, axis=(0, 1, 2, 3))
        dCdb = T.patternbroadcast(dCdb, b.broadcastable)
        dCdd = grad_undefined(
            self, 3, inputs[3],
            "The gradient of Conv3D with respect to the convolution"
            " stride is undefined because Conv3D is only defined for"
            " integer strides.")

        if 'name' in dir(dCdH) and dCdH.name is not None:
            dCdH_name = dCdH.name
        else:
            dCdH_name = 'anon_dCdH'

        if 'name' in dir(V) and V.name is not None:
            V_name = V.name
        else:
            V_name = 'anon_V'

        if 'name' in dir(W) and W.name is not None:
            W_name = W.name
        else:
            W_name = 'anon_W'

        if 'name' in dir(b) and b.name is not None:
            b_name = b.name
        else:
            b_name = 'anon_b'

        dCdV.name = 'Conv3D_dCdV(dCdH=' + dCdH_name + ',V=' + V_name + ')'
        dCdW.name = ('Conv3D_dCdW(dCdH=' + dCdH_name + ',V=' + V_name +
                     ',W=' + W_name + ')')
        dCdb.name = ('Conv3D_dCdb(dCdH=' + dCdH_name + ',V=' + V_name +
                     ',W=' + W_name + ',b=' + b_name + ')')

        return [dCdV, dCdW, dCdb, dCdd]
Exemple #31
0
    def grad(self, inputs, output_gradients):
        C, d, WShape, B = inputs
        dLdA, = output_gradients

        z = T.zeros_like(C[0, 0, 0, 0, :])
        dLdC = convTransp3D(dLdA, z, d, B, C.shape[1:4])
        # d actually does affect the outputs, so it's not disconnected
        dLdd = grad_undefined(self, 1, d)
        # The shape of the weights doesn't affect the output elements
        dLdWShape = DisconnectedType()()
        dLdB = conv3D(C, dLdA, T.zeros_like(B[0, 0, 0, 0, :]), d)

        return [dLdC, dLdd, dLdWShape, dLdB]
Exemple #32
0
    def grad(self, inp, grads):
        axis, tensors = inp[0], inp[1:]
        gz, = grads

        rval = [grad_undefined(self, 0, axis)]

        out = ConcatenateGrad()(gz, axis, *tensors)

        if not isinstance(out, list):
            out = [out]

        rval = rval + out
        return rval
Exemple #33
0
    def grad(self, inp, grads):
        axis, tensors = inp[0], inp[1:]
        gz, = grads

        rval = [grad_undefined(self, 0, axis)]

        out = ConcatenateGrad()(gz, axis, *tensors)

        if not isinstance(out, list):
            out = [out]

        rval = rval + out
        return rval
Exemple #34
0
    def grad(self, inputs, output_gradients):
        C, d, WShape, B = inputs
        dLdA, = output_gradients

        z = T.zeros_like(C[0, 0, 0, 0, :])
        dLdC = convTransp3D(dLdA, z, d, B, C.shape[1:4])
        # d actually does affect the outputs, so it's not disconnected
        dLdd = grad_undefined(self, 1, d)
        # The shape of the weights doesn't affect the output elements
        dLdWShape = DisconnectedType()()
        dLdB = conv3D(C, dLdA, T.zeros_like(B[0, 0, 0, 0, :]), d)

        return [dLdC, dLdd, dLdWShape, dLdB]
Exemple #35
0
    def grad(self, inp, grads):
        x, neib_shape, neib_step = inp
        gz, = grads

        if self.mode in ['valid', 'ignore_borders']:
            if (neib_shape is neib_step or
                neib_shape == neib_step or
                # Theano Constant == do not compare the data
                # the equals function do that.
                (hasattr(neib_shape, "equals") and
                 neib_shape.equals(neib_step))):
                return [neibs2images(gz, neib_shape, x.shape, mode=self.mode),
                        grad_undefined(self, 1, neib_shape),
                        grad_undefined(self, 2, neib_step)]

        if self.mode in ['valid']:
            # Iterate over neighborhood positions, summing contributions.
            def pos2map(pidx, pgz, prior_result, neib_shape, neib_step):
                '''
                Helper function that adds gradient contribution from a single
                neighborhood position i,j.
                pidx = Index of position within neighborhood.
                pgz  = Gradient of shape (batch_size*num_channels*neibs)
                prior_result  = Shape (batch_size, num_channnels, rows, cols)
                neib_shape = Number of rows, cols in a neighborhood.
                neib_step  = Step sizes from image2neibs.
                '''
                nrows, ncols = neib_shape
                rstep, cstep = neib_step
                batch_size, num_channels, rows, cols = prior_result.shape
                i = pidx // ncols
                j = pidx - (i * ncols)
                # This position does not touch some img pixels in valid mode.
                result_indices = prior_result[:, :,
                                              i:(rows - nrows + i + 1):rstep,
                                              j:(cols - ncols + j + 1):cstep]
                newshape = (batch_size, num_channels) + \
                           ((rows - nrows) // rstep + 1,) + \
                           ((cols - ncols) // cstep + 1,)
                return T.inc_subtensor(result_indices, pgz.reshape(newshape))
            indices = T.arange(neib_shape[0] * neib_shape[1])
            pgzs = gz.dimshuffle((1, 0))
            result, _ = theano.scan(fn=pos2map,
                                    sequences=[indices, pgzs],
                                    outputs_info=T.zeros(x.shape),
                                    non_sequences=[neib_shape, neib_step])
            grad_input = result[-1]
            return [grad_input,
                    grad_undefined(self, 1, neib_shape),
                    grad_undefined(self, 2, neib_step)]

        return [grad_not_implemented(self, 0, x),
                grad_undefined(self, 1, neib_shape),
                grad_undefined(self, 2, neib_step)]
Exemple #36
0
    def grad(self, inp, cost_grad):
        """
        Notes
        -----
        The gradient is currently implemented for matrices only.
        """
        a, val, offset = inp
        grad = cost_grad[0]
        height, width = grad.shape

        if a.dtype.startswith("complex"):
            return [None, None]

        # only valid for matrices
        wr_a = fill_diagonal_offset(grad, 0, offset)

        offset_abs = basic.abs_(offset)
        pos_offset_flag = basic.ge(offset, 0)
        neg_offset_flag = basic.lt(offset, 0)
        min_wh = basic.minimum(width, height)

        start = offset * pos_offset_flag + offset_abs * width * neg_offset_flag
        num_of_step = basic.minimum(
            min_wh,
            width * pos_offset_flag + height * neg_offset_flag - offset_abs)

        step = a.shape[1] + 1
        end = start + step * num_of_step

        # input of slice should be integer
        start = basic.cast(start, "int32")
        step = basic.cast(step, "int32")
        end = basic.cast(end, "int32")

        wr_val = grad.flatten()[start:end:step].sum()

        wr_offset = grad_undefined(
            self,
            2,
            offset,
            "offset is not defined for non-integer offset so"
            " fill_diagonal_offset(a,val,offset+eps) is undefined",
        )

        return [wr_a, wr_val, wr_offset]
Exemple #37
0
    def grad(self, inputs, output_gradients):
        W, b, d, H, RShape = inputs
        dCdR, = output_gradients
        dCdH = theano.tensor.nnet.conv3D(dCdR, W, T.zeros_like(H[0, 0, 0,
                                                                 0, :]), d)
        WShape = W.shape
        dCdW = theano.tensor.nnet.convGrad3D(dCdR, d, WShape, H)
        dCdb = T.sum(dCdR, axis=(0, 1, 2, 3))
        # not differentiable, since d affects the output elements
        dCdd = grad_undefined(self, 2, d)
        # disconnected, since RShape just determines the output shape
        dCdRShape = DisconnectedType()()

        if 'name' in dir(dCdR) and dCdR.name is not None:
            dCdR_name = dCdR.name
        else:
            dCdR_name = 'anon_dCdR'

        if 'name' in dir(H) and H.name is not None:
            H_name = H.name
        else:
            H_name = 'anon_H'

        if 'name' in dir(W) and W.name is not None:
            W_name = W.name
        else:
            W_name = 'anon_W'

        if 'name' in dir(b) and b.name is not None:
            b_name = b.name
        else:
            b_name = 'anon_b'

        dCdW.name = ('ConvTransp3D_dCdW.H=' + H_name + ',dCdR=' + dCdR_name +
                     ',W=' + W_name)
        dCdb.name = ('ConvTransp3D_dCdb.H=' + H_name + ',dCdR=' + dCdR_name +
                     ',W=' + W_name + ',b=' + b_name)
        dCdH.name = 'ConvTransp3D_dCdH.H=' + H_name + ',dCdR=' + dCdR_name

        return [dCdW, dCdb, dCdd, dCdH, dCdRShape]
Exemple #38
0
    def grad(self, inputs, output_gradients):
        W, b, d, H, RShape = inputs
        dCdR, = output_gradients
        dCdH = theano.tensor.nnet.conv3D(dCdR, W, T.zeros_like(H[0, 0, 0, 0, :]), d)
        WShape = W.shape
        dCdW = theano.tensor.nnet.convGrad3D(dCdR, d, WShape, H)
        dCdb = T.sum(dCdR, axis=(0, 1, 2, 3))
        # not differentiable, since d affects the output elements
        dCdd = grad_undefined(self, 2, d)
        # disconnected, since RShape just determines the output shape
        dCdRShape = DisconnectedType()()

        if 'name' in dir(dCdR) and dCdR.name is not None:
            dCdR_name = dCdR.name
        else:
            dCdR_name = 'anon_dCdR'

        if 'name' in dir(H) and H.name is not None:
            H_name = H.name
        else:
            H_name = 'anon_H'

        if 'name' in dir(W) and W.name is not None:
            W_name = W.name
        else:
            W_name = 'anon_W'

        if 'name' in dir(b) and b.name is not None:
            b_name = b.name
        else:
            b_name = 'anon_b'

        dCdW.name = ('ConvTransp3D_dCdW.H=' + H_name + ',dCdR=' + dCdR_name +
                     ',W=' + W_name)
        dCdb.name = ('ConvTransp3D_dCdb.H=' + H_name + ',dCdR=' + dCdR_name +
                     ',W=' + W_name + ',b=' + b_name)
        dCdH.name = 'ConvTransp3D_dCdH.H=' + H_name + ',dCdR=' + dCdR_name

        return [dCdW, dCdb, dCdd, dCdH, dCdRShape]
Exemple #39
0
    def grad(self, inputs, outputs_gradients):
        a, *shape = inputs
        (dout, ) = outputs_gradients

        # Determine the dimensions that were added by broadcasting
        new_dims = list(range(dout.ndim - a.ndim))

        d_wrt_a = broadcast_to(dout, shape).sum(axis=new_dims)

        # Determine the dimensions that were broadcast
        _, shape_bcast = basic.alloc_validate_shape(shape)
        bcast_sums = [
            i for i, (
                a_b,
                s_b) in enumerate(zip(a.broadcastable, shape_bcast[-a.ndim:]))
            if a_b and not s_b
        ]

        if bcast_sums:
            d_wrt_a = d_wrt_a.sum(axis=bcast_sums, keepdims=True)

        return [d_wrt_a] + [
            grad_undefined(self, i, shp) for i, shp in enumerate(shape, 1)
        ]
Exemple #40
0
 def grad(self, inp, grads):
     return [grad_undefined(self, 0, inp[0])]
Exemple #41
0
 def grad(self, inp, grads):
     return [grad_undefined(self, i, inp[i])
             for i in xrange(3)]
Exemple #42
0
 def grad(self, inputs, ograd):
     return [gradient.grad_undefined(
                 self, k, inp,
                 'No gradient defined through random sampling op')
             for k, inp in enumerate(inputs)]
Exemple #43
0
 def grad(self,inp,grads):
     x,indx,=inp
     gz, = grads
     return [GpuAssigner()(x,indx,gz), grad_undefined(self,1,inp[1])] 
Exemple #44
0
 def grad(self, inputs, output_grads):
   return [self.gradients,
           grad_undefined(self, 1, inputs[1]),
           grad_undefined(self, 2, inputs[2]),
           grad_undefined(self, 3, inputs[3]),
           grad_undefined(self, 4, inputs[4])]
 def grad(self, inp, grads):
     return [grad_undefined(self, i, inp[i])
             for i in xrange(2)]
Exemple #46
0
 def grad(self, inp, grads):
     return [grad_undefined(self, 0, inp[0])]
Exemple #47
0
 def grad(self, inp, output_grads):
     return [grad_undefined(self, i, inp[i]) for i in xrange(len(inp))]