def grad(self, inp, grads): softmaxes, y_idxes, y_lengths, y_startidxes = inp g_costs, = grads return [masked_loss_dx(softmaxes, y_idxes, y_lengths, y_startidxes, g_costs), grad_not_implemented(self, 1, y_idxes), grad_not_implemented(self, 1, y_lengths), grad_not_implemented(self, 1, y_startidxes)]
def grad(self, inp, grads): softmaxes, y_idxes, y_lengths, y_startidxes = inp g_costs, = grads return [masked_loss_dx(softmaxes, y_idxes, y_lengths, y_startidxes, g_costs), grad_not_implemented(self, 1, y_idxes), grad_not_implemented(self, 1, y_lengths), grad_not_implemented(self, 1, y_startidxes)]
def grad(self, inp, grads): y, y_starts, y_lengths, = inp g_costs, = grads return [ masked_sum_dx(y, y_starts, y_lengths, g_costs), grad_not_implemented(self, 1, y_starts), grad_not_implemented(self, 1, y_lengths) ]
def grad(self, inputs, gradients): x, new_length, nonconstants = inputs d_out = gradients[0] swap = range(self.ndim) swap.remove(self.axis) swap.insert(0, self.axis) return [d_out.dimshuffle(swap)[nonconstants].dimshuffle(swap), grad_not_implemented(self, 1, new_length), grad_not_implemented(self, 2, nonconstants)]
def grad(self, inputs, gradients): x, new_length, nonconstants = inputs d_out = gradients[0] swap = range(self.ndim) swap.remove(self.axis) swap.insert(0, self.axis) return [ d_out.dimshuffle(swap)[nonconstants].dimshuffle(swap), grad_not_implemented(self, 1, new_length), grad_not_implemented(self, 2, nonconstants), ]
def grad(self, inp, grads): kerns, top, output, desc, alpha, beta = inp img, = grads img = gpu_contiguous(img) d_kerns = GpuDnnConvGradW()(img, top, empty_like(kerns), desc) d_top = GpuDnnConv()(img, kerns, empty_like(top), desc) d_alpha = grad_not_implemented(self, 4, alpha) d_beta = grad_not_implemented(self, 5, beta) return (d_kerns * alpha, d_top * alpha, img * beta, DisconnectedType()(), d_alpha, d_beta)
def grad(self, inp, grads): kerns, top, output, desc, alpha, beta = inp img, = grads img = gpu_contiguous(img) d_kerns = GpuDnn3dConvGradW()(img, top, gpu_alloc_empty(*kerns.shape), desc) d_top = GpuDnn3dConv()(img, kerns, gpu_alloc_empty(*top.shape), desc) d_alpha = grad_not_implemented(self, 4, alpha) d_beta = grad_not_implemented(self, 5, beta) return (d_kerns * alpha, d_top * alpha, img * beta, DisconnectedType()(), d_alpha, d_beta)
def grad(self, inp, grads): img, kerns, output, desc, alpha, beta = inp top, = grads top = gpu_contiguous(top) d_img = GpuDnnConvGradI()(kerns, top, empty_like(img), desc) d_kerns = GpuDnnConvGradW()(img, top, empty_like(kerns), desc) d_alpha = grad_not_implemented(self, 4, alpha) d_beta = grad_not_implemented(self, 5, beta) return [d_img * alpha, d_kerns * alpha, top * beta, DisconnectedType()(), d_alpha, d_beta]
def grad(self, inp, grads): img, top, output, desc, alpha, beta = inp kerns, = grads kerns = gpu_contiguous(kerns) d_img = GpuDnn3dConvGradI()(kerns, top, gpu_alloc_empty(*img.shape), desc) d_top = GpuDnn3dConv()(img, kerns, gpu_alloc_empty(*top.shape), desc) d_alpha = grad_not_implemented(self, 4, alpha) d_beta = grad_not_implemented(self, 5, beta) return (d_img * alpha, d_top * alpha, kerns * beta, DisconnectedType()(), d_alpha, d_beta)
def grad(self, inp, grads): kerns, top, output, desc, alpha, beta = inp img, = grads img = gpu_contiguous(img) d_kerns = GpuDnnConvGradW()(img, top, empty_like(kerns), desc) d_top = GpuDnnConv()(img, kerns, empty_like(top), desc) d_alpha = grad_not_implemented(self, 4, alpha) d_beta = grad_not_implemented(self, 5, beta) return (d_kerns * alpha, d_top * alpha, img * beta, DisconnectedType()(), d_alpha, d_beta)
def grad(self, inp, grads): img, kerns, output, desc, alpha, beta = inp top, = grads top = gpu_contiguous(top) d_img = GpuDnnConvGradI()(kerns, top, empty_like(img), desc) d_kerns = GpuDnnConvGradW()(img, top, empty_like(kerns), desc) d_alpha = grad_not_implemented(self, 4, alpha) d_beta = grad_not_implemented(self, 5, beta) return [d_img * alpha, d_kerns * alpha, top * beta, DisconnectedType()(), d_alpha, d_beta]
def grad(self, inputs, grads): v, x = inputs (gz, ) = grads return [ grad_not_implemented(self, 0, v), gz * (jv(v - 1, x) - jv(v + 1, x)) / 2.0, ]
def grad(self, inp, grads): x, neib_shape, neib_step = inp gz, = grads if self.mode in ['valid', 'ignore_borders']: if (neib_shape is neib_step or neib_shape == neib_step or # Theano Constant == do not compare the data # the equals function do that. (hasattr(neib_shape, "equals") and neib_shape.equals(neib_step) )): return [ neibs2images(gz, neib_shape, x.shape, mode=self.mode), grad_undefined(self, 1, neib_shape), grad_undefined(self, 2, neib_step) ] if self.mode in ['valid']: # Iterate over neighborhood positions, summing contributions. def pos2map(pidx, pgz, prior_result, neib_shape, neib_step): ''' Helper function that adds gradient contribution from a single neighborhood position i,j. pidx = Index of position within neighborhood. pgz = Gradient of shape (batch_size*num_channels*neibs) prior_result = Shape (batch_size, num_channnels, rows, cols) neib_shape = Number of rows, cols in a neighborhood. neib_step = Step sizes from image2neibs. ''' nrows, ncols = neib_shape rstep, cstep = neib_step batch_size, num_channels, rows, cols = prior_result.shape i = pidx // ncols j = pidx - (i * ncols) # This position does not touch some img pixels in valid mode. result_indices = prior_result[:, :, i:(rows - nrows + i + 1):rstep, j:(cols - ncols + j + 1):cstep] newshape = (batch_size, num_channels) + \ ((rows - nrows) // rstep + 1,) + \ ((cols - ncols) // cstep + 1,) return T.inc_subtensor(result_indices, pgz.reshape(newshape)) indices = T.arange(neib_shape[0] * neib_shape[1]) pgzs = gz.dimshuffle((1, 0)) result, _ = theano.scan(fn=pos2map, sequences=[indices, pgzs], outputs_info=T.zeros(x.shape), non_sequences=[neib_shape, neib_step]) grad_input = result[-1] return [ grad_input, grad_undefined(self, 1, neib_shape), grad_undefined(self, 2, neib_step) ] return [ grad_not_implemented(self, 0, x), grad_undefined(self, 1, neib_shape), grad_undefined(self, 2, neib_step) ]
def grad(self, inputs, grads): v, x = inputs gz, = grads return [ grad_not_implemented(self, 0, v), gz * (iv(v - 1, x) + iv(v + 1, x)) / 2. ]
def grad(self, inp, grads): coding, one_of_n = inp g_y, = grads return [ hierarchical_crossentropy_categorical_1hot_grad( g_y, coding, one_of_n, self._hierarchy, self._inv_hierarchy, self._level_list), grad_not_implemented(self, 1, one_of_n) ]
def grad(self, inp, grads): x, neib_shape, neib_step = inp gz, = grads if self.mode in ['valid', 'ignore_borders']: if (neib_shape is neib_step or neib_shape == neib_step or # Theano Constant == do not compare the data # the equals function do that. (hasattr(neib_shape, "equals") and neib_shape.equals(neib_step))): return [neibs2images(gz, neib_shape, x.shape, mode=self.mode), grad_undefined(self, 1, neib_shape), grad_undefined(self, 2, neib_step)] if self.mode in ['valid']: # Iterate over neighborhood positions, summing contributions. def pos2map(pidx, pgz, prior_result, neib_shape, neib_step): ''' Helper function that adds gradient contribution from a single neighborhood position i,j. pidx = Index of position within neighborhood. pgz = Gradient of shape (batch_size*num_channels*neibs) prior_result = Shape (batch_size, num_channnels, rows, cols) neib_shape = Number of rows, cols in a neighborhood. neib_step = Step sizes from image2neibs. ''' nrows, ncols = neib_shape rstep, cstep = neib_step batch_size, num_channels, rows, cols = prior_result.shape i = pidx // ncols j = pidx - (i * ncols) # This position does not touch some img pixels in valid mode. result_indices = prior_result[:, :, i:(rows - nrows + i + 1):rstep, j:(cols - ncols + j + 1):cstep] newshape = (batch_size, num_channels) + \ ((rows - nrows) // rstep + 1,) + \ ((cols - ncols) // cstep + 1,) return T.inc_subtensor(result_indices, pgz.reshape(newshape)) indices = T.arange(neib_shape[0] * neib_shape[1]) pgzs = gz.dimshuffle((1, 0)) result, _ = theano.scan(fn=pos2map, sequences=[indices, pgzs], outputs_info=T.zeros(x.shape), non_sequences=[neib_shape, neib_step]) grad_input = result[-1] return [grad_input, grad_undefined(self, 1, neib_shape), grad_undefined(self, 2, neib_step)] return [grad_not_implemented(self, 0, x), grad_undefined(self, 1, neib_shape), grad_undefined(self, 2, neib_step)]
def grad(self, inp, grads): x, neib_shape, neib_step = inp gz, = grads if self.mode in ['valid', 'ignore_borders']: if (neib_shape is neib_step or neib_shape == neib_step or # Theano Constant == do not compare the data # the equals function do that. (hasattr(neib_shape, "equals") and neib_shape.equals(neib_step))): return [neibs2images(gz, neib_shape, x.shape, mode=self.mode), grad_undefined(self, 1, neib_shape), grad_undefined(self, 2, neib_step)] return [grad_not_implemented(self, 0, x), grad_undefined(self, 1, neib_shape), grad_undefined(self, 2, neib_step)]
def grad(self, inp, grads): x, neib_shape, neib_step = inp gz, = grads if self.mode in ['valid', 'ignore_borders']: if (neib_shape is neib_step or neib_shape == neib_step or # Theano Constant == do not compare the data # the equals function do that. (hasattr(neib_shape, "equals") and neib_shape.equals(neib_step))): return [neibs2images(gz, neib_shape, x.shape, mode=self.mode), grad_undefined(self, 1, neib_shape), grad_undefined(self, 2, neib_step)] return [grad_not_implemented(self, 0, x), grad_undefined(self, 1, neib_shape), grad_undefined(self, 2, neib_step)]
def grad(self, inp, grads): return [grad_not_implemented(self, i, inp[i]) for i in range(2)]
def grad(self, inputs, gout): (input_x,) = inputs return [grad_not_implemented(self, 0, input_x)]
def grad(self, inputs, ograds): return [ grad_not_implemented(self, i, inputs[i]) for i in xrange(len(inputs)) ]
def grad(self, inputs, ograds): return [grad_not_implemented(self, i, inputs[i]) for i in xrange(len(inputs))]
def grad(self, inp, grads): y, y_starts, y_lengths, = inp g_costs, = grads return [masked_sum_dx(y, y_starts, y_lengths, g_costs), grad_not_implemented(self, 1, y_starts), grad_not_implemented(self, 1, y_lengths)]
def grad(self, inputs, grads): y_true, y_score = inputs g_y, = grads roc_grad = rocGrad() return [roc_grad(g_y, y_true, y_score), grad_not_implemented(self, 1, y_score)]
def grad(self, inputs, grads): v, x = inputs gz, = grads return [grad_not_implemented(self, 0, v), gz * (iv(v - 1, x) + iv(v + 1, x)) / 2.]
def grad(self, inp, grads): return [GpuExtractDiag2D(keepdims=(inp[0].ndim==2))(grads[0], inp[1])] + [grad_not_implemented(self, i, inp[i]) for i in range(1,4)]
def grad(self, inputs, gout): (input_x,) = inputs return [grad_not_implemented(self, 0, input_x)]
def grad(self, inp, grads): return [GpuAllocDiag2D()(grads[0], inp[1], *(inp[0].shape)), grad_not_implemented(self, 1, inp[1])]
def grad(self, inp, grads): coding, one_of_n = inp g_y, = grads crossentropy_categorical_1hot_grad = rocGrad() return [crossentropy_categorical_1hot_grad(g_y, coding, one_of_n), grad_not_implemented(self, 1, one_of_n)]