Beispiel #1
0
class MaskLayerImpl(Layer):

    expected_inputs = {
        'default': StructureTemplate('T', 'B', '...'),
        'mask': StructureTemplate('T', 'B', 1)
    }

    computes_no_input_deltas_for = ['mask']

    def setup(self, kwargs, in_shapes):
        outputs = OrderedDict()
        outputs['default'] = in_shapes['default']
        return outputs, OrderedDict(), OrderedDict()

    def forward_pass(self, buffers, training_pass=True):
        _h = self.handler

        flat_inp = flatten_time_and_features(buffers.inputs.default)
        flat_mask = flatten_time(buffers.inputs.mask)
        flat_out = flatten_time_and_features(buffers.outputs.default)

        _h.mult_mv(flat_inp, flat_mask, out=flat_out)

    def backward_pass(self, buffers):
        _h = self.handler

        flat_out_deltas = flatten_time_and_features(
            buffers.output_deltas.default)
        tmp = self.handler.allocate(flat_out_deltas.shape)
        flat_mask = flatten_time(buffers.inputs.mask)
        flat_in_deltas = flatten_time_and_features(
            buffers.input_deltas.default)

        _h.mult_mv(flat_out_deltas, flat_mask, tmp)
        _h.add_tt(tmp, flat_in_deltas, flat_in_deltas)
Beispiel #2
0
class HighwayLayerImpl(Layer):
    expected_inputs = {'H': StructureTemplate('T', 'B', '...'),
                       'T': StructureTemplate('T', 'B', '...'),
                       'x': StructureTemplate('T', 'B', '...')}

    def setup(self, kwargs, in_shapes):
        # 'H', 'T' and 'x' must have the same shape
        if in_shapes['H'] != in_shapes['T']:
            raise LayerValidationError(
                "{}: H and T must have the same shape but got {} and {}"
                .format(self.name, in_shapes['H'], in_shapes['T']))
        if in_shapes['H'] != in_shapes['x']:
            raise LayerValidationError(
                "{}: H and x must have the same shape but got {} and {}"
                .format(self.name, in_shapes['H'], in_shapes['x']))

        outputs = OrderedDict()
        outputs['default'] = BufferStructure(
            'T', 'B', *self.in_shapes['x'].feature_shape)
        return outputs, OrderedDict(), OrderedDict()

    def forward_pass(self, buffers, training_pass=True):
        # prepare
        _h = self.handler
        x = buffers.inputs.x
        H = buffers.inputs.H
        T = buffers.inputs.T
        y = buffers.outputs.default

        tmp = _h.zeros(x.shape)
        _h.subtract_tt(H, x, out=tmp)
        _h.mult_tt(T, tmp, out=tmp)
        _h.add_tt(tmp, x, out=y)

    def backward_pass(self, buffers):
        # prepare
        _h = self.handler
        x = buffers.inputs.x
        H = buffers.inputs.H
        T = buffers.inputs.T
        dx = buffers.input_deltas.x
        dH = buffers.input_deltas.H
        dT = buffers.input_deltas.T
        dy = buffers.output_deltas.default

        tmp = _h.ones(dx.shape)
        _h.subtract_tt(tmp, T, out=tmp)
        _h.mult_add_tt(tmp, dy, out=dx)

        _h.mult_add_tt(T, dy, out=dH)

        _h.subtract_tt(H, x, out=tmp)
        _h.mult_add_tt(tmp, dy, out=dT)
Beispiel #3
0
class SquaredErrorLayerImpl(Layer):

    expected_inputs = {
        'default': StructureTemplate('T', 'B', '...'),
        'targets': StructureTemplate('T', 'B', '...')
    }
    expected_kwargs = {}
    computes_no_input_deltas_for = ['targets']
    takes_no_output_deltas_from = ['predictions']

    def setup(self, kwargs, in_shapes):
        # 'default' and 'targets' must have same shape
        in_shape = in_shapes['default'].feature_shape
        tar_shape = in_shapes['targets'].feature_shape
        if in_shape != tar_shape:
            raise LayerValidationError(
                "{}: default and targets must have same feature shapes but "
                "got {} and {}".format(self.name, in_shape, tar_shape))

        outputs = OrderedDict()
        outputs['predictions'] = BufferStructure('T', 'B', *in_shape)
        outputs['loss'] = BufferStructure('T', 'B', *in_shape)

        internals = OrderedDict()
        internals['diff'] = BufferStructure('T', 'B', *in_shape)
        return outputs, OrderedDict(), internals

    def forward_pass(self, buffers, training_pass=True):
        # prepare
        _h = self.handler
        x = flatten_time_and_features(buffers.inputs.default)
        t = flatten_time_and_features(buffers.inputs.targets)
        diff = flatten_time_and_features(buffers.internals.diff)
        y = flatten_time_and_features(buffers.outputs.predictions)
        loss = flatten_time_and_features(buffers.outputs.loss)

        # calculate
        _h.copy_to(x, y)
        _h.subtract_tt(x, t, out=diff)
        _h.mult_tt(diff, diff, out=loss)
        _h.mult_st(0.5, loss, out=loss)

    def backward_pass(self, buffers):
        # prepare
        _h = self.handler
        dloss = flatten_time_and_features(buffers.output_deltas.loss)
        diff = flatten_time_and_features(buffers.internals.diff)
        dx = flatten_time_and_features(buffers.input_deltas.default)

        # calculate
        _h.mult_add_tt(dloss, diff, dx)
Beispiel #4
0
class MaskLayerImpl(Layer):

    expected_inputs = {'default': StructureTemplate('T', 'B', '...'),
                       'mask': StructureTemplate('T', 'B', '...')}

    computes_no_input_deltas_for = ['mask']

    def setup(self, kwargs, in_shapes):
        in_shape = in_shapes['default'].feature_shape
        expected_shape = in_shape[:-1] + (1,)

        if in_shapes['mask'].feature_shape == (1,):
            self.flatten_dim = 2
        elif in_shapes['mask'].feature_shape in [expected_shape, in_shape]:
            self.flatten_dim = len(in_shape) + 1
        else:
            raise LayerValidationError(
                "Shape of the mask did not match shape of the default inputs. "
                "Should be either ('T', 'B', 1) or {} or {}, but was {}".format(
                    ('T', 'B') + expected_shape,
                    in_shapes['default'].shape,
                    in_shapes['mask']))
        outputs = OrderedDict()
        outputs['default'] = in_shapes['default']
        return outputs, OrderedDict(), OrderedDict()

    def flatten_buffer(self, buffer):
        pre = buffer.shape[:self.flatten_dim]
        post = buffer.shape[self.flatten_dim:]
        return buffer.reshape((int(product(pre)), int(product(post))))

    def forward_pass(self, buffers, training_pass=True):
        _h = self.handler

        flat_inp = self.flatten_buffer(buffers.inputs.default)
        flat_mask = self.flatten_buffer(buffers.inputs.mask)
        flat_out = self.flatten_buffer(buffers.outputs.default)

        _h.mult_mv(flat_inp, flat_mask, out=flat_out)

    def backward_pass(self, buffers):
        _h = self.handler

        flat_out_deltas = self.flatten_buffer(buffers.output_deltas.default)
        tmp = self.handler.allocate(flat_out_deltas.shape)
        flat_mask = self.flatten_buffer(buffers.inputs.mask)
        flat_in_deltas = self.flatten_buffer(buffers.input_deltas.default)

        _h.mult_mv(flat_out_deltas, flat_mask, tmp)
        _h.add_tt(tmp, flat_in_deltas, flat_in_deltas)
Beispiel #5
0
class SquaredDifferenceLayerImpl(Layer):

    expected_inputs = {'inputs_1': StructureTemplate('T', 'B', '...'),
                       'inputs_2': StructureTemplate('T', 'B', '...')}
    expected_kwargs = {}

    def setup(self, kwargs, in_shapes):
        # 'inputs_1' and 'inputs_2' must have same shape
        f_shape1 = in_shapes['inputs_1'].feature_shape
        f_shape2 = in_shapes['inputs_2'].feature_shape
        if f_shape1 != f_shape2:
            raise LayerValidationError(
                "{}: inputs_1 and inputs_2 must have same feature shapes but "
                "got {} and {}".format(self.name, f_shape1, f_shape2))

        outputs = OrderedDict()
        outputs['default'] = BufferStructure('T', 'B', *f_shape1)

        internals = OrderedDict()
        feature_shape = self.in_shapes['inputs_1'].feature_shape
        internals['diff'] = BufferStructure('T', 'B', *feature_shape)
        return outputs, OrderedDict(), internals

    def forward_pass(self, buffers, training_pass=True):
        # prepare
        _h = self.handler
        inputs_1 = flatten_time_and_features(buffers.inputs.inputs_1)
        inputs_2 = flatten_time_and_features(buffers.inputs.inputs_2)
        diff = flatten_time_and_features(buffers.internals.diff)
        outputs = flatten_time_and_features(buffers.outputs.default)

        # calculate
        _h.subtract_tt(inputs_1, inputs_2, out=diff)
        _h.mult_tt(diff, diff, out=outputs)

    def backward_pass(self, buffers):
        # prepare
        _h = self.handler
        out_deltas = flatten_time_and_features(buffers.output_deltas.default)
        diff = flatten_time_and_features(buffers.internals.diff)
        dinputs_1 = flatten_time_and_features(buffers.input_deltas.inputs_1)
        dinputs_2 = flatten_time_and_features(buffers.input_deltas.inputs_2)

        tmp = _h.allocate(out_deltas.shape)
        # calculate
        _h.mult_st(2, out_deltas, out=out_deltas)
        _h.mult_add_tt(out_deltas, diff, out=dinputs_1)
        _h.mult_st(-1, diff, out=tmp)
        _h.mult_add_tt(out_deltas, tmp, out=dinputs_2)
Beispiel #6
0
class SquareLayerImpl(Layer):
    # accept inputs in any format
    expected_inputs = {'default': StructureTemplate('...')}
    # no kwargs supported
    expected_kwargs = {}

    # For a custom layer we need to implement the following 3 methods:

    def setup(self, kwargs, in_shapes):
        # In this method we set up the buffer structure of the layer
        # we can use the kwargs passed to this layer (here we don't)
        # and the shapes of the inputs (an OrderedDict[str, BufferStructure])

        # This layer is elementwise so the output shapes should be the same as
        # the input shapes
        outputs = in_shapes
        parameters = OrderedDict()  # No parameters so this is empty
        internals = OrderedDict()  # Also no need for internal buffers
        return outputs, parameters, internals

    def forward_pass(self, buffers, training_pass=True):
        inputs = buffers.inputs.default
        outputs = buffers.outputs.default
        self.handler.mult_tt(inputs, inputs, outputs)
        self.handler.mult_st(0.5, outputs, outputs)

    def backward_pass(self, buffers):
        inputs = buffers.inputs.default
        output_deltas = buffers.output_deltas.default
        input_deltas = buffers.input_deltas.default
        self.handler.mult_add_tt(inputs, output_deltas, input_deltas)
Beispiel #7
0
class DropoutLayerImpl(Layer):

    expected_inputs = {'default': StructureTemplate('T', 'B', '...')}
    expected_kwargs = {'drop_prob'}

    def setup(self, kwargs, in_shapes):
        self.drop_prob = kwargs.get('drop_prob', 0.5)

        outputs = OrderedDict()
        outputs['default'] = in_shapes['default']

        internals = OrderedDict()
        internals['mask'] = self.in_shapes['default']
        return outputs, OrderedDict(), internals

    def forward_pass(self, buffers, training_pass=True):
        _h = self.handler

        if training_pass:
            _h.generate_probability_mask(buffers.internals.mask,
                                         1 - self.drop_prob)
            _h.mult_tt(buffers.inputs.default,
                       buffers.internals.mask,
                       out=buffers.outputs.default)
            _h.mult_st(1 / (1 - self.drop_prob),
                       buffers.outputs.default,
                       out=buffers.outputs.default)
        else:
            _h.copy_to(buffers.inputs.default, buffers.outputs.default)

    def backward_pass(self, buffers):
        self.handler.mult_add_tt(buffers.output_deltas.default,
                                 buffers.internals.mask,
                                 buffers.input_deltas.default)
class FullyConnectedLayerImpl(Layer):

    expected_inputs = {'default': StructureTemplate('T', 'B', '...')}
    expected_kwargs = {'size', 'activation'}

    def setup(self, kwargs, in_shapes):
        self.activation = kwargs.get('activation', 'rel')
        self.size = kwargs.get('size', self.in_shapes['default'].feature_shape)
        if isinstance(self.size, int):
            self.size = (self.size, )

        if not isinstance(self.size, (tuple, list)):
            raise LayerValidationError('size must be int but was {}'.format(
                self.size))
        in_size = in_shapes['default'].feature_size

        outputs = OrderedDict()
        outputs['default'] = BufferStructure('T', 'B', *self.size)
        out_size = outputs['default'].feature_size

        parameters = OrderedDict()
        parameters['W'] = BufferStructure(out_size, in_size)
        parameters['bias'] = BufferStructure(out_size)

        internals = OrderedDict()
        return outputs, parameters, internals

    def forward_pass(self, buffers, training_pass=True):
        # prepare
        _h = self.handler
        W, bias = buffers.parameters
        inputs = flatten_time_and_features(buffers.inputs.default)
        outputs = flatten_time_and_features(buffers.outputs.default)

        # calculate outputs
        _h.dot_mm(inputs, W, outputs, transb=True)
        _h.add_mv(outputs, bias.reshape((1, bias.shape[0])), outputs)
        _h.inplace_act_func[self.activation](outputs)

    def backward_pass(self, buffers):
        # prepare
        _h = self.handler
        W, bias = buffers.parameters
        dW, dbias = buffers.gradients
        inputs = flatten_time_and_features(buffers.inputs.default)
        outputs = flatten_time_and_features(buffers.outputs.default)
        in_deltas = flatten_time_and_features(buffers.input_deltas.default)
        out_deltas = flatten_time_and_features(buffers.output_deltas.default)

        # calculate in_deltas and gradients
        _h.inplace_act_func_deriv[self.activation](outputs, out_deltas)
        _h.dot_add_mm(out_deltas, W, out=in_deltas)
        _h.dot_mm(out_deltas, inputs, out=dW, transa=True)
        _h.sum_t(out_deltas, axis=0, out=dbias)
Beispiel #9
0
class L2DecayLayerImpl(Layer):

    expected_inputs = {'default': StructureTemplate('T', 'B', '...')}
    expected_kwargs = {}

    def setup(self, kwargs, in_shapes):
        outputs = OrderedDict()
        outputs['loss'] = BufferStructure('T', 'B', 1)

        parameters = OrderedDict()
        internals = OrderedDict()
        internals['tmp'] = in_shapes['default']
        internals['dsq_activations'] = BufferStructure(
            *in_shapes['default'].shape, is_backward_only=True)

        return outputs, parameters, internals

    def forward_pass(self, buffers, training_pass=True):
        # prepare
        _h = self.handler
        assert isinstance(_h, Handler)
        inputs = buffers.inputs.default
        tmp = buffers.internals.tmp
        outputs = buffers.outputs.loss

        # reshape
        flat_inputs = flatten_time_and_features(inputs)
        flat_tmp = flatten_time_and_features(tmp)
        flat_outputs = flatten_time(outputs)

        # compute
        _h.mult_tt(flat_inputs, flat_inputs, flat_tmp)
        _h.mult_st(0.5, flat_tmp, flat_tmp)
        _h.sum_t(flat_tmp, 1, flat_outputs)

    def backward_pass(self, buffers):
        _h = self.handler
        assert isinstance(_h, Handler)
        inputs = buffers.inputs.default
        tmp = buffers.internals.tmp
        output_deltas = buffers.output_deltas.loss
        input_deltas = buffers.input_deltas.default

        # reshape
        flat_inputs = flatten_time_and_features(inputs)
        flat_tmp = flatten_time_and_features(tmp)
        flat_output_deltas = flatten_time(output_deltas)
        flat_input_deltas = flatten_time_and_features(input_deltas)

        # compute
        _h.mult_mv(flat_inputs, flat_output_deltas, flat_tmp)
        _h.add_tt(flat_tmp, flat_input_deltas, flat_input_deltas)
Beispiel #10
0
class MaskLayerImpl(Layer):

    expected_inputs = {'default': StructureTemplate('T', 'B', '...'),
                       'mask': StructureTemplate('T', 'B', '...')}

    computes_no_input_deltas_for = ['mask']

    def setup(self, kwargs, in_shapes):
        in_shape = in_shapes['default'].feature_shape
        if in_shapes['mask'].feature_shape not in [(1,), in_shape]:
            raise LayerValidationError(
                "Shape of the mask did not match shape of the default inputs. "
                "Should be either ('T', 'B', 1) or {}, but was {}".format(
                    in_shapes['default'].shape), in_shapes['mask'].shape)
        outputs = OrderedDict()
        outputs['default'] = in_shapes['default']
        return outputs, OrderedDict(), OrderedDict()

    def forward_pass(self, buffers, training_pass=True):
        _h = self.handler

        flat_inp = flatten_time_and_features(buffers.inputs.default)
        flat_mask = flatten_time_and_features(buffers.inputs.mask)
        flat_out = flatten_time_and_features(buffers.outputs.default)

        _h.mult_mv(flat_inp, flat_mask, out=flat_out)

    def backward_pass(self, buffers):
        _h = self.handler

        flat_out_deltas = flatten_time_and_features(
            buffers.output_deltas.default)
        tmp = self.handler.allocate(flat_out_deltas.shape)
        flat_mask = flatten_time_and_features(buffers.inputs.mask)
        flat_in_deltas = flatten_time_and_features(
            buffers.input_deltas.default)

        _h.mult_mv(flat_out_deltas, flat_mask, tmp)
        _h.add_tt(tmp, flat_in_deltas, flat_in_deltas)
Beispiel #11
0
class MergeLayerImpl(Layer):
    expected_inputs = {
        'inputs_1': StructureTemplate('...'),
        'inputs_2': StructureTemplate('...')
    }
    expected_kwargs = {}

    def setup(self, kwargs, in_shapes):
        # 'inputs_1' and 'inputs_2' must have same shape except for last dim
        shape_prefix1 = in_shapes['inputs_1'].shape[:-1]
        shape_prefix2 = in_shapes['inputs_2'].shape[:-1]
        if shape_prefix1 != shape_prefix2:
            raise LayerValidationError(
                "{}: The shapes of inputs_1 and inputs_2 may only differ in "
                "the last dimension but got {} and {}".format(
                    self.name, in_shapes['inputs_1'].shape,
                    in_shapes['inputs_2'].shape))

        combined_size = (in_shapes['inputs_1'].shape[-1] +
                         in_shapes['inputs_2'].shape[-1])
        out_shape = shape_prefix1 + (combined_size, )
        outputs = OrderedDict()
        outputs['default'] = BufferStructure(*out_shape)

        parameters = OrderedDict()
        internals = OrderedDict()
        return outputs, parameters, internals

    def forward_pass(self, buffers, training_pass=True):
        # prepare
        self.handler.merge_tt(buffers.inputs.inputs_1, buffers.inputs.inputs_2,
                              buffers.outputs.default)

    def backward_pass(self, buffers):
        # prepare
        _h = self.handler
        self.handler.split_add_tt(buffers.output_deltas.default,
                                  buffers.input_deltas.inputs_1,
                                  buffers.input_deltas.inputs_2)
Beispiel #12
0
class DeltasScalingLayerImpl(Layer):
    expected_inputs = {'default': StructureTemplate('T', 'B', '...')}
    expected_kwargs = {'factor'}

    def setup(self, kwargs, in_shapes):
        if 'factor' not in kwargs:
            raise LayerValidationError('Missing required "factor" argument')
        self.factor = kwargs['factor']
        out_shapes = in_shapes
        return out_shapes, OrderedDict(), OrderedDict()

    def forward_pass(self, buffers, training_pass=True):
        self.handler.copy_to(buffers.inputs.default, buffers.outputs.default)

    def backward_pass(self, buffers):
        self.handler.mult_add_st(self.factor, buffers.output_deltas.default,
                                 buffers.input_deltas.default)
Beispiel #13
0
class ElementwiseLayerImpl(Layer):

    expected_inputs = {'default': StructureTemplate('T', 'B', '...')}
    expected_kwargs = {'activation'}

    def setup(self, kwargs, in_shapes):
        self.activation = kwargs.get('activation', 'rel')
        return in_shapes, OrderedDict(), OrderedDict()

    def forward_pass(self, buffers, training_pass=True):
        self.handler.act_func[self.activation](buffers.inputs.default,
                                               buffers.outputs.default)

    def backward_pass(self, buffers):
        tmp = self.handler.allocate(buffers.input_deltas.default.shape)
        self.handler.act_func_deriv[self.activation](
            buffers.inputs.default, buffers.outputs.default,
            buffers.output_deltas.default, tmp)
        self.handler.add_tt(buffers.input_deltas.default, tmp,
                            buffers.input_deltas.default)
Beispiel #14
0
class LossLayerImpl(Layer):

    expected_inputs = {'default': StructureTemplate('...')}
    expected_kwargs = {'importance'}

    def setup(self, kwargs, in_shapes):
        assert self.name != 'total_loss'

        self.importance = kwargs.get('importance', 1.0)
        self.batch_index = None
        if in_shapes['default'].scales_with_time:
            self.batch_index = 1
        elif in_shapes['default'].scales_with_batch_size:
            self.batch_index = 0

        outputs = OrderedDict()
        outputs['loss'] = BufferStructure(1)
        return outputs, OrderedDict(), OrderedDict()

    def forward_pass(self, buffers, training_pass=True):
        if self.batch_index is None:
            batch_size = 1.0
        else:
            batch_size = buffers.inputs.default.shape[self.batch_index]

        self.handler.sum_t(buffers.inputs.default,
                           None,
                           buffers.outputs.loss.reshape(tuple()))
        self.handler.mult_st(self.importance / batch_size,
                             buffers.outputs.loss,
                             buffers.outputs.loss)

    def backward_pass(self, buffers):
        if self.batch_index is None:
            batch_size = 1.0
        else:
            batch_size = buffers.inputs.default.shape[self.batch_index]
        self.handler.add_st(self.importance / batch_size,
                            buffers.input_deltas.default,
                            buffers.input_deltas.default)
Beispiel #15
0
def test_illegal_structure_template_raise(shape):
    with pytest.raises(StructureValidationError):
        StructureTemplate(*shape)
Beispiel #16
0
class LstmLayerImpl(Layer):

    expected_inputs = {'default': StructureTemplate('T', 'B', 'F')}
    expected_kwargs = {'size', 'activation'}

    def setup(self, kwargs, in_shapes):
        self.activation = kwargs.get('activation', 'tanh')
        in_size = in_shapes['default'].feature_size
        self.size = kwargs.get('size', in_size)
        if not isinstance(self.size, int):
            raise LayerValidationError('size must be int but was {}'.
                                       format(self.size))

        outputs = OrderedDict()
        outputs['default'] = BufferStructure('T', 'B', self.size,
                                             context_size=1)

        parameters = OrderedDict()
        parameters['Wz'] = BufferStructure(self.size, in_size)
        parameters['Wi'] = BufferStructure(self.size, in_size)
        parameters['Wf'] = BufferStructure(self.size, in_size)
        parameters['Wo'] = BufferStructure(self.size, in_size)

        parameters['pi'] = BufferStructure(1, self.size)
        parameters['pf'] = BufferStructure(1, self.size)
        parameters['po'] = BufferStructure(1, self.size)

        parameters['Rz'] = BufferStructure(self.size, self.size)
        parameters['Ri'] = BufferStructure(self.size, self.size)
        parameters['Rf'] = BufferStructure(self.size, self.size)
        parameters['Ro'] = BufferStructure(self.size, self.size)
        parameters['bz'] = BufferStructure(self.size)
        parameters['bi'] = BufferStructure(self.size)
        parameters['bf'] = BufferStructure(self.size)
        parameters['bo'] = BufferStructure(self.size)

        internals = OrderedDict()
        internals['Za'] = BufferStructure('T', 'B', self.size, context_size=1)
        internals['Zb'] = BufferStructure('T', 'B', self.size, context_size=1)
        internals['Ia'] = BufferStructure('T', 'B', self.size, context_size=1)
        internals['Ib'] = BufferStructure('T', 'B', self.size, context_size=1)
        internals['Fa'] = BufferStructure('T', 'B', self.size, context_size=1)
        internals['Fb'] = BufferStructure('T', 'B', self.size, context_size=1)
        internals['Oa'] = BufferStructure('T', 'B', self.size, context_size=1)
        internals['Ob'] = BufferStructure('T', 'B', self.size, context_size=1)
        internals['Ca'] = BufferStructure('T', 'B', self.size, context_size=1)
        internals['Cb'] = BufferStructure('T', 'B', self.size, context_size=1)
        internals['dZa'] = BufferStructure('T', 'B', self.size, context_size=1,
                                           is_backward_only=True)
        internals['dZb'] = BufferStructure('T', 'B', self.size, context_size=1,
                                           is_backward_only=True)
        internals['dIa'] = BufferStructure('T', 'B', self.size, context_size=1,
                                           is_backward_only=True)
        internals['dIb'] = BufferStructure('T', 'B', self.size, context_size=1,
                                           is_backward_only=True)
        internals['dFa'] = BufferStructure('T', 'B', self.size, context_size=1,
                                           is_backward_only=True)
        internals['dFb'] = BufferStructure('T', 'B', self.size, context_size=1,
                                           is_backward_only=True)
        internals['dOa'] = BufferStructure('T', 'B', self.size, context_size=1,
                                           is_backward_only=True)
        internals['dOb'] = BufferStructure('T', 'B', self.size, context_size=1,
                                           is_backward_only=True)
        internals['dCa'] = BufferStructure('T', 'B', self.size, context_size=1,
                                           is_backward_only=True)
        internals['dCb'] = BufferStructure('T', 'B', self.size, context_size=1,
                                           is_backward_only=True)
        return outputs, parameters, internals

    def forward_pass(self, buffers, training_pass=True):
        # prepare
        _h = self.handler
        (Wz, Wi, Wf, Wo,
         pi, pf, po,
         Rz, Ri, Rf, Ro,
         bz, bi, bf, bo) = buffers.parameters

        (Za, Zb, Ia, Ib, Fa, Fb, Oa, Ob, Ca, Cb,
         dZa, dZb, dIa, dIb, dFa, dFb, dOa, dOb, dCa, dCb) = buffers.internals
        x = buffers.inputs.default
        y = buffers.outputs.default

        time_size, batch_size, in_size = x.shape

        flat_x = flatten_time(x)
        flat_Za = flatten_time(Za[:-1])
        flat_Ia = flatten_time(Ia[:-1])
        flat_Fa = flatten_time(Fa[:-1])
        flat_Oa = flatten_time(Oa[:-1])
        _h.dot_mm(flat_x, Wz, flat_Za, transb=True)
        _h.dot_mm(flat_x, Wi, flat_Ia, transb=True)
        _h.dot_mm(flat_x, Wf, flat_Fa, transb=True)
        _h.dot_mm(flat_x, Wo, flat_Oa, transb=True)

        for t in range(time_size):
            # Block input
            _h.dot_add_mm(y[t - 1], Rz, Za[t], transb=True)
            _h.add_mv(Za[t], bz.reshape((1, self.size)), Za[t])
            _h.act_func[self.activation](Za[t], Zb[t])

            # Input Gate
            _h.dot_add_mm(y[t - 1], Ri, Ia[t], transb=True)
            _h.mult_add_mv(Ca[t - 1], pi, Ia[t])
            _h.add_mv(Ia[t], bi.reshape((1, self.size)), Ia[t])
            _h.sigmoid(Ia[t], Ib[t])

            # Forget Gate
            _h.dot_add_mm(y[t - 1], Rf, Fa[t], transb=True)
            _h.mult_add_mv(Ca[t - 1], pf, Fa[t])
            _h.add_mv(Fa[t], bf.reshape((1, self.size)), Fa[t])
            _h.sigmoid(Fa[t], Fb[t])

            # Cell
            _h.mult_tt(Ib[t], Zb[t], Ca[t])
            _h.mult_add_tt(Fb[t], Ca[t - 1], Ca[t])

            # Output Gate
            _h.dot_add_mm(y[t - 1], Ro, Oa[t], transb=True)
            _h.mult_add_mv(Ca[t], po, Oa[t])
            _h.add_mv(Oa[t], bo.reshape((1, self.size)), Oa[t])
            _h.sigmoid(Oa[t], Ob[t])

            # Block output
            _h.act_func[self.activation](Ca[t], Cb[t])
            _h.mult_tt(Ob[t], Cb[t], y[t])

    def backward_pass(self, buffers):
        # prepare
        _h = self.handler
        (Wz, Wi, Wf, Wo,
         pi, pf, po,
         Rz, Ri, Rf, Ro,
         bz, bi, bf, bo) = buffers.parameters
        (dWz, dWi, dWf, dWo,
         dpi, dpf, dpo,
         dRz, dRi, dRf, dRo,
         dbz, dbi, dbf, dbo) = buffers.gradients

        (Za, Zb, Ia, Ib, Fa, Fb, Oa, Ob, Ca, Cb,
         dZa, dZb, dIa, dIb, dFa, dFb, dOa, dOb, dCa, dCb) = buffers.internals

        x = buffers.inputs.default
        dx = buffers.input_deltas.default
        y = buffers.outputs.default
        deltas = buffers.output_deltas.default

        dy = _h.allocate(y.shape)
        _h.fill(dCa, 0.0)

        time_size, batch_size, in_size = x.shape
        for t in range(time_size - 1, -1, - 1):
            # Accumulate recurrent deltas
            _h.copy_to(deltas[t], dy[t])
            _h.dot_add_mm(dIa[t + 1], Ri, dy[t])
            _h.dot_add_mm(dFa[t + 1], Rf, dy[t])
            _h.dot_add_mm(dOa[t + 1], Ro, dy[t])
            _h.dot_add_mm(dZa[t + 1], Rz, dy[t])

            # Peephole connection part:
            _h.mult_add_mv(dIa[t + 1], pi, dCa[t])
            _h.mult_add_mv(dFa[t + 1], pf, dCa[t])

            # Output Gate
            _h.mult_tt(dy[t], Cb[t], dOb[t])
            _h.sigmoid_deriv(Oa[t], Ob[t], dOb[t], dOa[t])
            # Peephole connection
            _h.mult_add_mv(dOa[t], po, dCa[t])

            # Cell
            _h.mult_tt(dy[t], Ob[t], dCb[t])
            _h.act_func_deriv[self.activation](Ca[t], Cb[t], dCb[t], dCb[t])
            _h.add_tt(dCa[t], dCb[t], dCa[t])
            _h.mult_add_tt(dCa[t + 1], Fb[t + 1], dCa[t])

            # Forget Gate
            _h.mult_tt(dCa[t], Ca[t - 1], dFb[t])
            _h.sigmoid_deriv(Fa[t], Fb[t], dFb[t], dFa[t])

            # Input Gate
            _h.mult_tt(dCa[t], Zb[t], dIb[t])
            _h.sigmoid_deriv(Ia[t], Ib[t], dIb[t], dIa[t])

            # Block Input
            _h.mult_tt(dCa[t], Ib[t], dZb[t])
            _h.act_func_deriv[self.activation](Za[t], Zb[t], dZb[t], dZa[t])

        flat_inputs = flatten_time(x)
        flat_dinputs = flatten_time(dx)

        flat_dIa = flatten_time(dIa[:-1])
        flat_dFa = flatten_time(dFa[:-1])
        flat_dOa = flatten_time(dOa[:-1])
        flat_dZa = flatten_time(dZa[:-1])

        # Calculate in_deltas and gradients
        _h.dot_add_mm(flat_dIa, Wi, flat_dinputs)
        _h.dot_add_mm(flat_dFa, Wf, flat_dinputs)
        _h.dot_add_mm(flat_dOa, Wo, flat_dinputs)
        _h.dot_add_mm(flat_dZa, Wz, flat_dinputs)

        _h.dot_add_mm(flat_dIa, flat_inputs, dWi, transa=True)
        _h.dot_add_mm(flat_dFa, flat_inputs, dWf, transa=True)
        _h.dot_add_mm(flat_dOa, flat_inputs, dWo, transa=True)
        _h.dot_add_mm(flat_dZa, flat_inputs, dWz, transa=True)

        dbias_tmp = _h.allocate(dbz.shape)
        _h.sum_t(flat_dIa, axis=0, out=dbias_tmp)
        _h.add_tt(dbi, dbias_tmp, dbi)
        _h.sum_t(flat_dFa, axis=0, out=dbias_tmp)
        _h.add_tt(dbf, dbias_tmp, dbf)
        _h.sum_t(flat_dOa, axis=0, out=dbias_tmp)
        _h.add_tt(dbo, dbias_tmp, dbo)
        _h.sum_t(flat_dZa, axis=0, out=dbias_tmp)
        _h.add_tt(dbz, dbias_tmp, dbz)

        flat_outputs = flatten_time(y[:-2])
        flat_cell = flatten_time(Ca[:-2])
        flat_cell2 = flatten_time(Ca[:-1])

        dWco_tmp = _h.allocate(flat_cell2.shape)
        dWc_tmp = _h.allocate(dpo.shape)

        # Output gate Peephole
        _h.mult_tt(flat_cell2, flat_dOa, dWco_tmp)
        _h.sum_t(dWco_tmp, axis=0, out=dWc_tmp)
        _h.add_tt(dpo, dWc_tmp, dpo)

        flat_dIa = flatten_time(dIa[1:-1])
        flat_dFa = flatten_time(dFa[1:-1])
        flat_dOa = flatten_time(dOa[1:-1])
        flat_dZa = flatten_time(dZa[1:-1])

        _h.dot_add_mm(flat_dIa, flat_outputs, dRi, transa=True)
        _h.dot_add_mm(flat_dFa, flat_outputs, dRf, transa=True)
        _h.dot_add_mm(flat_dOa, flat_outputs, dRo, transa=True)
        _h.dot_add_mm(flat_dZa, flat_outputs, dRz, transa=True)

        _h.dot_add_mm(dIa[0], dy[-1], dRi, transa=True)
        _h.dot_add_mm(dFa[0], dy[-1], dRf, transa=True)
        _h.dot_add_mm(dOa[0], dy[-1], dRo, transa=True)
        _h.dot_add_mm(dZa[0], dy[-1], dRz, transa=True)

        # Other Peephole connections
        dWcif_tmp = _h.allocate(flat_cell.shape)
        _h.mult_tt(flat_cell, flat_dIa, dWcif_tmp)
        _h.sum_t(dWcif_tmp, axis=0, out=dWc_tmp)
        _h.add_tt(dpi, dWc_tmp, dpi)
        _h.mult_tt(flat_cell, flat_dFa, dWcif_tmp)
        _h.sum_t(dWcif_tmp, axis=0, out=dWc_tmp)
        _h.add_tt(dpf, dWc_tmp, dpf)

        dWcif_tmp = _h.allocate(dIa[0].shape)
        _h.mult_tt(dCa[-1], dIa[0], dWcif_tmp)
        _h.sum_t(dWcif_tmp, axis=0, out=dWc_tmp)
        _h.add_tt(dpi, dWc_tmp, dpi)
        _h.mult_tt(dCa[-1], dIa[0], dWcif_tmp)
        _h.sum_t(dWcif_tmp, axis=0, out=dWc_tmp)
        _h.add_tt(dpf, dWc_tmp, dpf)
Beispiel #17
0
class Pooling2DLayerImpl(Layer):

    expected_inputs = {'default': StructureTemplate('T', 'B', '...')}
    expected_kwargs = {'kernel_size', 'type', 'stride',
                       'padding', 'activation_function'}

    def setup(self, kwargs, in_shapes):
        assert 'kernel_size' in kwargs, "kernel_size must be specified for " \
                                        "Pooling2D"
        assert 'type' in kwargs, "type must be specified for Pooling2D"
        kernel_size = kwargs['kernel_size']
        ptype = kwargs['type']
        padding = kwargs.get('padding', 0)
        stride = kwargs.get('stride', (1, 1))
        in_shape = self.in_shapes['default'].feature_shape
        assert ptype in ('max', 'avg')
        assert type(padding) is int and padding >= 0, \
            "Invalid padding: {}".format(padding)
        assert type(kernel_size) in [list, tuple] and \
            len(kernel_size) == 2, "Kernel size must be list or " \
                                   "tuple  of length 2: {}".format(
                                   kernel_size)
        assert type(stride) in [list, tuple] and len(stride) == 2, \
            "Stride must be list or tuple of length 2: {}".format(stride)
        assert stride[0] >= 0 and stride[1] >= 0, \
            "Invalid stride: {}".format(stride)
        assert isinstance(in_shape, tuple) and len(in_shape) == 3, \
            "PoolingLayer2D must have 3 dimensional input but input " \
            "shape was %s" % in_shape

        self.kernel_size = tuple(kernel_size)
        self.type = ptype
        self.padding = padding
        self.stride = tuple(stride)
        output_height = ((in_shape[0] + 2 * padding - kernel_size[0]) //
                         stride[0]) + 1
        output_width = ((in_shape[1] + 2 * padding - kernel_size[1]) //
                        stride[1]) + 1
        assert output_height > 0 and output_width > 0, \
            "Evaluated output height and width must be positive but were " \
            "({}, {})".format(output_height, output_width)
        output_shape = (output_height, output_width, in_shape[2])

        outputs = OrderedDict()
        outputs['default'] = BufferStructure('T', 'B', *output_shape)

        internals = OrderedDict()
        if self.type == 'max':
            argmax_shape = outputs['default'].feature_shape
            internals['argmax'] = BufferStructure('T', 'B', *argmax_shape)
        return outputs, OrderedDict(), internals

    def forward_pass(self, buffers, training_pass=True):
        # prepare
        _h = self.handler
        inputs = buffers.inputs.default
        outputs = buffers.outputs.default

        # reshape
        flat_inputs = flatten_time(inputs)
        flat_outputs = flatten_time(outputs)

        # calculate outputs
        if self.type == 'max':
            argmax = buffers.internals.argmax
            flat_argmax = flatten_time(argmax)
            _h.maxpool2d_forward_batch(flat_inputs, self.kernel_size,
                                       flat_outputs, self.padding, self.stride,
                                       flat_argmax)
        elif self.type == 'avg':
            _h.avgpool2d_forward_batch(flat_inputs, self.kernel_size,
                                       flat_outputs, self.padding, self.stride)

    def backward_pass(self, buffers):

        # prepare
        _h = self.handler
        inputs = buffers.inputs.default
        outputs = buffers.outputs.default
        in_deltas = buffers.input_deltas.default
        out_deltas = buffers.output_deltas.default

        # reshape
        flat_inputs = flatten_time(inputs)
        flat_in_deltas = flatten_time(in_deltas)
        flat_out_deltas = flatten_time(out_deltas)
        flat_outputs = flatten_time(outputs)

        if self.type == 'max':
            argmax = buffers.internals.argmax
            flat_argmax = flatten_time(argmax)
            _h.maxpool2d_backward_batch(flat_inputs, self.kernel_size,
                                        flat_outputs, self.padding,
                                        self.stride, flat_argmax,
                                        flat_in_deltas, flat_out_deltas)
        elif self.type == 'avg':
            _h.avgpool2d_backward_batch(flat_inputs, self.kernel_size,
                                        flat_outputs, self.padding,
                                        self.stride,
                                        flat_in_deltas, flat_out_deltas)
class BinomialCrossEntropyLayerImpl(Layer):

    expected_inputs = {'default': StructureTemplate('T', 'B', '...'),
                       'targets': StructureTemplate('T', 'B', '...')}

    expected_kwargs = {}

    computes_no_input_deltas_for = ['targets']

    def setup(self, kwargs, in_shapes):
        if in_shapes['default'] != in_shapes['targets']:
            raise LayerValidationError("{}: default and targets must have the "
                                       "same shapes but got {} and {}"
                                       .format(self.name,
                                               in_shapes['default'],
                                               in_shapes['targets']))
        outputs = OrderedDict()
        outputs['default'] = BufferStructure('T', 'B', 1)

        feature_shape = in_shapes['default'].feature_shape
        internals = OrderedDict()
        internals['cee'] = BufferStructure('T', 'B', *feature_shape)
        internals['ceed'] = BufferStructure('T', 'B', *feature_shape,
                                            is_backward_only=True)

        return outputs, OrderedDict(), internals

    def forward_pass(self, buffers, training_pass=True):
        # prepare
        _h = self.handler
        y = buffers.inputs.default
        t = buffers.inputs.targets
        cee = buffers.internals.cee
        cee_sum = buffers.outputs.default

        # the binomial cross entropy error is given by
        # - t * ln(y) - (1-t) * ln(1-y)
        tmp = _h.ones(cee.shape)
        _h.subtract_tt(tmp, y, cee)     # cee = 1-y
        _h.subtract_tt(tmp, t, tmp)     # tmp  = 1-t
        _h.clip_t(cee, 1e-6, 1.0, cee)
        _h.log_t(cee, cee)              # cee = ln(1-y)
        _h.mult_tt(tmp, cee, tmp)  # tmp = (1-t) * ln(1-y)

        _h.clip_t(y, 1e-6, 1.0, cee)
        _h.log_t(cee, cee)              # cee = ln(y)
        _h.mult_tt(t, cee, cee)    # cee = t * ln(y)

        _h.add_tt(tmp, cee, cee)        # cee = (1-t) * ln(1-y) + t * ln(y)

        # reshape for summation
        cee = flatten_time_and_features(cee)
        cee_sum = flatten_time(cee_sum)
        _h.sum_t(cee, axis=1, out=cee_sum)
        _h.mult_st(-1, cee_sum, cee_sum)  # * -1

    def backward_pass(self, buffers):
        # prepare
        _h = self.handler
        ceed_sum = buffers.output_deltas.default
        ceed = buffers.internals.ceed
        tmp = _h.allocate(ceed.shape)

        y = buffers.inputs.default
        t = buffers.inputs.targets

        yd = buffers.input_deltas.default

        # the derivative of the binomial cross entropy error is given by
        # (y - t) / (y - y²)

        _h.mult_tt(y, y, ceed)       # ceed = y²
        _h.subtract_tt(y, ceed, ceed)     # ceed = y - y²
        _h.clip_t(ceed, 1e-6, 1.0, ceed)  # clip

        _h.subtract_tt(y, t, tmp)         # tmp = y - t

        _h.divide_tt(tmp, ceed, ceed)     # ceed = (y - t) / (y - y²)

        # ceed_sum has only one feature dimension due to summation,
        # so we broadcast to all feature dimensions
        _h.broadcast_t(ceed_sum, 2, tmp)
        _h.mult_tt(ceed, tmp, ceed)

        _h.add_tt(ceed, yd, yd)
def test_structure_template_matches4(shape, expected):
    st = StructureTemplate(1, 2, 7)
    struct = BufferStructure(*shape)
    assert st.matches(struct) == expected
Beispiel #20
0
class HighwayRNNCoupledGatesLayerImpl(Layer):

    expected_inputs = {'default': StructureTemplate('T', 'B', '...')}
    expected_kwargs = {
        'size', 'activation', 'recurrence_depth', 'block_size', 'sizes_list'
    }

    def setup(self, kwargs, in_shapes):
        self.activation = kwargs.get('activation', 'tanh')
        self.size = kwargs.get('size', self.in_shapes['default'].feature_size)
        self.recurrence_depth = kwargs.get('recurrence_depth', 1)
        if not isinstance(self.size, int):
            raise LayerValidationError('size must be int but was {}'.format(
                self.size))
        if not isinstance(self.recurrence_depth, int):
            raise LayerValidationError(
                'recurrence_depth must be int but was {}'.format(
                    self.recurrence_depth))
        in_size = self.in_shapes['default'].feature_size

        outputs = OrderedDict()
        outputs['default'] = BufferStructure('T',
                                             'B',
                                             self.size,
                                             context_size=1)
        parameters = OrderedDict()
        parameters['W_H'] = BufferStructure(self.size, in_size)
        parameters['W_T'] = BufferStructure(self.size, in_size)
        parameters['R_T'] = BufferStructure(self.recurrence_depth, self.size,
                                            self.size)
        parameters['bias_T'] = BufferStructure(self.recurrence_depth,
                                               self.size)
        parameters['R_H'] = (BufferStructure(self.recurrence_depth, self.size,
                                             self.size))
        parameters['bias_H'] = BufferStructure(self.recurrence_depth,
                                               self.size)

        internals = OrderedDict()
        for i in range(self.recurrence_depth):
            internals['H_{}'.format(i)] = BufferStructure('T',
                                                          'B',
                                                          self.size,
                                                          context_size=1)
            internals['T_{}'.format(i)] = BufferStructure('T',
                                                          'B',
                                                          self.size,
                                                          context_size=1)
            internals['Y_{}'.format(i)] = BufferStructure('T',
                                                          'B',
                                                          self.size,
                                                          context_size=1)
            internals['dH_{}'.format(i)] = BufferStructure(
                'T', 'B', self.size, context_size=1, is_backward_only=True)
            internals['dT_{}'.format(i)] = BufferStructure(
                'T', 'B', self.size, context_size=1, is_backward_only=True)
            internals['dY_{}'.format(i)] = BufferStructure(
                'T', 'B', self.size, context_size=1, is_backward_only=True)

        return outputs, parameters, internals

    def forward_pass(self, buffers, training_pass=True):
        # prepare
        _h = self.handler
        W_H, W_T, R_T, bias_T, R_H, bias_H = buffers.parameters

        inputs = buffers.inputs.default
        outputs = buffers.outputs.default

        H_list = []
        T_list = []
        Y_list = []

        for i in range(self.recurrence_depth):
            H_list.append(buffers.internals['H_{}'.format(i)])
            T_list.append(buffers.internals['T_{}'.format(i)])
            Y_list.append(buffers.internals['Y_{}'.format(i)])

        flat_inputs = flatten_time_and_features(inputs)

        flat_H = flatten_time(H_list[0][:-1])
        flat_T = flatten_time(T_list[0][:-1])

        _h.dot_mm(flat_inputs, W_H, flat_H, transb=True)
        _h.dot_mm(flat_inputs, W_T, flat_T, transb=True)

        for t in range(inputs.shape[0]):
            for i in range(self.recurrence_depth):
                if i == 0:
                    x = outputs[t - 1]
                    _h.dot_add_mm(x, R_T[i], T_list[i][t], transb=True)
                    _h.add_mv(T_list[i][t], bias_T[i].reshape((1, self.size)),
                              T_list[i][t])
                    _h.inplace_act_func['sigmoid'](T_list[i][t])
                    _h.dot_add_mm(x, R_H[i], H_list[i][t], transb=True)
                    _h.add_mv(H_list[i][t], bias_H[i].reshape((1, self.size)),
                              H_list[i][t])
                    _h.inplace_act_func[self.activation](H_list[i][t])
                else:
                    x = Y_list[i - 1][t]
                    _h.dot_mm(x, R_T[i], T_list[i][t], transb=True)
                    _h.add_mv(T_list[i][t], bias_T[i].reshape((1, self.size)),
                              T_list[i][t])
                    _h.inplace_act_func['sigmoid'](T_list[i][t])
                    _h.dot_mm(x, R_H[i], H_list[i][t], transb=True)
                    _h.add_mv(H_list[i][t], bias_H[i].reshape((1, self.size)),
                              H_list[i][t])
                    _h.inplace_act_func[self.activation](H_list[i][t])

                if i == 0:
                    _h.mult_tt(T_list[i][t], H_list[i][t], out=Y_list[i][t])
                    tmp = _h.ones(H_list[i][t].shape)
                    _h.subtract_tt(tmp, T_list[i][t], tmp)
                    _h.mult_add_tt(tmp, outputs[t - 1], out=Y_list[i][t])
                else:
                    _h.mult_tt(T_list[i][t], H_list[i][t], out=Y_list[i][t])
                    tmp = _h.ones(H_list[i][t].shape)
                    _h.subtract_tt(tmp, T_list[i][t], tmp)
                    _h.mult_add_tt(tmp, Y_list[i - 1][t], out=Y_list[i][t])
            _h.copy_to(Y_list[self.recurrence_depth - 1][t], outputs[t])

    def backward_pass(self, buffers):
        # prepare
        _h = self.handler

        W_H, W_T, R_T, bias_T, R_H, bias_H = buffers.parameters
        dW_H, dW_T, dR_T, dbias_T, dR_H, dbias_H = buffers.gradients

        inputs = buffers.inputs.default
        outputs = buffers.outputs.default
        dinputs = buffers.input_deltas.default
        doutputs = buffers.output_deltas.default

        H_list = []
        T_list = []
        Y_list = []
        dH_list = []
        dT_list = []
        dY_list = []

        for i in range(self.recurrence_depth):
            H_list.append(buffers.internals['H_{}'.format(i)])
            T_list.append(buffers.internals['T_{}'.format(i)])
            Y_list.append(buffers.internals['Y_{}'.format(i)])
            dH_list.append(buffers.internals['dH_{}'.format(i)])
            dT_list.append(buffers.internals['dT_{}'.format(i)])
            dY_list.append(buffers.internals['dY_{}'.format(i)])

        t = inputs.shape[0] - 1
        _h.copy_to(doutputs[t], dY_list[self.recurrence_depth - 1][t])

        for i in range(self.recurrence_depth - 1, -1, -1):
            if i == 0:
                _h.mult_tt(dY_list[i][t], T_list[i][t], dH_list[i][t])
                tmp = _h.ones(dH_list[i][t].shape)
                _h.subtract_tt(H_list[i][t], outputs[t - 1], tmp)
                _h.mult_tt(dY_list[i][t], tmp, dT_list[i][t])

                _h.inplace_act_func_deriv['sigmoid'](T_list[i][t],
                                                     dT_list[i][t])
                _h.inplace_act_func_deriv[self.activation](H_list[i][t],
                                                           dH_list[i][t])
            else:
                _h.mult_tt(dY_list[i][t], T_list[i][t], dH_list[i][t])
                tmp = _h.ones(dH_list[i][t].shape)
                _h.subtract_tt(tmp, T_list[i][t], tmp)
                _h.mult_tt(dY_list[i][t], tmp, dY_list[i - 1][t])

                _h.subtract_tt(H_list[i][t], Y_list[i - 1][t], tmp)
                _h.mult_tt(dY_list[i][t], tmp, dT_list[i][t])

                _h.inplace_act_func_deriv['sigmoid'](T_list[i][t],
                                                     dT_list[i][t])
                _h.inplace_act_func_deriv[self.activation](H_list[i][t],
                                                           dH_list[i][t])
                _h.dot_add_mm(dT_list[i][t], R_T[i], dY_list[i - 1][t])
                _h.dot_add_mm(dH_list[i][t], R_H[i], dY_list[i - 1][t])

        for t in range(inputs.shape[0] - 2, -1, -1):
            _h.dot_add_mm(dT_list[0][t + 1], R_T[0], doutputs[t])
            _h.dot_add_mm(dH_list[0][t + 1], R_H[0], doutputs[t])
            tmp = _h.ones(dH_list[0][t + 1].shape)
            _h.subtract_tt(tmp, T_list[0][t + 1], tmp)
            _h.mult_add_tt(dY_list[0][t + 1], tmp, doutputs[t])
            _h.copy_to(doutputs[t], dY_list[self.recurrence_depth - 1][t])

            for i in range(self.recurrence_depth - 1, -1, -1):
                if i == 0:
                    _h.mult_tt(dY_list[i][t], T_list[i][t], dH_list[i][t])
                    tmp = _h.ones(dH_list[i][t].shape)
                    _h.subtract_tt(H_list[i][t], outputs[t - 1], tmp)
                    _h.mult_tt(dY_list[i][t], tmp, dT_list[i][t])

                    _h.inplace_act_func_deriv['sigmoid'](T_list[i][t],
                                                         dT_list[i][t])
                    _h.inplace_act_func_deriv[self.activation](H_list[i][t],
                                                               dH_list[i][t])
                else:
                    _h.mult_tt(dY_list[i][t], T_list[i][t], dH_list[i][t])
                    tmp = _h.ones(dH_list[i][t].shape)
                    _h.subtract_tt(tmp, T_list[i][t], tmp)
                    _h.mult_tt(dY_list[i][t], tmp, dY_list[i - 1][t])

                    _h.subtract_tt(H_list[i][t], Y_list[i - 1][t], tmp)
                    _h.mult_tt(dY_list[i][t], tmp, dT_list[i][t])

                    _h.inplace_act_func_deriv['sigmoid'](T_list[i][t],
                                                         dT_list[i][t])
                    _h.inplace_act_func_deriv[self.activation](H_list[i][t],
                                                               dH_list[i][t])
                    _h.dot_add_mm(dT_list[i][t], R_T[i], dY_list[i - 1][t])
                    _h.dot_add_mm(dH_list[i][t], R_H[i], dY_list[i - 1][t])

        flat_inputs = flatten_time_and_features(inputs)
        flat_dinputs = flatten_time_and_features(dinputs)
        flat_dH = flatten_time(dH_list[0][:-1])
        flat_dT = flatten_time(dT_list[0][:-1])

        # calculate in_deltas and gradients
        _h.dot_add_mm(flat_dH, W_H, flat_dinputs)
        _h.dot_add_mm(flat_dH, flat_inputs, dW_H, transa=True)
        _h.dot_add_mm(flat_dT, W_T, flat_dinputs)
        _h.dot_add_mm(flat_dT, flat_inputs, dW_T, transa=True)

        for i in range(self.recurrence_depth):
            dbias_tmp = _h.allocate(dbias_H[i].shape)
            flat_dH = flatten_time(dH_list[i][:-1])
            flat_dT = flatten_time(dT_list[i][:-1])
            _h.sum_t(flat_dT, axis=0, out=dbias_tmp)
            _h.add_tt(dbias_T[i], dbias_tmp, dbias_T[i])
            _h.sum_t(flat_dH, axis=0, out=dbias_tmp)
            _h.add_tt(dbias_H[i], dbias_tmp, dbias_H[i])

        for i in range(self.recurrence_depth):
            if i == 0:
                flat_outputs = flatten_time(outputs[:-2])
                flat_dH = flatten_time(dH_list[i][1:-1])
                flat_dT = flatten_time(dT_list[i][1:-1])
                _h.dot_add_mm(flat_dT, flat_outputs, dR_T[i], transa=True)
                _h.dot_add_mm(dT_list[i][0], outputs[-1], dR_T[i], transa=True)

                _h.dot_add_mm(flat_dH, flat_outputs, dR_H[i], transa=True)
                _h.dot_add_mm(dH_list[i][0], outputs[-1], dR_H[i], transa=True)
            else:
                flat_outputs = flatten_time(Y_list[i - 1][:-1])
                flat_dH = flatten_time(dH_list[i][:-1])
                flat_dT = flatten_time(dT_list[i][:-1])
                _h.dot_add_mm(flat_dT, flat_outputs, dR_T[i], transa=True)
                _h.dot_add_mm(flat_dH, flat_outputs, dR_H[i], transa=True)
Beispiel #21
0
def test_structure_template_matches4(shape, expected):
    st = StructureTemplate(1, 2, 7)
    struct = BufferStructure(*shape)
    assert st.matches(struct) == expected
class BatchNormLayerImpl(Layer):

    expected_inputs = {'default': StructureTemplate('T', 'B', '...')}
    expected_kwargs = {'decay', 'epsilon'}

    def setup(self, kwargs, in_shapes):
        self.epsilon = kwargs.get('epsilon', 1.0e-5)
        self.decay = kwargs.get('decay', 0.9)
        assert 0.0 <= self.decay <= 1.0, "Decay must be between 0 and 1."

        outputs = OrderedDict()
        outputs['default'] = in_shapes['default']

        parameters = OrderedDict()
        buf = BufferStructure(self.in_shapes['default'].feature_shape[-1])
        parameters['gamma'] = buf
        parameters['beta'] = buf
        parameters['mu'] = buf
        parameters['sigma'] = buf

        internals = OrderedDict()
        internals['sigma_b'] = buf
        internals['centered'] = self.in_shapes['default']
        internals['x_hat'] = self.in_shapes['default']

        return outputs, parameters, internals

    def forward_pass(self, buffers, training_pass=True):
        _h = self.handler
        sigma_b, centered, x_hat = buffers.internals
        gamma, beta, mu, sigma = buffers.parameters
        # Note: we flatten time for all buffers, so we skip the flat_ prefix
        inputs = flatten_all_but_last(buffers.inputs.default)
        centered = flatten_all_but_last(centered)
        x_hat = flatten_all_but_last(x_hat)
        out = flatten_all_but_last(buffers.outputs.default)
        m = inputs.shape[0]

        if training_pass:
            mu_b = sigma_b  # temporary use this with other name
            # Calculate the (negative) batch mean
            _h.sum_t(inputs, 0, mu_b)
            _h.mult_st(-1.0 / m, mu_b, mu_b)

            # Adjust mu as an exponential moving average
            # TODO: Find better way
            _h.mult_st(self.decay, mu, mu)
            _h.mult_add_st(1.0 - self.decay, mu_b, mu)

            mu = mu_b

        # Calculate the centered activations
        _h.add_mv(inputs, mu.reshape((1, mu.size)), centered)

        if training_pass:
            sigma2 = sigma_b  # temporary use this with other name
            centered2 = x_hat  # temporary use this with other name
            # Calculate the variance
            _h.mult_tt(centered, centered, centered2)
            _h.sum_t(centered2, 0, sigma2)
            _h.mult_st(1.0 / m, sigma2, sigma2)  # TODO m-1 instead?
            _h.add_st(self.epsilon, sigma2, sigma2)  # (numerically stabilized)

            # Standard deviation
            _h.sqrt_t(sigma2, sigma_b)

            # Adjust sigma as an exponential moving sigma
            # FIXME: This is clearly a hack and wrong
            _h.mult_st(self.decay, sigma, sigma)
            _h.mult_add_st(1.0 - self.decay, sigma_b, sigma)

            sigma = sigma_b

        # compute normalized inputs
        _h.divide_mv(centered, sigma.reshape((1, sigma.size)), x_hat)

        # Compute outputs
        _h.mult_mv(x_hat, gamma.reshape((1, gamma.size)), out)
        _h.add_mv(out, beta.reshape((1, beta.size)), out)

    def backward_pass(self, buffers):
        _h = self.handler
        sigma_b, centered, x_hat = buffers.internals
        gamma = buffers.parameters.gamma
        dgamma = buffers.gradients.gamma
        dbeta = buffers.gradients.beta
        # Note: we flatten time for all buffers, so we skip the flat_ prefix
        x_hat = flatten_all_but_last(x_hat)
        outdeltas = flatten_all_but_last(buffers.output_deltas.default)
        indeltas = flatten_all_but_last(buffers.input_deltas.default)
        m = outdeltas.shape[0]

        big_tmp = _h.allocate(x_hat.shape)  # big
        small_tmp = _h.allocate(gamma.shape)  # small

        # ------------- Gradients ---------------
        # Calculate dgamma
        tmp = big_tmp
        dgamma_tmp = small_tmp
        _h.mult_tt(outdeltas, x_hat, tmp)
        _h.sum_t(tmp, axis=0, out=dgamma_tmp)
        _h.add_tt(dgamma_tmp, dgamma, dgamma)

        _h.mult_st(1 / m, dgamma_tmp, dgamma_tmp)
        term1 = big_tmp
        _h.mult_mv(x_hat, dgamma_tmp.reshape((1, gamma.size)), term1)

        # Calculate dbeta
        dbeta_tmp = small_tmp
        _h.sum_t(outdeltas, axis=0, out=dbeta_tmp)
        _h.add_tt(dbeta_tmp, dbeta, dbeta)
        _h.mult_st(1 / m, dbeta_tmp, dbeta_tmp)

        # ------------- Deltas ---------------
        term2 = big_tmp
        term3 = big_tmp
        _h.subtract_tt(outdeltas, term1, term2)
        _h.subtract_mv(term2, dbeta_tmp.reshape((1, dbeta.size)), term3)

        # get normalization factor (gamma / sigma_b)
        coeff = small_tmp
        _h.divide_tt(gamma, sigma_b, coeff)

        term4 = big_tmp
        _h.mult_mv(term3, coeff.reshape((1, coeff.size)), term4)
        _h.add_tt(term4, indeltas, indeltas)
Beispiel #23
0
class RecurrentLayerImpl(Layer):

    expected_inputs = {'default': StructureTemplate('T', 'B', '...')}
    expected_kwargs = {'size', 'activation'}

    def setup(self, kwargs, in_shapes):
        self.activation = kwargs.get('activation', 'tanh')
        self.size = kwargs.get('size', self.in_shapes['default'].feature_size)
        if not isinstance(self.size, int):
            raise LayerValidationError('size must be int but was {}'.format(
                self.size))

        in_size = self.in_shapes['default'].feature_size

        outputs = OrderedDict()
        outputs['default'] = BufferStructure('T',
                                             'B',
                                             self.size,
                                             context_size=1)
        parameters = OrderedDict()
        parameters['W'] = BufferStructure(self.size, in_size)
        parameters['R'] = BufferStructure(self.size, self.size)
        parameters['bias'] = BufferStructure(self.size)

        internals = OrderedDict()
        internals['Ha'] = BufferStructure('T', 'B', self.size, context_size=1)
        internals['dHa'] = BufferStructure('T',
                                           'B',
                                           self.size,
                                           context_size=1,
                                           is_backward_only=True)
        internals['dHb'] = BufferStructure('T',
                                           'B',
                                           self.size,
                                           context_size=1,
                                           is_backward_only=True)
        return outputs, parameters, internals

    def forward_pass(self, buffers, training_pass=True):
        # prepare
        _h = self.handler
        W, R, bias = buffers.parameters
        inputs = buffers.inputs.default
        outputs = buffers.outputs.default
        Ha = buffers.internals.Ha

        flat_inputs = flatten_time_and_features(inputs)
        flat_H = flatten_time(Ha[:-1])

        _h.dot_mm(flat_inputs, W, flat_H, transb=True)
        _h.add_mv(flat_H, bias.reshape((1, self.size)), flat_H)

        for t in range(inputs.shape[0]):
            _h.dot_add_mm(outputs[t - 1], R, Ha[t], transb=True)
            _h.act_func[self.activation](Ha[t], outputs[t])

    def backward_pass(self, buffers):
        # prepare
        _h = self.handler
        W, R, bias = buffers.parameters
        dW, dR, dbias = buffers.gradients
        inputs = buffers.inputs.default
        outputs = buffers.outputs.default
        dinputs = buffers.input_deltas.default
        doutputs = buffers.output_deltas.default
        Ha, dHa, dHb = buffers.internals

        _h.copy_to(doutputs, dHb)
        T = inputs.shape[0] - 1
        _h.act_func_deriv[self.activation](Ha[T], outputs[T], dHb[T], dHa[T])
        for t in range(T - 1, -1, -1):
            _h.dot_add_mm(dHa[t + 1], R, dHb[t])
            _h.act_func_deriv[self.activation](Ha[t], outputs[t], dHb[t],
                                               dHa[t])

        flat_inputs = flatten_time_and_features(inputs)
        flat_dinputs = flatten_time_and_features(dinputs)
        flat_dHa = flatten_time(dHa[:-1])

        # calculate in_deltas and gradients
        _h.dot_add_mm(flat_dHa, W, flat_dinputs)
        _h.dot_add_mm(flat_dHa, flat_inputs, dW, transa=True)
        dbias_tmp = _h.allocate(dbias.shape)
        _h.sum_t(flat_dHa, axis=0, out=dbias_tmp)
        _h.add_tt(dbias, dbias_tmp, dbias)

        flat_outputs = flatten_time(outputs[:-2])
        flat_dHa = flatten_time(dHa[1:-1])
        _h.dot_add_mm(flat_dHa, flat_outputs, dR, transa=True)
        _h.dot_add_mm(dHa[0], outputs[-1], dR, transa=True)
class SoftmaxCELayerImpl(Layer):

    expected_inputs = {
        'default': StructureTemplate('T', 'B', '...'),
        'targets': StructureTemplate('T', 'B', '...')
    }

    computes_no_input_deltas_for = ['targets']
    takes_no_output_deltas_from = ['probabilities']

    def setup(self, kwargs, in_shapes):
        in_shape = in_shapes['default'].feature_shape
        tar_shape = in_shapes['targets'].feature_shape

        if len(tar_shape) != len(in_shape):
            raise LayerValidationError('Default input and targets must have '
                                       'the same number of dimensions.')
        if tar_shape[:-1] != in_shape[:-1]:
            raise LayerValidationError('All dimensions except last must match '
                                       'for default input and targets.')
        if tar_shape[-1] != 1:
            raise LayerValidationError('Last dimension of targets must be '
                                       'size 1.')

        outputs = OrderedDict()
        outputs['probabilities'] = BufferStructure('T', 'B', *in_shape)
        outputs['loss'] = BufferStructure('T', 'B', *tar_shape)

        internals = OrderedDict()
        internals['t_bin'] = BufferStructure('T',
                                             'B',
                                             *in_shape,
                                             is_backward_only=True)
        return outputs, OrderedDict(), internals

    def forward_pass(self, buffers, training_pass=True):
        # prepare
        _h = self.handler
        inputs = buffers.inputs.default
        targets = buffers.inputs.targets
        probabilities = buffers.outputs.probabilities
        loss = buffers.outputs.loss

        # reshape
        flat_inputs = flatten_all_but_last(inputs)
        flat_probs = flatten_all_but_last(probabilities)
        flat_loss = flatten_all_but_last(loss)
        flat_targets = flatten_all_but_last(targets)

        # softmax
        _h.softmax_m(flat_inputs, flat_probs)

        # the multinomial cross entropy error is given by
        # - sum over i: p_i * ln(y_i)
        # now our targets are indices so all p_i = 0 except for i=t
        _h.fill(loss, 0.)
        _h.index_m_by_v(flat_probs, flat_targets, flat_loss)
        _h.clip_t(flat_loss, 1e-6, 1.0, flat_loss)
        _h.log_t(loss, loss)
        _h.mult_st(-1, loss, loss)

    def backward_pass(self, buffers):
        # prepare
        _h = self.handler
        targets = buffers.inputs.targets
        probs = buffers.outputs.probabilities

        dinputs = buffers.input_deltas.default
        dloss = buffers.output_deltas.loss
        t_bin = buffers.internals.t_bin

        # reshape
        flat_probs = flatten_all_but_last(probs)
        flat_targets = flatten_all_but_last(targets)
        flat_t_bin = flatten_all_but_last(t_bin)
        flat_dloss = flatten_all_but_last(dloss)
        flat_dinputs = flatten_all_but_last(dinputs)

        # derivative of multinomial cross-entropy error wrt softmax:
        # y - t
        _h.binarize_v(flat_targets, flat_t_bin)
        _h.mult_st(-1, flat_t_bin, flat_t_bin)
        _h.add_tt(flat_t_bin, flat_probs, flat_t_bin)
        _h.mult_mv(flat_t_bin, flat_dloss, flat_t_bin)
        _h.add_tt(flat_t_bin, flat_dinputs, flat_dinputs)
def test_structure_template_matches3(shape, expected):
    st = StructureTemplate('T', 'B', '...')
    struct = BufferStructure(*shape)
    assert st.matches(struct) == expected
class SoftmaxFiddleLayerImpl(Layer):

    expected_inputs = {'default': StructureTemplate('T', 'B', '...'),
                       'targets': StructureTemplate('T', 'B', '...')}

    computes_no_input_deltas_for = ['targets']
    takes_no_output_deltas_from = ['predictions']

    def setup(self, kwargs, in_shapes):
        in_shape = in_shapes['default'].feature_shape
        tar_shape = in_shapes['targets'].feature_shape

        if len(tar_shape) != len(in_shape):
            raise LayerValidationError('Default input and targets must have '
                                       'the same number of dimensions.')
        if tar_shape != in_shape:
            raise LayerValidationError('All dimensions must match '
                                       'for default input and targets.')

        outputs = OrderedDict()
        outputs['predictions'] = BufferStructure('T', 'B', *in_shape)
        outputs['loss'] = BufferStructure('T', 'B', *in_shape)

        internals = OrderedDict()
        internals['dcee'] = BufferStructure('T', 'B', *in_shape,
                                            is_backward_only=True)
        return outputs, OrderedDict(), internals

    def forward_pass(self, buffers, training_pass=True):
        # prepare
        _h = self.handler
        inputs = buffers.inputs.default
        targets = buffers.inputs.targets
        predictions = buffers.outputs.predictions
        loss = buffers.outputs.loss

        # reshape
        flat_inputs = flatten_all_but_last(inputs)
        flat_probs = flatten_all_but_last(predictions)
        flat_loss = flatten_all_but_last(loss)
        flat_targets = flatten_all_but_last(targets)

        # softmax
        _h.softmax_m(flat_inputs, flat_probs)

        # the multinomial cross entropy error is given by
        # - sum over i: p_i * ln(y_i)
        _h.copy_to(flat_probs, flat_loss)
        _h.clip_t(flat_loss, 1e-6, 1.0, flat_loss)
        _h.log_t(flat_loss, flat_loss)
        _h.mult_tt(flat_loss, flat_targets, flat_loss)
        _h.mult_st(-1, loss, loss)

    def backward_pass(self, buffers):
        # prepare
        _h = self.handler
        targets = flatten_time_and_features(buffers.inputs.targets)
        probs = flatten_time_and_features(buffers.outputs.predictions)

        dinputs = flatten_time_and_features(buffers.input_deltas.default)
        dloss = flatten_time_and_features(buffers.output_deltas.loss)

        dcee = flatten_time_and_features(buffers.internals.dcee)

        # derivative of multinomial cross-entropy error wrt softmax:
        # y - t

        _h.subtract_tt(probs, targets, dcee)  # y - t
        _h.mult_mv(dcee, dloss, dcee)  # out_delta * (y - t)
        _h.add_tt(dcee, dinputs, dinputs)
Beispiel #27
0
def test_structure_template_matches1(shape, expected):
    st = StructureTemplate('T', 'B', 1, 3)
    assert st.matches(BufferStructure(*shape)) == expected
Beispiel #28
0
def test_structure_template_matches3(shape, expected):
    st = StructureTemplate('T', 'B', '...')
    struct = BufferStructure(*shape)
    assert st.matches(struct) == expected
Beispiel #29
0
class SquaredDifferenceLayerImpl(Layer):

    expected_inputs = {
        'inputs_1': StructureTemplate('T', 'B', '...'),
        'inputs_2': StructureTemplate('T', 'B', '...')
    }
    expected_kwargs = {}

    def setup(self, kwargs, in_shapes):
        # 'inputs_1' and 'inputs_2' must have same shape
        f_size1 = in_shapes['inputs_1'].feature_size
        f_size2 = in_shapes['inputs_2'].feature_size
        if f_size1 != f_size2:
            raise LayerValidationError(
                "{}: inputs_1 and inputs_2 must have same feature sizes but "
                "got {} and {}".format(self.name,
                                       in_shapes['inputs_1'].feature_shape,
                                       in_shapes['inputs_2'].feature_shape))

        outputs = OrderedDict()
        outputs['default'] = BufferStructure('T', 'B', 1)

        internals = OrderedDict()
        feature_size = self.in_shapes['inputs_1'].feature_size
        internals['squared_diff'] = BufferStructure('T', 'B', feature_size)
        internals['grad_diff'] = BufferStructure('T',
                                                 'B',
                                                 feature_size,
                                                 is_backward_only=True)
        return outputs, OrderedDict(), internals

    def forward_pass(self, buffers, training_pass=True):
        # prepare
        _h = self.handler
        inputs_1 = flatten_time_and_features(buffers.inputs.inputs_1)
        inputs_2 = flatten_time_and_features(buffers.inputs.inputs_2)
        diff = flatten_time_and_features(buffers.internals.squared_diff)
        diff_sum = flatten_time(buffers.outputs.default)

        # calculate
        _h.subtract_tt(inputs_1, inputs_2, out=diff)
        _h.mult_tt(diff, diff, out=diff)
        _h.sum_t(diff, axis=1, out=diff_sum)
        _h.mult_st(0.5, diff_sum, out=diff_sum)

    def backward_pass(self, buffers):
        # prepare
        _h = self.handler
        inputs_1 = flatten_time_and_features(buffers.inputs.inputs_1)
        inputs_2 = flatten_time_and_features(buffers.inputs.inputs_2)
        out_deltas = buffers.output_deltas.default
        grad_diff = buffers.internals.grad_diff
        dinputs_1 = flatten_time_and_features(buffers.input_deltas.inputs_1)
        dinputs_2 = flatten_time_and_features(buffers.input_deltas.inputs_2)

        tmp = _h.allocate(inputs_2.shape)
        # out_deltas has only one feature dimension due to summation,
        # so we broadcast to all feature dimensions
        _h.broadcast_t(out_deltas, 2, grad_diff)

        grad_diff = flatten_time(grad_diff)
        # calculate
        _h.subtract_tt(inputs_1, inputs_2, out=tmp)
        _h.mult_add_tt(grad_diff, tmp, dinputs_1)

        _h.subtract_tt(inputs_2, inputs_1, out=tmp)
        _h.mult_add_tt(grad_diff, tmp, dinputs_2)
Beispiel #30
0
class ClockworkLstmLayerImpl(Layer):
    expected_kwargs = {'size', 'activation'}
    expected_inputs = {'default': StructureTemplate('T', 'B', '...')}

    computes_no_gradients_for = ['timing']

    def setup(self, kwargs, in_shapes):
        self.activation = kwargs.get('activation', 'tanh')
        self.size = kwargs.get('size', in_shapes['default'].feature_size)

        if not isinstance(self.size, int):
            raise LayerValidationError('size must be int but was {}'.
                                       format(self.size))

        in_size = in_shapes['default'].feature_size

        outputs = OrderedDict()
        outputs['default'] = BufferStructure('T', 'B', self.size,
                                             context_size=1)

        parameters = OrderedDict()
        parameters['Wz'] = BufferStructure(self.size, in_size)
        parameters['Wi'] = BufferStructure(self.size, in_size)
        parameters['Wf'] = BufferStructure(self.size, in_size)
        parameters['Wo'] = BufferStructure(self.size, in_size)

        parameters['pi'] = BufferStructure(1, self.size)
        parameters['pf'] = BufferStructure(1, self.size)
        parameters['po'] = BufferStructure(1, self.size)

        parameters['Rz'] = BufferStructure(self.size, self.size)
        parameters['Ri'] = BufferStructure(self.size, self.size)
        parameters['Rf'] = BufferStructure(self.size, self.size)
        parameters['Ro'] = BufferStructure(self.size, self.size)

        parameters['bz'] = BufferStructure(self.size)
        parameters['bi'] = BufferStructure(self.size)
        parameters['bf'] = BufferStructure(self.size)
        parameters['bo'] = BufferStructure(self.size)

        parameters['timing'] = BufferStructure(self.size)

        internals = OrderedDict()
        internals['Za'] = BufferStructure('T', 'B', self.size, context_size=1)
        internals['Zb'] = BufferStructure('T', 'B', self.size, context_size=1)
        internals['Ia'] = BufferStructure('T', 'B', self.size, context_size=1)
        internals['Ib'] = BufferStructure('T', 'B', self.size, context_size=1)
        internals['Fa'] = BufferStructure('T', 'B', self.size, context_size=1)
        internals['Fb'] = BufferStructure('T', 'B', self.size, context_size=1)
        internals['Oa'] = BufferStructure('T', 'B', self.size, context_size=1)
        internals['Ob'] = BufferStructure('T', 'B', self.size, context_size=1)
        internals['Ca'] = BufferStructure('T', 'B', self.size, context_size=1)
        internals['Cb'] = BufferStructure('T', 'B', self.size, context_size=1)

        internals['dZa'] = BufferStructure('T', 'B', self.size, context_size=1,
                                           is_backward_only=True)
        internals['dZb'] = BufferStructure('T', 'B', self.size, context_size=1,
                                           is_backward_only=True)
        internals['dIa'] = BufferStructure('T', 'B', self.size, context_size=1,
                                           is_backward_only=True)
        internals['dIb'] = BufferStructure('T', 'B', self.size, context_size=1,
                                           is_backward_only=True)
        internals['dFa'] = BufferStructure('T', 'B', self.size, context_size=1,
                                           is_backward_only=True)
        internals['dFb'] = BufferStructure('T', 'B', self.size, context_size=1,
                                           is_backward_only=True)
        internals['dOa'] = BufferStructure('T', 'B', self.size, context_size=1,
                                           is_backward_only=True)
        internals['dOb'] = BufferStructure('T', 'B', self.size, context_size=1,
                                           is_backward_only=True)
        internals['dCa'] = BufferStructure('T', 'B', self.size, context_size=1,
                                           is_backward_only=True)
        internals['dCb'] = BufferStructure('T', 'B', self.size, context_size=1,
                                           is_backward_only=True)

        return outputs, parameters, internals

    def forward_pass(self, buffers, training_pass=True):
        # prepare
        _h = self.handler
        (Wz, Wi, Wf, Wo,
         pi, pf, po,
         Rz, Ri, Rf, Ro,
         bz, bi, bf, bo,
         timing) = buffers.parameters

        (Za, Zb, Ia, Ib, Fa, Fb, Oa, Ob, Ca, Cb,
         dZa, dZb, dIa, dIb, dFa, dFb, dOa, dOb, dCa, dCb) = buffers.internals
        x = buffers.inputs.default
        y = buffers.outputs.default
        time_size, batch_size = x.shape[0], x.shape[1]

        # Temporary variable to be filled with the current value of time t
        tmp = _h.zeros(timing.shape)
        cond = _h.zeros(y[0].shape)

        flat_x = flatten_time_and_features(x)
        flat_Za = flatten_time(Za[:-1])
        flat_Ia = flatten_time(Ia[:-1])
        flat_Fa = flatten_time(Fa[:-1])
        flat_Oa = flatten_time(Oa[:-1])
        _h.dot_mm(flat_x, Wz, flat_Za, transb=True)
        _h.dot_mm(flat_x, Wi, flat_Ia, transb=True)
        _h.dot_mm(flat_x, Wf, flat_Fa, transb=True)
        _h.dot_mm(flat_x, Wo, flat_Oa, transb=True)

        for t in range(time_size):

            # Block input
            _h.dot_add_mm(y[t - 1], Rz, Za[t], transb=True)
            _h.add_mv(Za[t], bz.reshape((1, self.size)), Za[t])
            _h.act_func[self.activation](Za[t], Zb[t])

            # Input Gate
            _h.dot_add_mm(y[t - 1], Ri, Ia[t], transb=True)
            _h.mult_add_mv(Ca[t - 1], pi, Ia[t])  # ADDED PEEPHOLE CONNECTION
            _h.add_mv(Ia[t], bi.reshape((1, self.size)), Ia[t])
            _h.sigmoid(Ia[t], Ib[t])

            # Forget Gate
            _h.dot_add_mm(y[t - 1], Rf, Fa[t], transb=True)
            _h.mult_add_mv(Ca[t - 1], pf, Fa[t])  # ADDED PEEPHOLE CONNECTION
            _h.add_mv(Fa[t], bf.reshape((1, self.size)), Fa[t])
            _h.sigmoid(Fa[t], Fb[t])

            # Cell
            _h.mult_tt(Ib[t], Zb[t], Ca[t])
            _h.mult_add_tt(Fb[t], Ca[t - 1], Ca[t])

            # Output Gate
            _h.dot_add_mm(y[t - 1], Ro, Oa[t], transb=True)
            _h.mult_add_mv(Ca[t], po, Oa[t])  # ADDED PEEPHOLE CONNECTION
            _h.add_mv(Oa[t], bo.reshape((1, self.size)), Oa[t])
            _h.sigmoid(Oa[t], Ob[t])

            # Block output
            _h.act_func[self.activation](Ca[t], Cb[t])
            _h.mult_tt(Ob[t], Cb[t], y[t])

            if t > 0:
                _h.fill(tmp, t)
                _h.modulo_tt(tmp, timing, tmp)
                _h.broadcast_t(tmp.reshape((1, tmp.shape[0])), 0, cond)

            # Reset Cell
                _h.copy_to_if(Ca[t-1], Ca[t], cond)
            # Reset Block output
                _h.copy_to_if(y[t-1], y[t], cond)

    def backward_pass(self, buffers):
        # prepare
        _h = self.handler

        (dWz, dWi, dWf, dWo,
         dpi, dpf, dpo,
         dRz, dRi, dRf, dRo,
         dbz, dbi, dbf, dbo,
         dtiming) = buffers.gradients

        (Wz, Wi, Wf, Wo,
         pi, pf, po,
         Rz, Ri, Rf, Ro,
         bz, bi, bf, bo,
         timing) = buffers.parameters

        (Za, Zb, Ia, Ib, Fa, Fb, Oa, Ob, Ca, Cb,
         dZa, dZb, dIa, dIb, dFa, dFb, dOa, dOb, dCa, dCb) = buffers.internals

        x = buffers.inputs.default
        dx = buffers.input_deltas.default
        y = buffers.outputs.default
        deltas = buffers.output_deltas.default

        dy = _h.allocate(y.shape)

        time_size, batch_size = x.shape[0], x.shape[1]

        # Temporary variable to be filled with the current value of time t
        tmp = _h.zeros(timing.shape)

        _h.fill(dCa, 0.0)
        cond = _h.zeros(y[0].shape)

        for t in range(time_size - 1, -1, - 1):
            # Accumulate recurrent deltas
            _h.add_tt(dy[t], deltas[t], dy[t])
            _h.fill(tmp, t)
            _h.modulo_tt(tmp, timing, tmp)
            _h.broadcast_t(tmp.reshape((1, tmp.shape[0])), 0, cond)

            _h.dot_add_mm(dIa[t + 1], Ri, dy[t])
            _h.dot_add_mm(dFa[t + 1], Rf, dy[t])
            _h.dot_add_mm(dOa[t + 1], Ro, dy[t])
            _h.dot_add_mm(dZa[t + 1], Rz, dy[t])

            _h.mult_add_mv(dIa[t + 1], pi, dCa[t])
            _h.mult_add_mv(dFa[t + 1], pf, dCa[t])

            # Output Gate
            _h.mult_tt(dy[t], Cb[t], dOb[t])
            _h.fill_if(dOb[t], 0, cond)  # Set inactive to 0
            _h.sigmoid_deriv(Oa[t], Ob[t], dOb[t], dOa[t])
            # Output influence on peephole:
            _h.mult_add_mv(dOa[t], po, dCa[t])

            # Cell
            _h.mult_tt(dy[t], Ob[t], dCb[t])
            _h.act_func_deriv[self.activation](Ca[t], Cb[t], dCb[t], dCb[t])
            _h.fill_if(dCb[t], 0, cond)
            _h.add_tt(dCa[t], dCb[t], dCa[t])
            _h.mult_add_tt(dCa[t + 1], Fb[t + 1], dCa[t])

            # Forget Gate
            _h.mult_tt(dCa[t], Ca[t - 1], dFb[t])
            _h.sigmoid_deriv(Fa[t], Fb[t], dFb[t], dFa[t])

            # Input Gate
            _h.mult_tt(dCa[t], Zb[t], dIb[t])
            _h.sigmoid_deriv(Ia[t], Ib[t], dIb[t], dIa[t])

            # Block Input
            _h.mult_tt(dCa[t], Ib[t], dZb[t])
            _h.act_func_deriv[self.activation](Za[t], Zb[t], dZb[t], dZa[t])

            # Copy over the error from previous inactive nodes
            _h.add_into_if(dy[t], dy[t-1], cond)
            _h.add_into_if(dCa[t], dCa[t-1], cond)

            # Undo updates to inactive nodes:
            _h.fill_if(dIa[t], 0, cond)
            _h.fill_if(dFa[t], 0, cond)
            _h.fill_if(dZa[t], 0, cond)
            _h.fill_if(Fb[t], 0, cond)

        # Same as for standard RNN:
        flat_inputs = flatten_time_and_features(x)
        flat_dinputs = flatten_time_and_features(dx)

        flat_dIa = flatten_time(dIa[:-1])
        flat_dFa = flatten_time(dFa[:-1])
        flat_dOa = flatten_time(dOa[:-1])
        flat_dZa = flatten_time(dZa[:-1])

        # calculate in_deltas and gradients
        _h.dot_add_mm(flat_dIa, Wi, flat_dinputs)
        _h.dot_add_mm(flat_dFa, Wf, flat_dinputs)
        _h.dot_add_mm(flat_dOa, Wo, flat_dinputs)
        _h.dot_add_mm(flat_dZa, Wz, flat_dinputs)

        _h.dot_add_mm(flat_dIa, flat_inputs, dWi, transa=True)
        _h.dot_add_mm(flat_dFa, flat_inputs, dWf, transa=True)
        _h.dot_add_mm(flat_dOa, flat_inputs, dWo, transa=True)
        _h.dot_add_mm(flat_dZa, flat_inputs, dWz, transa=True)

        dbias_tmp = _h.allocate(dbz.shape)
        _h.sum_t(flat_dIa, axis=0, out=dbias_tmp)
        _h.add_tt(dbi, dbias_tmp, dbi)
        _h.sum_t(flat_dFa, axis=0, out=dbias_tmp)
        _h.add_tt(dbf, dbias_tmp, dbf)
        _h.sum_t(flat_dOa, axis=0, out=dbias_tmp)
        _h.add_tt(dbo, dbias_tmp, dbo)
        _h.sum_t(flat_dZa, axis=0, out=dbias_tmp)
        _h.add_tt(dbz, dbias_tmp, dbz)

        flat_outputs = flatten_time(y[:-2])

        flat_cell = flatten_time(Ca[:-2])
        flat_cell2 = flatten_time(Ca[:-1])

        dWco_tmp = _h.allocate(flat_cell2.shape)
        dWc_tmp = _h.allocate(dpo.shape)
        # Peephole connection output weight:
        _h.mult_tt(flat_cell2, flat_dOa, dWco_tmp)
        _h.sum_t(dWco_tmp, axis=0, out=dWc_tmp)
        _h.add_tt(dpo, dWc_tmp, dpo)

        flat_dIa = flatten_time(dIa[1:-1])
        flat_dFa = flatten_time(dFa[1:-1])
        flat_dOa = flatten_time(dOa[1:-1])
        flat_dZa = flatten_time(dZa[1:-1])

        _h.dot_add_mm(flat_dIa, flat_outputs, dRi, transa=True)
        _h.dot_add_mm(flat_dFa, flat_outputs, dRf, transa=True)
        _h.dot_add_mm(flat_dOa, flat_outputs, dRo, transa=True)
        _h.dot_add_mm(flat_dZa, flat_outputs, dRz, transa=True)

        _h.dot_add_mm(dIa[0], dy[-1], dRi, transa=True)
        _h.dot_add_mm(dFa[0], dy[-1], dRf, transa=True)
        _h.dot_add_mm(dOa[0], dy[-1], dRo, transa=True)
        _h.dot_add_mm(dZa[0], dy[-1], dRz, transa=True)

        # Other Peephole connections
        dWcif_tmp = _h.allocate(flat_cell.shape)
        _h.mult_tt(flat_cell, flat_dIa, dWcif_tmp)
        _h.sum_t(dWcif_tmp, axis=0, out=dWc_tmp)
        _h.add_tt(dpi, dWc_tmp, dpi)
        _h.mult_tt(flat_cell, flat_dFa, dWcif_tmp)
        _h.sum_t(dWcif_tmp, axis=0, out=dWc_tmp)
        _h.add_tt(dpf, dWc_tmp, dpf)

        dWcif_tmp = _h.allocate(dIa[0].shape)
        _h.mult_tt(dCa[-1], dIa[0], dWcif_tmp)
        _h.sum_t(dWcif_tmp, axis=0, out=dWc_tmp)
        _h.add_tt(dpi, dWc_tmp, dpi)
        _h.mult_tt(dCa[-1], dIa[0], dWcif_tmp)
        _h.sum_t(dWcif_tmp, axis=0, out=dWc_tmp)
        _h.add_tt(dpf, dWc_tmp, dpf)
Beispiel #31
0
class SigmoidCELayerImpl(Layer):

    expected_inputs = {'default': StructureTemplate('T', 'B', '...'),
                       'targets': StructureTemplate('T', 'B', '...')}

    computes_no_input_deltas_for = ['targets']
    takes_no_output_deltas_from = ['predictions']

    def setup(self, kwargs, in_shapes):
        in_shape = in_shapes['default'].feature_shape
        tar_shape = in_shapes['targets'].feature_shape

        if tar_shape != in_shape:
            raise LayerValidationError('input and targets must have the same '
                                       'shapes. But got {} != {}'
                                       .format(in_shape, tar_shape))

        outputs = OrderedDict()
        outputs['predictions'] = BufferStructure('T', 'B', *in_shape)
        outputs['loss'] = BufferStructure('T', 'B', *in_shape)

        internals = OrderedDict()
        internals['dcee'] = BufferStructure('T', 'B', *in_shape,
                                            is_backward_only=True)
        return outputs, OrderedDict(), internals

    def forward_pass(self, buffers, training_pass=True):
        _h = self.handler
        assert isinstance(_h, Handler)

        inputs = flatten_time_and_features(buffers.inputs.default)
        targets = flatten_time_and_features(buffers.inputs.targets)
        loss = flatten_time_and_features(buffers.outputs.loss)
        prob = flatten_time_and_features(buffers.outputs.predictions)

        # Apply sigmoid
        _h.sigmoid(inputs, prob)

        # the binomial cross entropy error is given by
        # - (t * ln(y) + (1-t) * ln(1-y))
        tmp = _h.ones(prob.shape)
        _h.subtract_tt(tmp, prob, loss)     # loss = 1-y
        _h.subtract_tt(tmp, targets, tmp)     # tmp  = 1-t
        _h.clip_t(loss, 1e-6, 1.0, loss)
        _h.log_t(loss, loss)              # loss = ln(1-y)
        _h.mult_tt(tmp, loss, tmp)  # tmp = (1-t) * ln(1-y)

        _h.clip_t(prob, 1e-6, 1.0, loss)
        _h.log_t(loss, loss)              # loss = ln(y)
        _h.mult_tt(targets, loss, loss)    # loss = t * ln(y)

        _h.add_tt(tmp, loss, loss)        # loss = (1-t) * ln(1-y) + t * ln(y)

        _h.mult_st(-1, loss, loss)  # * -1

    def backward_pass(self, buffers):
        # prepare
        _h = self.handler
        assert isinstance(_h, Handler)

        dinputs = flatten_time_and_features(buffers.input_deltas.default)
        dloss = flatten_time_and_features(buffers.output_deltas.loss)
        dcee = flatten_time_and_features(buffers.internals.dcee)
        targets = flatten_time_and_features(buffers.inputs.targets)
        prob = flatten_time_and_features(buffers.outputs.predictions)

        _h.subtract_tt(prob, targets, dcee)  # y - t
        _h.mult_mv(dcee, dloss, dcee)        # out_delta * (y - t)
        _h.add_tt(dcee, dinputs, dinputs)
def test_structure_template_matches1(shape, expected):
    st = StructureTemplate('T', 'B', 1, 3)
    assert st.matches(BufferStructure(*shape)) == expected
Beispiel #33
0
class Convolution2DLayerImpl(Layer):

    expected_inputs = {'default': StructureTemplate('T', 'B', '...')}
    expected_kwargs = {
        'num_filters', 'kernel_size', 'stride', 'padding', 'activation'
    }

    def setup(self, kwargs, in_shapes):
        self.activation = kwargs.get('activation', 'tanh')
        assert 'num_filters' in kwargs, "num_filters must be specified " \
                                        " for ConvolutionLayer"
        assert 'kernel_size' in kwargs, "kernel_size must be specified " \
                                        "for ConvolutionLayer"
        self.num_filters = kwargs['num_filters']
        self.kernel_size = kwargs['kernel_size']
        self.stride = tuple(kwargs.get('stride', (1, 1)))
        self.padding = kwargs.get('padding', 0)
        assert type(self.padding) is int and self.padding >= 0, \
            "Invalid padding: {}".format(self.padding)
        assert type(self.kernel_size) in [list, tuple] and \
            len(self.kernel_size) == 2, "Kernel size must be list or " \
                                        "tuple  of length 2: {}".format(
                                        self.kernel_size)
        assert type(self.stride) in [list, tuple] and len(self.stride) == 2, \
            "Stride must be list or tuple of length 2: {}".format(self.stride)
        in_shape = self.in_shapes['default'].feature_shape
        assert self.stride[0] >= 0 and self.stride[1] >= 0, \
            "Invalid stride: {}".format(self.stride)
        assert isinstance(in_shape, tuple) and len(in_shape) == 3, \
            "ConvolutionLayer2D must have 3 dimensional input but input " \
            "shape was {}".format(in_shape)
        num_input_maps = in_shape[2]
        num_filters = self.num_filters
        kernel_x, kernel_y = self.kernel_size
        padding, stride = self.padding, self.stride
        output_height = (
            (in_shape[0] + 2 * padding - kernel_x) // stride[0]) + 1
        output_width = (
            (in_shape[1] + 2 * padding - kernel_y) // stride[1]) + 1
        out_shape = (output_height, output_width, num_filters)

        outputs = OrderedDict()
        outputs['default'] = BufferStructure('T', 'B', *out_shape)

        parameters = OrderedDict()
        parameters['W'] = BufferStructure(num_filters, kernel_x, kernel_y,
                                          num_input_maps)
        parameters['bias'] = BufferStructure(num_filters)

        internals = OrderedDict()
        return outputs, parameters, internals

    def forward_pass(self, buffers, training_pass=True):
        # prepare
        _h = self.handler
        W, bias = buffers.parameters
        inputs = buffers.inputs.default
        outputs = buffers.outputs.default

        # reshape
        flat_inputs = flatten_time(inputs)
        flat_outputs = flatten_time(outputs)

        # calculate outputs
        _h.conv2d_forward_batch(flat_inputs, W, bias, flat_outputs,
                                self.padding, self.stride)
        _h.inplace_act_func[self.activation](outputs)

    def backward_pass(self, buffers):
        # prepare
        _h = self.handler
        W, bias = buffers.parameters
        dW, dbias = buffers.gradients
        inputs = buffers.inputs.default
        outputs = buffers.outputs.default
        in_deltas = buffers.input_deltas.default
        out_deltas = buffers.output_deltas.default

        # reshape
        flat_inputs = flatten_time(inputs)
        flat_in_deltas = flatten_time(in_deltas)
        flat_out_deltas = flatten_time(out_deltas)

        # calculate in_deltas and gradients
        _h.inplace_act_func_deriv[self.activation](outputs, out_deltas)
        _h.conv2d_backward_batch(flat_inputs, W, self.padding, self.stride,
                                 flat_in_deltas, flat_out_deltas, dW, dbias)