Example #1
0
    def get_output_for(self, input, **kwargs):
        if self.pad == 'strictsamex':
            assert(self.stride[0] == 1)
            kk = self.pool_size[0]
            ll = int(np.ceil(kk/2.))
            # rr = kk-ll
            # pad = (ll, 0)
            pad = [(ll, 0)]

            length = input.shape[2]

            self.ignore_border = True
            input = padding.pad(input, pad, batch_ndim=2)
            pad = (0, 0)
        else:
            pad = self.pad

        pooled = pool.pool_2d(input,
                              ds=self.pool_size,
                              st=self.stride,
                              ignore_border=self.ignore_border,
                              padding=pad,
                              mode=self.mode,
                              )

        if self.pad == 'strictsamex':
            pooled = pooled[:, :, :length or None, :]

        return pooled
Example #2
0
    def get_output_for(self, input, input_shape=None, **kwargs):
        # The optional input_shape argument is for when get_output_for is
        # called directly with a different shape than self.input_shape.
        if input_shape is None:
            input_shape = self.input_shape

        if self.stride == (1, 1) and self.pad == 'same':
            # simulate same convolution by cropping a full convolution
            conved = self.convolution(input, self.W, subsample=self.stride,
                                      input_shape=input_shape,
                                      # image_shape=input_shape,
                                      filter_shape=self.get_W_shape(),
                                      border_mode='full')
            crop_x = self.filter_size[0] // 2
            crop_y = self.filter_size[1] // 2
            conved = conved[:, :, crop_x:-crop_x or None,
                            crop_y:-crop_y or None]
        else:
            # no padding needed, or explicit padding of input needed
            if self.pad == 'full':
                border_mode = 'full'
                pad = [(0, 0), (0, 0)]
            elif self.pad == 'same':
                border_mode = 'valid'
                pad = [(self.filter_size[0] // 2,
                        self.filter_size[0] // 2),
                       (self.filter_size[1] // 2,
                        self.filter_size[1] // 2)]
            elif self.pad == 'strictsamex':
                border_mode = 'valid'
                kk = self.filter_size[0]-1
                rr = kk // 2
                ll = kk-rr
                pad = [(ll, rr),
                       (0, 0)]
            else:
                border_mode = 'valid'
                pad = [(self.pad[0], self.pad[0]), (self.pad[1], self.pad[1])]

            if pad != [(0, 0), (0, 0)]:
                input = padding.pad(input, pad, batch_ndim=2)
                input_shape = (input_shape[0], input_shape[1],
                               None if input_shape[2] is None else
                               input_shape[2] + pad[0][0] + pad[0][1],
                               None if input_shape[3] is None else
                               input_shape[3] + pad[1][0] + pad[1][1])
            conved = self.convolution(input, self.W, subsample=self.stride,
                                      input_shape=input_shape,
                                      # image_shape=input_shape,
                                      filter_shape=self.get_W_shape(),
                                      border_mode=border_mode)

        if self.b is None:
            activation = conved
        elif self.untie_biases:
            activation = conved + self.b.dimshuffle('x', 0, 1, 2)
        else:
            activation = conved + self.b.dimshuffle('x', 0, 'x', 'x')

        return self.nonlinearity(activation)
Example #3
0
    def get_output_for(self, input, **kwargs):
        if self.pad == 'strictsamex':
            assert (self.stride[0] == 1)
            kk = self.pool_size[0]
            ll = int(np.ceil(kk / 2.))
            # rr = kk-ll
            # pad = (ll, 0)
            pad = [(ll, 0)]

            length = input.shape[2]

            self.ignore_border = True
            input = padding.pad(input, pad, batch_ndim=2)
            pad = (0, 0)
        else:
            pad = self.pad

        pooled = pool.pool_2d(
            input,
            ds=self.pool_size,
            st=self.stride,
            ignore_border=self.ignore_border,
            padding=pad,
            mode=self.mode,
        )

        if self.pad == 'strictsamex':
            pooled = pooled[:, :, :length or None, :]

        return pooled
def test_pad(batch_ndim, val, width=3):
    from lasagne.theano_extensions.padding import pad

    X = T.tensor4()
    X0 = lasagne.utils.floatX(np.ones((2, 3, 4, 5)))
    X_pad_theano = pad(X, width, val, batch_ndim).eval({X: X0})

    pads = tuple((width, width) if i >= batch_ndim else (0, 0) for i, _ in enumerate(X0.shape))
    X_pad_np = np.pad(X0, pads, mode="constant", constant_values=val)

    assert (X_pad_theano == X_pad_np).all()
Example #5
0
def test_pad(val, width=3, batch_ndim=2):
    from lasagne.theano_extensions.padding import pad

    X = T.tensor4()
    X0 = lasagne.utils.floatX(np.ones((2, 3, 4, 5)))
    X_pad_theano = pad(X, width, val, batch_ndim).eval({X: X0})

    pads = tuple((width, width) if i >= batch_ndim else (0, 0)
                 for i, _ in enumerate(X0.shape))
    X_pad_np = np.pad(X0, pads, mode='constant', constant_values=val)

    assert (X_pad_theano == X_pad_np).all()
Example #6
0
        def step(input_n, cell_previous, hid_previous, *args):
            if not self.precompute_input:
                input_n = T.nnet.conv2d(input_n,
                                        W_in_stacked,
                                        None,
                                        None,
                                        subsample=(1, 1),
                                        border_mode='half',
                                        filter_flip=False) + b_stacked

            # Calculate gates pre-activations and slice
            hid_previous = pad(hid_previous, [(1, 0)], 0, 2)

            gates = input_n + conv1d_mc1(hid_previous,
                                         W_hid_stacked,
                                         None,
                                         None,
                                         subsample=(1, ),
                                         border_mode='valid',
                                         filter_flip=False)

            # Clip gradients
            if self.grad_clipping:
                gates = theano.gradient.grad_clip(gates, -self.grad_clipping,
                                                  self.grad_clipping)

            # Extract the pre-activation gate values
            ingate = slice_w(gates, 0)
            forgetgate = slice_w(gates, 1)
            cell_input = slice_w(gates, 2)
            outgate = slice_w(gates, 3)

            if self.peepholes:
                # Compute peephole connections
                ingate += cell_previous * self.W_cell_to_ingate
                forgetgate += cell_previous * self.W_cell_to_forgetgate

            # Apply nonlinearities
            ingate = self.nonlinearity_ingate(ingate)
            forgetgate = self.nonlinearity_forgetgate(forgetgate)
            cell_input = self.nonlinearity_cell(cell_input)

            # Compute new cell value
            cell = forgetgate * cell_previous + ingate * cell_input

            if self.peepholes:
                outgate += cell * self.W_cell_to_outgate
            outgate = self.nonlinearity_outgate(outgate)

            # Compute new hidden unit activation
            hid = outgate * self.nonlinearity(cell)
            return [cell, hid]
Example #7
0
    def get_output_for(self, input, input_shape=None, **kwargs):
        if self.stride == (1, 1, 1) and self.pad == 'same':
            conved = self.convolution(img=input, 
                                      kerns=self.W, 
                                      border_mode='full',
                                      subsample=self.stride,
                                      conv_mode='conv')
            shift_x = (self.filter_size[0] - 1) // 2
            shift_y = (self.filter_size[1] - 1) // 2
            shift_z = (self.filter_size[2] - 1) // 2
            conved = conved[:, :, shift_x:input.shape[2] + shift_x, 
                                  shift_y:input.shape[3] + shift_y,
                                  shift_z:input.shape[4] + shift_z]
        else:
            if self.pad == 'full':
                border_mode = 'full'
                pad = [(0, 0), (0, 0), (0, 0)]
            elif self.pad == 'same':
                border_mode = 'valid'
                pad = [(self.filter_size[0] // 2,
                        self.filter_size[0] // 2),
                       (self.filter_size[1] // 2,
                        self.filter_size[1] // 2),
                       (self.filter_size[2] // 2,
                        self.filter_size[2] // 2)]
            else:
                border_mode = 'valid'
                pad = [(self.pad[0], self.pad[0]), (self.pad[1], self.pad[1]), (self.pad[2], self.pad[2])]
            if pad != [(0, 0), (0, 0), (0, 0)]:
                input = padding.pad(input, pad, batch_ndim=3)
                input_shape = (input_shape[0], input_shape[1], 
                               None if input_shape[2] is None else
                               input_shape[2] + pad[0][0] + pad[0][1],
                               None if input_shape[3] is None else
                               input_shape[3] + pad[1][0] + pad[1][1],
                               None if input_shape[4] is None else
                               input_shape[4] + pad[2][0] + pad[2][1])

            conved = self.convolution(img=input, 
                                  kerns=self.W, 
                                  border_mode=border_mode,
                                  subsample=self.stride,
                                  conv_mode='conv')

        if self.b is None:
            activation = conved
        elif self.untie_biases:
            activation = conved + self.b.dimshuffle('x', 0, 1, 2, 3)
        else:
            activation = conved + self.b.dimshuffle('x', 0, 'x', 'x', 'x')

        return self.nonlinearity(activation)
def test_pad_width_per_border(batch_ndim, val=0):
    from lasagne.theano_extensions.padding import pad

    width = [(1, 2), (3, 4), (1, 2), (3, 4)]

    X = T.tensor4()
    X0 = lasagne.utils.floatX(np.ones((2, 3, 4, 5)))
    X_pad_theano = pad(X, width[batch_ndim:], val, batch_ndim).eval({X: X0})

    pads = tuple(w if i >= batch_ndim else (0, 0) for i, w in enumerate(width))
    X_pad_np = np.pad(X0, pads, mode='constant', constant_values=val)

    assert (X_pad_theano == X_pad_np).all()
def test_pad_width_per_border(batch_ndim, val=0):
    from lasagne.theano_extensions.padding import pad

    width = [(1, 2), (3, 4), (1, 2), (3, 4)]

    X = T.tensor4()
    X0 = lasagne.utils.floatX(np.ones((2, 3, 4, 5)))
    X_pad_theano = pad(X, width[batch_ndim:], val, batch_ndim).eval({X: X0})

    pads = tuple(w if i >= batch_ndim else (0, 0) for i, w in enumerate(width))
    X_pad_np = np.pad(X0, pads, mode="constant", constant_values=val)

    assert (X_pad_theano == X_pad_np).all()
Example #10
0
    def get_output_for(self, input, input_shape=None, **kwargs):
        # the optional input_shape argument is for when get_output_for is
        # called directly with a different shape than self.input_shape.
        if input_shape is None:
            input_shape = self.input_shape

        if self.stride == (1, ) and self.pad == 'same':
            # simulate same convolution by cropping a full convolution
            conved = self.convolution(input,
                                      self.W,
                                      subsample=self.stride,
                                      input_shape=input_shape,
                                      filter_shape=self.get_W_shape(),
                                      border_mode='full')
            crop = self.filter_size[0] // 2
            conved = conved[:, :, crop:-crop or None]
        else:
            # no padding needed, or explicit padding of input needed
            if self.pad == 'full':
                border_mode = 'full'
                pad = (0, 0)
            elif self.pad == 'same':
                border_mode = 'valid'
                pad = (self.filter_size[0] // 2,
                       (self.filter_size[0] - 1) // 2)
            elif self.pad == 'strictsame':
                self.stride = (1, )
                border_mode = 'valid'
                kk = self.filter_size[0] - 1
                rr = kk // 2
                ll = kk - rr
                pad = (ll, rr)
            else:
                border_mode = 'valid'
                pad = (self.pad[0], self.pad[0])
            if pad != (0, 0):
                input = padding.pad(input, [pad], batch_ndim=2)
                input_shape = (input_shape[0], input_shape[1],
                               None if input_shape[2] is None else
                               input_shape[2] + pad[0] + pad[1])
            conved = self.convolution(input,
                                      self.W,
                                      subsample=self.stride,
                                      input_shape=input_shape,
                                      filter_shape=self.get_W_shape(),
                                      border_mode=border_mode)

        activation = conved

        return self.nonlinearity(activation)
Example #11
0
    def get_weights(self, h_t, w_tm1, M_t, **kwargs):
        batch_size = self.heads[0].input_shape[0] # QKFIX: Get the size of the batches from the 1st head
        num_heads = len(self.heads)
        k_t = self.nonlinearity_key(T.dot(h_t, self.W_hid_to_key) + self.b_hid_to_key)
        beta_t = self.nonlinearity_beta(T.dot(h_t, self.W_hid_to_beta) + self.b_hid_to_beta)
        g_t = self.nonlinearity_gate(T.dot(h_t, self.W_hid_to_gate) + self.b_hid_to_gate)
        # QKFIX: If the nonlinearity is softmax (which is usually the case), then the activations
        # need to be reshaped (T.nnet.softmax only accepts 2D inputs)
        try:
            s_t = self.nonlinearity_shift(T.dot(h_t, self.W_hid_to_shift) + self.b_hid_to_shift)
        except ValueError:
            shift_activation_t = T.dot(h_t, self.W_hid_to_shift) + self.b_hid_to_shift
            s_t = self.nonlinearity_shift(shift_activation_t.reshape((h_t.shape[0] * num_heads, self.num_shifts)))
            s_t = s_t.reshape(shift_activation_t.shape)
        gamma_t = self.nonlinearity_gamma(T.dot(h_t, self.W_hid_to_gamma) + self.b_hid_to_gamma)

        # Content Addressing (3.3.1)
        beta_t = T.addbroadcast(beta_t, 2)
        betaK = beta_t * similarities.cosine_similarity(k_t, M_t)
        w_c = lasagne.nonlinearities.softmax(betaK.flatten(ndim=2))
        w_c = w_c.reshape(betaK.shape)

        # Interpolation (3.3.2)
        g_t = T.addbroadcast(g_t, 2)
        w_g = g_t * w_c + (1. - g_t) * w_tm1

        # Convolutional Shift (3.3.2)
        # NOTE: This library is using a flat (zero-padded) convolution instead of the circular
        # convolution from the original paper. In practice, this change has a minimal impact.
        w_g_padded = w_g.reshape((h_t.shape[0] * num_heads, self.memory_shape[0])).dimshuffle(0, 'x', 'x', 1)
        conv_filter = s_t.reshape((h_t.shape[0] * num_heads, self.num_shifts)).dimshuffle(0, 'x', 'x', 1)
        pad = (self.num_shifts // 2, (self.num_shifts - 1) // 2)
        w_g_padded = padding.pad(w_g_padded, [pad], batch_ndim=3)
        convolution = T.nnet.conv2d(w_g_padded, conv_filter,
            input_shape=(None if batch_size is None else \
                batch_size * num_heads, 1, 1, self.memory_shape[0] + pad[0] + pad[1]),
            filter_shape=(None if batch_size is None else \
                batch_size * num_heads, 1, 1, self.num_shifts),
            subsample=(1, 1),
            border_mode='valid')
        w_tilde = convolution[T.arange(h_t.shape[0] * num_heads), T.arange(h_t.shape[0] * num_heads), 0, :]
        w_tilde = w_tilde.reshape((h_t.shape[0], num_heads, self.memory_shape[0]))

        # Sharpening (3.3.2)
        gamma_t = T.addbroadcast(gamma_t, 2)
        w = T.pow(w_tilde + 1e-6, gamma_t)
        w /= T.sum(w, axis=2).dimshuffle(0, 1, 'x')

        return w
Example #12
0
    def get_output_for(self, input, input_shape=None, **kwargs):
        # the optional input_shape argument is for when get_output_for is
        # called directly with a different shape than self.input_shape.
        if input_shape is None:
            input_shape = self.input_shape

        if self.stride == (1,) and self.pad == 'same':
            # simulate same convolution by cropping a full convolution
            conved = self.convolution(input, self.W, subsample=self.stride,
                                      input_shape=input_shape,
                                      filter_shape=self.get_W_shape(),
                                      border_mode='full')
            crop = self.filter_size[0] // 2
            conved = conved[:, :, crop:-crop or None]
        else:
            # no padding needed, or explicit padding of input needed
            if self.pad == 'full':
                border_mode = 'full'
                pad = (0, 0)
            elif self.pad == 'same':
                border_mode = 'valid'
                pad = (self.filter_size[0] // 2,
                       (self.filter_size[0] - 1) // 2)
            elif self.pad == 'strictsame':
                self.stride = (1,)
                border_mode = 'valid'
                kk = self.filter_size[0]-1
                rr = kk // 2
                ll = kk-rr
                pad = (ll, rr)
            else:
                border_mode = 'valid'
                pad = (self.pad[0], self.pad[0])
            if pad != (0, 0):
                input = padding.pad(input, [pad], batch_ndim=2)
                input_shape = (input_shape[0], input_shape[1],
                               None if input_shape[2] is None else
                               input_shape[2] + pad[0] + pad[1])
            conved = self.convolution(input, self.W, subsample=self.stride,
                                      input_shape=input_shape,
                                      filter_shape=self.get_W_shape(),
                                      border_mode=border_mode)

        activation = conved

        return self.nonlinearity(activation)
Example #13
0
def test_convolutional_shift():
    weights_var, shift_var = T.tensor3s('weights', 'shift')
    num_shifts = 3

    weights_reshaped = weights_var.reshape((16 * 4, 128))
    weights_reshaped = weights_reshaped.dimshuffle(0, 'x', 'x', 1)
    shift_reshaped = shift_var.reshape((16 * 4, num_shifts))
    shift_reshaped = shift_reshaped.dimshuffle(0, 'x', 'x', 1)
    pad = (num_shifts // 2, (num_shifts - 1) // 2)
    weights_padded = padding.pad(weights_reshaped, [pad], batch_ndim=3)
    convolution = T.nnet.conv2d(weights_padded,
                                shift_reshaped,
                                input_shape=(16 * 4, 1, 1,
                                             128 + pad[0] + pad[1]),
                                filter_shape=(16 * 4, 1, 1, num_shifts),
                                subsample=(1, 1),
                                border_mode='valid')
    w_tilde = convolution[T.arange(16 * 4), T.arange(16 * 4), 0, :]
    w_tilde = w_tilde.reshape((16, 4, 128))

    convolutional_shift_fn = theano.function([weights_var, shift_var], w_tilde)

    weights = np.random.rand(16, 4, 128)
    shift = np.random.rand(16, 4, 3)

    weight_tilde = convolutional_shift_fn(weights, shift)
    weight_tilde_manual = np.zeros_like(weight_tilde)

    for i in range(16):
        for j in range(4):
            for k in range(128):
                # Filters in T.nnet.conv2d are reversed
                if (k - 1) >= 0:
                    weight_tilde_manual[i, j,
                                        k] += shift[i, j, 2] * weights[i, j,
                                                                       k - 1]
                weight_tilde_manual[i, j,
                                    k] += shift[i, j, 1] * weights[i, j, k]
                if (k + 1) < 128:
                    weight_tilde_manual[i, j,
                                        k] += shift[i, j, 0] * weights[i, j,
                                                                       k + 1]

    assert weight_tilde.shape == (16, 4, 128)
    assert np.allclose(weight_tilde, weight_tilde_manual)
Example #14
0
    def get_output_for(self, h_t, w_tm1, M_t, **kwargs):
        if self.sign is not None:
            sign_t = self.sign.get_output_for(h_t, **kwargs)
        else:
            sign_t = 1.
        k_t = self.key.get_output_for(h_t, **kwargs)
        beta_t = self.beta.get_output_for(h_t, **kwargs)
        g_t = self.gate.get_output_for(h_t, **kwargs)
        s_t = self.shift.get_output_for(h_t, **kwargs)
        gamma_t = self.gamma.get_output_for(h_t, **kwargs)

        # Content Adressing (3.3.1)
        beta_t = T.addbroadcast(beta_t, 1)
        betaK = beta_t * similarities.cosine_similarity(sign_t * k_t, M_t)
        w_c = lasagne.nonlinearities.softmax(betaK)

        # Interpolation (3.3.2)
        g_t = T.addbroadcast(g_t, 1)
        w_g = g_t * w_c + (1. - g_t) * w_tm1

        # Convolutional Shift (3.3.2)
        w_g_padded = w_g.dimshuffle(0, 'x', 'x', 1)
        conv_filter = s_t.dimshuffle(0, 'x', 'x', 1)
        pad = (self.num_shifts // 2, (self.num_shifts - 1) // 2)
        w_g_padded = padding.pad(w_g_padded, [pad], batch_ndim=3)
        convolution = T.nnet.conv2d(
            w_g_padded,
            conv_filter,
            input_shape=(self.input_shape[0], 1, 1,
                         self.memory_shape[0] + pad[0] + pad[1]),
            filter_shape=(self.input_shape[0], 1, 1, self.num_shifts),
            subsample=(1, 1),
            border_mode='valid')
        w_tilde = convolution[:, 0, 0, :]

        # Sharpening (3.3.2)
        gamma_t = T.addbroadcast(gamma_t, 1)
        w = T.pow(w_tilde + 1e-6, gamma_t)
        w /= T.sum(w)

        return w
Example #15
0
    def get_output_for(self, h_t, w_tm1, M_t, **kwargs):
        if self.sign is not None:
            sign_t = self.sign.get_output_for(h_t, **kwargs)
        else:
            sign_t = 1.
        k_t = self.key.get_output_for(h_t, **kwargs)
        beta_t = self.beta.get_output_for(h_t, **kwargs)
        g_t = self.gate.get_output_for(h_t, **kwargs)
        s_t = self.shift.get_output_for(h_t, **kwargs)
        gamma_t = self.gamma.get_output_for(h_t, **kwargs)

        # Content Adressing (3.3.1)
        beta_t = T.addbroadcast(beta_t, 1)
        betaK = beta_t * similarities.cosine_similarity(sign_t * k_t, M_t)
        w_c = lasagne.nonlinearities.softmax(betaK)

        # Interpolation (3.3.2)
        g_t = T.addbroadcast(g_t, 1)
        w_g = g_t * w_c + (1. - g_t) * w_tm1

        # Convolutional Shift (3.3.2)
        w_g_padded = w_g.dimshuffle(0, 'x', 'x', 1)
        conv_filter = s_t.dimshuffle(0, 'x', 'x', 1)
        pad = (self.num_shifts // 2, (self.num_shifts - 1) // 2)
        w_g_padded = padding.pad(w_g_padded, [pad], batch_ndim=3)
        convolution = T.nnet.conv2d(w_g_padded, conv_filter,
            input_shape=(self.input_shape[0], 1, 1, self.memory_shape[0] + pad[0] + pad[1]),
            filter_shape=(self.input_shape[0], 1, 1, self.num_shifts),
            subsample=(1, 1),
            border_mode='valid')
        w_tilde = convolution[:, 0, 0, :]

        # Sharpening (3.3.2)
        gamma_t = T.addbroadcast(gamma_t, 1)
        w = T.pow(w_tilde + 1e-6, gamma_t)
        w /= T.sum(w)

        return w
Example #16
0
def test_convolutional_shift():
    weights_var, shift_var = T.tensor3s('weights', 'shift')
    num_shifts = 3

    weights_reshaped = weights_var.reshape((16 * 4, 128))
    weights_reshaped = weights_reshaped.dimshuffle(0, 'x', 'x', 1)
    shift_reshaped = shift_var.reshape((16 * 4, num_shifts))
    shift_reshaped = shift_reshaped.dimshuffle(0, 'x', 'x', 1)
    pad = (num_shifts // 2, (num_shifts - 1) // 2)
    weights_padded = padding.pad(weights_reshaped, [pad], batch_ndim=3)
    convolution = T.nnet.conv2d(weights_padded, shift_reshaped,
        input_shape=(16 * 4, 1, 1, 128 + pad[0] + pad[1]),
        filter_shape=(16 * 4, 1, 1, num_shifts),
        subsample=(1, 1),
        border_mode='valid')
    w_tilde = convolution[T.arange(16 * 4), T.arange(16 * 4), 0, :]
    w_tilde = w_tilde.reshape((16, 4, 128))

    convolutional_shift_fn = theano.function([weights_var, shift_var], w_tilde)

    weights = np.random.rand(16, 4, 128)
    shift = np.random.rand(16, 4, 3)

    weight_tilde = convolutional_shift_fn(weights, shift)
    weight_tilde_manual = np.zeros_like(weight_tilde)

    for i in range(16):
        for j in range(4):
            for k in range(128):
                # Filters in T.nnet.conv2d are reversed
                if (k - 1) >= 0:
                    weight_tilde_manual[i, j, k] += shift[i, j, 2] * weights[i, j, k - 1]
                weight_tilde_manual[i, j, k] += shift[i, j, 1] * weights[i, j, k]
                if (k + 1) < 128:
                    weight_tilde_manual[i, j, k] += shift[i, j, 0] * weights[i, j, k + 1]

    assert weight_tilde.shape == (16, 4, 128)
    assert np.allclose(weight_tilde, weight_tilde_manual)
Example #17
0
    def get_output_for(self, input, input_shape=None, **kwargs):
        # The optional input_shape argument is for when get_output_for is
        # called directly with a different shape than self.input_shape.
        if input_shape is None:
            input_shape = self.input_shape

        #print("Input Shape",input_shape)
        #print("Filter Shape",self.get_W_shape())

        ############################################################

        if self.stride == (1, 1) and self.pad == 'same':
            # simulate same convolution by cropping a full convolution
            conved = self.convolution(input,
                                      self.W,
                                      subsample=self.stride,
                                      image_shape=input_shape,
                                      filter_shape=self.get_W_shape(),
                                      border_mode='full')
            shift_x = (self.filter_size[0] - 1) // 2
            shift_y = (self.filter_size[1] - 1) // 2
            conved = conved[:, :, shift_x:input.shape[2] + shift_x,
                            shift_y:input.shape[3] + shift_y]
        else:
            # no padding needed, or explicit padding of input needed
            if self.pad == 'full':
                border_mode = 'full'
                pad = [(0, 0), (0, 0)]
            elif self.pad == 'same':
                border_mode = 'valid'
                pad = [
                    (self.filter_size[0] // 2, (self.filter_size[0] - 1) // 2),
                    (self.filter_size[1] // 2, (self.filter_size[1] - 1) // 2)
                ]
            else:
                border_mode = 'valid'
                pad = [(self.pad[0], self.pad[0]), (self.pad[1], self.pad[1])]
            if pad != [(0, 0), (0, 0)]:
                input = padding.pad(input, pad, batch_ndim=2)
                input_shape = (input_shape[0], input_shape[1],
                               None if input_shape[2] is None else
                               input_shape[2] + pad[0][0] + pad[0][1],
                               None if input_shape[3] is None else
                               input_shape[3] + pad[1][0] + pad[1][1])
#            #####
#            input = input.eval()
#            #input_max = np.amax(input)
#            #input_min = np.amin(input)
#            input = input*6 #32768
#            #input = (input - input_min)*4
#            #print('max input', np.amax(input))
#            #print('min input', np.amin(input))
#            input = input.astype(int)
#            input = input.astype(float)
#
#            weight_tmp = self.W.eval()
#            #weight_max = np.amax(self.W)
#            #weight_min = np.amin(self.W)
#            weight_tmp = weight_tmp * 128 #65536
#            #self.W = (self.W - weight_min)*4
#            #print('max weight', np.amax(self.W))
#            #print('min weight', np.amin(self.W))
#            weight_tmp = weight_tmp.astype(int)
#            weight_tmp = weight_tmp.astype(float)
#            #####
            conved = self.convolution(input,
                                      self.W,
                                      subsample=self.stride,
                                      image_shape=input_shape,
                                      filter_shape=self.get_W_shape(),
                                      border_mode=border_mode)
#            #####
#            conved = conved.eval()
#            #conved_max = np.amax(conved)
#            #conved_min = np.amin(conved)
#            conved = conved / 768 #2147483648
#            #conved = (conved - (conved_max+conved_min)/2)/16
#            #print('max output', np.amax(conved))
#            #print('min output', np.amin(conved))
#            #conved = conved.astype(int)
#            #####

        if self.b is None:
            activation = conved
        elif self.untie_biases:
            activation = conved + self.b.dimshuffle('x', 0, 1, 2)
        else:
            activation = conved + self.b.dimshuffle('x', 0, 'x', 'x')
        activation = conved

        return self.nonlinearity(activation)
 def get_output_for(self, input, **kwargs):
     return padding.pad(input, self.width, self.val, self.batch_ndim)
 def get_output_for(self, input, **kwargs):
     return padding.pad(input, self.width, self.val, self.batch_ndim)
Example #20
0
    def get_weights(self, h_t, w_tm1, M_t, **kwargs):
        batch_size = self.heads[0].input_shape[
            0]  # QKFIX: Get the size of the batches from the 1st head
        num_heads = len(self.heads)
        k_t = self.nonlinearity_key(
            T.dot(h_t, self.W_hid_to_key) + self.b_hid_to_key)
        beta_t = self.nonlinearity_beta(
            T.dot(h_t, self.W_hid_to_beta) + self.b_hid_to_beta)
        g_t = self.nonlinearity_gate(
            T.dot(h_t, self.W_hid_to_gate) + self.b_hid_to_gate)
        # QKFIX: If the nonlinearity is softmax (which is usually the case), then the activations
        # need to be reshaped (T.nnet.softmax only accepts 2D inputs)
        try:
            s_t = self.nonlinearity_shift(
                T.dot(h_t, self.W_hid_to_shift) + self.b_hid_to_shift)
        except ValueError:
            shift_activation_t = T.dot(
                h_t, self.W_hid_to_shift) + self.b_hid_to_shift
            s_t = self.nonlinearity_shift(
                shift_activation_t.reshape(
                    (h_t.shape[0] * num_heads, self.num_shifts)))
            s_t = s_t.reshape(shift_activation_t.shape)
        gamma_t = self.nonlinearity_gamma(
            T.dot(h_t, self.W_hid_to_gamma) + self.b_hid_to_gamma)

        # Content Addressing (3.3.1)
        beta_t = T.addbroadcast(beta_t, 2)
        betaK = beta_t * similarities.cosine_similarity(k_t, M_t)
        w_c = lasagne.nonlinearities.softmax(betaK.flatten(ndim=2))
        w_c = w_c.reshape(betaK.shape)

        # Interpolation (3.3.2)
        g_t = T.addbroadcast(g_t, 2)
        w_g = g_t * w_c + (1. - g_t) * w_tm1

        # Convolutional Shift (3.3.2)
        # NOTE: This library is using a flat (zero-padded) convolution instead of the circular
        # convolution from the original paper. In practice, this change has a minimal impact.
        w_g_padded = w_g.reshape(
            (h_t.shape[0] * num_heads,
             self.memory_shape[0])).dimshuffle(0, 'x', 'x', 1)
        conv_filter = s_t.reshape(
            (h_t.shape[0] * num_heads,
             self.num_shifts)).dimshuffle(0, 'x', 'x', 1)
        pad = (self.num_shifts // 2, (self.num_shifts - 1) // 2)
        w_g_padded = padding.pad(w_g_padded, [pad], batch_ndim=3)
        convolution = T.nnet.conv2d(w_g_padded, conv_filter,
            input_shape=(None if batch_size is None else \
                batch_size * num_heads, 1, 1, self.memory_shape[0] + pad[0] + pad[1]),
            filter_shape=(None if batch_size is None else \
                batch_size * num_heads, 1, 1, self.num_shifts),
            subsample=(1, 1),
            border_mode='valid')
        w_tilde = convolution[T.arange(h_t.shape[0] * num_heads),
                              T.arange(h_t.shape[0] * num_heads), 0, :]
        w_tilde = w_tilde.reshape(
            (h_t.shape[0], num_heads, self.memory_shape[0]))

        # Sharpening (3.3.2)
        gamma_t = T.addbroadcast(gamma_t, 2)
        w = T.pow(w_tilde + 1e-6, gamma_t)
        w /= T.sum(w, axis=2).dimshuffle(0, 1, 'x')

        return w