예제 #1
0
파일: decode.py 프로젝트: the-moliver/kfs
    def call(self, x, mask=None):
        x = K.permute_dimensions(x, (0, 2, 1))
        x = K.expand_dims(x, -1)

        output = K.square(K.permute_dimensions(K.squeeze(K.conv2d(x, self.kernel), -1), (0, 2, 1)))

        return output
    def call(self, x):
        assert(K.backend() == 'tensorflow')
        temp = K.permute_dimensions(x, (0, 2, 1))
        for i in range(0, self.attention_depth):
            temp = K.sigmoid(K.dot(temp, self.Ws[i]) + self.bs[i])
        temp = K.permute_dimensions(temp, (0, 2, 1))
        estimated_weight = K.squeeze(K.dot(temp, K.expand_dims(self.Wf, -1)), -1)
        biased_weight = estimated_weight + self.bias
        non_linear_weight = K.tanh(biased_weight)

        # For each hidded state calculate how much should it contribute
        # to the context vector. This is the main part of attention.
        # In order to convert weights to "probabilities" use a sigmoid
        # based function: exp(x) / sum(exp(xi)).
        prob = K.exp(non_linear_weight)
        # Compute the total sum for each batch.
        total_sum = K.sum(prob, axis=1, keepdims=True)
        prob /= K.cast(total_sum, K.floatx())

        # Enable this if you want access to internal probabilities.
        # Should only be used for testing that Attention works as expected.
        # return prob

        # Multiply each hidden value by the corresponding probability.
        prob = K.expand_dims(prob, -1)
        new_hidden_values = x * prob
        return K.sum(new_hidden_values, axis=1)
예제 #3
0
    def get_output(self, train=False):
        H = self.get_input(train)
        X = K.permute_dimensions(H, (1, 0, 2))[-1]
        def reshape(x, states):
            h = K.dot(x, self.W_h) + self.b_h
            return h, []
        _, H, _ = K.rnn(reshape, H, [], mask=None)
        if self.stateful or self.state_input or len(self.state_outputs) > 0:
            initial_states = self.states
        else:
            initial_states = self.get_initial_states(X)
        [outputs,hidden_states, cell_states], updates = theano.scan(
            self._step,
            n_steps = self.output_length,
            outputs_info=[X] + initial_states,
            non_sequences=[H, self.U_i, self.U_f, self.U_o, self.U_c,
                          self.W_i, self.W_f, self.W_c, self.W_o,
                          self.W_x, self.W_a, self.V_i, self.V_f, self.V_c,
                          self.V_o, self.b_i, self.b_f, self.b_c, 
                          self.b_o, self.b_x, self.b_a])
        states = [hidden_states[-1], cell_states[-1]]
        if self.stateful and not self.state_input:
            self.updates = []
            for i in range(2):
                self.updates.append((self.states[i], states[i]))
        for o in self.state_outputs:
            o.updates = []
            for i in range(2):
                o.updates.append((o.states[i], states[i]))

        return K.permute_dimensions(outputs, (1, 0, 2))
예제 #4
0
 def attend_function(self, inputs, mask=None):
     # b,n,f -> b,f via b,n broadcasted
     inputs = K.permute_dimensions(inputs, (1,0,2)) ### assuming it comes from an unroller
     if mask:
         mask = K.permute_dimensions(mask, (1,0,2))
     output = super(Accumulator, self).call(inputs, mask)
     return output
예제 #5
0
    def call(self, x, mask=None):
        if self.direction == 'Down':
            X = K.permute_dimensions(x, (0, 3, 1, 2))
        elif self.direction == 'Right':
            X = K.permute_dimensions(x, (0, 2, 1, 3))
        else:
            raise Exception('ERROR: Unknown direction')

        if self.stateful:
            super(DiagLSTM, self).call(X, mask)
        else:
            if self.reverse:
                X = X[:,::-1,:,:]
            X = Utils.Skew(X)
            res = super(DiagLSTM, self).call(X, mask)
            unskew = Utils.Unskew(res)

            if self.reverse:
                unskew = unskew[:,::-1,:,:]

            if self.direction == 'Down':
                return K.permute_dimensions(unskew, (0, 2, 3, 1))
            elif self.direction == 'Right':
                return K.permute_dimensions(unskew, (0, 2, 1, 3))
            else:
                raise Exception('ERROR: Unknown direction')
    def call(self, X, mask=None):
        # 1D -> 2D
        batch = K.shape(X)[0]
        width = deconv_output_length(K.shape(X)[1],
                                    self.filter_length,
                                    self.padding,
                                    self.strides[2])

        print("Output width: ", width)

        print("Input shape: ", K.shape(X))
        X = K.expand_dims(X,2)
        print("Input shape after expand: ", K.shape(X))
        # X = K.permute_dimensions(X, (0, 2, 3, 1))
        X = K.permute_dimensions(X, (0, 2, 1, 3))
        print("Input shape after permute: ", K.shape(X))
        deconv_shape = tf.pack([batch, 1, width, self.nb_filter])
        print("Deconv shape: ", deconv_shape)
        conv_out = tf.nn.conv2d_transpose(X, self.W, strides=self.strides,
                                          padding=self.padding.upper(),
                                          output_shape=deconv_shape)

        output = conv_out + K.reshape(self.b, (1, 1, 1, self.W_shape[2]))
        print("Output shape: ", K.shape(output))
        # output =  K.permute_dimensions(output, (0, 3, 1, 2))
        output =  K.permute_dimensions(output, (0, 2, 1, 3))
        print("Output shape after permute: ", K.shape(output))
        # 2D -> 1D
        output = K.squeeze(output,2)
        print("Output shape after squeeze: ", K.shape(output))
        return output
예제 #7
0
    def _step(self,
              x_tm1,
              h_tm1, c_tm1, H,
              u_i, u_f, u_o, u_c, w_i, w_f, w_c, w_o, w_x, w_a, v_i, v_f, v_c, v_o, b_i, b_f, b_c, b_o, b_x, b_a):

        s_tm1 = K.repeat(c_tm1, self.input_length)
        e = H + s_tm1
        def a(x, states):
            output = K.dot(x, w_a) + b_a
            return output, []
        _, energy, _ = K.rnn(a, e, [], mask=None)
        energy = activations.get('linear')(energy)
        energy = K.permute_dimensions(energy, (2, 0, 1))
        energy = energy[0]
        alpha = K.softmax(energy)
        alpha = K.repeat(alpha, self.hidden_dim)
        alpha = K.permute_dimensions(alpha, (0, 2 , 1))
        weighted_H = H * alpha
        
        v = K.sum(weighted_H, axis=1)

        xi_t = K.dot(x_tm1, w_i) + K.dot(v, v_i) + b_i
        xf_t = K.dot(x_tm1, w_f) + K.dot(v, v_f) + b_f
        xc_t = K.dot(x_tm1, w_c) + K.dot(v, v_c) + b_c
        xo_t = K.dot(x_tm1, w_o) + K.dot(v, v_o) + b_o

        i_t = self.inner_activation(xi_t + K.dot(h_tm1, u_i))
        f_t = self.inner_activation(xf_t + K.dot(h_tm1, u_f))
        c_t = f_t * c_tm1 + i_t * self.activation(xc_t + K.dot(h_tm1, u_c))
        o_t = self.inner_activation(xo_t + K.dot(h_tm1, u_o))
        h_t = o_t * self.activation(c_t)

        x_t = K.dot(h_t, w_x) + b_x
        return x_t, h_t, c_t
    def call(self, inputs):

        input_shape = K.int_shape(inputs)
        if len(input_shape) != 4:
            raise ValueError('Inputs should have rank ' +
                             str(4) +
                             '; Received input shape:', str(input_shape))

        if self.data_format == 'channels_first':
            batch_size, c, h, w = input_shape
            if batch_size is None:
                batch_size = -1
            rh, rw = self.size
            oh, ow = h * rh, w * rw
            oc = c // (rh * rw)

            out = K.reshape(inputs, (batch_size, rh, rw, oc, h, w))
            out = K.permute_dimensions(out, (0, 3, 4, 1, 5, 2))
            out = K.reshape(out, (batch_size, oc, oh, ow))
            return out

        elif self.data_format == 'channels_last':
            batch_size, h, w, c = input_shape
            if batch_size is None:
                batch_size = -1
            rh, rw = self.size
            oh, ow = h * rh, w * rw
            oc = c // (rh * rw)

            out = K.reshape(inputs, (batch_size, h, w, rh, rw, oc))
            out = K.permute_dimensions(out, (0, 1, 3, 2, 4, 5))
            out = K.reshape(out, (batch_size, oh, ow, oc))
            return out
예제 #9
0
파일: decode.py 프로젝트: the-moliver/kfs
    def call(self, x, mask=None):
        x = K.permute_dimensions(x, (0, 2, 1))
        x = K.reshape(x, (-1, self.input_length))
        x = K.expand_dims(x, 1)
        x = K.expand_dims(x, -1)
        if self.real_filts is not None:
            conv_out_r = K.conv2d(x, self.W_r, strides=self.subsample,
                                  border_mode=self.border_mode,
                                  dim_ordering='th')
        else:
            conv_out_r = x

        if self.complex_filts is not None:
            conv_out_c1 = K.conv2d(x, self.W_c1, strides=self.subsample,
                                   border_mode=self.border_mode,
                                   dim_ordering='th')
            conv_out_c2 = K.conv2d(x, self.W_c2, strides=self.subsample,
                                   border_mode=self.border_mode,
                                   dim_ordering='th')
            conv_out_c = K.sqrt(K.square(conv_out_c1) + K.square(conv_out_c2) + K.epsilon())
            output = K.concatenate((conv_out_r, conv_out_c), axis=1)
        else:
            output = conv_out_r

        output_shape = self.get_output_shape_for((None, self.input_length, self.input_dim))
        output = K.squeeze(output, 3)  # remove the dummy 3rd dimension
        output = K.permute_dimensions(output, (2, 1, 0))
        output = K.reshape(output, (-1, output_shape[1], output.shape[1]*output.shape[2]))
        return output
예제 #10
0
 def reverse(x):
     if K.ndim(x) == 2:
         x = K.expand_dims(x, -1)
         rev = K.permute_dimensions(x, (1, 0, 2))[::-1]
         rev = K.squeeze(rev, -1)
     else:
         rev = K.permute_dimensions(x, (1, 0, 2))[::-1]                
     return K.permute_dimensions(rev, (1, 0, 2))
예제 #11
0
def semantic_matrix(argv):
	assert len(argv) == 2
	q = argv[0]
	a = argv[1]
	q_sqrt = K.sqrt((q ** 2).sum(axis=2, keepdims=True))
	a_sqrt = K.sqrt((a ** 2).sum(axis=2, keepdims=True))
	denominator = K.batch_dot(q_sqrt, K.permute_dimensions(a_sqrt, [0,2,1]))
	return K.batch_dot(q, K.permute_dimensions(a, [0,2,1])) / (denominator + SAFE_EPSILON)
예제 #12
0
    def call(self, X,  mask=None):
        #X = self.get_input(train)
        X = K.permute_dimensions(X, (0, 2, 3, 1))
        conv_out = tf.nn.conv2d_transpose(X, self.W, strides=self.strides,
                                          padding=self.padding.upper(),
                                          output_shape=self.deconv_shape)

        output = conv_out + K.reshape(self.b, (1, 1, 1, self.W_shape[2]))
        return K.permute_dimensions(output, (0, 3, 1, 2))
예제 #13
0
파일: local.py 프로젝트: Abhipray/keras
    def call(self, x, mask=None):
        stride_row, stride_col = self.subsample
        _, feature_dim, nb_filter = self.W_shape

        if self.dim_ordering == 'th':
            if K._backend == 'theano':
                output = []
                for i in range(self.output_row):
                    for j in range(self.output_col):
                        slice_row = slice(i * stride_row,
                                          i * stride_row + self.nb_row)
                        slice_col = slice(j * stride_col,
                                          j * stride_col + self.nb_col)
                        x_flatten = K.reshape(x[:, :, slice_row, slice_col], (1, -1, feature_dim))
                        output.append(K.dot(x_flatten, self.W[i * self.output_col + j, :, :]))
                output = K.concatenate(output, axis=0)
            else:
                xs = []
                for i in range(self.output_row):
                    for j in range(self.output_col):
                        slice_row = slice(i * stride_row,
                                          i * stride_row + self.nb_row)
                        slice_col = slice(j * stride_col,
                                          j * stride_col + self.nb_col)
                        xs.append(K.reshape(x[:, :, slice_row, slice_col], (1, -1, feature_dim)))
                x_aggregate = K.concatenate(xs, axis=0)
                output = K.batch_dot(x_aggregate, self.W)
            output = K.reshape(output, (self.output_row, self.output_col, -1, nb_filter))
            output = K.permute_dimensions(output, (2, 3, 0, 1))
        elif self.dim_ordering == 'tf':
            xs = []
            for i in range(self.output_row):
                for j in range(self.output_col):
                    slice_row = slice(i * stride_row,
                                      i * stride_row + self.nb_row)
                    slice_col = slice(j * stride_col,
                                      j * stride_col + self.nb_col)
                    xs.append(K.reshape(x[:, slice_row, slice_col, :], (1, -1, feature_dim)))
            x_aggregate = K.concatenate(xs, axis=0)
            output = K.batch_dot(x_aggregate, self.W)
            output = K.reshape(output, (self.output_row, self.output_col, -1, nb_filter))
            output = K.permute_dimensions(output, (2, 0, 1, 3))
        else:
            raise Exception('Invalid dim_ordering: ' + self.dim_ordering)

        if self.bias:
            if self.dim_ordering == 'th':
                output += K.reshape(self.b, (1, nb_filter, self.output_row, self.output_col))
            elif self.dim_ordering == 'tf':
                output += K.reshape(self.b, (1, self.output_row, self.output_col, nb_filter))
            else:
                raise Exception('Invalid dim_ordering: ' + self.dim_ordering)

        output = self.activation(output)
        return output
예제 #14
0
 def f(X):
     b, ch, r, c = X.shape  # batch, channel, row, column
     half = n // 2
     square = K.square(X)
     extra_channels = K.spatial_2d_padding(K.permute_dimensions(square, (0, 2, 3, 1)), (0, half))
     extra_channels = K.permute_dimensions(extra_channels, (0, 3, 1, 2))
     scale = k
     for i in range(n):
         scale += alpha * extra_channels[:, i:i + ch, :, :]
     scale = scale ** beta
     return X / scale
예제 #15
0
파일: Utils.py 프로젝트: shiretzet/PixelRNN
def Skew(inputs):
    inputs_ = K.permute_dimensions(inputs, (3,0,1,2))
    buffer_ = T.zeros((K.shape(inputs)[3], K.shape(inputs)[0], K.shape(inputs)[1]+K.shape(inputs)[3]-1, K.shape(inputs)[2]))

    def fnc(buf, inp, i):
        return T.set_subtensor(buf[:, i:i+K.shape(inputs)[1], :], inp[:,:,:])

    res, _ = theano.scan(fn=fnc, sequences=[buffer_, inputs_, T.arange(K.shape(inputs)[3])])
    res = K.permute_dimensions(res, (1,2,3,0))

    return res
예제 #16
0
    def call(self, x, mask=None):
        if isinstance(x, list): 
            x,_ = x
        if mask is not None and isinstance(mask, list):
            mask,_ = mask
        if 0. < self.dropout < 1.:
            retain_p = 1. - self.dropout
            dims = self.W._keras_shape[:-1]
            B = K.random_binomial(dims, p=retain_p) * (1. / retain_p)
            B = K.expand_dims(B)
            W = K.in_train_phase(self.W * B, self.W)
        else:
            W = self.W
        
        if self.mode == 'matrix':
            return K.gather(W,x)
        elif self.mode == 'tensor':
            # quick and dirty: only allowing for 3dim inputs when it's tensor mode
            assert K.ndim(x) == 3
            # put sequence on first; gather; take diagonal across shared batch dimension
            # in other words, W is (B, S, F)
            # incoming x is (B, S, A)
            inds = K.arange(self.W._keras_shape[0])
            #out = K.gather(K.permute_dimensions(W, (1,0,2)), x).diagonal(axis1=0, axis2=3)
            #return K.permute_dimensions(out, (3,0,1,2))
            ### method above doesn't do grads =.=
            # tensor abc goes to bac, indexed onto with xyz, goes to xyzac, 
            # x == a, so shape to xayzc == xxyzc
            # take diagonal on first two: xyzc 
            #out = K.colgather()
            out = K.gather(K.permute_dimensions(W, (1,0,2)), x) 
            out = K.permute_dimensions(out, (0,3,1,2,4))
            out = K.gather(out, (inds, inds))
            return out
        else:
            raise Exception('sanity check. should not be here.')

        #all_dims = T.arange(len(self.W._keras_shape))
        #first_shuffle = [all_dims[self.embed_dim]] + all_dims[:self.embed_dim] + all_dims[self.embed_dim+1:]
        ## 1. take diagonal from 0th to
        ## chang eof tactics
        ## embed on time or embed on batch. that's all I'm supporting.  
        ## if it's embed on time, then, x.ndim+1 is where batch will be, and is what
        ## i need to take the diagonal over. 
        ## now dim shuffle the xdims + 1 to the front.
        #todo: get second shuffle or maybe find diagonal calculations
        #out = K.gather(W, x)
        #return out

        ### reference
        #A = S(np.arange(60).reshape(3,4,5))
        #x = S(np.random.randint(0, 4, (3,4,10)))
        #x_emb = A.dimshuffle(1,0,2)[x].dimshuffle(0,3,1,2,4)[T.arange(A.shape[0]), T.arange(A.shape[0])]
예제 #17
0
파일: decode.py 프로젝트: the-moliver/kfs
    def call(self, x, mask=None):
        x = K.permute_dimensions(x, (0, 2, 1))
        x = K.expand_dims(x, -1)

        conv_out = K.permute_dimensions(K.squeeze(K.conv2d(x, self.kernel), -1), (0, 2, 1))

        conv_out_s = conv_out[:,:,:self.nb_simple]

        conv_out_c = K.square(conv_out[:,:,self.nb_simple:])

        output = K.concatenate((conv_out_s, conv_out_c), axis=-1)

        return output
예제 #18
0
	def get_initial_states(self, x):
		M = K.zeros_like(x[:, 0, 0])  # (nb_samples,)
		M = K.pack([M] * self.nb_slots)  # (nb_slots, nb_samples)
		M = K.pack([M] * self.memory_size)  # (memory_size, nb_slots, nb_samples)
		M = K.permute_dimensions(M, (2, 1, 0))  # (nb_samples, nb_slots, memory_size)
		h = K.zeros_like(x[:, 0, 0])  # (nb_samples,)
		h = K.pack([h] * self.memory_size)  # (memory_size, nb_samples)
		h = K.permute_dimensions(h, (1, 0))  # (nb_samples, memory_size)
		w = K.zeros_like(x[:, 0, 0])  # (nb_samples,)
		w = K.pack([w] * self.nb_slots)  # (nb_slots, nb_samples)
		w = K.permute_dimensions(w, (1, 0))  # (nb_samples, nb_slots)
		states = [M, h, w]
		return states
예제 #19
0
    def call(self, X,  mask=None):
        # 1D -> 2D
        X = K.expand_dims(X,2)
        X = K.permute_dimensions(X, (0, 2, 3, 1))
        conv_out = tf.nn.conv2d_transpose(X, self.W, strides=self.strides,
                                          padding=self.padding.upper(),
                                          output_shape=self.deconv_shape)

        output = conv_out + K.reshape(self.b, (1, 1, 1, self.W_shape[2]))
        output =  K.permute_dimensions(output, (0, 3, 1, 2))
        # 2D -> 1D
        output = K.squeeze(output,2)
        return output
예제 #20
0
    def get_output(self, train=False):
        X = train
        X = K.expand_dims(X, -1)  # add a dimension of the right
        X = K.permute_dimensions(X, (0, 2, 1, 3))
        conv_out = K.conv2d(X, self.W, strides=self.subsample,
                            border_mode=self.border_mode,
                            dim_ordering='th')

        output = conv_out + K.reshape(self.b, (1, self.nb_filter, 1, 1))
        output = self.activation(output)
        output = K.squeeze(output, 3)  # remove the dummy 3rd dimension
        output = K.permute_dimensions(output, (0, 2, 1))
        return output
예제 #21
0
    def call(self, x, mask=None):
        if self.direction == 'Down':
            X = K.permute_dimensions(x, (0, 2, 1, 3))
        elif self.direction == 'Right':
            X = K.permute_dimensions(x, (0, 3, 1, 2))
        else:
            raise Exception('ERROR: Unknown direction')

        if self.direction == 'Down':
            return K.permute_dimensions(super(PyramidSTM, self).call(X, mask), (0, 2, 1, 3))
        elif self.direction == 'Right':
            return K.permute_dimensions(super(PyramidSTM, self).call(X, mask), (0, 2, 3, 1))
        else:
            raise Exception('ERROR: Unknown direction')
예제 #22
0
def recurrence(y_i, h):
    h_permute = K.permute_dimensions(h, [0, 2, 1])  # (batch_size, encoding_dim, input_length)
    e = K.l2_normalize(
        K.batch_dot(h_permute, s, axes=1),  # (batch_size, input_length)
        axis=1)  # (batch_size, input_length)

    # eqn 6
    alpha = K.softmax(e)  # (batch_size, input_length)

    # eqn 5
    c = K.batch_dot(h, alpha, axes=1)  # (batch_size, encoding_dim)

    recurrence_result = K.expand_dims(
        K.concatenate([c, y_i], axis=1),
        dim=1)  # (batch_size, 1, 2 * encoding_dim)

    expanded_h = Input(shape=(1, 2 * encoding_dim),
                       name='expanded_h')
    gru = Sequential([
        GRU(output_dim,
            return_sequences=False,
            input_shape=(1, 2 * encoding_dim))
    ])
    model = Model(input=[expanded_h],
                  output=[gru(expanded_h)])  # (batch_size, 1, output_dim)
    return model(recurrence_result)
예제 #23
0
    def get_output(self, train=False):
        v = self.get_input(train)
        if self.stateful or self.state_input or len(self.state_outputs) > 0:
            initial_states = self.states
        else:
            initial_states = self.get_initial_states(v)        
        [outputs,hidden_states, cell_states], updates = theano.scan(
            self._step,
            n_steps = self.output_length,
            outputs_info=[v] + initial_states,
            non_sequences=[v, self.U_i, self.U_f, self.U_o, self.U_c,
                          self.W_i, self.W_f, self.W_c, self.W_o,
                          self.W_x, self.V_i, self.V_f, self.V_c,
                          self.V_o, self.b_i, self.b_f, self.b_c, 
                          self.b_o, self.b_x])
        states = [hidden_states[-1], cell_states[-1]]
        if self.stateful and not self.state_input:
            self.updates = []
            for i in range(2):
                self.updates.append((self.states[i], states[i]))
        for o in self.state_outputs:
            o.updates = []
            for i in range(2):
                o.updates.append((o.states[i], states[i]))

        return K.permute_dimensions(outputs, (1, 0, 2))
def gram_matrix(x):
    #change height,width,depth to depth, height, width, it could be 2,1,0 too
    #maybe 2,0,1 is more efficient due to underlying memory layout
    features = K.permute_dimensions(x, (2,0,1))
    #batch flatten make features become 2D array
    features = K.batch_flatten(features)
    return K.dot(features, K.transpose(features)) / x.get_shape().num_elements()    
예제 #25
0
        def teacher_forced(h, states):
            # switching from (batch_size, previous_layer_input|true_input, output_dim)
            #    to ( previous_layer_input|true_input, batch_size, output_dim)
            axes = [1, 0] + list(range(2, K.ndim(h)))
            h = K.permute_dimensions(h, axes)

            prev_layer_input = h[0:1, :, :]
            true_input = h[1:, :, :self.units]

            # this should correspond  to true input
            prev_sampled_output = true_input

            if self.implementation == 0:
                x_z = prev_layer_input[0, :, :self.units]
                x_r = prev_layer_input[0, :, self.units: 2 * self.units]
                x_h = prev_layer_input[0, :, 2 * self.units:]
            else:
                raise ValueError('Implementation type ' + self.implementation + ' is invalid')

            z = self.recurrent_activation(x_z + K.dot(h_tm1 * rec_dp_mask[0],
                                                      self.recurrent_kernel_z))
            r = self.recurrent_activation(x_r + K.dot(h_tm1 * rec_dp_mask[1],
                                                      self.recurrent_kernel_r))

            hh = self.activation(x_h +
                                 K.dot(r * h_tm1 * rec_dp_mask[2],
                                       self.recurrent_kernel_h) +
                                 K.dot(r * prev_sampled_output, self.recurrent_kernel_y))

            output = z * h_tm1 + (1. - z) * hh

            return K.stack([output, output])
예제 #26
0
def make_patches_grid(x, patch_size, patch_stride):
    '''Break image `x` up into a grid of patches.

    input shape: (channels, rows, cols)
    output shape: (rows, cols, channels, patch_rows, patch_cols)
    '''
    from theano.tensor.nnet.neighbours import images2neibs  # TODO: all K, no T
    x = K.expand_dims(x, 0)
    xs = K.shape(x)
    num_rows = 1 + (xs[-2] - patch_size) // patch_stride
    num_cols = 1 + (xs[-1] - patch_size) // patch_stride
    num_channels = xs[-3]
    patches = images2neibs(
        x, (patch_size, patch_size), (patch_stride, patch_stride),
        mode='valid')
    # neibs are sorted per-channel
    patches = K.reshape(patches,
                        (num_channels, K.shape(patches)[0] // num_channels,
                         patch_size, patch_size))
    patches = K.permute_dimensions(patches, (1, 0, 2, 3))
    # arrange in a 2d-grid (rows, cols, channels, px, py)
    patches = K.reshape(
        patches, (num_rows, num_cols, num_channels, patch_size, patch_size))
    patches_norm = K.sqrt(
        K.sum(K.square(patches), axis=(2, 3, 4), keepdims=True))
    return patches, patches_norm
예제 #27
0
def gram_matrix(x):
    if K.image_dim_ordering() == "th":
        features = K.batch_flatten(x)
    else:
        features = K.batch_flatten(K.permute_dimensions(x, (2, 0, 1)))
    gram = K.dot(features, K.transpose(features))
    return gram
예제 #28
0
    def call(self, inputs, **kwargs):
        """Following the routing algorithm from Hinton's paper,
        but replace b = b + <u,v> with b = <u,v>.

        This change can improve the feature representation of the capsule.

        However, you can replace
            b = K.batch_dot(outputs, hat_inputs, [2, 3])
        with
            b += K.batch_dot(outputs, hat_inputs, [2, 3])
        to get standard routing.
        """

        if self.share_weights:
            hat_inputs = K.conv1d(inputs, self.kernel)
        else:
            hat_inputs = K.local_conv1d(inputs, self.kernel, [1], [1])

        batch_size = K.shape(inputs)[0]
        input_num_capsule = K.shape(inputs)[1]
        hat_inputs = K.reshape(hat_inputs,
                               (batch_size, input_num_capsule,
                                self.num_capsule, self.dim_capsule))
        hat_inputs = K.permute_dimensions(hat_inputs, (0, 2, 1, 3))

        b = K.zeros_like(hat_inputs[:, :, :, 0])
        print(self.routings)
        for i in range(self.routings):
            c = K.softmax(b, 1)
            o = self.activation(K.batch_dot(c, hat_inputs, [2, 2]))
            if i < self.routings - 1:
                b = K.batch_dot(o, hat_inputs, [2, 3])
                if K.backend() == 'theano':
                    o = K.sum(o, axis=1)
        return o
예제 #29
0
 def call(self, x, mask=None):
     print("AttentionDecoder.call")
     H = x
     x = K.permute_dimensions(H, (1, 0, 2))[-1, :, :]
     if self.stateful or self.state_input or len(self.state_outputs) > 0:
         initial_states = self.states[:]
     else:
         initial_states = self.get_initial_states(H)
     constants = self.get_constants(H) + [H]
     y_0 = x
     x = K.repeat(x, self.output_length)
     initial_states += [y_0]
     last_output, outputs, states = K.rnn(
         self.step,
         x,
         initial_states,
         go_backwards=self.go_backwards,
         mask=mask,
         constants=constants,
         unroll=self.unroll,
         input_length=self.output_length)
     if self.stateful and not self.state_input:
         self.updates = zip(self.states, states)
     self.states_to_transfer = states
     return outputs
예제 #30
0
        def free_running(h, states):

            prev_generated_output = initial_states[0][1:, :, :]
            prev_sampled_output = prev_generated_output

            # switching from (batch_size, previous_layer_input|true_input, output_dim)
            #    to ( previous_layer_input|true_input, batch_size, output_dim)
            axes = [1, 0] + list(range(2, K.ndim(h)))
            h = K.permute_dimensions(h, axes)

            prev_layer_input = h[0:1, :, :]

            if self.implementation == 0:
                x_z = prev_layer_input[0, :, :self.units]
                x_r = prev_layer_input[0, :, self.units: 2 * self.units]
                x_h = prev_layer_input[0, :, 2 * self.units:]

            z = self.recurrent_activation(x_z + K.dot(h_tm1 * rec_dp_mask[0],
                                                      self.recurrent_kernel_z))
            r = self.recurrent_activation(x_r + K.dot(h_tm1 * rec_dp_mask[1],
                                                      self.recurrent_kernel_r))

            hh = self.activation(x_h +
                                 K.dot(r * h_tm1 * rec_dp_mask[2],
                                       self.recurrent_kernel_h) +
                                 K.dot(r * prev_sampled_output, self.recurrent_kernel_y))

            output = z * h_tm1 + (1. - z) * hh

            final_output = self.output_sampling(output, random_cutoff_vec)

            return K.stack([output, final_output])
 def _outer(AB):
     att_ji = K.batch_dot(AB[1], K.permute_dimensions(AB[0], (0, 2, 1)))
     return K.permute_dimensions(att_ji, (0, 2, 1))
예제 #32
0
    def call(self, x, mask=None):

        assert (len(x) == 2)

        img = x[0]
        rois = x[1]

        input_shape = K.shape(img)

        outputs = []

        for roi_idx in range(self.num_rois):

            x = rois[0, roi_idx, 0]
            y = rois[0, roi_idx, 1]
            w = rois[0, roi_idx, 2]
            h = rois[0, roi_idx, 3]

            row_length = w / float(self.pool_size)
            col_length = h / float(self.pool_size)

            num_pool_regions = self.pool_size

            #NOTE: the RoiPooling implementation differs between theano and tensorflow due to the lack of a resize op
            # in theano. The theano implementation is much less efficient and leads to long compile times

            if self.dim_ordering == 'channels_first':
                for jy in range(num_pool_regions):
                    for ix in range(num_pool_regions):
                        x1 = x + ix * row_length
                        x2 = x1 + row_length
                        y1 = y + jy * col_length
                        y2 = y1 + col_length

                        x1 = K.cast(x1, 'int32')
                        x2 = K.cast(x2, 'int32')
                        y1 = K.cast(y1, 'int32')
                        y2 = K.cast(y2, 'int32')

                        x2 = x1 + K.maximum(1, x2 - x1)
                        y2 = y1 + K.maximum(1, y2 - y1)

                        new_shape = [
                            input_shape[0], input_shape[1], y2 - y1, x2 - x1
                        ]

                        x_crop = img[:, :, y1:y2, x1:x2]
                        xm = K.reshape(x_crop, new_shape)
                        pooled_val = K.max(xm, axis=(2, 3))
                        outputs.append(pooled_val)

            elif self.dim_ordering == 'channels_last':
                x = K.cast(x, 'int32')
                y = K.cast(y, 'int32')
                w = K.cast(w, 'int32')
                h = K.cast(h, 'int32')

                rs = tf.image.resize(img[:, y:y + h, x:x + w, :],
                                     size=(self.pool_size, self.pool_size))
                outputs.append(rs)

        final_output = K.concatenate(outputs, axis=0)
        final_output = K.reshape(final_output,
                                 (1, self.num_rois, self.pool_size,
                                  self.pool_size, self.nb_channels))

        if self.dim_ordering == 'channels_first':
            final_output = K.permute_dimensions(final_output, (0, 1, 4, 2, 3))
        else:
            final_output = K.permute_dimensions(final_output, (0, 1, 2, 3, 4))

        return final_output
예제 #33
0
    def local_conv3d(self, inputs, kernel, kernel_size, strides, output_shape, data_format=None):
        """Apply 3D conv with un-shared weights.
        # Arguments
            inputs: 4D tensor with shape:
                    (batch_size, filters, new_rows, new_cols)
                    if data_format='channels_first'
                    or 4D tensor with shape:
                    (batch_size, new_rows, new_cols, filters)
                    if data_format='channels_last'.
            kernel: the unshared weight for convolution,
                    with shape (output_items, feature_dim, filters)
            kernel_size: a tuple of 2 integers, specifying the
                        width and height of the 3D convolution window.
            strides: a tuple of 2 integers, specifying the strides
                    of the convolution along the width and height.
            output_shape: a tuple with (output_row, output_col)
            data_format: the data format, channels_first or channels_last
        # Returns
            A 4d tensor with shape:
            (batch_size, filters, new_rows, new_cols)
            if data_format='channels_first'
            or 4D tensor with shape:
            (batch_size, new_rows, new_cols, filters)
            if data_format='channels_last'.
        # Raises
            ValueError: if `data_format` is neither
                        `channels_last` or `channels_first`.
        """
        if data_format is None:
            data_format = K.image_data_format()
        if data_format not in {'channels_first', 'channels_last'}:
            raise ValueError('Unknown data_format: ' + str(data_format))

        stride_row, stride_col, stride_z = strides
        output_row, output_col, output_z = output_shape
        kernel_shape = K.int_shape(kernel)
        _, feature_dim, filters = kernel_shape

        xs = []
        for i in range(output_row):
            for j in range(output_col):
                for k in range(output_z):
                    slice_row = slice(i * stride_row,
                                      i * stride_row + kernel_size[0])
                    slice_col = slice(j * stride_col,
                                      j * stride_col + kernel_size[1])
                    slice_z = slice(k * stride_z,
                                    k * stride_z + kernel_size[2])
                    if data_format == 'channels_first':
                        xs.append(K.reshape(inputs[:, :, slice_row, slice_col, slice_z],
                                            (1, -1, feature_dim)))
                    else:
                        xs.append(K.reshape(inputs[:, slice_row, slice_col, slice_z, :],
                                            (1, -1, feature_dim)))

        x_aggregate = K.concatenate(xs, axis=0)
        output = K.batch_dot(x_aggregate, kernel)
        output = K.reshape(output,
                           (output_row, output_col, output_z, -1, filters))

        if data_format == 'channels_first':
            output = K.permute_dimensions(output, (3, 4, 0, 1, 2))
        else:
            output = K.permute_dimensions(output, (3, 0, 1, 2, 4))
        return output
예제 #34
0
def minibatch_discriminator(x):
    """ Computes minibatch discrimination features from input tensor x"""
    diffs = K.expand_dims(x, 3) - \
        K.expand_dims(K.permute_dimensions(x, [1, 2, 0]), 0)
    l1_norm = K.sum(K.abs(diffs), axis=2)
    return K.sum(K.exp(-l1_norm), axis=2)
    def call(self, inputs):
        X = inputs[0]  # Node features (batch x N x F)
        A = inputs[1]  # Adjacency matrix (batch x N x N)

        assert K.ndim(X) == 3
        assert K.ndim(A) == 3

        outputs = []
        for h in range(self.attn_heads):
            kernel = self.kernels[h]
            attn_kernel_self = self.attn_kernels_self[h]
            attn_kernel_neighs = self.attn_kernels_neighs[h]
            if self.use_bias:
                bias = self.biases[h]

            # Compute inputs to attention network
            features = K.dot(X, kernel)  # (batch x N x F')

            # Compute feature combinations
            # Note: [[a_1], [a_2]]^T [[Wh_i], [Wh_2]] = [a_1]^T [Wh_i] + [a_2]^T [Wh_j]
            # broadcast the attention kernel across all batches and nodes
            attn_for_self = K.dot(features, attn_kernel_self)
            attn_for_neighs = K.dot(features, attn_kernel_neighs)
            # Attention head a(Wh_i, Wh_j) = a^T [[Wh_i], [Wh_j]]

            trans_attn_for_neighs = K.permute_dimensions(
                attn_for_neighs, (0, 2, 1))

            # add dimensions to compute additive attention with broadcasting
            scores = attn_for_self + trans_attn_for_neighs  # (batch x N x N) via broadcasting

            # Add nonlinearty
            scores = LeakyReLU(alpha=0.2)(scores)

            # Mask values before activation (Vaswani et al., 2017)
            mask = (1.0 - A) * -10e9
            scores = scores + mask

            # Feed masked values to softmax
            attn_weights = K.softmax(
                scores)  # (batch x N x N), attention coefficients

            dropout_attn_coeffs = Dropout(self.attn_dropout)(
                attn_weights)  # (batch x N x N)
            dropout_features = Dropout(self.feature_dropout)(features)

            # Linear combination with neighbors' features
            # (batch x N x N) * (batch x N x F') = (batch x N x F')
            node_features = K.batch_dot(dropout_attn_coeffs, dropout_features)

            if self.use_bias:
                node_features = K.bias_add(node_features, bias)

            outputs.append(node_features)

        # Reduce the attention heads output according to the reduction method
        if self.attn_heads_reduction == 'concat':
            output = K.concatenate(outputs, -1)  # (batch x N x KF')
        else:
            output = K.mean(K.stack(outputs, axis=0),
                            axis=0)  # (batch x N x F')

        output = self.activation(output)

        return output
예제 #36
0
def gram_matrix(x):
    features = K.batch_flatten(K.permute_dimensions(x, (2, 0, 1)))
    return K.dot(features, K.transpose(features)) / x.get_shape().num_elements()
예제 #37
0
    def call(self, inputs):
        channel_axis = 1 if self.data_format == 'channels_first' else -1
        input_dim    = K.shape(inputs)[channel_axis] // 2
        if self.rank == 1:
            f_real   = self.kernel[:, :, :self.filters]
            f_imag   = self.kernel[:, :, self.filters:]
        elif self.rank == 2:
            f_real   = self.kernel[:, :, :, :self.filters]
            f_imag   = self.kernel[:, :, :, self.filters:]
        elif self.rank == 3:
            f_real   = self.kernel[:, :, :, :, :self.filters]
            f_imag   = self.kernel[:, :, :, :, self.filters:]

        convArgs = {"strides":       self.strides[0]       if self.rank == 1 else self.strides,
                    "padding":       self.padding,
                    "data_format":   self.data_format,
                    "dilation_rate": self.dilation_rate[0] if self.rank == 1 else self.dilation_rate}
        if self.transposed:
            convFunc = {1: K.conv1d_transpose,
                        2: K.conv2d_transpose,
                        3: K.conv3d_transpose}[self.rank]
        else:
            convFunc = {1: K.conv1d,
                        2: K.conv2d,
                        3: K.conv3d}[self.rank]

        # processing if the weights are assumed to be represented in the
        # spectral domain

        if self.spectral_parametrization:
            if   self.rank == 1:
                f_real = K.permute_dimensions(f_real, (2,1,0))
                f_imag = K.permute_dimensions(f_imag, (2,1,0))
                f      = K.concatenate([f_real, f_imag], axis=0)
                fshape = K.shape(f)
                f      = K.reshape(f, (fshape[0] * fshape[1], fshape[2]))
                f      = ifft(f)
                f      = K.reshape(f, fshape)
                f_real = f[:fshape[0]//2]
                f_imag = f[fshape[0]//2:]
                f_real = K.permute_dimensions(f_real, (2,1,0))
                f_imag = K.permute_dimensions(f_imag, (2,1,0))
            elif self.rank == 2:
                f_real = K.permute_dimensions(f_real, (3,2,0,1))
                f_imag = K.permute_dimensions(f_imag, (3,2,0,1))
                f      = K.concatenate([f_real, f_imag], axis=0)
                fshape = K.shape(f)
                f      = K.reshape(f, (fshape[0] * fshape[1], fshape[2], fshape[3]))
                f      = ifft2(f)
                f      = K.reshape(f, fshape)
                f_real = f[:fshape[0]//2]
                f_imag = f[fshape[0]//2:]
                f_real = K.permute_dimensions(f_real, (2,3,1,0))
                f_imag = K.permute_dimensions(f_imag, (2,3,1,0))

        # In case of weight normalization, real and imaginary weights are
        # normalized

        if self.normalize_weight:
            ker_shape = self.kernel_shape
            nb_kernels = ker_shape[-2] * ker_shape[-1]
            kernel_shape_4_norm = (np.prod(self.kernel_size), nb_kernels)
            reshaped_f_real = K.reshape(f_real, kernel_shape_4_norm)
            reshaped_f_imag = K.reshape(f_imag, kernel_shape_4_norm)
            reduction_axes = list(range(2))
            del reduction_axes[-1]
            mu_real = K.mean(reshaped_f_real, axis=reduction_axes)
            mu_imag = K.mean(reshaped_f_imag, axis=reduction_axes)

            broadcast_mu_shape = [1] * 2
            broadcast_mu_shape[-1] = nb_kernels
            broadcast_mu_real = K.reshape(mu_real, broadcast_mu_shape)
            broadcast_mu_imag = K.reshape(mu_imag, broadcast_mu_shape)
            reshaped_f_real_centred = reshaped_f_real - broadcast_mu_real
            reshaped_f_imag_centred = reshaped_f_imag - broadcast_mu_imag
            Vrr = K.mean(reshaped_f_real_centred ** 2, axis=reduction_axes) + self.epsilon
            Vii = K.mean(reshaped_f_imag_centred ** 2, axis=reduction_axes) + self.epsilon
            Vri = K.mean(reshaped_f_real_centred * reshaped_f_imag_centred,
                         axis=reduction_axes) + self.epsilon

            normalized_weight = complex_normalization(
                K.concatenate([reshaped_f_real, reshaped_f_imag], axis=-1),
                Vrr, Vii, Vri,
                beta = None,
                gamma_rr = self.gamma_rr,
                gamma_ri = self.gamma_ri,
                gamma_ii = self.gamma_ii,
                scale=True,
                center=False,
                axis=-1
            )

            normalized_real = normalized_weight[:, :nb_kernels]
            normalized_imag = normalized_weight[:, nb_kernels:]
            f_real = K.reshape(normalized_real, self.kernel_shape)
            f_imag = K.reshape(normalized_imag, self.kernel_shape)

        # Performing complex convolution

        f_real._keras_shape = self.kernel_shape
        f_imag._keras_shape = self.kernel_shape

        cat_kernels_4_real = K.concatenate([f_real, -f_imag], axis=-2)
        cat_kernels_4_imag = K.concatenate([f_imag,  f_real], axis=-2)
        cat_kernels_4_complex = K.concatenate([cat_kernels_4_real, cat_kernels_4_imag], axis=-1)
        cat_kernels_4_complex._keras_shape = self.kernel_size + (2 * input_dim, 2 * self.filters)

        output = convFunc(inputs, cat_kernels_4_complex, **convArgs)

        if self.use_bias:
            output = K.bias_add(
                output,
                self.bias,
                data_format=self.data_format
            )

        if self.activation is not None:
            output = self.activation(output)

        return output
def gram_matrix(x):
    features = backend.batch_flatten(backend.permute_dimensions(x, (2, 0, 1)))
    gram = backend.dot(features, backend.transpose(features))
    return gram
예제 #39
0
    def _interpolate(image, sampled_grids, output_size):
        image = K.permute_dimensions(image, (0, 2, 3, 1))
        batch_size = K.shape(image)[0]
        height = K.shape(image)[1]
        width = K.shape(image)[2]
        num_channels = K.shape(image)[3]

        x = K.cast(K.flatten(sampled_grids[:, 0:1, :]), dtype='float32')
        y = K.cast(K.flatten(sampled_grids[:, 1:2, :]), dtype='float32')

        x = .5 * (x + 1.0) * K.cast(width, dtype='float32')
        y = .5 * (y + 1.0) * K.cast(height, dtype='float32')

        x0 = K.cast(x, 'int32')
        x1 = x0 + 1
        y0 = K.cast(y, 'int32')
        y1 = y0 + 1

        max_x = int(K.int_shape(image)[2] - 1)
        max_y = int(K.int_shape(image)[1] - 1)

        x0 = K.clip(x0, 0, max_x)
        x1 = K.clip(x1, 0, max_x)
        y0 = K.clip(y0, 0, max_y)
        y1 = K.clip(y1, 0, max_y)

        pixels_batch = K.arange(0, batch_size) * (height * width)
        pixels_batch = K.expand_dims(pixels_batch, axis=-1)
        flat_output_size = output_size[0] * output_size[1]
        base = K.repeat_elements(pixels_batch, flat_output_size, axis=1)
        base = K.flatten(base)

        # base_y0 = base + (y0 * width)
        base_y0 = y0 * width
        base_y0 = base + base_y0
        # base_y1 = base + (y1 * width)
        base_y1 = y1 * width
        base_y1 = base_y1 + base

        indices_a = base_y0 + x0
        indices_b = base_y1 + x0
        indices_c = base_y0 + x1
        indices_d = base_y1 + x1

        flat_image = K.reshape(image, shape=(-1, num_channels))
        flat_image = K.cast(flat_image, dtype='float32')
        pixel_values_a = K.gather(flat_image, indices_a)
        pixel_values_b = K.gather(flat_image, indices_b)
        pixel_values_c = K.gather(flat_image, indices_c)
        pixel_values_d = K.gather(flat_image, indices_d)

        x0 = K.cast(x0, 'float32')
        x1 = K.cast(x1, 'float32')
        y0 = K.cast(y0, 'float32')
        y1 = K.cast(y1, 'float32')

        area_a = K.expand_dims(((x1 - x) * (y1 - y)), 1)
        area_b = K.expand_dims(((x1 - x) * (y - y0)), 1)
        area_c = K.expand_dims(((x - x0) * (y1 - y)), 1)
        area_d = K.expand_dims(((x - x0) * (y - y0)), 1)

        values_a = area_a * pixel_values_a
        values_b = area_b * pixel_values_b
        values_c = area_c * pixel_values_c
        values_d = area_d * pixel_values_d
        return values_a + values_b + values_c + values_d
예제 #40
0
 def stack_and_transpose(x):
     # x is a list of length T, each element is a batch_size x output_vocab_size tensor
     x = K.stack(x)  # is now T x batch_size x output_vocab_size tensor
     x = K.permute_dimensions(
         x, pattern=(1, 0, 2))  # is now batch_size x T x output_vocab_size
     return x
예제 #41
0
def NN_model(args, training=True):
    global N_COL
    global N_ROW

    if args.model == 'densenet121':
        from keras.applications.densenet import DenseNet121
        input_tensor = Input(shape=(N_COL, N_ROW, 3))
        base_model = DenseNet121(input_shape=(N_COL, N_ROW, 3),
                                 include_top=False,
                                 weights='imagenet',
                                 input_tensor=input_tensor,
                                 pooling=None)

    elif args.model == 'resnet18':
        import resnet
        NOT_CARE = 1
        base_model = resnet.ResnetBuilder.build_resnet_18(input_shape=(N_COL,
                                                                       N_ROW,
                                                                       3),
                                                          num_outputs=NOT_CARE,
                                                          include_top=False)
    elif args.model == 'resnet18_2222':
        import resnet
        NOT_CARE = 1
        base_model = resnet.ResnetBuilder.build_resnet_18_2222(
            input_shape=(N_COL, N_ROW, 3),
            num_outputs=NOT_CARE,
            include_top=False)
    elif args.model == 'resnet34':
        import resnet
        NOT_CARE = 1
        base_model = resnet.ResnetBuilder.build_resnet_34(input_shape=(N_COL,
                                                                       N_ROW,
                                                                       3),
                                                          num_outputs=NOT_CARE,
                                                          include_top=False)
    elif args.model == 'resnet50':
        import resnet
        NOT_CARE = 1
        base_model = resnet.ResnetBuilder.build_resnet_50(input_shape=(N_COL,
                                                                       N_ROW,
                                                                       3),
                                                          num_outputs=NOT_CARE,
                                                          include_top=False)
    elif args.model == 'resnet101':
        import resnet
        NOT_CARE = 1
        base_model = resnet.ResnetBuilder.build_resnet_101(
            input_shape=(N_COL, N_ROW, 3),
            num_outputs=NOT_CARE,
            include_top=False)

    else:
        raise TypeError('model should be in the list of the supported model!')

    print('Input col: ', N_COL)
    print('Input row: ', N_ROW)

    x = base_model.output
    #CNN to RNN
    x = Lambda(lambda x: K.permute_dimensions(x, (0, 2, 1, 3)))(
        x)  # switchaxes from [b,h,w,c] to [b,w,h,c]
    conv_shape = x.get_shape()  # b, h,w,c  resnet 18 -> (?, 16, 32, 256)
    print('conv_shape', conv_shape)
    x = Reshape(target_shape=(int(conv_shape[1]),
                              int(conv_shape[2] * conv_shape[3])),
                name='reshape')(x)
    x = Dense(para.dense_size,
              activation='relu',
              kernel_initializer='he_normal',
              name='dense1')(x)
    #x = BatchNormalization()(x)
    # GRU RNN
    gru_1 = GRU(para.rnn_size,
                return_sequences=True,
                init='he_normal',
                name='gru1')(x)
    gru_1b = GRU(para.rnn_size,
                 return_sequences=True,
                 go_backwards=True,
                 init='he_normal',
                 name='gru1_b')(x)
    gru1_merged = add([gru_1, gru_1b])
    gru1_merged = BatchNormalization()(gru1_merged)

    gru_2 = GRU(para.rnn_size,
                return_sequences=True,
                init='he_normal',
                name='gru2')(gru1_merged)
    gru_2b = GRU(para.rnn_size,
                 return_sequences=True,
                 go_backwards=True,
                 init='he_normal',
                 name='gru2_b')(gru1_merged)
    gru2_merged = concatenate([gru_2, gru_2b])
    gru2_merged = BatchNormalization()(gru2_merged)

    inner = Dense(para.num_classes,
                  kernel_initializer='he_normal',
                  name='dense2')(gru2_merged)
    y_pred = Activation('softmax', name='softmax')(inner)

    labels = Input(name='the_labels',
                   shape=[para.max_text_len],
                   dtype='float32')  # (None ,7)
    input_length = Input(name='input_length', shape=[1],
                         dtype='int64')  # (None, 1)
    label_length = Input(name='label_length', shape=[1],
                         dtype='int64')  # (None, 1)

    # Keras doesn't currently support loss funcs with extra parameters
    # so CTC loss is implemented in a lambda layer
    loss_out = Lambda(ctc_lambda_func, output_shape=(1, ),
                      name='ctc')([y_pred, labels, input_length,
                                   label_length])  #(None, 1)

    if training:
        return Model(
            inputs=[base_model.input, labels, input_length, label_length],
            outputs=loss_out), conv_shape[1]
    else:
        return Model(inputs=[base_model.input], outputs=y_pred)
예제 #42
0
def merge_heads(x):
    new_x = K.permute_dimensions(x, [0, 2, 1, 3])
    x_shape = shape_list(new_x)
    new_x_shape = x_shape[:-2] + [np.prod(x_shape[-2:])]
    return K.reshape(new_x, new_x_shape)
예제 #43
0
def split_heads(x, n: int, k: bool = False):  # B, L, C
    x_shape = shape_list(x)
    m = x_shape[-1]
    new_x_shape = x_shape[:-1] + [n, m // n]
    new_x = K.reshape(x, new_x_shape)
    return K.permute_dimensions(new_x, [0, 2, 3, 1] if k else [0, 2, 1, 3])
예제 #44
0
def correlation_layer(x):
    lbranch, rbranch = squeeze(x[0], 1), squeeze(x[1], 1)
    rbranch = permute_dimensions(rbranch, (0, 2, 1))
    out_tensor = squeeze(batch_dot(lbranch, rbranch), 1)
    return out_tensor
예제 #45
0
 def _call(self, features, edges):
     return K.batch_dot(K.permute_dimensions(edges, (0, 2, 1)), features) \
         / (K.sum(edges, axis=2, keepdims=True) + K.epsilon())
def gram_matrix(x):
    features = K.batch_flatten(K.permute_dimensions(x, (2, 0, 1)))
    gram = K.dot(features, K.transpose(features))
    return gram
예제 #47
0
    def attention(self,
                  pre_q,
                  pre_v,
                  pre_k,
                  out_seq_len: int,
                  d_model: int,
                  training=None):
        """
        Calculates the output of the attention once the affine transformations
        of the inputs are done. Here's the shapes of the arguments:
        :param pre_q: (batch_size, q_seq_len, num_heads, d_model // num_heads)
        :param pre_v: (batch_size, v_seq_len, num_heads, d_model // num_heads)
        :param pre_k: (batch_size, k_seq_len, num_heads, d_model // num_heads)
        :param out_seq_len: the length of the output sequence
        :param d_model: dimensionality of the model (by the paper)
        :param training: Passed by Keras. Should not be defined manually.
          Optional scalar tensor indicating if we're in training
          or inference phase.
        """
        # shaping Q and V into (batch_size, num_heads, seq_len, d_model//heads)
        q = K.permute_dimensions(pre_q, [0, 2, 1, 3])
        v = K.permute_dimensions(pre_v, [0, 2, 1, 3])

        if self.compression_window_size is None:
            k_transposed = K.permute_dimensions(pre_k, [0, 2, 3, 1])
        else:
            # Memory-compressed attention described in paper
            # "Generating Wikipedia by Summarizing Long Sequences"
            # (https://arxiv.org/pdf/1801.10198.pdf)
            # It compresses keys and values using 1D-convolution which reduces
            # the size of Q * K_transposed from roughly seq_len^2
            # to convoluted_seq_len^2. If we use strided convolution with
            # window size = 3 and stride = 3, memory requirements of such
            # memory-compressed attention will be 9 times smaller than
            # that of the original version.
            if self.use_masking:
                raise NotImplementedError(
                    "Masked memory-compressed attention has not "
                    "been implemented yet")
            k = K.permute_dimensions(pre_k, [0, 2, 1, 3])
            k, v = [
                K.reshape(
                    # Step 3: Return the result to its original dimensions
                    # (batch_size, num_heads, seq_len, d_model//heads)
                    K.bias_add(
                        # Step 3: ... and add bias
                        K.conv1d(
                            # Step 2: we "compress" K and V using strided conv
                            K.reshape(
                                # Step 1: we reshape K and V to
                                # (batch + num_heads,  seq_len, d_model//heads)
                                item,
                                (-1, K.int_shape(item)[-2],
                                 d_model // self.num_heads)),
                            kernel,
                            strides=self.compression_window_size,
                            padding='valid',
                            data_format='channels_last'),
                        bias,
                        data_format='channels_last'),
                    # new shape
                    K.concatenate(
                        [K.shape(item)[:2], [-1, d_model // self.num_heads]]))
                for item, kernel, bias in ((k, self.k_conv_kernel,
                                            self.k_conv_bias),
                                           (v, self.v_conv_kernel,
                                            self.v_conv_bias))
            ]
            k_transposed = K.permute_dimensions(k, [0, 1, 3, 2])
        # shaping K into (batch_size, num_heads, d_model//heads, seq_len)
        # for further matrix multiplication
        sqrt_d = K.constant(np.sqrt(d_model // self.num_heads),
                            dtype=K.floatx())
        q_shape = K.int_shape(q)
        k_t_shape = K.int_shape(k_transposed)
        v_shape = K.int_shape(v)
        # before performing batch_dot all tensors are being converted to 3D
        # shape (batch_size * num_heads, rows, cols) to make sure batch_dot
        # performs identically on all backends
        attention_heads = K.reshape(
            K.batch_dot(
                self.apply_dropout_if_needed(K.softmax(
                    self.mask_attention_if_needed(
                        K.batch_dot(
                            K.reshape(q, (-1, ) + q_shape[-2:]),
                            K.reshape(k_transposed,
                                      (-1, ) + k_t_shape[-2:])) / sqrt_d)),
                                             training=training),
                K.reshape(v, (-1, ) + v_shape[-2:])),
            (-1, self.num_heads, q_shape[-2], v_shape[-1]))
        attention_heads_merged = K.reshape(
            K.permute_dimensions(attention_heads, [0, 2, 1, 3]), (-1, d_model))
        attention_out = K.reshape(
            K.dot(attention_heads_merged, self.output_weights),
            (-1, out_seq_len, d_model))
        return attention_out
예제 #48
0
파일: model.py 프로젝트: sougata09/CNN-QA
    def get_gru_baseline(self):
        lstm_qo = GRU(100, return_sequences=False)
        get_diag = Lambda(
            lambda xin: K.sum(xin * T.eye(self.max_opt_count), axis=2),
            output_shape=(self.max_opt_count, ))
        transp_out = Lambda(lambda xin: K.permute_dimensions(xin, (0, 2, 1)),
                            output_shape=(self.max_opt_count, 100))
        apply_weights = Lambda(lambda xin: (K.expand_dims(xin[
            0], axis=-1) * K.expand_dims(xin[1], axis=2)).sum(axis=1),
                               output_shape=(100, self.max_opt_count))
        tile_q = Lambda(lambda xin: K.tile(xin, (1, self.max_opt_count, 1, 1)),
                        output_shape=(self.max_opt_count, self.max_q_length,
                                      self.word_vec_size))
        exp_dims = Lambda(lambda xin: K.expand_dims(xin, 1),
                          output_shape=(1, self.max_q_length,
                                        self.word_vec_size))
        exp_layer = Lambda(lambda xin: K.exp(xin),
                           output_shape=(self.max_sent_para,
                                         self.max_opt_count))
        mask_weights = Lambda(lambda xin: T.switch(T.eq(xin, 0), np.NINF, xin),
                              output_shape=(self.max_sent_para,
                                            self.max_opt_count))
        final_weights = Lambda(lambda xin: xin / K.cast(
            K.sum(xin, axis=1, keepdims=True), K.floatx()),
                               output_shape=(self.max_sent_para,
                                             self.max_opt_count))

        q_input = Input(shape=(self.max_q_length, self.word_vec_size),
                        name='question_input')
        q_exp = exp_dims(q_input)
        q_rep = tile_q(q_exp)
        option_input = Input(shape=(self.max_opt_count, self.max_option_length,
                                    self.word_vec_size),
                             name='option_input')
        opt_q = Concatenate(axis=2)([q_rep, option_input])

        lstm_input = Input(shape=(None, self.word_vec_size), name='lstm_input')
        lstm_mask = Masking(mask_value=0.)(lstm_input)
        lstm_out = lstm_qo(lstm_mask)

        lstm_model = Model(inputs=lstm_input, outputs=lstm_out)
        lstm_td_opt = TimeDistributed(lstm_model)(opt_q)

        doc_input = Input(shape=(self.max_sent_para, self.max_words_sent,
                                 self.word_vec_size),
                          name='doc_input')
        lstm_doc = TimeDistributed(lstm_model)(doc_input)
        att_wts = Dot(axes=2, normalize=True)([lstm_doc, lstm_td_opt])
        att_wts = mask_weights(att_wts)
        att_wts = exp_layer(att_wts)
        att_wts = final_weights(att_wts)
        out = apply_weights([lstm_doc, att_wts])

        out = transp_out(out)
        dp = Dot(axes=2, normalize=True)([out, lstm_td_opt])
        out = get_diag(dp)
        probs = MaskedSoftmax()([out, option_input])
        main_model = Model(inputs=[q_input, doc_input, option_input],
                           outputs=probs)
        sgd = SGD(lr=0.1, decay=0., momentum=0., nesterov=False)
        main_model.compile(loss='categorical_crossentropy',
                           optimizer=sgd,
                           metrics=['accuracy'])
        main_model.summary()
        return main_model
예제 #49
0
conv_4 = Conv1D(300,
                4,
                padding='same',
                activation='relu',
                strides=1)(input)
shared = Model(input, conv_4)

input_1 =  Input(shape=(None, 300), dtype='float32')
input_2 =  Input(shape=(None, 300), dtype='float32')

out_1 = shared(input_1)
out_2 = shared(input_2)

attention = AttentionLayer()([out_1,out_2])

# out_1 column wise
att_1 = GlobalMaxPooling1D()(attention)
att_1 = Activation('softmax')(att_1)
out_1 = dot([att_1, out_1], axes=1)

# out_2 row wise
attention_transposed = Lambda(lambda x: K.permute_dimensions(x, (0,2,1)))(attention)
att_2 = GlobalMaxPooling1D()(attention_transposed)
att_2 = Activation('softmax')(att_2)
out_2 = dot([att_2, out_2], axes=1)

distance = Lambda(euclidean_distance,
                  output_shape=eucl_dist_output_shape)([out_1, out_2])

model = Model(input=[input_1, input_2], output=distance)
예제 #50
0
 def call(self, inputs):
     num_axis = K.ndim(inputs)
     inputs = K.permute_dimensions(inputs, range(num_axis)[::-1])
     x_outs = K.gather(inputs, self.idxs)
     x_outs = K.permute_dimensions(x_outs, range(num_axis)[::-1])
     return x_outs
예제 #51
0
    def _call(self, inputs, **kwargs):
        if self.proto_number == self.capsule_number:
            return inputs
        else:
            signals = inputs[0]
            diss = inputs[1]
            signal_shape = mixed_shape(signals)

            if self.use_for_loop:
                diss_stack = []
                signals_stack = []
                sub_idx = None
                with K.name_scope('for_loop'):
                    for p in self._proto_distrib:
                        with K.name_scope('compute_slices'):
                            diss_ = diss[:, p[0]:(p[-1] + 1)]
                            signals_ = K.reshape(
                                signals[:, p[0]:(p[-1] + 1), :],
                                [signal_shape[0] * len(p)] +
                                list(signal_shape[2:]))
                        with K.name_scope('competition'):
                            if len(p) > 1:
                                with K.name_scope('competition_indices'):
                                    argmin_idx = K.argmin(diss_, axis=-1)
                                    if sub_idx is None:
                                        sub_idx = K.arange(
                                            0,
                                            signal_shape[0],
                                            dtype=argmin_idx.dtype)
                                    argmin_idx = argmin_idx + len(p) * sub_idx

                                with K.name_scope('dissimilarity_competition'):
                                    diss_stack.append(
                                        K.expand_dims(
                                            K.gather(K.flatten(diss_),
                                                     argmin_idx), -1))

                                with K.name_scope('signal_competition'):
                                    signals_stack.append(
                                        K.gather(signals_, argmin_idx))
                            else:
                                diss_stack.append(diss_)
                                signals_stack.append(signals_)

                diss = K.concatenate(diss_stack, 1)

                with K.name_scope('signal_concatenation'):
                    signals = K.concatenate(signals_stack, 1)
                    signals = K.reshape(
                        signals, [signal_shape[0], self.capsule_number] +
                        list(signal_shape[2:]))

            else:
                with K.name_scope('dissimilarity_preprocessing'):
                    # extend if it is not equally distributed
                    if not self._equally_distributed:
                        # permute to first dimension is prototype (protos x batch)
                        diss = K.permute_dimensions(diss, [1, 0])
                        # gather regarding extension (preparing for reshape to block)
                        diss = K.gather(diss, self._proto_extension)
                        # permute back (max_proto_number x (max_proto_number * batch))
                        diss = K.permute_dimensions(diss, [1, 0])

                    # reshape to block form
                    diss = K.reshape(diss, [
                        signal_shape[0] * self.capsule_number,
                        self._max_proto_number_in_capsule
                    ])

                with K.name_scope('competition_indices'):
                    # get minimal idx in each class and batch for element selection in diss and signals
                    argmin_idx = K.argmin(diss, axis=-1)
                    argmin_idx = argmin_idx + self._max_proto_number_in_capsule * \
                                 K.arange(0, signal_shape[0] * self.capsule_number, dtype=argmin_idx.dtype)

                with K.name_scope('dissimilarity_competition'):
                    # get minimal values in the form (batch x capsule)
                    diss = K.gather(K.flatten(diss), argmin_idx)
                    diss = K.reshape(diss,
                                     [signal_shape[0], self.capsule_number])

                with K.name_scope('signal_preprocessing'):
                    # apply the same steps as above for signals
                    # get signals in: (batch x protos x dim1 x ... x dimN) --> out: (batch x capsule x dim1 x ... x dimN)
                    # extend if is not equally distributed
                    if not self._equally_distributed:
                        signals = K.permute_dimensions(
                            signals,
                            [1, 0] + list(range(2, len(signal_shape))))
                        signals = K.gather(signals, self._proto_extension)
                        signals = K.permute_dimensions(
                            signals,
                            [1, 0] + list(range(2, len(signal_shape))))

                    signals = K.reshape(signals, [
                        signal_shape[0] * self.capsule_number *
                        self._max_proto_number_in_capsule
                    ] + list(signal_shape[2:]))

                with K.name_scope('signal_competition'):
                    signals = K.gather(signals, argmin_idx)
                    signals = K.reshape(
                        signals, [signal_shape[0], self.capsule_number] +
                        list(signal_shape[2:]))

            return {0: signals, 1: diss}
def integrate_vec(vec, time_dep=False, method='ss', **kwargs):
    """
    Integrate (stationary of time-dependent) vector field (N-D Tensor) in tensorflow
    
    Aside from directly using tensorflow's numerical integration odeint(), also implements 
    "scaling and squaring", and quadrature. Note that the diff. equation given to odeint
    is the one used in quadrature.   

    Parameters:
        vec: the Tensor field to integrate. 
            If vol_size is the size of the intrinsic volume, and vol_ndim = len(vol_size),
            then vector shape (vec_shape) should be 
            [vol_size, vol_ndim] (if stationary)
            [vol_size, vol_ndim, nb_time_steps] (if time dependent)
        time_dep: bool whether vector is time dependent
        method: 'scaling_and_squaring' or 'ss' or 'ode' or 'quadrature'
        
        if using 'scaling_and_squaring': currently only supports integrating to time point 1.
            nb_steps: int number of steps. Note that this means the vec field gets broken
            down to 2**nb_steps. so nb_steps of 0 means integral = vec.

        if using 'ode':
            out_time_pt (optional): a time point or list of time points at which to evaluate
                Default: 1
            init (optional): if using 'ode', the initialization method.
                Currently only supporting 'zero'. Default: 'zero'
            ode_args (optional): dictionary of all other parameters for 
                tf.contrib.integrate.odeint()

    Returns:
        int_vec: integral of vector field.
        Same shape as the input if method is 'scaling_and_squaring', 'ss', 'quadrature', 
        or 'ode' with out_time_pt not a list. Will have shape [*vec_shape, len(out_time_pt)]
        if method is 'ode' with out_time_pt being a list.

    Todo:
        quadrature for more than just intrinsically out_time_pt = 1
    """

    if method not in ['ss', 'scaling_and_squaring', 'ode', 'quadrature']:
        raise ValueError(
            "method has to be 'scaling_and_squaring' or 'ode'. found: %s" %
            method)

    if method in ['ss', 'scaling_and_squaring']:
        nb_steps = kwargs['nb_steps']
        assert nb_steps >= 0, 'nb_steps should be >= 0, found: %d' % nb_steps

        if time_dep:
            svec = K.permute_dimensions(vec,
                                        [-1, *range(0, vec.shape[-1] - 1)])
            assert 2**nb_steps == svec.shape[
                0], "2**nb_steps and vector shape don't match"

            svec = svec / (2**nb_steps)
            for _ in range(nb_steps):
                svec = svec[0::2] + tf.map_fn(transform, svec[1::2, :],
                                              svec[0::2, :])

            disp = svec[0, :]

        else:
            vec = vec / (2**nb_steps)
            for _ in range(nb_steps):
                vec += transform(vec, vec)
            disp = vec

    elif method == 'quadrature':
        # TODO: could output more than a single timepoint!
        nb_steps = kwargs['nb_steps']
        assert nb_steps >= 1, 'nb_steps should be >= 1, found: %d' % nb_steps

        vec = vec / nb_steps

        if time_dep:
            disp = vec[..., 0]
            for si in range(nb_steps - 1):
                disp += transform(vec[..., si + 1], disp)
        else:
            disp = vec
            for _ in range(nb_steps - 1):
                disp += transform(vec, disp)

    else:
        assert not time_dep, "odeint not implemented with time-dependent vector field"
        fn = lambda disp, _: transform(vec, disp)

        # process time point.
        out_time_pt = kwargs['out_time_pt'] if 'out_time_pt' in kwargs.keys(
        ) else 1
        single_out_time_pt = not isinstance(out_time_pt, (list, tuple))
        if single_out_time_pt: out_time_pt = [out_time_pt]
        K_out_time_pt = K.variable([0, *out_time_pt])

        # process initialization
        if 'init' not in kwargs.keys() or kwargs['init'] == 'zero':
            disp0 = vec * 0
        else:
            raise ValueError('non-zero init for ode method not implemented')

        # compute integration with tf.contrib.integrate.odeint
        if 'ode_args' not in kwargs.keys(): kwargs['ode_args'] = {}
        disp = tf.contrib.integrate.odeint(fn, disp0, K_out_time_pt,
                                           **kwargs['ode_args'])
        disp = K.permute_dimensions(disp[1:len(out_time_pt) + 1, :],
                                    [*range(1, len(disp.shape)), 0])

        # return
        if single_out_time_pt:
            disp = disp[..., 0]

    return disp
예제 #53
0
파일: model.py 프로젝트: sougata09/CNN-QA
    def get_cnn_model2(self):
        get_diag = Lambda(
            lambda xin: K.sum(xin * T.eye(self.max_opt_count), axis=2),
            output_shape=(self.max_opt_count, ))
        transp_out = Lambda(lambda xin: K.permute_dimensions(xin, (0, 2, 1)),
                            output_shape=(self.max_opt_count,
                                          self.word_vec_size))
        apply_weights = Lambda(lambda xin: (K.expand_dims(xin[
            0], axis=-1) * K.expand_dims(xin[1], axis=2)).sum(axis=1),
                               output_shape=(self.word_vec_size,
                                             self.max_opt_count))
        tile_q = Lambda(lambda xin: K.tile(xin, (1, self.max_opt_count, 1, 1)),
                        output_shape=(self.max_opt_count, self.max_q_length,
                                      self.word_vec_size))
        exp_dims = Lambda(lambda xin: K.expand_dims(xin, 1),
                          output_shape=(1, self.max_q_length,
                                        self.word_vec_size))
        exp_dims2 = Lambda(lambda xin: K.expand_dims(xin, 3),
                           output_shape=(None, self.word_vec_size, 1))
        exp_layer = Lambda(lambda xin: K.exp(xin),
                           output_shape=(self.max_sent_para,
                                         self.max_opt_count))
        final_weights = Lambda(lambda xin: xin / K.cast(
            K.sum(xin, axis=1, keepdims=True), K.floatx()),
                               output_shape=(self.max_sent_para,
                                             self.max_opt_count))
        mask_weights = Lambda(lambda xin: T.switch(T.eq(xin, 0), np.NINF, xin),
                              output_shape=(self.max_sent_para,
                                            self.max_opt_count))
        glob_pool = Lambda(lambda xin: K.mean(xin, axis=[1, 2]),
                           output_shape=(100, ))

        filter_sizes = [2, 3, 4]
        num_filters = 100
        q_input = Input(shape=(self.max_q_length, self.word_vec_size),
                        name='question_input')
        q_exp = exp_dims(q_input)
        q_rep = tile_q(q_exp)
        option_input = Input(shape=(self.max_opt_count, self.max_option_length,
                                    self.word_vec_size),
                             name='option_input')
        opt_q = Concatenate(axis=2)([q_rep, option_input])

        cnn_input = Input(shape=(None, self.word_vec_size), name='cnn_input')
        cnn_reshape = exp_dims2(cnn_input)

        conv_0 = Conv2D(num_filters,
                        kernel_size=(filter_sizes[0], self.word_vec_size),
                        padding='valid',
                        kernel_initializer='normal',
                        activation='linear')(cnn_reshape)
        conv_1 = Conv2D(num_filters,
                        kernel_size=(filter_sizes[1], self.word_vec_size),
                        padding='valid',
                        kernel_initializer='normal',
                        activation='linear')(cnn_reshape)
        conv_2 = Conv2D(num_filters,
                        kernel_size=(filter_sizes[2], self.word_vec_size),
                        padding='valid',
                        kernel_initializer='normal',
                        activation='linear')(cnn_reshape)

        meanpool_0 = glob_pool(conv_0)
        meanpool_1 = glob_pool(conv_1)
        meanpool_2 = glob_pool(conv_2)
        concatenated_tensor = Concatenate(axis=1)(
            [meanpool_0, meanpool_1, meanpool_2])

        cnn_model = Model(inputs=cnn_input, outputs=concatenated_tensor)
        cnn_td_opt = TimeDistributed(cnn_model)(opt_q)

        doc_input = Input(shape=(self.max_sent_para, self.max_words_sent,
                                 self.word_vec_size),
                          name='doc_input')
        cnn_doc = TimeDistributed(cnn_model)(doc_input)
        att_wts = Dot(axes=2, normalize=True)([cnn_doc, cnn_td_opt])
        att_wts = mask_weights(att_wts)
        att_wts = exp_layer(att_wts)
        att_wts = final_weights(att_wts)
        out = apply_weights([cnn_doc, att_wts])

        out = transp_out(out)
        dp = Dot(axes=2, normalize=True)([out, cnn_td_opt])
        out = get_diag(dp)
        probs = MaskedSoftmax()([out, option_input])
        main_model = Model(inputs=[q_input, doc_input, option_input],
                           outputs=probs)
        sgd = SGD(lr=0.1, decay=0., momentum=0., nesterov=False)
        main_model.compile(loss='categorical_crossentropy',
                           optimizer=sgd,
                           metrics=['accuracy'])
        main_model.summary()
        return main_model
예제 #54
0
파일: vin.py 프로젝트: panxipeng/vin-keras
 def ext_start(inputs):
     m = inputs[0]
     s = inputs[1]
     w = K.one_hot(s[:, 0] + l_s * s[:, 1], l_s * l_s)  # (None, l_s * l_s)
     return K.transpose(
         K.sum(w * K.permute_dimensions(m, (1, 0, 2)), axis=2))
예제 #55
0
 def T(x):
     return K.permute_dimensions(x, [0, 2, 1])
예제 #56
0
 def call(self, x, mask=None):
     mask = K.cast(mask, 'float32')
     mask = K.repeat(mask, self.repeat_dim)
     mask = K.permute_dimensions(mask, (0, 2, 1))
     return x * mask
예제 #57
0
    def _call(self, inputs, **kwargs):
        if self.proto_number == self.capsule_number:
            return inputs

        else:
            signals = inputs[0]
            diss = inputs[1]

            signal_shape = None

            # signal.shape: (batch, proto_num, caps_dim1, ..., caps_dimN)
            if self.input_spec[0].ndim > 3:
                signal_shape = mixed_shape(signals)
                signals = K.reshape(signals, signal_shape[0:2] + (-1, ))

            if not self._equally_distributed:
                if self.use_for_loop:
                    signals_stack = []
                    diss_stack = []
                    with K.name_scope('for_loop'):
                        for i, p in enumerate(self._proto_distrib):
                            with K.name_scope('compute_slices'):
                                diss_ = diss[:, p[0]:(p[-1] + 1)]
                                signals_ = signals[:, p[0]:(p[-1] + 1), :]

                            if len(p) > 1:
                                with K.name_scope('competition_probabilities'):
                                    coefficients = prob_trans.neg_softmax(
                                        diss_ * self.beta[i],
                                        axis=-1,
                                        max_stabilization=True)

                                with K.name_scope('signal_competition'):
                                    signals_stack.append(
                                        K.expand_dims(
                                            K.batch_dot(
                                                coefficients, signals_,
                                                [1, 1]), 1))

                                with K.name_scope('dissimilarity_competition'):
                                    diss_stack.append(
                                        K.batch_dot(coefficients, diss_,
                                                    [1, 1]))
                            else:
                                signals_stack.append(signals_)
                                diss_stack.append(diss_)

                    signals = K.concatenate(signals_stack, axis=1)
                    diss = K.concatenate(diss_stack, axis=-1)
                else:
                    extension_idx = []
                    for i in self._proto_extension:
                        if i not in extension_idx:
                            extension_idx.append(i)
                        else:
                            extension_idx.append(
                                max(self._proto_extension) + 1)

                    batch_size = K.shape(
                        signals
                    )[0] if signal_shape is None else signal_shape[0]
                    # reshape to block
                    with K.name_scope('competition_probabilities'):
                        with K.name_scope('neg_softmax'):
                            with K.name_scope('coefficients'):
                                beta = K.gather(self.beta,
                                                self._capsule_extension)
                                coefficients = -diss * beta
                                # max stabilization
                                coefficients = coefficients - K.max(
                                    coefficients, axis=-1, keepdims=True)
                                coefficients = K.exp(coefficients)
                                coefficients = K.concatenate([
                                    coefficients,
                                    K.zeros_like(coefficients[:, 0:1])
                                ],
                                                             axis=-1)
                                coefficients = K.transpose(coefficients)
                                coefficients = K.gather(
                                    coefficients, extension_idx)
                                coefficients = K.transpose(coefficients)
                                coefficients = K.reshape(
                                    coefficients, [
                                        batch_size, self.capsule_number,
                                        self._max_proto_number_in_capsule
                                    ])
                            # could never be a zero division
                            with K.name_scope('normalization_constant'):
                                constant = K.sum(coefficients,
                                                 axis=-1,
                                                 keepdims=True)

                            probs = coefficients / constant

                    with K.name_scope('dissimilarity_preprocessing'):
                        diss = K.transpose(diss)
                        diss = K.gather(diss, self._proto_extension)
                        diss = K.transpose(diss)
                        diss = K.reshape(diss, [
                            batch_size, self.capsule_number,
                            self._max_proto_number_in_capsule
                        ])

                    with K.name_scope('dissimilarity_competition'):
                        diss = K.squeeze(
                            K.batch_dot(probs, K.expand_dims(diss), [2, 2]),
                            -1)

                    with K.name_scope('signal_preprocessing'):
                        signals = K.permute_dimensions(signals, [1, 0, 2])
                        signals = K.gather(signals, self._proto_extension)
                        signals = K.permute_dimensions(signals, [1, 0, 2])
                        signals = K.reshape(signals, [
                            batch_size, self.capsule_number,
                            self._max_proto_number_in_capsule, -1
                        ])

                    with K.name_scope('signal_competition'):
                        signals = K.batch_dot(probs, signals, [2, 2])

            else:
                batch_size = K.shape(
                    signals)[0] if signal_shape is None else signal_shape[0]
                diss = K.reshape(diss, [
                    batch_size, self.capsule_number,
                    self._max_proto_number_in_capsule
                ])

                with K.name_scope('competition_probabilities'):
                    coefficients = prob_trans.neg_softmax(
                        diss * K.expand_dims(self.beta, -1),
                        axis=-1,
                        max_stabilization=True)

                with K.name_scope('signal_competition'):
                    signals = K.reshape(signals, [
                        batch_size, self.capsule_number,
                        self._max_proto_number_in_capsule, -1
                    ])
                    signals = K.batch_dot(coefficients, signals, [2, 2])

                with K.name_scope('dissimilarity_competition'):
                    diss = K.squeeze(
                        K.batch_dot(coefficients, K.expand_dims(diss), [2, 2]),
                        -1)

            if self.input_spec[0].ndim > 3:
                signals = K.reshape(signals,
                                    [signal_shape[0], self.capsule_number] +
                                    list(signal_shape[2:]))

            return {0: signals, 1: diss}
예제 #58
0
    def call(self, inputs, mask=None, training=None):
        (inputs, content, memories, segment_mat, segment_embed, relatives,
         bias_context, bias_relative, bias_segment, permutation) = inputs
        full = K.concatenate([memories, content],
                             axis=1)  # (batch, prev_len + seq_len, units)

        kernel_q = self.kernel[:, :self.units]
        kernel_kv = self.kernel[:, self.units:self.units * 3]
        kernel_r = self.kernel[:, self.units * 3:self.units * 4]
        kernel_o = self.kernel[:, self.units * 4:self.units * 5]

        bias_q, bias_kv, bias_r, bias_o = (None, ) * 4
        if self.use_bias:
            bias_q = self.bias[:self.units]
            bias_kv = self.bias[self.units:self.units * 3]
            bias_r = self.bias[self.units * 3:self.units * 4]
            bias_o = self.bias[self.units * 4:self.units * 5]

        w_q = K.dot(inputs, kernel_q)  # (batch, seq_len, units)
        w_kv = K.dot(full, kernel_kv)  # (batch, prev_len + seq_len, units * 2)
        w_r = K.dot(relatives, kernel_r)  # (batch, prev_len + seq_len, units)
        if self.use_bias:
            w_q = K.bias_add(w_q, bias_q)
            w_kv = K.bias_add(w_kv, bias_kv)
            w_r = K.bias_add(w_r, bias_r)
        if self.activation is not None:
            w_q = self.activation(w_q)
            w_kv = self.activation(w_kv)
            w_r = self.activation(w_r)

        w_k = w_kv[:, :, :self.units]  # (batch, prev_len + seq_len, units)
        w_v = w_kv[:, :, self.units:]  # (batch, prev_len + seq_len, units)
        batch_size, q_len, k_len = K.shape(inputs)[0], K.shape(
            w_q)[1], K.shape(w_k)[1]

        w_qc = K.bias_add(w_q, bias_context)
        w_qc = self._reshape_to_batches(
            w_qc)  # (batch * n_head, seq_len, units_head)
        w_k = self._reshape_to_batches(
            w_k)  # (batch * n_head, prev_len + seq_len, units_head)
        a_context = K.batch_dot(
            w_qc, w_k, axes=2)  # (batch * n_head, seq_len, prev_len + seq_len)

        w_qr = K.bias_add(w_q, bias_relative)
        w_qr = self._reshape_to_batches(
            w_qr)  # (batch * n_head, seq_len, units_head)
        w_r = self._reshape_to_batches(
            w_r)  # (batch * n_head, prev_len + seq_len, units_head)
        a_relative = K.batch_dot(
            w_qr, w_r, axes=2)  # (batch * n_head, seq_len, prev_len + seq_len)
        a_relative = self._relative_shift(  # (batch * n_head, seq_len, prev_len + seq_len)
            a_relative,
            key_len_expected=K.shape(a_context)[-1],
        )

        w_qs = K.bias_add(w_q, bias_segment)
        w_qs = K.reshape(w_qs, (-1, q_len, self.num_head, self.units_head))
        w_qs = K.permute_dimensions(
            w_qs, (2, 0, 1, 3))  # (n_head, batch, seq_len, units_head)
        segment_embed = K.reshape(K.transpose(segment_embed),
                                  (self.num_head, 1, self.units_head, 2))
        segment_embed = K.tile(segment_embed, (1, batch_size, 1, 1))
        w_qs = K.reshape(w_qs, (-1, q_len, self.units_head))
        segment_embed = K.reshape(segment_embed, (-1, self.units_head, 2))
        a_segment = K.batch_dot(w_qs, segment_embed,
                                axes=(2, 1))  # (n_head * batch, seq_len, 2)
        a_segment = K.reshape(a_segment, (self.num_head, batch_size, q_len, 2))
        a_segment = K.permute_dimensions(
            a_segment, (1, 2, 3, 0))  # (batch, seq_len, 2, n_head)
        segment_mat = K.reshape(
            segment_mat,
            (-1, k_len, 2))  # (batch * seq_len, prev_len + seq_len, 2)
        a_segment = K.reshape(
            a_segment, (-1, 2, self.num_head))  # (batch * seq_len, 2, n_head)
        a_segment = K.batch_dot(
            segment_mat, a_segment,
            axes=(2, 1))  # (batch * seq_len, prev_len + seq_len, n_head)
        a_segment = K.reshape(a_segment, (-1, q_len, k_len, self.num_head))
        a_segment = K.reshape(K.permute_dimensions(a_segment, (0, 3, 1, 2)),
                              (-1, q_len, k_len))

        att = (a_context + a_relative + a_segment) / K.sqrt(
            K.constant(self.units_head, dtype=K.floatx()))
        exp = K.exp(att - K.max(att, axis=-1, keepdims=True))

        permutation = K.tile(K.expand_dims(permutation, axis=1),
                             [1, self.num_head, 1, 1])
        permutation = K.reshape(permutation, (-1, q_len, k_len))
        exp *= permutation
        if mask is not None and mask[0] is not None:
            mask = K.cast(mask[0], K.floatx())
            mask = K.concatenate([K.ones_like(memories[:, :, 0]), mask],
                                 axis=1)
            exp *= K.expand_dims(self._reshape_mask(mask), axis=1)

        att = exp / (K.sum(exp, axis=-1, keepdims=True) + K.epsilon())
        if self.att_drop_layer is not None:
            att = self.att_drop_layer(att, training=training)
        w_v = self._reshape_to_batches(
            w_v)  # (batch * n_head, prev_len + seq_len, units_head)
        w_o = K.batch_dot(att, w_v)  # (batch * n_head, seq_len, units_head)

        w_o = self._reshape_from_batches(w_o)  # (batch, seq_len, units)
        w_o = K.dot(w_o, kernel_o)  # (batch, seq_len, units)
        if self.use_bias:
            w_o = K.bias_add(w_o, bias_o)
        if self.activation is not None:
            w_o = self.activation(w_o)

        if TF_KERAS:
            # Add shape information to tensor when using `tf.keras`
            input_shape = K.int_shape(inputs)
            if input_shape[1] is not None:
                w_o = K.reshape(w_o, (-1, ) + input_shape[1:])
        return w_o
예제 #59
0
 def transpose_3tensor(t):
     '''
     Lambda function to switch dimensions of input in order to properly
     take the dot product and return the necessary relationships
     '''
     return K.permute_dimensions(t, (0, 2, 1))
예제 #60
0
from keras.datasets import mnist

(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train, x_test = x_train / 255., x_test / 255.
print('X_train shape: {}'.format(x_train.shape))
print('X_test shape: {}'.format(x_test.shape))
print('y_train shape: {}'.format(y_train.shape))
print('y_test shape: {}'.format(y_test.shape))

D = 28
M = 15

inputs = Input(shape=(D, D))
x1 = Bidirectional(LSTM(M, return_sequences=True))(inputs)
x1 = GlobalMaxPooling1D()(x1)
permutor = Lambda(lambda t: K.permute_dimensions(t, pattern=(0, 2, 1)))
x2 = permutor(inputs)
x2 = Bidirectional(LSTM(M, return_sequences=True))(x2)
x2 = GlobalMaxPooling1D()(x2)
x = Concatenate(axis=1)([x1, x2])
outputs = Dense(10, activation='softmax')(x)
model = Model(inputs=inputs, outputs=outputs)
model.compile(optimizer='Adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])
print(model.summary())

history = model.fit(x_train,
                    y_train,
                    batch_size=256,
                    epochs=5,