예제 #1
0
    def preprocess_input(self, x):
        '''
        We have to override this preprocessing step, because if we are using the cpu,
        we do the weight - input multiplications in the internals of the GRU as seperate,
        smaller matrix multiplications and concatenate them after. Therefore, before this
        happens, we split off the attention and then add it back afterwards.
        '''
        if self.consume_less == 'cpu':

            attention = x[:, :, 0]  # Shape:(samples, knowledge_length)
            x = x[:, :, 1:]  # Shape:(samples, knowledge_length, word_dim)

            input_shape = self.input_spec[0].shape
            input_dim = input_shape[2] - 1
            timesteps = input_shape[1]

            x_z = time_distributed_dense(x, self.W_z, self.b_z, self.dropout_W,
                                         input_dim, self.output_dim, timesteps)
            x_r = time_distributed_dense(x, self.W_r, self.b_r, self.dropout_W,
                                         input_dim, self.output_dim, timesteps)
            x_h = time_distributed_dense(x, self.W_h, self.b_h, self.dropout_W,
                                         input_dim, self.output_dim, timesteps)

            # Add attention back on to it's original place.
            return K.concatenate([K.expand_dims(attention, 2), x_z, x_r, x_h],
                                 axis=2)
        else:
            return x
예제 #2
0
        def preprocess_input(self, x, train=False):
            if self.consume_less == 'cpu':
                if train and (0 < self.dropout_W < 1):
                    dropout = self.dropout_W
                else:
                    dropout = 0
                input_shape = self.input_spec[0].shape
                input_dim = input_shape[2]
                timesteps = input_shape[1]

                x_i = time_distributed_dense(x, self.W_i, self.b_i, dropout,
                                             input_dim, self.output_dim,
                                             timesteps)
                x_f = time_distributed_dense(x, self.W_f, self.b_f, dropout,
                                             input_dim, self.output_dim,
                                             timesteps)
                x_c = time_distributed_dense(x, self.W_c, self.b_c, dropout,
                                             input_dim, self.output_dim,
                                             timesteps)
                x_o = time_distributed_dense(x, self.W_o, self.b_o, dropout,
                                             input_dim, self.output_dim,
                                             timesteps)
                return K.concatenate([x_i, x_f, x_c, x_o], axis=2)
            else:
                return x
예제 #3
0
    def call(self, x, mask=None):
        '''
        x: batch_size * time_steps* input_dim
        '''
        check_and_throw_if_fail(K.ndim(x) == 3, "x")

        input_dim = shape(x)[2]
        time_steps = shape(x)[1]

        ui = K.tanh(
            time_distributed_dense(x,
                                   self.Ws,
                                   self.bs,
                                   input_dim=input_dim,
                                   output_dim=self.attention_weight_vector_dim,
                                   timesteps=time_steps)
        )  # batch_size, time_steps, attention_weight_vector_dim
        ai = K.exp(
            time_distributed_dense(
                ui,
                K.expand_dims(self.us, 1),
                input_dim=self.attention_weight_vector_dim,
                output_dim=1,
                timesteps=time_steps))  # batch_size, time_steps, 1
        sum_of_ai = K.sum(ai, 1, keepdims=True)  # batch_size 1 1
        ai = ai / sum_of_ai  # batch_size * time_steps * 1
        # batch_size *time_steps * input_dim -> batch_size* input_dim
        output = K.sum(ai * x, 1)
        if self.element_wise_output_transformer:
            return self.element_wise_output_transformer(output)
        else:
            return output
예제 #4
0
    def preprocess_input(self, x):
        if self.consume_less == 'cpu':
            input_shape = K.int_shape(x)
            input_dim = input_shape[2]
            timesteps = input_shape[1]

            x_f = time_distributed_dense(x, self.W_f, self.b_f, self.dropout_W,
                                         input_dim, self.output_dim, timesteps)
            x_h = time_distributed_dense(x, self.W_h, self.b_h, self.dropout_W,
                                         input_dim, self.output_dim, timesteps)
            return K.concatenate([x_f, x_h], axis=2)
        else:
            return x
예제 #5
0
    def step(self, x_input, states):
        input_shape = self.input_spec[0].shape
        en_seq = states[-1]
        _, [h, c] = super(PointerLSTM, self).step(x_input, states[:-1])

        # vt*tanh(W1*e+W2*d)
        dec_seq = K.repeat(h, input_shape[1])
        Eij = time_distributed_dense(en_seq, self.W1, output_dim=1)
        Dij = time_distributed_dense(dec_seq, self.W2, output_dim=1)
        U = self.vt * tanh(Eij + Dij)
        U = K.squeeze(U, 2)

        # make probability tensor
        pointer = softmax(U)
        return pointer, [h, c]
예제 #6
0
 def preprocess_input(self, x):
     if self.consume_less == 'cpu':
         input_shape = self.input_spec[0].shape
         input_dim = input_shape[2]
         timesteps = input_shape[1]
         return time_distributed_dense(x, self.W, self.b, self.dropout_W,
                                       input_dim, self.output_dim,
                                       timesteps)
     else:
         return x
예제 #7
0
	def preprocess_input(self, x):
		if self.consume_less == 'cpu':
			input_shape = K.int_shape(x)
			input_dim = input_shape[2]
			timesteps = input_shape[1]
			return time_distributed_dense(x, self.W, self.b, self.dropout_W,
			                              input_dim, self.hidden_recurrent_dim,
			                              timesteps)
		else:
			return x
예제 #8
0
파일: layers.py 프로젝트: commaai/research
 def preprocess_input(self, x):
     if self.consume_less == 'cpu':
         input_shape = self.input_spec[0].shape
         input_dim = input_shape[2]
         timesteps = input_shape[1]
         return time_distributed_dense(x, self.W, self.b, self.dropout_W,
                                       input_dim, self.output_dim,
                                       timesteps)
     else:
         return x
예제 #9
0
    def step(self, x_input, states):
        input_shape = self.input_spec[0].shape
        en_seq = states[-1]
        _, [h, c] = super(PointerLSTM, self).step(x_input, states[:-1])

        # vt*tanh(W1*e+W2*d)
        dec_seq = K.repeat(h, input_shape[1])
        #dec_seq = K.repeat(h, 2)
        print ('dec_seq')
        print (dec_seq)
        Eij = time_distributed_dense(en_seq, self.W1, output_dim=1)
        Dij = time_distributed_dense(dec_seq, self.W2, output_dim=1)
        U = self.vt * tanh(Eij + Dij)
        print ('U')
        print (U)
        U = K.squeeze(U, 2)
        print ('U squeezed')
        print (U)
        # make probability tensor
        pointer = softmax(U)
        return pointer, [h, c]
예제 #10
0
 def step(self, x, states):
     h_tm1, c_tm1, y_tm1, B, U, H = states
     s = K.dot(c_tm1, self.W_h) + self.b_h
     s = K.repeat(s, self.input_length)
     energy = time_distributed_dense(s + H, self.W_a, self.b_a)
     energy = K.squeeze(energy, 2)
     alpha = K.softmax(energy)
     alpha = K.repeat(alpha, self.input_dim)
     alpha = K.permute_dimensions(alpha, (0, 2, 1))
     weighted_H = H * alpha
     v = K.sum(weighted_H, axis=1)
     y, new_states = super(AttentionDecoder, self).step(v, states[:-1])
     return y, new_states
예제 #11
0
    def preprocess_input(self, x):
        #self.STACK[:] = 0.0 # when we see new data, zero out stack and pointer
        #self.POINT[:] = 0.0

        ###shape = K.int_shape(self.X)
        ###self.STACK = K.variable(np.zeros((shape[0],shape[1])))
        ###self.POINT = K.variable(np.zeros((shape[0])))

        if self.consume_less == 'cpu':
            input_shape = K.int_shape(x)
            input_dim = input_shape[2]
            timesteps = input_shape[1]
            x_z = time_distributed_dense(x, self.W_z, self.b_z, self.dropout_W,
                                        input_dim, self.output_dim, timesteps)
            x_r = time_distributed_dense(x, self.W_r, self.b_r, self.dropout_W,
                                        input_dim, self.output_dim, timesteps)
            x_h = time_distributed_dense(x, self.W_h, self.b_h, self.dropout_W,
                                        input_dim, self.output_dim, timesteps)
            to_return = K.concatenate([x_z, x_r, x_h], axis=2)
        else:
            to_return = x
        return K.concatenate([self.X, to_return], axis=-1)
예제 #12
0
 def step(self, x, states):
     h_tm1, c_tm1, y_tm1, B, U, H = states
     s = K.dot(c_tm1, self.W_h) + self.b_h
     s = K.repeat(s, self.input_length)
     energy = time_distributed_dense(s + H, self.W_a, self.b_a)
     energy = K.squeeze(energy, 2)
     alpha = K.softmax(energy)
     alpha = K.repeat(alpha, self.input_dim)
     alpha = K.permute_dimensions(alpha, (0, 2, 1))
     weighted_H = H * alpha
     v = K.sum(weighted_H, axis=1)
     y, new_states = super(AttentionDecoder, self).step(v, states[:-1])
     return y, new_states
    def preprocess_input(self, x):
        #x = input
        #print(x)
        #return x

        if 0 < self.dropout_W < 1:
            dropout = self.dropout_W
        else:
            dropout = 0

        input_shape = self.input_spec[0].shape
        input_dim = input_shape[2]
        timesteps = input_shape[1]

        x_i = time_distributed_dense(x, self.W_i, self.b_i, dropout, input_dim,
                                     self.output_dim, timesteps)
        x_f = time_distributed_dense(x, self.W_f, self.b_f, dropout, input_dim,
                                     self.output_dim, timesteps)
        x_c = time_distributed_dense(x, self.W_c, self.b_c, dropout, input_dim,
                                     self.output_dim, timesteps)
        x_o = time_distributed_dense(x, self.W_o, self.b_o, dropout, input_dim,
                                     self.output_dim, timesteps)
        return K.concatenate([x_i, x_f, x_c, x_o], axis=2)
예제 #14
0
    def preprocess_input(self, x):
        if self.consume_less == 'cpu':
            input_shape = self.input_spec[0].shape
            input_dim = input_shape[2]
            timesteps = input_shape[1]

            x = time_distributed_dense(x, self.W_out, self.b_out,
                                       self.dropout_W, input_dim,
                                       self.output_dim, timesteps)
            # x_r = time_distributed_dense(x, self.W_r, self.b_r, self.dropout_W,
            #                              input_dim, self.output_dim, timesteps)
            # x_h = time_distributed_dense(x, self.W_h, self.b_h, self.dropout_W,
            #                              input_dim, self.output_dim, timesteps)
            return x
        else:
            return x