def euclideanSqDistance(inputs): if (len(inputs) != 2): raise 'oops' output = K.mean(K.square(inputs[1] - inputs[0]), axis=-1) output = K.expand_dims(output, 1) return output
def call(self, x, mask=None): uit = dot_product(x, self.W) if self.bias: uit += self.b uit = K.tanh(uit) ait = dot_product(uit, self.u) # ait = K.dot(uit, self.u) a = K.exp(ait) # apply mask after the exp. will be re-normalized next if mask is not None: # Cast the mask to floatX to avoid float64 upcasting in theano a *= K.cast(mask, K.floatx()) # in some cases especially in the early stages of training the sum may be almost zero # and this results in NaN's. A workaround is to add a very small positive number ε to the sum. # a /= K.cast(K.sum(a, axis=1, keepdims=True), K.floatx()) a /= K.cast(K.sum(a, axis=1, keepdims=True) + K.epsilon(), K.floatx()) a = K.expand_dims(a) weighted_input = x * a return K.sum(weighted_input, axis=1)
def call(self, x, mask=None): # size of x :[batch_size, sel_len, attention_dim] # size of u :[batch_size, attention_dim] # uit = tanh(xW+b) uit = K.tile(K.expand_dims(self.W, axis=0), (K.shape(x)[0], 1, 1)) uit = tf.matmul(x, uit) uit = K.tanh(K.bias_add(uit, self.b)) ait = K.dot(uit, self.u) ait = K.squeeze(ait, -1) ait = K.exp(ait) if mask is not None: # Cast the mask to floatX to avoid float64 upcasting in theano ait *= K.cast(mask, K.floatx()) ait /= K.cast(K.sum(ait, axis=1, keepdims=True) + K.epsilon(), K.floatx()) ait = K.expand_dims(ait) weighted_input = x * ait output = K.sum(weighted_input, axis=1) return output
def dot_product(x, kernel): """ Wrapper for dot product operation, in order to be compatible with both Theano and Tensorflow Args: x (): input kernel (): weights Returns: """ if K.backend() == 'tensorflow': return K.squeeze(K.dot(x, K.expand_dims(kernel)), axis=-1) else: return K.dot(x, kernel)
def call(self, inputs, **kwargs): if type(inputs) is list: # true label is provided with shape = [None, n_classes], i.e. one-hot code. assert len(inputs) == 2 inputs, mask = inputs else: # if no true label, mask by the max length of capsules. Mainly used for prediction # compute lengths of capsules x = K.sqrt(K.sum(K.square(inputs), -1)) # generate the mask which is a one-hot code. # mask.shape=[None, n_classes]=[None, num_capsule] mask = K.one_hot(indices=K.argmax(x, 1), num_classes=x.get_shape().as_list()[1]) # inputs.shape=[None, num_capsule, dim_capsule] # mask.shape=[None, num_capsule] # masked.shape=[None, num_capsule * dim_capsule] masked = K.batch_flatten(inputs * K.expand_dims(mask, -1)) return masked
def atae_lstm_new(self): input_content = Input(shape=(self.max_len, )) input_aspect = Input(shape=(self.aspect_max_len, )) ###先将每个字进行embed,然后将aspect进行embed,然后根据content中字的个数重复,然后重复后的每个aspect embedding跟每个字的进行串联 content_embed = Embedding(input_dim=self.max_content_vocab_size, output_dim=self.content_embed_dim) aspect_embed = Embedding(input_dim=self.max_content_vocab_size, output_dim=self.aspect_embed_dim) content_embedding = content_embed(input_content) content_embedding = SpatialDropout1D(0.2)(content_embedding) aspect_embedding = aspect_embed(input_aspect) ##对aspect的字符串向量进行一个pooling 60*128=>1*128 aspect_embedding = AveragePooling1D( pool_size=self.aspect_max_len)(aspect_embedding) aspect_flatten = Flatten()(aspect_embedding) repeat_aspect_embedding = RepeatVector(self.max_len)(aspect_flatten) ##将重复后的aspect和content字进行串联 input_concat = concatenate( [content_embedding, repeat_aspect_embedding], axis=-1) ##再加个LSTM if (self.is_cudnn): hidden_vecs, state_h, _ = CuDNNLSTM( self.lstm_units, return_sequences=True, return_state=True)(input_concat) else: hidden_vecs, state_h, _ = LSTM(self.lstm_units, return_sequences=True, return_state=True)(input_concat) concat = concatenate([hidden_vecs, repeat_aspect_embedding], axis=-1) # apply attention mechanism attend_weight = Attention()(concat) attend_weight_expand = Lambda(lambda x: K.expand_dims(x))( attend_weight) attend_hidden = multiply([hidden_vecs, attend_weight_expand]) attend_hidden = Lambda(lambda x: K.sum(x, axis=1))(attend_hidden) attend_hidden_dense = Dense(self.lstm_units)(attend_hidden) last_hidden_dense = Dense(self.lstm_units)(state_h) final_output = Activation('tanh')(add( [attend_hidden_dense, last_hidden_dense])) dense_layer = Dense(self.dense_units, activation='relu')(final_output) output_layer = Dense(self.n_classes, activation='softmax')(dense_layer) return Model([input_content, input_aspect], output_layer)
def call(self, inputs, training=None): # inputs.shape=[None, input_num_capsule, input_dim_capsule] # inputs_expand.shape=[None, 1, input_num_capsule, input_dim_capsule] inputs_expand = K.expand_dims(inputs, 1) # Replicate num_capsule dimension to prepare being multiplied by W # inputs_tiled.shape=[None, num_capsule, input_num_capsule, input_dim_capsule] inputs_tiled = K.tile(inputs_expand, [1, self.num_capsule, 1, 1]) # Compute `inputs * W` by scanning inputs_tiled on dimension 0. # x.shape=[num_capsule, input_num_capsule, input_dim_capsule] # W.shape=[num_capsule, input_num_capsule, dim_capsule, input_dim_capsule] # Regard the first two dimensions as `batch` dimension, # then matmul: [input_dim_capsule] x [dim_capsule, input_dim_capsule]^T -> [dim_capsule]. # inputs_hat.shape = [None, num_capsule, input_num_capsule, dim_capsule] inputs_hat = K.map_fn(lambda x: K.batch_dot(x, self.W, [2, 3]), elems=inputs_tiled) # Begin: Routing algorithm ---------------------------------------------------------------------# # The prior for coupling coefficient, initialized as zeros. # b.shape = [None, self.num_capsule, self.input_num_capsule]. b = tf.zeros(shape=[K.shape(inputs_hat)[0], self.num_capsule, self.input_num_capsule]) assert self.routings > 0, 'The routings should be > 0.' for i in range(self.routings): # c.shape=[batch_size, num_capsule, input_num_capsule] c = tf.nn.softmax(b, dim=1) # c.shape = [batch_size, num_capsule, input_num_capsule] # inputs_hat.shape=[None, num_capsule, input_num_capsule, dim_capsule] # The first two dimensions as `batch` dimension, # then matmal: [input_num_capsule] x [input_num_capsule, dim_capsule] -> [dim_capsule]. # outputs.shape=[None, num_capsule, dim_capsule] outputs = squash(K.batch_dot(c, inputs_hat, [2, 2])) # [None, 10, 16] if i < self.routings - 1: # outputs.shape = [None, num_capsule, dim_capsule] # inputs_hat.shape=[None, num_capsule, input_num_capsule, dim_capsule] # The first two dimensions as `batch` dimension, # then matmal: [dim_capsule] x [input_num_capsule, dim_capsule]^T -> [input_num_capsule]. # b.shape=[batch_size, num_capsule, input_num_capsule] b += K.batch_dot(outputs, inputs_hat, [2, 3]) # End: Routing algorithm -----------------------------------------------------------------------# return outputs
def call(self, x, mask=None): features_dim = self.features_dim step_dim = self.step_dim eij = K.reshape(K.dot(K.reshape(x, (-1, features_dim)), K.reshape(self.W, (features_dim, 1))), (-1, step_dim)) if self.bias: eij += self.b eij = K.tanh(eij) a = K.exp(eij) if mask is not None: a *= K.cast(mask, K.floatx()) a /= K.cast(K.sum(a, axis=1, keepdims=True) + K.epsilon(), K.floatx()) a = K.expand_dims(a) weighted_input = x * a return K.sum(weighted_input, axis=1)
def expand_label_input(x): x = K.expand_dims(x, axis=1) x = K.expand_dims(x, axis=1) x = K.tile(x, [1, 32, 32, 1]) return x
def call(self, inputs): if K.dtype(inputs) != 'int32': inputs = K.cast(inputs, 'int32') out = K.gather(self.embeddings, inputs) mask = K.expand_dims(K.clip(K.cast(inputs, 'float32'), 0, 1), axis=-1) return out * mask
def splitter(y_true): payoffs = y_true[:, 1] payoffs = K.expand_dims(payoffs, 1) y_true = y_true[:, 0] y_true = K.expand_dims(y_true, 1) return y_true, payoffs
def dot_product(x, kernel): if K.backend() == 'tensorflow': return K.squeeze(K.dot(x, K.expand_dims(kernel)), axis=-1) else: return K.dot(x, kernel)
def cabasc(self): def sequence_mask(sequence): return K.sign(K.max(K.abs(sequence), 2)) def sequence_length(sequence): return K.cast(K.sum(sequence_mask(sequence), 1), tf.int32) input_text = Input(shape=(self.max_len, )) input_text_l = Input(shape=(self.max_len, )) input_text_r = Input(shape=(self.max_len, )) input_aspect = Input(shape=(1, )) input_mask = Input(shape=(self.max_len, )) word_embedding = Embedding(input_dim=self.max_content_vocab_size, output_dim=self.content_embed_dim) text_embed = SpatialDropout1D(0.2)(word_embedding(input_text)) text_l_embed = SpatialDropout1D(0.2)(word_embedding(input_text_l)) text_r_embed = SpatialDropout1D(0.2)(word_embedding(input_text_r)) asp_embedding = Embedding(input_dim=self.max_aspect_vocab_size, output_dim=self.aspect_embed_dim) aspect_embed = asp_embedding(input_aspect) aspect_embed = Flatten()(aspect_embed) # reshape to 2d # regarding aspect string as the first unit hidden_l = GRU(self.lstm_units, go_backwards=True, return_sequences=True)(text_l_embed) hidden_r = GRU(self.lstm_units, return_sequences=True)(text_r_embed) # left context attention context_attend_l = TimeDistributed(Dense( 1, activation='sigmoid'))(hidden_l) # Note: I couldn't find `reverse_sequence` in keras context_attend_l = Lambda(lambda x: tf.reverse_sequence( x, sequence_length(x), 1, 0))(context_attend_l) context_attend_l = Lambda(lambda x: K.squeeze(x, -1))(context_attend_l) # right context attention context_attend_r = TimeDistributed(Dense( 1, activation='sigmoid'))(hidden_r) context_attend_r = Lambda(lambda x: K.squeeze(x, -1))(context_attend_r) # combine context attention # aspect_text_embed = subtract([add([text_l_embed, text_r_embed]), text_embed]) # aspect_text_mask = Lambda(lambda x: sequence_mask(x))(aspect_text_embed) # text_mask = Lambda(lambda x: sequence_mask(x))(text_embed) # context_mask = subtract([text_mask, aspect_text_mask]) # aspect_text_mask_half = Lambda(lambda x: x*0.5)(aspect_text_mask) # combine_mask = add([context_mask, aspect_text_mask_half]) # 1 for context, 0.5 for aspect context_attend = multiply( [add([context_attend_l, context_attend_r]), input_mask]) # apply context attention context_attend_expand = Lambda(lambda x: K.expand_dims(x))( context_attend) memory = multiply([text_embed, context_attend_expand]) # sentence-level content attention sentence = Lambda(lambda x: K.mean(x, axis=1))(memory) final_output = ContentAttention()([memory, aspect_embed, sentence]) dense_layer = Dense(self.dense_units, activation='relu')(final_output) output_layer = Dense(self.n_classes, activation='softmax')(dense_layer) return Model( [input_text, input_text_l, input_text_r, input_aspect, input_mask], output_layer)