def test_add(self): inputs = np.ones((1, 5, 2)) weights = np.random.random((10, 2)) weights[1, :] = np.asarray([0.25, 0.1]) weights[3, :] = np.asarray([0.6, -0.2]) model = keras.models.Sequential() model.add( PositionEmbedding( input_dim=10, output_dim=2, mode=PositionEmbedding.MODE_ADD, input_shape=(None, 2), weights=[weights], name='Pos-Embd', )) model.compile('adam', keras.losses.mae, {}) model_path = os.path.join(tempfile.gettempdir(), 'test_pos_embd_%f.h5' % random.random()) model.save(model_path) model = keras.models.load_model( model_path, custom_objects={'PositionEmbedding': PositionEmbedding}) model.summary() predicts = model.predict(inputs) self.assertTrue(np.allclose([1.25, 1.1], predicts[0][1]), predicts[0]) self.assertTrue(np.allclose([1.6, 0.8], predicts[0][3]), predicts[0])
def build_model(max_len: int, input_dim: int, embedding_dim: int, feed_forward_units: int, head_num=1, block_num=1, dropout_rate=0.5) -> Tuple[Model, Embedding]: inputs = Input(shape=(max_len)) emb = Embedding(input_dim=input_dim, output_dim=embedding_dim, mask_zero=True) x = emb(inputs) pos_emb = PositionEmbedding(input_dim=max_len, output_dim=embedding_dim, mode=PositionEmbedding.MODE_ADD, mask_zero=True)(x) y = Dropout(dropout_rate)(pos_emb) for _ in range(block_num): y = block(y, head_num, feed_forward_units, dropout_rate) model = Model(inputs=inputs, outputs=y) return model, emb
def get_model( n_vocab, n_ctx=1024, n_embd=768, n_head=12, n_layer=12, fixed_input_shape=False # neededforTPU training ): """Get basic GPT-2 model. :param n_vocab: Number of vocabulary tokens. :param n_ctx: The length of each input. :param n_embd: The dimension of embeddings. :param n_head: Number of heads in transformer. :param n_layer: Number of transformer blocks. :return: The model. """ if fixed_input_shape: input_layer_shape = (n_ctx, ) else: input_layer_shape = (None, ) input_layer = keras.layers.Input(shape=input_layer_shape, name='Input') embed_token, embeddings = EmbeddingRet( input_dim=n_vocab, output_dim=n_embd, mask_zero=False, name='Embed-Token', )(input_layer) embed_token_pos = PositionEmbedding( input_dim=n_ctx, output_dim=n_embd, mode=PositionEmbedding.MODE_ADD, name='Embed-Token-Pos', )(embed_token) last_layer = embed_token_pos for i in range(n_layer): last_layer = _get_encoder_component( name='Encode-%d' % i, input_layer=last_layer, head_num=n_head, hidden_dim=n_embd * 4, attention_activation=None, feed_forward_activation=gelu, ) norm_layer = LayerNormalization(name='Norm', )(last_layer) output_layer = EmbeddingSim( use_bias=False, name='Output', )([norm_layer, embeddings]) model = keras.models.Model(inputs=input_layer, outputs=output_layer) model.compile( optimizer=keras.optimizers.Adam(), loss=keras.losses.sparse_categorical_crossentropy, ) return model
def test_mask_zero(self): indices = np.asarray([[-4, 10, 100]]) weights = np.random.random((21, 2)) weights[6, :] = np.asarray([0.25, 0.1]) weights[20, :] = np.asarray([0.6, -0.2]) model = keras.models.Sequential() model.add( PositionEmbedding( input_dim=10, output_dim=2, mode=PositionEmbedding.MODE_EXPAND, mask_zero=100, input_shape=(None, ), weights=[weights], name='Pos-Embd', )) model.build() model.compile('adam', keras.losses.mae, [keras.metrics.mae]) model_path = os.path.join(tempfile.gettempdir(), 'keras_pos_embd_%f.h5' % random.random()) model.save(model_path) model = keras.models.load_model( model_path, custom_objects={'PositionEmbedding': PositionEmbedding}) model.summary() predicts = model.predict(indices) expected = np.asarray([[ [0.25, 0.1], [0.6, -0.2], [0.6, -0.2], ]]) self.assertTrue(np.allclose(expected, predicts))
def get_embedding(inputs, token_num, pos_num, embed_dim, dropout_rate=0.1, trainable=True): """Get embedding layer. See: https://arxiv.org/pdf/1810.04805.pdf :param inputs: Input layers. :param token_num: Number of tokens. :param pos_num: Maximum position. :param embed_dim: The dimension of all embedding layers. :param dropout_rate: Dropout rate. :param trainable: Whether the layers are trainable. :return: The merged embedding layer and weights of token embedding. """ # embeddings = [ # TokenEmbedding( # input_dim=token_num, # output_dim=embed_dim, # mask_zero=True, # trainable=trainable, # name='Embedding-Token', # )(inputs[0]), # keras.layers.Embedding( # input_dim=2, # output_dim=embed_dim, # trainable=trainable, # name='Embedding-Segment', # )(inputs[1]), # ] embeddings = [1, 2] embeddings[0] = TokenEmbedding( input_dim=token_num, output_dim=embed_dim, mask_zero=True, trainable=trainable, name='Embedding-Token', )(inputs[0]) embeddings[1] = keras.layers.Embedding( input_dim=2, output_dim=embed_dim, trainable=trainable, name='Embedding-Segment', )(inputs[1]) embeddings[0], embed_weights = embeddings[0] embed_layer = keras.layers.Add(name='Embedding-Token-Segment')(embeddings) embed_layer = PositionEmbedding( input_dim=pos_num, output_dim=embed_dim, mode=PositionEmbedding.MODE_ADD, trainable=trainable, name='Embedding-Position', )(embed_layer) return embed_layer, embed_weights
def get_embedding(inputs, token_num, pos_num, embed_dim, dropout_rate=0.1, trainable=True): """Get embedding layer. See: https://arxiv.org/pdf/1810.04805.pdf :param inputs: Input layers. :param token_num: Number of tokens. :param pos_num: Maximum position. :param embed_dim: The dimension of all embedding layers. :param dropout_rate: Dropout rate. :param trainable: Whether the layers are trainable. :return: The merged embedding layer and weights of token embedding. """ embeddings = [ TokenEmbedding( input_dim=token_num, output_dim=embed_dim, mask_zero=True, trainable=trainable, embeddings_regularizer=keras.regularizers.l2(), name='Embedding-Token', )(inputs[0]), keras.layers.Embedding( input_dim=2, output_dim=embed_dim, trainable=trainable, embeddings_regularizer=keras.regularizers.l2(), name='Embedding-Segment', )(inputs[1]), ] embeddings[0], embed_weights = embeddings[0] embed_layer = keras.layers.Add(name='Embedding-Token-Segment')(embeddings) embed_layer = PositionEmbedding( input_dim=pos_num, output_dim=embed_dim, mode=PositionEmbedding.MODE_ADD, trainable=trainable, embeddings_regularizer=keras.regularizers.l2(), name='Embedding-Position', )(embed_layer) if dropout_rate > 0.0: dropout_layer = keras.layers.Dropout( rate=dropout_rate, name='Embedding-Dropout', )(embed_layer) else: dropout_layer = embed_layer norm_layer = LayerNormalization( trainable=trainable, name='Embedding-Norm', )(dropout_layer) return norm_layer, embed_weights
def deprecated1_my_get_embedding(inputs, token_num, pos_num, embed_dim, dropout_rate=0.1, trainable=True): """Get embedding layer. See: https://arxiv.org/pdf/1810.04805.pdf :param inputs: Input layers. :param token_num: Number of tokens. :param pos_num: Maximum position. :param embed_dim: The dimension of all embedding layers. :param dropout_rate: Dropout rate. :param trainable: Whether the layers are trainable. :return: The merged embedding layer and weights of token embedding. """ embeddings = [ TokenEmbedding( input_dim=66, # 有效AP的个数 output_dim=embed_dim, mask_zero=True, trainable=trainable, name='Embedding-AP', )(inputs[0]), keras.layers.Embedding( input_dim=66, # RSSI的可能取值是从(-0,-128db]取整数值 output_dim=embed_dim, trainable=trainable, name='Embedding-RSSI', # name='Embedding-Segment', )(inputs[1]), ] embeddings[0], embed_weights = embeddings[0] embed_layer = keras.layers.Add(name='Embedding-AP-RSSI')(embeddings) embed_layer = PositionEmbedding( input_dim=pos_num, output_dim=embed_dim, mode=PositionEmbedding.MODE_ADD, trainable=trainable, name='Embedding-Position', )(embed_layer) return embed_layer, embed_weights
def build_albert(token_num, pos_num=512, seq_len=512, embed_dim=128, hidden_dim=768, transformer_num=12, head_num=12, feed_forward_dim=3072, dropout_rate=0.1, attention_activation=None, feed_forward_activation='gelu', training=True, trainable=None, output_layers=None): """Get ALBERT model. See: https://arxiv.org/pdf/1909.11942.pdf :param token_num: Number of tokens. :param pos_num: Maximum position. :param seq_len: Maximum length of the input sequence or None. :param embed_dim: Dimensions of embeddings. :param hidden_dim: Dimensions of hidden layers. :param transformer_num: Number of transformers. :param head_num: Number of heads in multi-head attention in each transformer. :param feed_forward_dim: Dimension of the feed forward layer in each transformer. :param dropout_rate: Dropout rate. :param attention_activation: Activation for attention layers. :param feed_forward_activation: Activation for feed-forward layers. :param training: A built model with MLM and NSP outputs will be returned if it is `True`, otherwise the input layers and the last feature extraction layer will be returned. :param trainable: Whether the model is trainable. :param output_layers: A list of indices of output layers. """ if attention_activation == 'gelu': attention_activation = gelu if feed_forward_activation == 'gelu': feed_forward_activation = gelu if trainable is None: trainable = training def _trainable(_layer): if isinstance(trainable, (list, tuple, set)): for prefix in trainable: if _layer.name.startswith(prefix): return True return False return trainable # Build inputs input_token = keras.layers.Input(shape=(seq_len, ), name='Input-Token') input_segment = keras.layers.Input(shape=(seq_len, ), name='Input-Segment') inputs = [input_token, input_segment] # Build embeddings embed_token, embed_weights, embed_projection = AdaptiveEmbedding( input_dim=token_num, output_dim=hidden_dim, embed_dim=embed_dim, mask_zero=True, trainable=trainable, return_embeddings=True, return_projections=True, name='Embed-Token', )(input_token) embed_segment = keras.layers.Embedding( input_dim=2, output_dim=hidden_dim, trainable=trainable, name='Embed-Segment', )(input_segment) embed_layer = keras.layers.Add(name='Embed-Token-Segment')( [embed_token, embed_segment]) embed_layer = PositionEmbedding( input_dim=pos_num, output_dim=hidden_dim, mode=PositionEmbedding.MODE_ADD, trainable=trainable, name='Embedding-Position', )(embed_layer) if dropout_rate > 0.0: dropout_layer = keras.layers.Dropout( rate=dropout_rate, name='Embedding-Dropout', )(embed_layer) else: dropout_layer = embed_layer embed_layer = LayerNormalization( trainable=trainable, name='Embedding-Norm', )(dropout_layer) # Build shared transformer attention_layer = MultiHeadAttention( head_num=head_num, activation=attention_activation, name='Attention', ) attention_normal = LayerNormalization(name='Attention-Normal') feed_forward_layer = FeedForward(units=feed_forward_dim, activation=feed_forward_activation, name='Feed-Forward') feed_forward_normal = LayerNormalization(name='Feed-Forward-Normal') transformed = embed_layer transformed_layers = [] for i in range(transformer_num): attention_input = transformed transformed = attention_layer(transformed) if dropout_rate > 0.0: transformed = keras.layers.Dropout( rate=dropout_rate, name='Attention-Dropout-{}'.format(i + 1), )(transformed) transformed = keras.layers.Add( name='Attention-Add-{}'.format(i + 1), )( [attention_input, transformed]) transformed = attention_normal(transformed) feed_forward_input = transformed transformed = feed_forward_layer(transformed) if dropout_rate > 0.0: transformed = keras.layers.Dropout( rate=dropout_rate, name='Feed-Forward-Dropout-{}'.format(i + 1), )(transformed) transformed = keras.layers.Add( name='Feed-Forward-Add-{}'.format(i + 1), )( [feed_forward_input, transformed]) transformed = feed_forward_normal(transformed) transformed_layers.append(transformed) if training: # Build tasks mlm_dense_layer = keras.layers.Dense( units=hidden_dim, activation=feed_forward_activation, name='MLM-Dense', )(transformed) mlm_norm_layer = LayerNormalization(name='MLM-Norm')(mlm_dense_layer) mlm_pred_layer = AdaptiveSoftmax( input_dim=hidden_dim, output_dim=token_num, embed_dim=embed_dim, bind_embeddings=True, bind_projections=True, name='MLM-Sim', )([mlm_norm_layer, embed_weights, embed_projection]) masked_layer = Masked(name='MLM')([mlm_pred_layer, inputs[-1]]) extract_layer = Extract(index=0, name='Extract')(transformed) nsp_dense_layer = keras.layers.Dense( units=hidden_dim, activation='tanh', name='SOP-Dense', )(extract_layer) nsp_pred_layer = keras.layers.Dense( units=2, activation='softmax', name='SOP', )(nsp_dense_layer) model = keras.models.Model(inputs=inputs, outputs=[masked_layer, nsp_pred_layer]) for layer in model.layers: layer.trainable = _trainable(layer) return model if output_layers is not None: if isinstance(output_layers, list): output_layers = [ transformed_layers[index] for index in output_layers ] output = keras.layers.Concatenate(name='Output', )(output_layers) else: output = transformed_layers[output_layers] model = keras.models.Model(inputs=inputs, outputs=output) return model model = keras.models.Model(inputs=inputs, outputs=transformed) for layer in model.layers: layer.trainable = _trainable(layer) return inputs, transformed
def get_model(self, params, a=False, b=False, c=False, d=False, e=False, f=False, g=False, dropout=0.5): hash_input = layers.Input(shape=(params['max_words'], ), dtype='int32') x = layers.Embedding(params['hash_mole'], params['embed_size'], input_length=params['max_words'], name=self.embedding_name)(hash_input) x = layers.Dropout(dropout / 3)(x) if a: # did not train # needs positional embedding? x = MultiHeadAttention(4)(x) x = layers.Dropout(dropout / 3)(x) if b: x = layers.Bidirectional( self.get_lstm(params['units'] // 2, return_sequences=True))(x) x = layers.Dropout(dropout)(x) x = layers.TimeDistributed(MultiHeadAttention(4))(x) #x = layers.Flatten()(x) #x = layers.Dropout(dropout)(x) #x = layers.Dense(params['embed_size'])(x) x = layers.Dropout(dropout / 3)(x) #if c: x = layers.Bidirectional( self.get_lstm(params['units'] // 2, return_sequences=False, name=self.bidirectional_name))(x) x = layers.Dropout(dropout)(x) x = layers.RepeatVector(params['num_sylls'])(x) x = layers.Dropout(dropout)(x) if d: x = PositionEmbedding(input_dim=params['embed_size'], output_dim=params['num_sylls'] * 4, mode=PositionEmbedding.MODE_CONCAT)(x) x = layers.Dropout(dropout)(x) x = MultiHeadAttention(4)(x) x = layers.Dropout(dropout)(x) x = self.get_lstm(params['units'], return_sequences=True, name=self.cu_dnnlstm_name)(x) if e: x = PositionEmbedding(input_dim=params['units'], output_dim=params['units'], mode=PositionEmbedding.MODE_ADD)(x) x = layers.Dropout(dropout)(x) #x = layers.Dense(params['units'])(x) #x = layers.Dropout(dropout)(x) if f: # this was somewhat effective x = MultiHeadAttention(2)(x) x = layers.Dropout(dropout)(x) if g: x = layers.Dense(params['units'], kernel_initializer='identity', name='dense_identity', activation='relu')(x) x = layers.Dropout(dropout)(x) output_layer = layers.Dense(params['max_features'], activation='softmax', name=self.dense_name)(x) model = Model(inputs=[hash_input], outputs=[output_layer]) return model
def __build_model(self): print("Building the model...") vocab_size = self.preprocessor.get_vocab_size(self.tokenizer) if self.context: embedding_matrix = self.preprocessor.get_embedding_matrix(self.tokenizer) left_input = Input(shape=(MAX_LENGTH,), dtype='int32', name='left_input') x1 = Embedding( output_dim=EMBEDDING_SIZE, input_dim=vocab_size, input_length=MAX_LENGTH, weights=[embedding_matrix], trainable=True, )(left_input) pos_left_input = Input(shape=(MAX_LENGTH,), dtype='int32', name='pos_left_input') x1p = keras.layers.Lambda( K.one_hot, arguments={'num_classes': pos_size}, output_shape=(MAX_LENGTH, pos_size) )(pos_left_input) target_input = Input(shape=(10,), dtype='int32', name='target_input') x2 = Embedding( output_dim=EMBEDDING_SIZE, input_dim=vocab_size, input_length=10, weights=[embedding_matrix], trainable=True, )(target_input) pos_target_input = Input(shape=(10,), dtype='int32', name='pos_target_input') x2p = keras.layers.Lambda( K.one_hot, arguments={'num_classes': pos_size}, output_shape=(10, pos_size) )(pos_target_input) right_input = Input(shape=(MAX_LENGTH,), dtype='int32', name='right_input') x3 = Embedding( output_dim=EMBEDDING_SIZE, input_dim=vocab_size, input_length=MAX_LENGTH, weights=[embedding_matrix], trainable=True, )(right_input) pos_right_input = Input(shape=(MAX_LENGTH,), dtype='int32', name='pos_right_input') x3p = keras.layers.Lambda( K.one_hot, arguments={'num_classes': pos_size}, output_shape=(MAX_LENGTH, pos_size) )(pos_right_input) aspect_input = Input(shape=(110,), dtype='int32', name='aspect_input') x4 = keras.layers.Lambda( k.one_hot, arguments={'num_classes': len(ASPECT_LIST)}, output_shape=(110, len(ASPECT_LIST)) )(aspect_input) x = keras.layers.concatenate([x1, x2, x3], axis=1) xp = keras.layers.concatenate([x1p, x2p, x3p], axis=1) x = keras.layers.concatenate([x, xp, x4]) x = Bidirectional(LSTM(256, return_sequences=True))(x) x = GlobalMaxPool1D()(x) x = Dropout(0.5)(x) x = Dense(256, activation='relu')(x) x = Dropout(0.5)(x) out = Dense(2, activation='softmax')(x) model = Model([left_input, target_input, right_input, pos_left_input, pos_target_input, pos_right_input, aspect_input], out) model.summary() model.compile( loss='categorical_crossentropy', optimizer='adam', metrics=['acc'] ) else: if self.embedding: embedding_matrix = self.preprocessor.get_embedding_matrix(self.tokenizer) main_input = Input(shape=(MAX_LENGTH,), dtype='int32', name='main_input') x = Embedding( output_dim=EMBEDDING_SIZE, input_dim=vocab_size, input_length=MAX_LENGTH, weights=[embedding_matrix], trainable=self.trainable_embedding, )(main_input) aspect_input = Input(shape=(MAX_LENGTH,), dtype='int32', name='aspect_input') x2 = keras.layers.Lambda( K.one_hot, arguments={'num_classes': len(ASPECT_LIST)}, output_shape=(MAX_LENGTH, len(ASPECT_LIST)) )(aspect_input) x = keras.layers.concatenate([x, x2]) if self.position_embd: weights = np.random.random((201, 50)) position_input = Input(shape=(MAX_LENGTH,), dtype='int32', name='position_input') x2 = PositionEmbedding( input_shape=(MAX_LENGTH,), input_dim=100, output_dim=50, weights=[weights], mode=PositionEmbedding.MODE_EXPAND, name='position_embedding', )(position_input) x = keras.layers.concatenate([x, x2]) if self.use_lexicon: lex_input = Input(shape=(MAX_LENGTH,), dtype='int32', name='lex_input') x3 = keras.layers.Lambda( K.one_hot, arguments={'num_classes': 3}, output_shape=(MAX_LENGTH, 3) )(lex_input) x = keras.layers.concatenate([x, x3]) if self.pos_tag is 'embedding': _, pos_size = self.preprocessor.get_pos_dict() pos_input = Input(shape=(MAX_LENGTH,), dtype='int32', name='pos_input') x4 = keras.layers.Lambda( K.one_hot, arguments={'num_classes': pos_size}, output_shape=(MAX_LENGTH, pos_size) )(pos_input) x = keras.layers.concatenate([x, x4]) else: new_embedding_size = EMBEDDING_SIZE + 6 if self.pos_tag is 'one_hot': new_embedding_size += 27 if self.dependency is True: new_embedding_size += 2 print('embedding size: ', new_embedding_size) main_input = Input(shape=(MAX_LENGTH, new_embedding_size), name='main_input') print("1. Input") if self.use_rnn is True: if self.embedding is True: if self.rnn_type is 'gru': x = Bidirectional(GRU(self.n_neuron, return_sequences=True))(x) else: x = Bidirectional(LSTM(self.n_neuron, return_sequences=True))(x) else: if self.rnn_type is 'gru': x = Bidirectional(GRU(self.n_neuron, return_sequences=True))(main_input) else: x = Bidirectional(LSTM(self.n_neuron, return_sequences=True))(main_input) # x = GlobalMaxPool1D()(x) x = GlobalAvgPool1D()(x) x = Dropout(self.dropout)(x) print("2. LSTM") if self.use_cnn is True: pass if self.n_dense is not 0: for i in range(self.n_dense): x = Dense(self.n_neuron, activation='relu')(x) x = Dropout(self.dropout)(x) print("3. Dense") out = Dense(2, activation='softmax')(x) print("4. Out") x_input = list() x_input.append(main_input) x_input.append(aspect_input) if self.position_embd: x_input.append(position_input) if self.use_lexicon: x_input.append(lex_input) if self.pos_tag is 'embedding': x_input.append(pos_input) model = Model(x_input, out) print("5. Model") model.summary() model.compile( loss='categorical_crossentropy', optimizer=self.optimizer, metrics=['acc'] ) print("6. Done") return model
def get_model(n_vocab, n_ctx=1024, n_embd=768, n_head=12, n_layer=12, batch_size=None, fixed_input_shape=False): """Get basic GPT-2 model. :param n_vocab: Number of vocabulary tokens. :param n_ctx: The length of each input. :param n_embd: The dimension of embeddings. :param n_head: Number of heads in transformer. :param n_layer: Number of transformer blocks. :param batch_size: Batch size of the model. :param fixed_input_shape: Whether the length of input is fixed. (Needed for TPU training) :return: The model. """ if fixed_input_shape: input_layer_shape = (batch_size, n_ctx) else: input_layer_shape = (batch_size, None) lm_input_layer = tf.keras.layers.Input( batch_shape=input_layer_shape, name='LMInput', ) mc_input_layer = tf.keras.layers.Input( batch_shape=(batch_size, ), name='MCInput', ) embed_token, embeddings = EmbeddingRet( input_dim=n_vocab, output_dim=n_embd, mask_zero=False, name='Embed-Token', )(lm_input_layer) embed_token_pos = PositionEmbedding( input_dim=n_ctx, output_dim=n_embd, mode=PositionEmbedding.MODE_ADD, name='Embed-Token-Pos', )(embed_token) last_layer = embed_token_pos for i in range(n_layer): last_layer = _get_encoder_component( name='Encode-%d' % i, input_layer=last_layer, head_num=n_head, hidden_dim=n_embd * 4, attention_activation=None, feed_forward_activation=gelu, ) norm_layer = LayerNormalization(name='Norm', )(last_layer) lm_head = EmbeddingSim( use_bias=False, name='LMOutput', )([norm_layer, embeddings]) mc_sequence_summary = SequenceSummary(name='MCSequenceSummary')( [norm_layer, mc_input_layer]) mc_linear = Dense(units=1, input_shape=(n_embd, ), name='MCDense')(mc_sequence_summary) mc_head = Dropout(rate=0.1, name='MCOutput')(mc_linear) losses = { "LMOutput": lm_loss_function, "MCOutput": mc_loss_function, } lossWeights = {"LMOutput": 2.0, "MCOutput": 1.0} metrics = {"LMOutput": get_metrics(), 'MCOutput': get_metrics(is_mc=True)} model = tf.keras.models.Model(inputs=[lm_input_layer, mc_input_layer], outputs=[lm_head, mc_head]) model.compile( optimizer=tf.keras.optimizers.Adam(clipnorm=1.), loss=losses, loss_weights=lossWeights, #metrics=metrics ) return model
def get_model(num_article, num_magazine, num_search_keyword, article_embedding_matrix, negative_sample_size, transformer_num=1, head_num=10, feed_forward_dim=100, dropout_rate=0.1, attention_activation=None, feed_forward_activation=tf.nn.leaky_relu, lr=1e-4, decay_rate=1e-5, inference=False, weight_path=None): if inference: trainable = None else: trainable = True user_inputs = get_user_inputs() pos_item_inputs, pos_user_item_inputs = get_item_inputs('pos') neg_item_inputs = [] neg_user_item_inputs = [] for i in range(negative_sample_size): item_inputs, user_item_inputs = get_item_inputs('neg{}'.format(i)) neg_item_inputs.append(item_inputs) neg_user_item_inputs.append(user_item_inputs) if trainable: article_embedding = keras.layers.Embedding( input_dim=num_article, output_dim=200, weights=[article_embedding_matrix], trainable=False, name='E-Article', ) else: article_embedding = keras.layers.Embedding( input_dim=num_article, output_dim=200, trainable=False, name='E-Article', ) magazine_embedding = keras.layers.Embedding( input_dim=num_magazine, output_dim=43, trainable=trainable, name='E-Magazine', ) author_embedding = keras.layers.Embedding( input_dim=19024, output_dim=50, trainable=trainable, name='E-Author', ) embed_layer = Concatenate(axis=-1, name='UserConcat')( [article_embedding(user_inputs[0]), magazine_embedding(user_inputs[1]), author_embedding(user_inputs[2]), user_inputs[3]]) embed_layer = PositionEmbedding( input_dim=MAX_USER_SEQUENCE_LEN, output_dim=300, mode=PositionEmbedding.MODE_ADD, trainable=trainable, name='E-Position', )(embed_layer) user_feature = get_transformer( encoder_num=transformer_num, input_layer=embed_layer, head_num=head_num, hidden_dim=feed_forward_dim, attention_activation=attention_activation, feed_forward_activation=feed_forward_activation, dropout_rate=dropout_rate, trainable=trainable, name='User' ) search_keyword_layer = keras.layers.Embedding( input_dim=num_search_keyword, output_dim=50, trainable=trainable, name='EMB-SearchKeyword', )(user_inputs[4]) search_keyword_feature = get_transformer( encoder_num=transformer_num, input_layer=search_keyword_layer, head_num=head_num, hidden_dim=50, attention_activation=attention_activation, feed_forward_activation=feed_forward_activation, dropout_rate=dropout_rate, trainable=trainable, name='SK' ) user_embedding = Concatenate(axis=-1, name='UserEmbedding1')([user_feature, search_keyword_feature]) user_embedding = Dense(200, name='UserEmbedding2', activation=feed_forward_activation)(user_embedding) item_layer = Dense(200, name='ItemEmbedding', activation=feed_forward_activation) score0 = Dense(1024, name='Scorer0', activation=feed_forward_activation) score1 = Dense(512, name='Scorer1', activation=feed_forward_activation) score2 = Dense(256, name='Scorer2', activation=feed_forward_activation) ''' if inference: final_activation = 'relu' else: final_activation = 'sigmoid' ''' score3 = Dense(1, name='Scorer3', activation=None) def extract_item(inputs): target_article = Reshape(target_shape=(200,))(article_embedding(inputs[0])) target_magazine = Reshape(target_shape=(43,))(magazine_embedding(inputs[1])) target_author = Reshape(target_shape=(50,))(author_embedding(inputs[2])) item_feature = Concatenate(axis=-1)([target_article, target_magazine, target_author, inputs[3]]) item_embedding = item_layer(item_feature) return item_embedding def scorer(user_embedding, item_embedding, inputs): merged = Concatenate(axis=-1)([user_embedding, item_embedding, inputs[0]]) merged = score0(merged) merged = score1(merged) merged = score2(merged) output = score3(merged) return output pos_item_embedding = extract_item(pos_item_inputs) pos_score = scorer(user_embedding, pos_item_embedding, pos_user_item_inputs) neg_scores = [] for i in range(negative_sample_size): neg_item_embedding = extract_item(neg_item_inputs[i]) score = scorer(user_embedding, neg_item_embedding, neg_user_item_inputs[i]) neg_scores.append(score) output = concatenate([pos_score] + neg_scores) inputs = list(user_inputs) inputs += pos_item_inputs inputs += pos_user_item_inputs for i in range(negative_sample_size): inputs += neg_item_inputs[i] inputs += neg_user_item_inputs[i] model = keras.models.Model(inputs=inputs, outputs=output) if inference: model.load_weights(weight_path) user_embed_input = keras.layers.Input( shape=(200,), name='I-UserEmbedding' ) item_embed_input = keras.layers.Input( shape=(200,), name='I-ItemEmbedding' ) scorer_inputs = [user_embed_input, item_embed_input] + pos_user_item_inputs scorer_output = scorer(user_embed_input, item_embed_input, pos_user_item_inputs) scorer_model = keras.models.Model(inputs=scorer_inputs, outputs=scorer_output) for layer in scorer_model.layers: if len(layer.get_weights()) == 0: continue try: layer.set_weights(model.get_layer(name=layer.name).get_weights()) except Exception as e: print("Could not transfer weights for layer {}".format(layer.name)) raise e user_model = keras.models.Model(inputs=user_inputs, outputs=user_embedding) item_model = keras.models.Model(inputs=pos_item_inputs, outputs=pos_item_embedding) return user_model, item_model, scorer_model else: def hinge_loss(y_true, y_pred): # hinge loss y_pos = y_pred[:, :1] y_neg = y_pred[:, 1:] loss = K.sum(K.maximum(0., 0.2 - y_pos + y_neg)) return loss model.compile(loss=hinge_loss, optimizer=Adam(lr=lr, decay=decay_rate), metrics=['accuracy']) return model
def __build_model(self, embedding_matrix): print("Building the model...") vocab_size = self.preprocessor.get_vocab_size(self.tokenizer) if self.embedding: main_input = Input(shape=(MAX_LENGTH, ), dtype='int32', name='main_input') x = Embedding( output_dim=EMBEDDING_SIZE, input_dim=vocab_size, input_length=MAX_LENGTH, weights=[embedding_matrix], trainable=self.trainable_embedding, )(main_input) if self.position_embd == True: weights = np.random.random((201, 50)) position_input = Input(shape=(MAX_LENGTH, ), dtype='int32', name='position_input') x2 = PositionEmbedding( input_shape=(MAX_LENGTH, ), input_dim=100, output_dim=50, weights=[weights], mode=PositionEmbedding.MODE_EXPAND, name='position_embedding', )(position_input) x = keras.layers.concatenate([x, x2]) if self.pos_tag is 'embedding': _, pos_size = self.preprocessor.get_pos_dict() pos_input = Input(shape=(MAX_LENGTH, ), dtype='int32', name='pos_input') x3 = Lambda(K.one_hot, arguments={'num_classes': pos_size}, output_shape=(MAX_LENGTH, pos_size))(pos_input) x = keras.layers.concatenate([x, x3]) else: new_embedding_size = EMBEDDING_SIZE if self.pos_tag is 'one_hot': new_embedding_size += 27 if self.dependency is True: new_embedding_size += 2 print('embedding size: ', new_embedding_size) main_input = Input(shape=(MAX_LENGTH, new_embedding_size), name='main_input') print("1. Input") if self.use_rnn is True: if self.embedding is True: if self.rnn_type is 'gru': x = Bidirectional(GRU(self.n_neuron, return_sequences=True))(x) else: x = Bidirectional( LSTM(self.n_neuron, return_sequences=True))(x) else: if self.rnn_type is 'gru': x = Bidirectional(GRU(self.n_neuron, return_sequences=True))(main_input) else: x = Bidirectional( LSTM(self.n_neuron, return_sequences=True))(main_input) x = GlobalMaxPool1D()(x) # x = GlobalAvgPool1D()(x) x = Dropout(self.dropout)(x) print("2. LSTM") if self.use_cnn is True: pass if self.n_dense is not 0: for i in range(self.n_dense): x = Dense(self.n_neuron, activation='relu')(x) x = Dropout(self.dropout)(x) print("3. Dense") out = Dense(len(self.aspects), activation='sigmoid')(x) print("4. Out") x_input = list() x_input.append(main_input) if self.position_embd == True: x_input.append(position_input) if self.pos_tag is 'embedding': x_input.append(pos_input) model = Model(x_input, out) print("5. Model") model.summary() model.compile(loss='binary_crossentropy', optimizer=self.optimizer, metrics=[f1]) print("6. Done") return model