def _build_newsencoder(self, name): """The main function to create news encoder of NRMS. Args: bert_model(obj): a bert model. # LP modified Return: obj: the news encoder of NRMS. """ hparams = self.hparams # sequences_input_title = keras.Input(shape=(hparams.title_size,), dtype="int32", name='sequences_input_title') # sequences_input_segment = keras.Input(shape=(hparams.title_size,), dtype="int32", # name='sequences_input_segment') # embedded_sequences_title = bert_model([sequences_input_title, sequences_input_segment]) # TODO shape可能有问题 # embedded_sequences_title = layers.TimeDistributed(self.bert_model)( # sequences_input_title) # TODO shape可能有问题 embedded_sequences_title = keras.Input( shape=( hparams['title_size'], 768, ), name='embedded_sequences_title') # TODO shape可能有问题 (?, 30, 768) y = layers.Dropout(hparams['dropout'])(embedded_sequences_title) print('y.shape', y.shape) y = MultiHeadAttention(hparams['head_num'], hparams['head_dim'])( [y, y, y]) # shape: (-1, 5, 30, 400) y = layers.Dropout(hparams['dropout'])(y) pred_title = AttLayer2(hparams['attention_hidden_dim'], seed=self.seed)( y) # shape: (?,5, 400) or (?, 400) model = keras.Model(embedded_sequences_title, pred_title, name=name) return model
def _build_eneityencoder(self, embedding_layer, name): """The main function to create news encoder of NRMS. Args: embedding_layer(obj): embedding layer. # LP modified Return: obj: the news encoder of NRMS. """ hparams = self.hparams sequences_input_title = keras.Input(shape=(hparams['title_size'], ), dtype="int32", name='sequences_input_title') embedded_sequences_title = embedding_layer( sequences_input_title) # TODO shape可能有问题 y = layers.Dropout(hparams['dropout'])(embedded_sequences_title) y = SelfAttention(hparams['head_num'], hparams['head_dim'], seed=self.seed)([y, y, y]) y = layers.Dropout(hparams['dropout'])(y) pred_title = AttLayer2(hparams['attention_hidden_dim'], seed=self.seed)(y) model = keras.Model(sequences_input_title, pred_title, name=name) return model
def _build_subvertencoder(self): """build subvert encoder of NAML news encoder. Return: obj: the subvert encoder of NAML. """ hparams = self.hparams input_subvert = keras.Input(shape=(1, ), dtype="int32") subvert_embedding = layers.Embedding(hparams['subvert_num'], hparams['subvert_emb_dim'], trainable=True) subvert_emb = subvert_embedding(input_subvert) pred_subvert = layers.Dense( hparams['filter_num'], activation=hparams['dense_activation'], bias_initializer=keras.initializers.Zeros(), kernel_initializer=keras.initializers.glorot_uniform( seed=self.seed), )(subvert_emb) pred_subvert = layers.Reshape((1, hparams['filter_num']))(pred_subvert) model = keras.Model(input_subvert, pred_subvert, name="subvert_encoder") return model
def _build_bodyencoder(self): hparams = self.hparams sequences_input_body = keras.Input(shape=(hparams['title_size'], ), dtype="int32") embedded_sequences_body = self.bert_model(sequences_input_body) y = layers.Dropout(hparams['dropout'])(embedded_sequences_body) y = SelfAttention(hparams['head_num'], hparams['head_dim'], seed=self.seed)([y, y, y]) y = layers.Dropout(hparams['dropout'])(y) pred_body = AttLayer2(hparams['attention_hidden_dim'], seed=self.seed)(y) pred_body = layers.Reshape((1, hparams['filter_num']))(pred_body) model = keras.Model(sequences_input_body, pred_body, name="body_encoder") return model
def _build_nrms(self): """The main function to create NRMS's logic. The core of NRMS is a user encoder and a news encoder. Returns: obj: a model used to train. obj: a model used to evaluate and inference. """ hparams = self.hparams his_input_title = keras.Input(shape=( hparams['his_size'], hparams['title_size'], ), dtype="int32", name='his_input_title') his_input_segment = keras.Input( shape=( hparams['his_size'], hparams['title_size'], ), dtype="int32", name="his_input_segment" # LP add ) pred_input_title = keras.Input(shape=( hparams['npratio'] + 1, hparams['title_size'], ), dtype="int32", name='pred_input_title') pred_input_segment = keras.Input(shape=( hparams['npratio'] + 1, hparams['title_size'], ), dtype="int32", name='pred_input_segment') # # # Reshape for bert-use # # pred_input_segment_reshape = keras.layers.Reshape(((hparams['npratio'] + 1) * hparams['title_size'],))( # pred_input_segment) # pred_input_title_reshape = keras.layers.Reshape(((hparams['npratio'] + 1) * hparams['title_size'],))( # pred_input_title) # his_input_title = keras.Input( # shape=[None,], dtype="int32", name='his_input_title' # ) # his_input_segment = keras.Input( # shape=[None,], dtype="int32", name="his_input_segment" # LP add # ) # pred_input_title = keras.Input( # shape=[None,], dtype="int32", name='pred_input_title' # ) # pred_input_segment = keras.Input( # shape=[None,], dtype="int32", name='pred_input_segment' # ) pred_input_title_one = keras.Input(shape=(1, hparams['title_size']), dtype="int32", name='pred_input_title_one') pred_title_one_reshape = layers.Reshape( (hparams['title_size'], ), name='pred_title_one_reshape')(pred_input_title_one) pred_input_title_segment_one = keras.Input( shape=(1, hparams['title_size']), dtype="int32", name='pred_input_title_segment_one' # LP add ) pred_title_segment_one_reshape = layers.Reshape( (hparams['title_size'], ), name='pred_title_segment_one_reshape')( pred_input_title_segment_one) # LP add # embedding_layer = layers.Embedding( # self.word2vec_embedding.shape[0], # hparams.word_emb_dim, # weights=[self.word2vec_embedding], # trainable=True, # ) # use bert_model 来代替 word embedding entity_embedding_layer = None context_embedding_layer = None if hparams['entityEmb_file'] is not None: his_input_title_entity = keras.Input(shape=(hparams['his_size'], hparams['title_size']), dtype="int32", name='his_input_title_entity') pred_input_title_entity = keras.Input( shape=(hparams['npratio'] + 1, hparams['title_size']), dtype="int32", name='pred_input_title_entity') pred_input_title_one_entity = keras.Input( shape=( 1, hparams['title_size'], ), dtype="int32", name='pred_input_title_one_entity') pred_title_one_reshape_entity = layers.Reshape( (hparams['title_size'], ), name='pred_title_one_reshape_entity')( pred_input_title_one_entity) entity_embedding_layer = layers.Embedding( self.entity2vec_embedding.shape[0], self.entity2vec_embedding.shape[1], weights=[self.entity2vec_embedding], trainable=True, name='entity_embedding_layer') if hparams['contextEmb_file'] is not None: context_embedding_layer = layers.Embedding( self.context2vec_embedding.shape[0], self.context2vec_embedding.shape[1], weights=[self.context2vec_embedding], trainable=True, name='context_embedding_layer') titleencoder = self._build_newsencoder('news_encoder') if hparams['entityEmb_file']: entity_encoder = self._build_eneityencoder(entity_embedding_layer, 'entity_encoder') if hparams['contextEmb_file']: context_encoder = self._build_eneityencoder( context_embedding_layer, 'context_encoder') else: context_encoder = None else: entity_encoder = None context_encoder = None self.userencoder = self._build_userencoder(his_input_title, his_input_segment, titleencoder, entity_encoder, context_encoder) # from tensorflow.keras.utils import plot_model # plot_model(self.userencoder, to_file='userencoder_model.png',show_shapes=True,show_layer_names=True) self.newsencoder = titleencoder self.entityencoder = entity_encoder self.contextencoder = context_encoder if hparams['entityEmb_file'] is not None: user_present = self.userencoder( [his_input_title, his_input_title_entity]) pred_input_title_emb = layers.TimeDistributed(self.bert_model)( pred_input_title) # TODO shape可能有问题 # pred_input_title_emb = keras.layers.Reshape((hparams.npratio + 1, hparams.title_size, 768))( # pred_input_title_emb) news_present = layers.TimeDistributed( self.newsencoder)(pred_input_title_emb) pred_input_title_one_emb = self.bert_model( pred_title_one_reshape) # TODO shape可能有问题 news_present_one = self.newsencoder(pred_input_title_one_emb) news_entity_present = layers.TimeDistributed( self.entityencoder)(pred_input_title_entity) news_entity_present_one = self.entityencoder( pred_title_one_reshape_entity) if hparams['contextEmb_file'] is not None: news_context_present = layers.TimeDistributed( self.contextencoder)(pred_input_title_entity) news_context_present_one = self.contextencoder( pred_title_one_reshape_entity) news_present = layers.Concatenate()( [news_present, news_entity_present, news_context_present]) news_present_one = layers.Concatenate()([ news_present_one, news_entity_present_one, news_context_present_one ]) else: news_present = layers.Concatenate()( [news_present, news_entity_present]) news_present_one = layers.Concatenate()( [news_present_one, news_entity_present_one]) else: user_present = self.userencoder([his_input_title]) pred_input_title_emb = layers.TimeDistributed(self.bert_model)( pred_input_title) # TODO shape可能有问题 shape:(-1,5,30, 768) # Reshape after bert # pred_input_title_emb = keras.layers.Reshape((hparams['npratio'] + 1, hparams['title_size'], 768))( # pred_input_title_emb) news_present = layers.TimeDistributed(self.newsencoder)( pred_input_title_emb) # (-1, 5, 400) print('news_present_shape:', news_present.shape) pred_input_title_one_emb = self.bert_model( pred_title_one_reshape) # shape: (1,768) news_present_one = self.newsencoder( pred_input_title_one_emb) # (-1,400) preds = layers.Dot(axes=-1)([news_present, user_present]) # shape: (-1, 5) preds = layers.Activation(activation="softmax")( preds) # shape: (-1, 5) pred_one = layers.Dot(axes=-1)([news_present_one, user_present]) pred_one = layers.Activation(activation="sigmoid")(pred_one) if hparams['entityEmb_file'] is not None: model = keras.Model([ his_input_title, his_input_segment, pred_input_title, pred_input_segment, his_input_title_entity, pred_input_title_entity ], preds, name='NRMS') scorer = keras.Model([ his_input_title, his_input_segment, pred_input_title_one, pred_input_title_segment_one, his_input_title_entity, pred_input_title_one_entity ], pred_one, name='scorer') else: model = keras.Model([ his_input_title, his_input_segment, pred_input_title, pred_input_segment ], preds, name='NRMS') scorer = keras.Model([ his_input_title, his_input_segment, pred_input_title_one, pred_input_title_segment_one ], pred_one, name='scorer') return model, scorer
def _build_userencoder(self, his_input_title, his_input_segment, titleencoder, entityencoder, contextencoder): """The main function to create user encoder of NRMS. Args: titleencoder(obj): the news encoder of NRMS. Return: obj: the user encoder of NRMS. """ hparams = self.hparams # his_input_title = keras.Input( # shape=(hparams.his_size, hparams.title_size), dtype="int32", name='ue_his_input_title' # ) # # his_input_segment = keras.Input( # shape=(hparams.his_size, hparams.title_size), dtype="int32", name='ue_his_input_segment' # ) embedded_sequences_title = layers.TimeDistributed(self.bert_model)( his_input_title) # TODO shape可能有问题 (-1, 50,30,768) embedded_sequences_title = keras.layers.Reshape( (hparams['his_size'], hparams['title_size'], 768), name='embedded_sequences_title_reshape')(embedded_sequences_title) click_title_presents = layers.TimeDistributed( titleencoder, name='news_time_distributed')(embedded_sequences_title) # y = SelfAttention(hparams['head_num'], hparams['head_dim'], seed=self.seed)( # [click_title_presents] * 3 # ) y = MultiHeadAttention(hparams['head_num'], hparams['head_dim'])([click_title_presents] * 3) if entityencoder is not None: his_input_title_entity = keras.Input(shape=(hparams['his_size'], hparams['title_size']), dtype="int32", name='his_input_title_entity') click_title_entity_presents = layers.TimeDistributed( entityencoder, name='entity_time_distributed')(his_input_title_entity) entity_y = SelfAttention(hparams['head_num'], hparams['head_dim'], seed=self.seed)( [click_title_entity_presents] * 3) if contextencoder is not None: click_title_context_presents = layers.TimeDistributed( contextencoder, name='context_time_distributed')(his_input_title_entity) context_y = SelfAttention( hparams['head_num'], hparams['head_dim'], seed=self.seed)([click_title_context_presents] * 3) y = layers.Concatenate()([y, entity_y, context_y]) else: y = layers.Concatenate()([y, entity_y]) user_present = AttLayer2(hparams['attention_hidden_dim'], seed=self.seed)(y) if entityencoder is not None: model = keras.Model( inputs=[his_input_title, his_input_title_entity], outputs=user_present, name="user_encoder") else: model = keras.Model(his_input_title, user_present, name="user_encoder") return model
def _build_nrms(self): """The main function to create NRMS's logic. The core of NRMS is a user encoder and a news encoder. Returns: obj: a model used to train. obj: a model used to evaluate and inference. """ hparams = self.hparams his_input_title = keras.Input(shape=( hparams['his_size'], hparams['title_size'], ), dtype="int32", name='his_input_title') his_input_segment = keras.Input( shape=( hparams['his_size'], hparams['title_size'], ), dtype="int32", name="his_input_segment" # LP add ) his_input_body = keras.Input(shape=(hparams['his_size'], hparams['body_size']), dtype="int32") his_input_vert = keras.Input(shape=(hparams['his_size'], 1), dtype="int32") his_input_subvert = keras.Input(shape=(hparams['his_size'], 1), dtype="int32") his_input_title_entity = keras.Input(shape=(hparams['his_size'], hparams['title_size']), dtype="int32", name='his_input_title_entity') pred_input_title = keras.Input(shape=( hparams['npratio'] + 1, hparams['title_size'], ), dtype="int32", name='pred_input_title') pred_input_segment = keras.Input(shape=( hparams['npratio'] + 1, hparams['title_size'], ), dtype="int32", name='pred_input_segment') pred_input_title_entity = keras.Input(shape=(hparams['npratio'] + 1, hparams['title_size']), dtype="int32", name='pred_input_title_entity') pred_input_body = keras.Input(shape=(hparams['npratio'] + 1, hparams['body_size']), dtype="int32") pred_input_vert = keras.Input(shape=(hparams['npratio'] + 1, 1), dtype="int32") pred_input_subvert = keras.Input(shape=(hparams['npratio'] + 1, 1), dtype="int32") pred_input_title_one = keras.Input(shape=(1, hparams['title_size']), dtype="int32", name='pred_input_title_one') # pred_title_one_reshape = layers.Reshape((hparams['title_size'],), # name='pred_title_one_reshape')(pred_input_title_one) pred_input_title_segment_one = keras.Input( shape=(1, hparams['title_size']), dtype="int32", name='pred_input_title_segment_one' # LP add ) pred_input_entity_one = keras.Input(shape=( 1, hparams['title_size'], ), dtype="int32", name='pred_input_title_one_entity') # pred_title_segment_one_reshape = layers.Reshape((hparams['title_size'],), # name='pred_title_segment_one_reshape')( # pred_input_title_segment_one) # LP add pred_input_body_one = keras.Input(shape=( 1, hparams['body_size'], ), dtype="int32") pred_input_vert_one = keras.Input(shape=(1, 1), dtype="int32") pred_input_subvert_one = keras.Input(shape=(1, 1), dtype="int32") his_title_body_verts = layers.Concatenate(axis=-1)([ his_input_title, his_input_segment, his_input_title_entity, his_input_body, his_input_vert, his_input_subvert ]) pred_title_body_verts = layers.Concatenate(axis=-1)([ pred_input_title, pred_input_segment, pred_input_title_entity, pred_input_body, pred_input_vert, pred_input_subvert ]) pred_title_body_verts_one = layers.Concatenate(axis=-1)([ pred_input_title_one, pred_input_title_segment_one, pred_input_entity_one, pred_input_body_one, pred_input_vert_one, pred_input_subvert_one, ]) pred_title_body_verts_one = layers.Reshape( (-1, ))(pred_title_body_verts_one) # embedding_layer = layers.Embedding( # self.word2vec_embedding.shape[0], # hparams.word_emb_dim, # weights=[self.word2vec_embedding], # trainable=True, # ) # use bert_model 来代替 word embedding # entity_embedding_layer = None # context_embedding_layer = None entity_embedding_layer = layers.Embedding( self.entity2vec_embedding.shape[0], self.entity2vec_embedding.shape[1], weights=[self.entity2vec_embedding], trainable=True, name='entity_embedding_layer') context_embedding_layer = layers.Embedding( self.context2vec_embedding.shape[0], self.context2vec_embedding.shape[1], weights=[self.context2vec_embedding], trainable=True, name='context_embedding_layer') self.newsencoder = self._build_newsencoder('news_encoder', entity_embedding_layer, context_embedding_layer) self.userencoder = self._build_userencoder(self.newsencoder) # from tensorflow.keras.utils import plot_model # plot_model(self.userencoder, to_file='userencoder_model.png',show_shapes=True,show_layer_names=True) user_present = self.userencoder(his_title_body_verts) news_present = layers.TimeDistributed( self.newsencoder)(pred_title_body_verts) news_present_one = self.newsencoder(pred_title_body_verts_one) preds = layers.Dot(axes=-1)([news_present, user_present]) preds = layers.Activation(activation="softmax")(preds) pred_one = layers.Dot(axes=-1)([news_present_one, user_present]) pred_one = layers.Activation(activation="sigmoid")(pred_one) model = keras.Model( [ his_input_title, his_input_segment, his_input_title_entity, his_input_body, his_input_vert, his_input_subvert, pred_input_title, pred_input_segment, pred_input_title_entity, pred_input_body, pred_input_vert, pred_input_subvert, ], preds, ) scorer = keras.Model( [ his_input_title, his_input_segment, his_input_title_entity, his_input_body, his_input_vert, his_input_subvert, pred_input_title_one, pred_input_title_segment_one, pred_input_entity_one, pred_input_body_one, pred_input_vert_one, pred_input_subvert_one, ], pred_one, ) return model, scorer
def _build_newsencoder(self, name, entity_embedding_layer, context_embedding_layer): """The main function to create news encoder of NRMS. Args: bert_model(obj): a bert model. # LP modified Return: obj: the news encoder of NRMS. """ hparams = self.hparams input_title_entity_body_verts = keras.Input( shape=(hparams['title_size'] * 3 + hparams['body_size'] + 2), dtype="int32", name='ue_his_input') sequences_input_title = layers.Lambda(lambda x: x[:, :hparams[ 'title_size']])(input_title_entity_body_verts) sequences_input_entity = layers.Lambda(lambda x: x[:, hparams[ 'title_size'] * 2:hparams['title_size'] * 3])( input_title_entity_body_verts) sequences_input_body = layers.Lambda( lambda x: x[:, hparams['title_size'] * 3:hparams['title_size'] * 3 + hparams['body_size']])(input_title_entity_body_verts) input_vert = layers.Lambda(lambda x: x[:, hparams[ 'title_size'] * 3 + hparams['body_size']:hparams[ 'title_size'] * 3 + hparams['body_size'] + 1, ])( input_title_entity_body_verts) input_subvert = layers.Lambda(lambda x: x[:, hparams[ 'title_size'] * 3 + hparams['body_size'] + 1:])( input_title_entity_body_verts) # sequences_input_segment = keras.Input(shape=(hparams.title_size,), dtype="int32", # name='sequences_input_segment') # embedded_sequences_title = layers.TimeDistributed(self.bert_model)( # sequences_input_title) # TODO shape可能有问题 # embedded_sequences_title = keras.Input(shape=(hparams['title_size'], # 768,), # name='embedded_sequences_title') # TODO shape可能有问题 (?, 30, 768) title_repr = self._build_titleencoder()(sequences_input_title) body_repr = self._build_bodyencoder()(sequences_input_body) entity_repr = self._build_eneityencoder( entity_embedding_layer, 'entity_encoder')(sequences_input_entity) context_repr = self._build_eneityencoder( context_embedding_layer, 'context_encoder')(sequences_input_entity) vert_repr = self._build_vertencoder()(input_vert) subvert_repr = self._build_subvertencoder()(input_subvert) concate_repr = layers.Concatenate(axis=-2)([ title_repr, entity_repr, context_repr, body_repr, vert_repr, subvert_repr ]) news_repr = AttLayer2(hparams['attention_hidden_dim'], seed=self.seed)(concate_repr) model = keras.Model(input_title_entity_body_verts, news_repr, name='news_encoder') return model