def __init__(self, embs, encoder=None, pooling=None, combiner='sum', **kwargs): super(Encoders, self).__init__(**kwargs) self.embs = embs self.encoder = util.get_encoder(encoder) pooling = pooling or 'att' self.pooling = mt.layers.Pooling(pooling) self.output_dim = embs[0].output_dim if combiner != 'concat' else embs[0].output_dim * (len(embs)) self.combiner = combiner assert self.combiner == 'sum'
def __init__(self, encoder, seqs_encoder=None, pooling=None, use_seq_position=False, **kwargs): super(SeqsEncoder, self).__init__(**kwargs) self.encoder = keras.layers.TimeDistributed(encoder, name=encoder.name) self.seqs_encoder = util.get_encoder(seqs_encoder) pooling = pooling or 'din' self.pooling_name = pooling self.pooling = util.get_att_pooling(pooling) # position emb seems not help self.use_seq_position = use_seq_position or FLAGS.use_seq_position if self.use_seq_position: self.position_emb = keras.layers.Embedding(500, FLAGS.emb_size)
def __init__(self, emb=None, encoder=None, pooling=None, **kwargs): super(Encoder, self).__init__(**kwargs) self.emb = emb pooling = pooling or 'att' self.pooling = mt.layers.Pooling(pooling) self.output_dim = None if emb is not None: self.output_dim = emb.output_dim self.encoder = util.get_encoder(encoder) if isinstance(encoder, str) or encoder is None else encoder if not self.output_dim: self.output_dim = self.encoder.output_dim
def __init__(self): super(Model, self).__init__() self.mode = 'train' self.input_ = {} def _emb(vocab_name, emb_name=None): return util.create_emb(vocab_name, emb_name) self.uemb = _emb('uid') self.demb = _emb('did') self.cat_emb = _emb('cat') self.scat_emb = _emb('sub_cat') self.entity_emb = _emb('entity') self.entity_type_emb = _emb('entity_type') self.word_emb = _emb('word') self.hour_emb = Embedding(24, FLAGS.emb_size, name='hour_emb') self.weekday_emb = Embedding(7, FLAGS.emb_size, name='weekday_emb') self.fresh_hour_emb = Embedding(300, FLAGS.emb_size, name='fresh_hour_emb') # 7 * 24 self.fresh_day_emb = Embedding(50, FLAGS.emb_size, name='fresh_day_emb') self.position_emb = Embedding(300, FLAGS.emb_size, name='position_emb') self.title_lookup = melt.layers.LookupArray(FLAGS.title_lookup) self.doc_lookup = melt.layers.LookupArray(FLAGS.doc_lookup) self.title_encoder = TitleEncoder(self.word_emb) self.titles_encoder = TitlesEncoder(self.title_encoder) self.sum_pooling = melt.layers.SumPooling() self.mean_pooling = melt.layers.MeanPooling() self.pooling = melt.layers.Pooling(FLAGS.pooling) self.feat_pooling = melt.layers.Pooling(FLAGS.feat_pooling) self.his_simple_pooling = melt.layers.Pooling(FLAGS.his_simple_pooling) self.dense = Dense( 1) if not FLAGS.use_multi_dropout else melt.layers.MultiDorpout( 1, drop_rate=0.3) self.batch_norm = BatchNormalization() self.dropout = keras.layers.Dropout(FLAGS.dropout) # --arch-mlp-bot="13-512-256-64-16" --arch-mlp-top="512-256-1" activation = FLAGS.activation mlp_dims = [FLAGS.emb_size * 2, FLAGS.emb_size] if not FLAGS.big_mlp else [ FLAGS.emb_size * 4, FLAGS.emb_size * 2, FLAGS.emb_size ] self.dense_mlp = melt.layers.MLP(mlp_dims, activation=activation, drop_rate=FLAGS.mlp_dropout, name='dense_mlp') mlp_dims = [512, 256, 64] if not FLAGS.big_mlp else [1024, 512, 256] self.mlp = melt.layers.MLP(mlp_dims, activation=activation, drop_rate=FLAGS.mlp_dropout, name='mlp') self.his_encoder = util.get_encoder(FLAGS.his_encoder) self.his_dense = keras.layers.Dense(FLAGS.hidden_size) self.his_pooling = util.get_att_pooling(FLAGS.his_pooling) self.his_pooling2 = util.get_att_pooling(FLAGS.his_pooling2) self.cur_dense = keras.layers.Dense(FLAGS.hidden_size) if FLAGS.his_strategy.startswith('bst'): self.transformer = melt.layers.transformer.Encoder( num_layers=1, d_model=FLAGS.hidden_size, num_heads=FLAGS.num_heads, dff=FLAGS.hidden_size, maximum_position_encoding=None, activation=FLAGS.transformer_activation, rate=FLAGS.transformer_dropout) self.fusion = melt.layers.SemanticFusion(drop_rate=0.1) if FLAGS.feat_pooling == 'cin': from deepctr.layers.interaction import CIN self.cin = CIN(( 128, 128, ), 'relu', True, 0, 1024) self.feat_pooling = self.cin if FLAGS.aux_loss_rate or FLAGS.lm_target: vsize = gezi.get('vocab_sizes')['vid'][0] # hidden_size = FLAGS.hidden_size if FLAGS.his_encoder in ['lstm', 'gru'] else int(FLAGS.hidden_size / 2) hidden_size = int(FLAGS.hidden_size / 2) self.sampled_weight = self.add_weight( name='sampled_weight', shape=(vsize, hidden_size), #initializer = keras.initializers.RandomUniform(minval=-10, maxval=10, seed=None), dtype=tf.float32, trainable=True) self.sampled_bias = self.add_weight( name='sampled_bias', shape=(vsize, ), #initializer = keras.initializers.RandomUniform(minval=-10, maxval=10, seed=None), dtype=tf.float32, trainable=True) self.softmax_loss_function = melt.seq2seq.gen_sampled_softmax_loss_function( 5, vsize, weights=self.sampled_weight, biases=self.sampled_bias, log_uniform_sample=True, is_predict=False, sample_seed=1234)
def __init__(self, **kwargs): super(Model, self).__init__(**kwargs) self.mode = 'train' self.input_ = {} def _emb(vocab_name, emb_name=None): return util.create_emb(vocab_name, emb_name) self.uemb = _emb('uid') self.demb = _emb('did') self.cat_emb = _emb('cat') self.scat_emb = _emb('sub_cat') self.entity_emb = _emb('entity') self.entity_type_emb = _emb('entity_type') if not FLAGS.bert_dir or not FLAGS.bert_only: self.word_emb = _emb('word') self.hour_emb = Embedding(24, FLAGS.emb_size, name='hour_emb') self.weekday_emb = Embedding(7, FLAGS.emb_size, name='weekday_emb') self.fresh_hour_emb = Embedding(300, FLAGS.emb_size, name='fresh_hour_emb') # 7 * 24 self.fresh_day_emb = Embedding(50, FLAGS.emb_size, name='fresh_day_emb') self.position_emb = Embedding(300, FLAGS.emb_size, name='position_emb') # self.title_lookup = mt.layers.LookupArray(FLAGS.title_lookup, name='title_lookup') self.doc_lookup = mt.layers.LookupArray(FLAGS.doc_lookup, name='doc_lookup') if _is_ok('enti'): self.entities_encoder = Encoders( [self.entity_emb, self.entity_type_emb], None, FLAGS.pooling, name='entities_encoder') self.his_entities_encoder = SeqsEncoder( self.entities_encoder, None, FLAGS.seqs_pooling, name='his_entities_encoder') if not FLAGS.bert_dir or not FLAGS.bert_only: if _is_ok('^cur_title&') or _is_ok('abstract') or _is_ok('body'): if FLAGS.share_words_encoder: words_encoder = Encoder(self.word_emb, FLAGS.seqs_encoder, FLAGS.pooling, name='words_encoder') else: words_encoder = None if _is_ok('^cur_title&'): self.title_encoder = words_encoder or Encoder( self.word_emb, FLAGS.seqs_encoder, FLAGS.pooling, name='title_encoder') self.titles_encoder = SeqsEncoder(self.title_encoder, FLAGS.seqs_encoder, FLAGS.seqs_pooling, name='titles_encoder') self.titles_encoder2 = SeqsEncoder(self.title_encoder, FLAGS.seqs_encoder, FLAGS.seqs_pooling2, name='titles_encoder2') if _is_ok('^abstract&'): self.abstract_encoder = words_encoder or Encoder( self.word_emb, FLAGS.seqs_encoder, name='abstract_encoder') self.abstracts_encoder = SeqsEncoder(self.abstract_encoder, FLAGS.seqs_encoder, FLAGS.seqs_pooling, name='abstracts_encoder') if _is_ok('^body&'): self.body_encoder = words_encoder or Encoder( self.word_emb, None, FLAGS.pooling, name='body_encoder') self.bodies_encoder = SeqsEncoder(self.body_encoder, FLAGS.seqs_encoder, FLAGS.seqs_pooling, name='bodies_encoder') if FLAGS.bert_dir: # tpu 会做静态检查 bert_encoder会认为没有weight失败 # Weights for model bert_encoder have not yet been created. Weights are created when the Model is first called on inputs or `build()` is called with an `input_shape` tpu # max_input_length = None if not gezi.get('tpu') else FLAGS.max_bert_input_length max_input_length = None bert_encoder = mt.models.Bert( FLAGS.bert_dir, FLAGS.emb_size, max_input_length=max_input_length, return_sequences=FLAGS.bert_pooling_seqs, name='bert_encoder') self.bert_title_encoder = bert_encoder self.bert_abstract_encoder = bert_encoder self.bert_body_encoder = bert_encoder if FLAGS.bert_pooling_seqs: if FLAGS.share_words_encoder: bert_words_encoder = Encoder(None, bert_encoder, FLAGS.pooling, name='words_encoder') else: bert_words_encoder = None if _is_ok('bert_title'): self.bert_title_encoder = bert_words_encoder or Encoder( None, bert_encoder, FLAGS.pooling, name='bert_title_encoder') if _is_ok('bert_abstract'): self.bert_abstract_encoder = bert_words_encoder or Encoder( None, bert_encoder, FLAGS.pooling, name='bert_abstract_encoder') if _is_ok('bert_body'): self.bert_body_encoder = bert_words_encoder or Encoder( None, bert_encoder, FLAGS.pooling, name='bert_body_encoder') if _is_ok('bert_title'): self.bert_titles_encoder = SeqsEncoder( self.bert_title_encoder, FLAGS.seqs_encoder, FLAGS.seqs_pooling, name='bert_titles_encoder') if _is_ok('bert_abstract'): self.bert_abstracts_encoder = SeqsEncoder( self.bert_abstract_encoder, FLAGS.seqs_encoder, FLAGS.seqs_pooling, name='bert_abstracts_encoder') if _is_ok('bert_body'): self.bert_bodies_encoder = SeqsEncoder( self.bert_body_encoder, FLAGS.seqs_encoder, FLAGS.seqs_pooling, name='bert_bodies_encoder') self.sum_pooling = mt.layers.SumPooling() self.mean_pooling = mt.layers.MeanPooling() self.pooling = mt.layers.Pooling(FLAGS.pooling) self.feat_pooling = mt.layers.Pooling(FLAGS.feat_pooling, name='feat_pooling') self.his_simple_pooling = mt.layers.Pooling(FLAGS.his_simple_pooling) # self.his_entity_pooling = mt.layers.Pooling('att', name='his_entity_pooling') self.his_entity_pooling = util.get_att_pooling( 'din', name='his_entity_pooling') self.his_cat_pooling = mt.layers.Pooling('att', name='his_cat_pooling') self.his_scat_din_pooling = util.get_att_pooling( 'din', name='his_scat_din_pooling') self.dense = Dense( 1) if not FLAGS.use_multi_dropout else mt.layers.MultiDropout( 1, drop_rate=0.3) self.batch_norm = BatchNormalization() self.dropout = keras.layers.Dropout(FLAGS.dropout) # --arch-mlp-bot="13-512-256-64-16" --arch-mlp-top="512-256-1" activation = FLAGS.activation mlp_dims = [FLAGS.emb_size * 2, FLAGS.emb_size] if not FLAGS.big_mlp else [ FLAGS.emb_size * 4, FLAGS.emb_size * 2, FLAGS.emb_size ] self.dense_mlp = mt.layers.MLP(mlp_dims, activation=activation, drop_rate=FLAGS.mlp_dropout, name='dense_mlp') mlp_dims = [512, 256, 64] if not FLAGS.big_mlp else [1024, 512, 256] self.mlp = mt.layers.MLP(mlp_dims, activation=activation, drop_rate=FLAGS.mlp_dropout, batch_norm=FLAGS.mlp_bn, name='mlp') self.his_encoder = util.get_encoder(FLAGS.his_encoder) self.his_dense = keras.layers.Dense(FLAGS.hidden_size) self.his_pooling = util.get_att_pooling(FLAGS.his_pooling) self.his_pooling2 = util.get_att_pooling(FLAGS.his_pooling2) self.cur_dense = keras.layers.Dense(FLAGS.hidden_size) if FLAGS.his_strategy.startswith('bst'): self.transformer = mt.layers.transformer.Encoder( num_layers=1, d_model=FLAGS.hidden_size, num_heads=FLAGS.num_heads, dff=FLAGS.hidden_size, maximum_position_encoding=None, activation=FLAGS.transformer_activation, rate=FLAGS.transformer_dropout) self.fusion = mt.layers.SemanticFusion(drop_rate=0.1) if FLAGS.feat_pooling == 'cin': from deepctr.layers.interaction import CIN self.cin = CIN(( 128, 128, ), 'relu', True, 0, 1024) self.feat_pooling = self.cin if FLAGS.aux_loss_rate or FLAGS.lm_target: vsize = gezi.get('vocab_sizes')['vid'][0] # hidden_size = FLAGS.hidden_size if FLAGS.his_encoder in ['lstm', 'gru'] else int(FLAGS.hidden_size / 2) hidden_size = int(FLAGS.hidden_size / 2) self.sampled_weight = self.add_weight( name='sampled_weight', shape=(vsize, hidden_size), # initializer = keras.initializers.RandomUniform(minval=-10, maxval=10, seed=None), dtype=tf.float32, trainable=True) self.sampled_bias = self.add_weight( name='sampled_bias', shape=(vsize, ), # initializer = keras.initializers.RandomUniform(minval=-10, maxval=10, seed=None), dtype=tf.float32, trainable=True) self.softmax_loss_function = mt.seq2seq.gen_sampled_softmax_loss_function( 5, vsize, weights=self.sampled_weight, biases=self.sampled_bias, log_uniform_sample=True, is_predict=False, sample_seed=1234)