def translation(self, params, src_language, tgt_language): if params.model.type == "rnn": encoder = rnn_encoder(word_vec_size=params.rnn.word_vec_size, hidden_size=params.rnn.hidden_size, layers=params.model.encoder_layers, rnn_dropout=params.rnn.rnn_dropout, bidirectional=params.rnn.bidirectional, decoder_layers=params.model.decoder_layers) decoder = rnn_decoder(attn_model=params.rnn.attention, word_vec_size=params.rnn.word_vec_size, hidden_size=params.rnn.hidden_size, output_size=len(tgt_language), layers=params.model.decoder_layers, rnn_dropout=params.rnn.rnn_dropout, attn_dropout=params.rnn.attn_dropout, input_feed=params.rnn.input_feed) embedding = EmbeddingLayer(src_lang=src_language, tgt_lang=tgt_language, word_vec_size=params.rnn.word_vec_size, shared=params.model.shared_embedding) model = seq2seq(encoder, decoder, embedding, src_language, tgt_language, params.model.max_length) elif params.model.type == "transformer": encoder = tr_encoder( d_model=params.transformer.d_model, n_head=params.transformer.heads, dim_ff=params.transformer.dim_feedforward, attn_dropout=params.transformer.attn_dropout, residual_dropout=params.transformer.residual_dropout, num_layers=params.model.encoder_layers, max_len=params.model.max_length) decoder = tr_decoder( d_model=params.transformer.d_model, n_head=params.transformer.heads, dim_ff=params.transformer.dim_feedforward, attn_dropout=params.transformer.attn_dropout, residual_dropout=params.transformer.residual_dropout, num_layers=params.model.decoder_layers, vocab_size=len(tgt_language), max_len=params.model.max_length) embedding = EmbeddingLayer( src_lang=src_language, tgt_lang=tgt_language, word_vec_size=params.transformer.d_model, shared=params.model.shared_embedding) model = Transformer(encoder, decoder, embedding, src_language, tgt_language, params.model.max_length) return model
def lm(self, params, language): if params.model.task == "language generation": # GPT style language model lm_type = "generator" elif params.model.task == "language encoding": # BERT style language model lm_type = "encoder" if not params.model.encoder_layers == params.model.decoder_layers: raise DimensionError( "In language models the number of layers in the " "encoder and decoder must match") encoder = tr_encoder( d_model=params.transformer.d_model, n_head=params.transformer.heads, dim_ff=params.transformer.dim_feedforward, attn_dropout=params.transformer.attn_dropout, residual_dropout=params.transformer.residual_dropout, num_layers=params.model.encoder_layers, max_len=params.model.max_length) # force shared embedding embedding = EmbeddingLayer(src_lang=language, tgt_lang=language, word_vec_size=params.transformer.d_model, shared=True) model = LanguageModel(encoder, embedding, language, params.model.max_length, lm_type) return model
def __init__(self, data_processor, model_params): config = data_processor.config embedding_layer = EmbeddingLayer(config) model_helper = ModelHelper(config) def _model_fn(features, labels, mode, params): self._check(params["feature_names"], data_processor, config) feature_name = params["feature_names"][0] index = data_processor.dict_names.index(feature_name) region_radius = int(config.RegionEmbedding.region_size / 2) sequence_length = data_processor.max_sequence_length[index] + \ region_radius * 2 vocab_ids = features["fixed_len_" + feature_name] padding_id = \ data_processor.dict_list[index][data_processor.VOCAB_PADDING] vocab_ids = tf.pad(vocab_ids, tf.constant([[0, 0], [region_radius, region_radius]]), constant_values=padding_id) region_emb = embedding_layer.get_region_embedding( feature_name, vocab_ids, len(data_processor.dict_list[index]), params["epoch"], sequence_length, config.RegionEmbedding.region_size, config.RegionEmbedding.region_embedding_mode, mode, data_processor.pretrained_embedding_files[index], dict_map=data_processor.dict_list[index]) # which words have corresponding region embedding trimmed_seq = \ vocab_ids[..., region_radius: sequence_length - region_radius] def mask(x): return tf.cast( tf.not_equal(tf.cast(x, tf.int32), tf.constant(padding_id)), tf.float32) # remove padding(setting to zero) weight = tf.map_fn(mask, trimmed_seq, dtype=tf.float32, back_prop=False) weight = tf.expand_dims(weight, -1) weighted_emb = region_emb * weight # document embedding hidden_layer = tf.reduce_sum(weighted_emb, 1) return model_helper.get_softmax_estimator_spec( hidden_layer, mode, labels, params["label_size"], params["static_embedding"], data_processor.label_dict_file) super(RegionEmbeddingEstimator, self).__init__(model_fn=_model_fn, model_dir=config.model_common.checkpoint_dir, config=model_helper.get_run_config(), params=model_params)
def __init__(self, vocab_size_src: int, dim_embed_src: int, num_neurons_encoder: int, optim: object): self.embedding_layer = EmbeddingLayer(dim_in=vocab_size_src, embed_dim=dim_embed_src, optim=optim) self.rnn_cell = RecurrentNeuralNetwork( dim_in=dim_embed_src, num_neurons=num_neurons_encoder, optim=optim, embedding_layer=self.embedding_layer)
def __init__(self, data_processor, model_params): config = data_processor.config logger = data_processor.logger embedding_layer = EmbeddingLayer(config, logger=logger) model_helper = ModelHelper(config, logger=logger) def _model_fn(features, labels, mode, params): self._check(params["feature_names"], data_processor) input_layer = [] len_list = [] for feature_name in params["feature_names"]: index = data_processor.dict_names.index(feature_name) input_layer.append(embedding_layer.get_vocab_embedding_sparse( feature_name, features["var_len_" + feature_name], len(data_processor.dict_list[index]), params["epoch"], pretrained_embedding_file= data_processor.pretrained_embedding_files[index], dict_map=data_processor.dict_list[index], mode=mode)) len_list.append(features[feature_name + "_var_real_len"]) if data_processor.ngram_list[index] > 1: ngram_name = feature_name + "_ngram" index = data_processor.dict_names.index(ngram_name) input_layer.append( embedding_layer.get_vocab_embedding_sparse( ngram_name, features["var_len_" + ngram_name], len(data_processor.dict_list[index]), params["epoch"], mode=mode)) len_list.append(features[ngram_name + "_var_real_len"]) hidden_layer = input_layer[0] total_len = len_list[0] for i in range(1, len(input_layer)): hidden_layer = hidden_layer + input_layer[i] total_len = total_len + len_list[i] hidden_layer = tf.div(hidden_layer, total_len) hidden_layer = tf.contrib.layers.fully_connected( inputs=hidden_layer, num_outputs=256, activation_fn=tf.nn.relu) hidden_layer = tf.contrib.layers.fully_connected( inputs=hidden_layer, num_outputs=config.embedding_layer.embedding_dimension, activation_fn=tf.nn.relu) if mode == tf.estimator.ModeKeys.TRAIN: hidden_layer = model_helper.dropout( hidden_layer, config.train.hidden_layer_dropout_keep_prob) return model_helper.get_softmax_estimator_spec( hidden_layer, mode, labels, params["label_size"], params["static_embedding"], data_processor.label_dict_file) super(FastTextEstimator, self).__init__( model_fn=_model_fn, model_dir=config.model_common.checkpoint_dir, config=model_helper.get_run_config(), params=model_params) super(FastTextEstimator, self).__init__( model_fn=_model_fn, model_dir=config.model_common.checkpoint_dir, config=model_helper.get_run_config(), params=model_params)
def __init__(self, vocab_size_trg: int, dim_embed_trg: int, num_neurons_decoder: int, optim: object): self.embedding_layer = EmbeddingLayer(vocab_size_trg, dim_embed_trg, optim) # for the decoder, we're going to tie the weights of the embedding # layer and the linear projection before softmax activation. If # vocab_size_src and vocab_size_trg are same as well, its possible to # tie all the weights but not done here for simplicity of # implementation. See: https://arxiv.org/abs/1608.05859 self.rnn_cell = RecurrentNeuralNetwork(dim_embed_trg, num_neurons_decoder, optim, self.embedding_layer, predict=True, costFunction=crossEntropy)
def __init__(self, data_processor, model_params): config = data_processor.config embedding_layer = EmbeddingLayer(config) model_helper = ModelHelper(config) def _convolutional_block(inputs, num_layers, num_filters, name, mode): """Convolutional Block of VDCNN Convolutional block contains 2 conv layers, and can be repeated Temp Conv-->Batch Norm-->ReLU-->Temp Conv-->Batch Norm-->ReLU """ with tf.variable_scope("conv_block_%s" % name): is_training = False if mode == tf.estimator.ModeKeys.TRAIN: is_training = True hidden_layer = inputs initializer_normal = tf.random_normal_initializer(stddev=0.1) initializer_const = tf.constant_initializer(0.0) for i in range(0, num_layers): filter_shape = [ 3, 1, hidden_layer.get_shape()[3], num_filters ] w = tf.get_variable(name='W_' + str(i), shape=filter_shape, initializer=initializer_normal) b = tf.get_variable(name='b_' + str(i), shape=[num_filters], initializer=initializer_const) conv = tf.nn.conv2d(hidden_layer, w, strides=[1, 1, 1, 1], padding="SAME") conv = tf.nn.bias_add(conv, b) batch_norm = tf.layers.batch_normalization( conv, center=True, scale=True, training=is_training) hidden_layer = tf.nn.relu(batch_norm) return hidden_layer def _model_fn(features, labels, mode, params): self._check(params["feature_names"], data_processor, config) feature_name = params["feature_names"][0] index = data_processor.dict_names.index(feature_name) """VDCNN architecture 1. text(char is recommended) 2. embedding lookup 3. conv layer(64 feature maps) 4. conv blocks(contains 2 conv layers, and can be repeated) 5. fc1 6. fc2 7. fc3(softmax) pooling is importmant and shortcut is optional """ sequence_length = data_processor.max_sequence_length[index] # embedding shape [batch_size, sequence_length, embedding_dimension] embedding = embedding_layer.get_vocab_embedding( feature_name, features["fixed_len_" + feature_name], len(data_processor.dict_list[index]), params["epoch"], pretrained_embedding_file=data_processor. pretrained_embedding_files[index], dict_map=data_processor.dict_list[index], mode=mode) embedding = tf.reshape(embedding, [ -1, sequence_length, config.embedding_layer.embedding_dimension ]) embedding = tf.expand_dims(embedding, -1) if mode == tf.estimator.ModeKeys.TRAIN: embedding = model_helper.dropout( embedding, config.embedding_layer.embedding_dropout_keep_prob) initializer = tf.random_normal_initializer(stddev=0.1) # first conv layer (filter_size=3, #feature_map=64) with tf.variable_scope("first_conv") as scope: filter_shape = [ 3, config.embedding_layer.embedding_dimension, 1, 64 ] w = tf.get_variable(name='W_1', shape=filter_shape, initializer=initializer) """ argv1: input = [batch_size, in_height, in_width, in_channels] argv2: filter = [filter_height, filter_width, in_channels, out_channels] argv3: strides return: feature_map note: 1. out_channels = num_filters = #feature map 2. for padding="SAME", new_height=new_width= ceil(input_size/stride) for padding="VALID", new_height=new_width= ceil((input_size-filter_size+1)/stride) """ conv = tf.nn.conv2d( embedding, w, strides=[ 1, 1, config.embedding_layer.embedding_dimension, 1 ], padding="SAME") b = tf.get_variable(name='b_1', shape=[64], initializer=tf.constant_initializer(0.0)) out = tf.nn.bias_add(conv, b) first_conv = tf.nn.relu(out) """all convolutional blocks 4 kinds of conv blocks, which #feature_map are 64,128,256,512 Depth: 9 17 29 49 ------------------------------ conv block 512: 2 4 4 6 conv block 256: 2 4 4 10 conv block 128: 2 4 10 16 conv block 64: 2 4 10 16 First conv. layer: 1 1 1 1 """ vdcnn_depth = {} vdcnn_depth[9] = [2, 2, 2, 2] vdcnn_depth[17] = [4, 4, 4, 4] vdcnn_depth[29] = [10, 10, 4, 4] vdcnn_depth[49] = [16, 16, 10, 6] max_pool_ksize = [1, 3, 1, 1] max_pool_strides = [1, 2, 1, 1] num_filters = [64, 128, 256, 512] conv_block = first_conv for i in range(0, 4): conv_block = _convolutional_block( conv_block, num_layers=vdcnn_depth[config.TextVDCNN.vdcnn_depth][i], num_filters=num_filters[i], name="cb_" + str(i), mode=mode) pool = tf.nn.max_pool(conv_block, ksize=max_pool_ksize, strides=max_pool_strides, padding='SAME', name="pool_" + str(i)) pool_shape = int(np.prod(pool.get_shape()[1:])) pool = tf.reshape(pool, (-1, pool_shape)) # fc1 fc1 = tf.contrib.layers.fully_connected(inputs=pool, num_outputs=2048, activation_fn=tf.nn.relu) if mode == tf.estimator.ModeKeys.TRAIN: fc1 = model_helper.dropout( fc1, config.train.hidden_layer_dropout_keep_prob) # fc2 hidden_layer = tf.contrib.layers.fully_connected( inputs=fc1, num_outputs=2048, activation_fn=tf.nn.relu) if mode == tf.estimator.ModeKeys.TRAIN: hidden_layer = model_helper.dropout( hidden_layer, config.train.hidden_layer_dropout_keep_prob) # fc3(softmax) return model_helper.get_softmax_estimator_spec( hidden_layer, mode, labels, params["label_size"], params["static_embedding"]) super(TextVDCNNEstimator, self).__init__(model_fn=_model_fn, model_dir=config.model_common.checkpoint_dir, config=model_helper.get_run_config(), params=model_params)
def __init__(self, data_processor, model_params): config = data_processor.config embedding_layer = EmbeddingLayer(config) model_helper = ModelHelper(config) def _model_fn(features, labels, mode, params): self._check(params["feature_names"], data_processor, config) feature_name = params["feature_names"][0] index = data_processor.dict_names.index(feature_name) padding_id = \ data_processor.dict_list[index][data_processor.VOCAB_PADDING] window_size = config.TextDRNN.drnn_window_size vocab_ids = tf.pad(features["fixed_len_" + feature_name], tf.constant([[0, 0], [window_size - 1, 0]]), constant_values=padding_id) embedding_lookup_table = embedding_layer.get_lookup_table( feature_name, len(data_processor.dict_list[index]), config.embedding_layer.embedding_dimension, params["epoch"], dict_map=data_processor.dict_list[index], pretrained_embedding_file=data_processor. pretrained_embedding_files[index], mode=mode) sequence_length = \ data_processor.max_sequence_length[index] + window_size - 1 aligned_seq = \ [tf.slice(vocab_ids, [0, i], [-1, window_size]) for i in range(0, sequence_length - window_size + 1)] aligned_seq = \ tf.reshape(tf.concat(list(aligned_seq), 1), [-1, sequence_length - window_size + 1, window_size]) embedding = tf.nn.embedding_lookup(embedding_lookup_table, aligned_seq) if mode == tf.estimator.ModeKeys.TRAIN: embedding = model_helper.dropout( embedding, config.embedding_layer.embedding_dropout_keep_prob) embedding = tf.reshape( embedding, [-1, window_size, config.embedding_layer.embedding_dimension]) _, state = model_layer.recurrent( embedding, config.TextDRNN.drnn_rnn_dimension, cell_type=config.TextDRNN.drnn_cell_type, cell_hidden_keep_prob=config.TextDRNN. drnn_cell_hidden_keep_prob, mode=mode, use_bidirectional=False, name="drnn", reuse=None) state = tf.reshape(state, [ -1, sequence_length - window_size + 1, config.TextDRNN.drnn_rnn_dimension ]) if mode == tf.estimator.ModeKeys.TRAIN: state = model_layer.batch_norm(state, tf.constant(True, dtype=tf.bool), name="bn") else: state = model_layer.batch_norm(state, tf.constant(False, dtype=tf.bool), name="bn") state = tf.contrib.layers.fully_connected( state, config.embedding_layer.embedding_dimension, biases_initializer=None) def _mask_no_padding(x): return tf.cast( tf.not_equal(tf.cast(x, tf.int32), tf.constant(padding_id)), tf.float32) def _mask_padding(x): return tf.cast( tf.equal(tf.cast(x, tf.int32), tf.constant(padding_id)), tf.float32) trim_seq = vocab_ids[..., window_size - 1:] weight = tf.map_fn(_mask_no_padding, trim_seq, dtype=tf.float32, back_prop=False) weight = tf.expand_dims(weight, -1) weighted_emb = state * weight neg = tf.map_fn(_mask_padding, trim_seq, dtype=tf.float32, back_prop=False) neg = tf.expand_dims(neg, -1) * tf.float32.min weighted_emb = weighted_emb + neg hidden_layer = tf.reduce_max(weighted_emb, axis=1) if mode == tf.estimator.ModeKeys.TRAIN: hidden_layer = model_helper.dropout( hidden_layer, config.train.hidden_layer_dropout_keep_prob) return model_helper.get_softmax_estimator_spec( hidden_layer, mode, labels, params["label_size"], params["static_embedding"], data_processor.label_dict_file) super(TextDRNNEstimator, self).__init__(model_fn=_model_fn, model_dir=config.model_common.checkpoint_dir, config=model_helper.get_run_config(), params=model_params)
def __init__(self, data_processor, model_params): config = data_processor.config embedding_layer = EmbeddingLayer(config) model_helper = ModelHelper(config) def _model_fn(features, labels, mode, params): self._check(params["feature_names"], data_processor) feature_name = params["feature_names"][0] index = data_processor.dict_names.index(feature_name) sequence_length = data_processor.max_sequence_length[index] + \ config.fixed_len_feature.token_padding_begin + \ config.fixed_len_feature.token_padding_end padding_value = \ data_processor.token_map[data_processor.VOCAB_PADDING] embedding = embedding_layer.get_vocab_embedding( feature_name, features["fixed_len_" + feature_name], len(data_processor.dict_list[index]), params["epoch"], pretrained_embedding_file=data_processor. pretrained_embedding_files[index], dict_map=data_processor.dict_list[index], mode=mode, begin_padding_size=config.fixed_len_feature. token_padding_begin, end_padding_size=config.fixed_len_feature.token_padding_end, padding_id=padding_value) embedding = tf.expand_dims(embedding, -1) if mode == tf.estimator.ModeKeys.TRAIN: embedding = model_helper.dropout( embedding, config.embedding_layer.embedding_dropout_keep_prob) filter_sizes = config.TextCNN.filter_sizes pooled_outputs = [] for i, filter_size in enumerate(filter_sizes): with tf.name_scope("convolution-max_pooling-%d" % filter_size): filter_shape = \ [filter_size, config.embedding_layer.embedding_dimension, 1, config.TextCNN.num_filters] W = tf.Variable(tf.random_uniform(filter_shape, minval=-0.01, maxval=0.01), name="W-%d" % filter_size) b = tf.get_variable("b-%d" % filter_size, [config.TextCNN.num_filters]) # Strides is set to [1, 1, 1, 1]. # Convolution will slide 1 vocab at one time convolution = tf.nn.conv2d(embedding, W, strides=[1, 1, 1, 1], padding="VALID", name="convolution") h = tf.nn.relu(tf.nn.bias_add(convolution, b), name="relu") pooled = tf.nn.max_pool( h, ksize=[1, sequence_length - filter_size + 1, 1, 1], strides=[1, 1, 1, 1], padding='VALID', name="max_pooling") pooled_outputs.append(pooled) num_filters_total = config.TextCNN.num_filters * len(filter_sizes) # pooled_outputs contains # tensor with shape [batch_size, 1, 1, num_filters] h_pool = tf.concat(pooled_outputs, 3) hidden_layer = tf.reshape(h_pool, [-1, num_filters_total]) if mode == tf.estimator.ModeKeys.TRAIN: hidden_layer = model_helper.dropout( hidden_layer, config.train.hidden_layer_dropout_keep_prob) # when repeating the result in the paper, the following code # should be added. # hidden_layer *= FLAGS.hidden_layer_dropout_keep_prob * ( # 1 - FLAGS.hidden_layer_dropout_keep_prob) return model_helper.get_softmax_estimator_spec( hidden_layer, mode, labels, params["label_size"], params["static_embedding"], data_processor.label_dict_file) super(TextCNNEstimator, self).__init__(model_fn=_model_fn, model_dir=config.model_common.checkpoint_dir, config=model_helper.get_run_config(), params=model_params)
def __init__(self, data_processor, model_params): config = data_processor.config embedding_layer = EmbeddingLayer(config) model_helper = ModelHelper(config) def _model_fn(features, labels, mode, params): self._check(params["feature_names"], data_processor, config) feature_name = params["feature_names"][0] index = data_processor.dict_names.index(feature_name) embedding = embedding_layer.get_vocab_embedding( feature_name, features["fixed_len_" + feature_name], len(data_processor.dict_list[index]), params["epoch"], pretrained_embedding_file=data_processor. pretrained_embedding_files[index], dict_map=data_processor.dict_list[index], mode=mode) if mode == tf.estimator.ModeKeys.TRAIN: embedding = model_helper.dropout( embedding, config.embedding_layer.embedding_dropout_keep_prob) rnn_fw_cell, rnn_bw_cell = None, None if config.TextRNN.cell_type == "lstm": rnn_fw_cell = rnn.BasicLSTMCell(config.TextRNN.rnn_dimension) rnn_bw_cell = rnn.BasicLSTMCell(config.TextRNN.rnn_dimension) elif config.TextRNN.cell_type == "gru": rnn_fw_cell = rnn.GRUCell(config.TextRNN.rnn_dimension) rnn_bw_cell = rnn.GRUCell(config.TextRNN.rnn_dimension) if config.TextRNN.use_bidirectional: outputs, _ = tf.nn.bidirectional_dynamic_rnn( rnn_fw_cell, rnn_bw_cell, embedding, dtype=tf.float32, sequence_length=tf.reshape( features[feature_name + "_fixed_real_len"], [-1])) text_embedding = tf.concat(outputs, 2) else: text_embedding, _ = tf.nn.dynamic_rnn(rnn_fw_cell, embedding, dtype=tf.float32) if config.model_common.use_self_attention: hidden_layer = model_helper.self_attention( text_embedding, config.model_common.attention_dimension) else: sum_layer = tf.reduce_sum(text_embedding, axis=1) hidden_layer = sum_layer / tf.cast( features[feature_name + "_fixed_real_len"], dtype=tf.float32) if mode == tf.estimator.ModeKeys.TRAIN: hidden_layer = model_helper.dropout( hidden_layer, config.train.hidden_layer_dropout_keep_prob) return model_helper.get_softmax_estimator_spec( hidden_layer, mode, labels, params["label_size"], params["static_embedding"], data_processor.label_dict_file) super(TextRNNEstimator, self).__init__(model_fn=_model_fn, model_dir=config.model_common.checkpoint_dir, config=model_helper.get_run_config(), params=model_params)
def _model(self): # Input embedding = EmbeddingLayer(name="Input-embedding-0", zero_padding=True, scale=True, word_count=self.source_word_count, model_dimension=self.model_dimension, network=self) positional_encoder = PositionalEncodingLayer(name="Input-positional_encoder-0", zero_padding=True, scale=True, model_dimension=self.model_dimension, network=self) add_embdeding_postion = AdditionLayer(name="Input-add-0", input_list=[embedding, positional_encoder], network=self) self.add_layer(add_embdeding_postion) # Encoder for i in range(1, 2): self.add_layer( MultiheadAttentionLayer(name="Encoder-multihead_attention_1-{0}".format(i), batch_size=self.batch_size, model_dimension=self.model_dimension, network=self) ) self.add_layer(NormalizeLayer(name="Encoder-normalize_1-{0}".format(i), network=self)) self.add_layer( FeedForwardLayer(name="Encoder-feedforward_1-{0}".format(i), batch_size=self.batch_size, dimension_inner=self.dimension_inner, model_dimension=self.model_dimension, network=self) ) self.add_layer(NormalizeLayer(name="Encoder-normalize_2-{0}".format(i), network=self)) # Output embedding = EmbeddingLayer(name="Output-embedding-0", zero_padding=True, scale=True, word_count=self.target_word_count, model_dimension=self.model_dimension, network=self) positional_encoder = PositionalEncodingLayer(name="Output-positional_encoder-0", zero_padding=True, scale=True, model_dimension=self.model_dimension, network=self) add_embdeding_postion = AdditionLayer(name="Output-add-0", input_list=[embedding, positional_encoder], network=self) self.add_layer(add_embdeding_postion) # Decoder for j in range(1, 2): self.add_layer( MultiheadAttentionLayer(name="Decoder-multihead_attention_1-{0}".format(j), batch_size=self.batch_size, model_dimension=self.model_dimension, network=self) ) self.add_layer(NormalizeLayer(name="Decoder-normalize_1-{0}".format(j), network=self)) self.add_layer( MultiheadAttentionLayer(name="Decoder-multihead_attention_2-{0}".format(j), batch_size=self.batch_size, model_dimension=self.model_dimension, network=self), ) self.add_layer(NormalizeLayer(name="Decoder-normalize_2-{0}".format(j), network=self)) self.add_layer( FeedForwardLayer(name="Decoder-feedforward_1-{0}".format(j), batch_size=self.batch_size, dimension_inner=self.dimension_inner, model_dimension=self.model_dimension, network=self) ) self.add_layer(NormalizeLayer(name="Decoder-normalize_3-{0}".format(j), network=self)) # Final Output self.add_layer(FinalLayer(name="Final", model_dimension=self.model_dimension, word_count=self.target_word_count, network=self))
def __init__(self, data_processor, model_params): config = data_processor.config embedding_layer = EmbeddingLayer(config) model_helper = ModelHelper(config) def _model_fn(features, labels, mode, params): self._check(params["feature_names"], data_processor, config) feature_name = params["feature_names"][0] index = data_processor.dict_names.index(feature_name) sequence_length = data_processor.max_sequence_length[index] embedding = embedding_layer.get_vocab_embedding( feature_name, features["fixed_len_" + feature_name], len(data_processor.dict_list[index]), params["epoch"], pretrained_embedding_file=data_processor. pretrained_embedding_files[index], dict_map=data_processor.dict_list[index], mode=mode) dimension = config.embedding_layer.embedding_dimension hidden_size = config.AttentiveConvNet.attentive_hidden_size # first fully connected matrix mat_hidden1 = tf.get_variable( "mat_hidden1", shape=[dimension, hidden_size], initializer=tf.random_uniform_initializer( -1.0 * pow(6.0 / (dimension + hidden_size), 0.5), pow(6.0 / (hidden_size + dimension), 0.5))) bias_hidden1 = tf.get_variable("bias_hidden1", shape=[hidden_size]) # second fully connected matrix mat_hidden2 = tf.get_variable( "mat_hidden2", shape=[hidden_size, hidden_size], initializer=tf.random_uniform_initializer( -1.0 * pow(3.0 / hidden_size, 0.5), pow(3.0 / hidden_size, 0.5))) bias_hidden2 = tf.get_variable("bias_hidden2", shape=[hidden_size]) def _gconv(context, filter_width, name): """ compute equations 7,8,9 """ bias_ha = tf.get_variable(name + "_bias_ha", shape=[dimension]) bias_ga = tf.get_variable(name + "_bias_ga", shape=[dimension]) embedded_context = tf.expand_dims(context, -1) filter_shape = [filter_width, dimension, 1, dimension] filter_o = tf.Variable(tf.truncated_normal(filter_shape, stddev=0.1), name=name + "_filter_Wo") filter_g = tf.Variable(tf.truncated_normal(filter_shape, stddev=0.1), name=name + "_filter_Wg") conv_o = tf.nn.conv2d(embedded_context, filter_o, strides=[1, 1, dimension, 1], padding="SAME", name=name + "_convolution_Wo") conv_g = tf.nn.conv2d(embedded_context, filter_g, strides=[1, 1, dimension, 1], padding="SAME", name=name + "_convolution_Wg") conv_o = tf.keras.backend.permute_dimensions( conv_o, (0, 1, 3, 2)) conv_g = tf.keras.backend.permute_dimensions( conv_g, (0, 1, 3, 2)) o_context = tf.tanh(tf.nn.bias_add(tf.squeeze(conv_o, [-1]), bias_ha), name=name + "_Wo_tanh") g_context = tf.sigmoid(tf.nn.bias_add(tf.squeeze(conv_g, [-1]), bias_ga), name=name + "_Wg_sigmoid") return g_context * context + (1 - g_context) * o_context def _attentive_context(source, focus, name="context_generate"): if config.AttentiveConvNet.attentive_version == 'advanced': mat_dimension = 2 * dimension else: mat_dimension = dimension mat_tx = tf.get_variable( name + "mat_tx", shape=[mat_dimension, mat_dimension], initializer=tf.random_uniform_initializer( -1.0 * pow(3.0 / dimension, 0.5), pow(3.0 / dimension, 0.5))) mat_ta = tf.get_variable( name + "mat_ta", shape=[dimension, mat_dimension], initializer=tf.random_uniform_initializer( -1.0 * pow(3.0 / dimension, 0.5), pow(3.0 / dimension, 0.5))) # use dot and batch_dot in keras, compute equation 2 embedding_conv = tf.keras.backend.dot(source, mat_tx) scores = tf.keras.backend.batch_dot( embedding_conv, tf.keras.backend.permute_dimensions(focus, (0, 2, 1))) scores_softmax = tf.keras.activations.softmax(scores, axis=1) # computes the context featur_map like equation 4 res = tf.matmul(scores_softmax, focus) # weights the output for equation 6 context = tf.keras.backend.permute_dimensions( tf.keras.backend.dot( mat_ta, tf.keras.backend.permute_dimensions(res, (0, 2, 1))), (1, 2, 0)) return context def _attentive_convolution(benificiary, attentive_context, name="attentive_convolution"): """ compute equation 6 """ bias = tf.get_variable(name + "bias", shape=[dimension]) embedded_text = tf.expand_dims(benificiary, -1) filter_shape = [ config.AttentiveConvNet.attentive_width, dimension, 1, dimension ] conv_filter = tf.Variable(tf.truncated_normal(filter_shape, stddev=0.1), name=name + "filter") convolution = tf.nn.conv2d(embedded_text, conv_filter, strides=[1, 1, dimension, 1], padding="SAME", name=name + "convolutioin") convolution = tf.keras.backend.permute_dimensions( convolution, (0, 1, 3, 2)) conv_text = tf.squeeze(convolution, [-1]) merge_text = tf.add(attentive_context, conv_text) merge_text = tf.nn.bias_add(merge_text, bias) tanh_out = tf.tanh(merge_text, name=name + "tanh") tanh_out = tf.expand_dims(tanh_out, -1) return tanh_out if config.AttentiveConvNet.attentive_version == "advanced": # generate source source_x_uni = _gconv(embedding, 1, "source_uni") source_x_tri = _gconv(embedding, 3, "source_tri") x_mgran = tf.concat([source_x_uni, source_x_tri], -1) # generate focus focus_a_uni = _gconv(embedding, 1, "focus_uni") focus_a_tri = _gconv(embedding, 3, "focus_tri") a_mgran = tf.concat([focus_a_uni, focus_a_tri], -1) # generate benificiary x_benificiary = _gconv(embedding, 1, "beni_uni") else: # light version x_mgran, a_mgran, x_benificiary = \ embedding, embedding, embedding context = _attentive_context(x_mgran, a_mgran) attentive_embedding = _attentive_convolution( x_benificiary, context) pooled = tf.nn.max_pool(attentive_embedding, ksize=[1, sequence_length, 1, 1], strides=[1, 1, 1, 1], padding="VALID", name="max_pooling") hidden_layer = tf.reshape( pooled, [-1, config.embedding_layer.embedding_dimension]) if mode == tf.estimator.ModeKeys.TRAIN: hidden_layer = model_helper.dropout( hidden_layer, config.train.hidden_layer_dropout_keep_prob) hidden_layer1 = tf.nn.relu(tf.matmul(hidden_layer, mat_hidden1) + bias_hidden1, name="relu_hidden1") if mode == tf.estimator.ModeKeys.TRAIN: hidden_layer1 = model_helper.dropout( hidden_layer1, config.train.hidden_layer_dropout_keep_prob) hidden_layer2 = tf.nn.relu(tf.matmul(hidden_layer1, mat_hidden2) + bias_hidden2, name="relu_hidden2") if mode == tf.estimator.ModeKeys.TRAIN: hidden_layer2 = model_helper.dropout( hidden_layer2, config.train.hidden_layer_dropout_keep_prob) # concat max pooling, hidden layer 1 output, hidden layer 2 output output = tf.concat([hidden_layer, hidden_layer1, hidden_layer2], -1) return model_helper.get_softmax_estimator_spec( output, mode, labels, params["label_size"], params["static_embedding"]) super(AttentiveConvNetEstimator, self).__init__(model_fn=_model_fn, model_dir=config.model_common.checkpoint_dir, config=model_helper.get_run_config(), params=model_params)
def test_embed_forward(self): inp_seq = np.array([[3, 2, 1, 4, 0], [0, 0, 1, 1, 2]]) obj = EmbeddingLayer(5, 3, GradientDescentMomentum) output = obj.forward(inp_seq) self.assertEqual(output.shape, (2, 5, 3))
def __init__(self, data_processor, model_params): config = data_processor.config embedding_layer = EmbeddingLayer(config) model_helper = ModelHelper(config) def _convolution(inputs, num_filters, name): """two layers of convolution """ with tf.variable_scope("two_conv-%s" % name): initializer_normal = tf.random_normal_initializer(stddev=0.01) filter_shape = [3, 1, num_filters, num_filters] W1 = tf.get_variable(name="W1-%s" % name, shape=filter_shape, initializer=initializer_normal) b1 = tf.get_variable(name="b1-%s" % name, shape=[num_filters]) # pre - activation, before convolution relu1 = tf.nn.relu(inputs, name="relu1-%s" % name) conv1 = tf.nn.conv2d(relu1, W1, strides=[1, 1, 1, 1], padding="SAME", name="convolution1-%s" % name) conv1 = tf.nn.bias_add(conv1, b1) W2 = tf.get_variable(name="W2-%s" % name, shape=filter_shape, initializer=initializer_normal) b2 = tf.get_variable(name="b2-%s" % name, shape=[num_filters]) # pre - activation relu2 = tf.nn.relu(conv1, name="relu2-%s" % name) conv2 = tf.nn.conv2d(relu2, W2, strides=[1, 1, 1, 1], padding="SAME", name="convolution2-%s" % name) conv2 = tf.nn.bias_add(conv2, b2) # return shortcut connections with identity mapping return inputs + conv2 def _convolution_block(inputs, num_filters, name): """DPCNN Block architecture 1. pooling (strides=2, sequence halved) 2. relu 3. conv1 layer 4. relu 5. conv2 layer 6. return pooling output + conv2 layer output """ with tf.variable_scope("pooling-%s" % name): pooled = tf.nn.max_pool(inputs, ksize=[1, 3, 1, 1], strides=[1, 2, 1, 1], padding='SAME', name="max-pooling-%s" % name) return _convolution(pooled, num_filters, name) def _model_fn(features, labels, mode, params): self._check(params["feature_names"], data_processor, config) feature_name = params["feature_names"][0] index = data_processor.dict_names.index(feature_name) num_filters = config.TextDPCNN.num_filters sequence_length = data_processor.max_sequence_length[index] embedding = embedding_layer.get_vocab_embedding( feature_name, features["fixed_len_" + feature_name], len(data_processor.dict_list[index]), params["epoch"], pretrained_embedding_file=data_processor. pretrained_embedding_files[index], dict_map=data_processor.dict_list[index], mode=mode) embedding_dims = config.embedding_layer.embedding_dimension embedding = tf.reshape(embedding, [-1, sequence_length, embedding_dims]) embedding = tf.expand_dims(embedding, -1) if mode == tf.estimator.ModeKeys.TRAIN: embedding = model_helper.dropout( embedding, config.embedding_layer.embedding_dropout_keep_prob) initializer = tf.random_normal_initializer(stddev=0.01) with tf.variable_scope("dpcnn") as scope: filter_shape = [3, embedding_dims, 1, num_filters] W0 = tf.get_variable(name="W0", shape=filter_shape, initializer=initializer) b0 = tf.get_variable(name="b0", shape=[num_filters]) conv0 = tf.nn.conv2d(embedding, W0, strides=[1, 1, embedding_dims, 1], padding="SAME") conv0 = tf.nn.bias_add(conv0, b0) conv = _convolution(conv0, num_filters, "conv-1-2") for i in range(config.TextDPCNN.dpcnn_blocks): conv = _convolution_block(conv, num_filters, "convolution-block-%d" % i) outputs_shape = int(np.prod(conv.get_shape()[1:])) outputs = tf.reshape(conv, (-1, outputs_shape)) return model_helper.get_softmax_estimator_spec( outputs, mode, labels, params["label_size"], params["static_embedding"], data_processor.label_dict_file) super(TextDPCNNEstimator, self).__init__(model_fn=_model_fn, model_dir=config.model_common.checkpoint_dir, config=model_helper.get_run_config(), params=model_params)