Example #1
0
    def translation(self, params, src_language, tgt_language):
        if params.model.type == "rnn":
            encoder = rnn_encoder(word_vec_size=params.rnn.word_vec_size,
                                  hidden_size=params.rnn.hidden_size,
                                  layers=params.model.encoder_layers,
                                  rnn_dropout=params.rnn.rnn_dropout,
                                  bidirectional=params.rnn.bidirectional,
                                  decoder_layers=params.model.decoder_layers)

            decoder = rnn_decoder(attn_model=params.rnn.attention,
                                  word_vec_size=params.rnn.word_vec_size,
                                  hidden_size=params.rnn.hidden_size,
                                  output_size=len(tgt_language),
                                  layers=params.model.decoder_layers,
                                  rnn_dropout=params.rnn.rnn_dropout,
                                  attn_dropout=params.rnn.attn_dropout,
                                  input_feed=params.rnn.input_feed)

            embedding = EmbeddingLayer(src_lang=src_language,
                                       tgt_lang=tgt_language,
                                       word_vec_size=params.rnn.word_vec_size,
                                       shared=params.model.shared_embedding)

            model = seq2seq(encoder, decoder, embedding, src_language,
                            tgt_language, params.model.max_length)

        elif params.model.type == "transformer":
            encoder = tr_encoder(
                d_model=params.transformer.d_model,
                n_head=params.transformer.heads,
                dim_ff=params.transformer.dim_feedforward,
                attn_dropout=params.transformer.attn_dropout,
                residual_dropout=params.transformer.residual_dropout,
                num_layers=params.model.encoder_layers,
                max_len=params.model.max_length)

            decoder = tr_decoder(
                d_model=params.transformer.d_model,
                n_head=params.transformer.heads,
                dim_ff=params.transformer.dim_feedforward,
                attn_dropout=params.transformer.attn_dropout,
                residual_dropout=params.transformer.residual_dropout,
                num_layers=params.model.decoder_layers,
                vocab_size=len(tgt_language),
                max_len=params.model.max_length)

            embedding = EmbeddingLayer(
                src_lang=src_language,
                tgt_lang=tgt_language,
                word_vec_size=params.transformer.d_model,
                shared=params.model.shared_embedding)

            model = Transformer(encoder, decoder, embedding, src_language,
                                tgt_language, params.model.max_length)

        return model
Example #2
0
    def lm(self, params, language):
        if params.model.task == "language generation":
            # GPT style language model
            lm_type = "generator"

        elif params.model.task == "language encoding":
            # BERT style language model
            lm_type = "encoder"

        if not params.model.encoder_layers == params.model.decoder_layers:
            raise DimensionError(
                "In language models the number of layers in the "
                "encoder and decoder must match")

        encoder = tr_encoder(
            d_model=params.transformer.d_model,
            n_head=params.transformer.heads,
            dim_ff=params.transformer.dim_feedforward,
            attn_dropout=params.transformer.attn_dropout,
            residual_dropout=params.transformer.residual_dropout,
            num_layers=params.model.encoder_layers,
            max_len=params.model.max_length)

        # force shared embedding
        embedding = EmbeddingLayer(src_lang=language,
                                   tgt_lang=language,
                                   word_vec_size=params.transformer.d_model,
                                   shared=True)

        model = LanguageModel(encoder, embedding, language,
                              params.model.max_length, lm_type)

        return model
    def __init__(self, data_processor, model_params):
        config = data_processor.config
        embedding_layer = EmbeddingLayer(config)
        model_helper = ModelHelper(config)

        def _model_fn(features, labels, mode, params):
            self._check(params["feature_names"], data_processor, config)
            feature_name = params["feature_names"][0]
            index = data_processor.dict_names.index(feature_name)
            region_radius = int(config.RegionEmbedding.region_size / 2)
            sequence_length = data_processor.max_sequence_length[index] + \
                              region_radius * 2
            vocab_ids = features["fixed_len_" + feature_name]
            padding_id = \
                data_processor.dict_list[index][data_processor.VOCAB_PADDING]
            vocab_ids = tf.pad(vocab_ids,
                               tf.constant([[0, 0],
                                            [region_radius, region_radius]]),
                               constant_values=padding_id)
            region_emb = embedding_layer.get_region_embedding(
                feature_name,
                vocab_ids,
                len(data_processor.dict_list[index]),
                params["epoch"],
                sequence_length,
                config.RegionEmbedding.region_size,
                config.RegionEmbedding.region_embedding_mode,
                mode,
                data_processor.pretrained_embedding_files[index],
                dict_map=data_processor.dict_list[index])

            # which words have corresponding region embedding
            trimmed_seq = \
                vocab_ids[..., region_radius: sequence_length - region_radius]

            def mask(x):
                return tf.cast(
                    tf.not_equal(tf.cast(x, tf.int32),
                                 tf.constant(padding_id)), tf.float32)

            # remove padding(setting to zero)
            weight = tf.map_fn(mask,
                               trimmed_seq,
                               dtype=tf.float32,
                               back_prop=False)
            weight = tf.expand_dims(weight, -1)
            weighted_emb = region_emb * weight
            # document embedding
            hidden_layer = tf.reduce_sum(weighted_emb, 1)

            return model_helper.get_softmax_estimator_spec(
                hidden_layer, mode, labels, params["label_size"],
                params["static_embedding"], data_processor.label_dict_file)

        super(RegionEmbeddingEstimator,
              self).__init__(model_fn=_model_fn,
                             model_dir=config.model_common.checkpoint_dir,
                             config=model_helper.get_run_config(),
                             params=model_params)
Example #4
0
 def __init__(self, vocab_size_src: int, dim_embed_src: int,
              num_neurons_encoder: int, optim: object):
     self.embedding_layer = EmbeddingLayer(dim_in=vocab_size_src,
                                           embed_dim=dim_embed_src,
                                           optim=optim)
     self.rnn_cell = RecurrentNeuralNetwork(
         dim_in=dim_embed_src,
         num_neurons=num_neurons_encoder,
         optim=optim,
         embedding_layer=self.embedding_layer)
Example #5
0
    def __init__(self, data_processor, model_params):
        config = data_processor.config
        logger = data_processor.logger
        embedding_layer = EmbeddingLayer(config, logger=logger)
        model_helper = ModelHelper(config, logger=logger)

        def _model_fn(features, labels, mode, params):
            self._check(params["feature_names"], data_processor)
            input_layer = []
            len_list = []
            for feature_name in params["feature_names"]:
                index = data_processor.dict_names.index(feature_name)
                input_layer.append(embedding_layer.get_vocab_embedding_sparse(
                    feature_name, features["var_len_" + feature_name],
                    len(data_processor.dict_list[index]), params["epoch"],
                    pretrained_embedding_file=
                    data_processor.pretrained_embedding_files[index],
                    dict_map=data_processor.dict_list[index],
                    mode=mode))
                len_list.append(features[feature_name + "_var_real_len"])
                if data_processor.ngram_list[index] > 1:
                    ngram_name = feature_name + "_ngram"
                    index = data_processor.dict_names.index(ngram_name)
                    input_layer.append(
                        embedding_layer.get_vocab_embedding_sparse(
                            ngram_name, features["var_len_" + ngram_name],
                            len(data_processor.dict_list[index]),
                            params["epoch"],
                            mode=mode))
                    len_list.append(features[ngram_name + "_var_real_len"])
            hidden_layer = input_layer[0]
            total_len = len_list[0]
            for i in range(1, len(input_layer)):
                hidden_layer = hidden_layer + input_layer[i]
                total_len = total_len + len_list[i]
            hidden_layer = tf.div(hidden_layer, total_len)
            hidden_layer = tf.contrib.layers.fully_connected(
            inputs=hidden_layer, num_outputs=256, activation_fn=tf.nn.relu)
            hidden_layer = tf.contrib.layers.fully_connected(
            inputs=hidden_layer, num_outputs=config.embedding_layer.embedding_dimension, activation_fn=tf.nn.relu)

            if mode == tf.estimator.ModeKeys.TRAIN:
                hidden_layer = model_helper.dropout(
                    hidden_layer, config.train.hidden_layer_dropout_keep_prob)
            return model_helper.get_softmax_estimator_spec(
                hidden_layer, mode, labels, params["label_size"],
                params["static_embedding"], data_processor.label_dict_file)

        super(FastTextEstimator, self).__init__(
            model_fn=_model_fn, model_dir=config.model_common.checkpoint_dir,
            config=model_helper.get_run_config(), params=model_params)
        super(FastTextEstimator, self).__init__(
            model_fn=_model_fn, model_dir=config.model_common.checkpoint_dir,
            config=model_helper.get_run_config(), params=model_params)
Example #6
0
 def __init__(self, vocab_size_trg: int, dim_embed_trg: int,
              num_neurons_decoder: int, optim: object):
     self.embedding_layer = EmbeddingLayer(vocab_size_trg, dim_embed_trg,
                                           optim)
     # for the decoder, we're going to tie the weights of the embedding
     # layer and the linear projection before softmax activation. If
     # vocab_size_src and vocab_size_trg are same as well, its possible to
     # tie all the weights but not done here for simplicity of
     # implementation. See: https://arxiv.org/abs/1608.05859
     self.rnn_cell = RecurrentNeuralNetwork(dim_embed_trg,
                                            num_neurons_decoder,
                                            optim,
                                            self.embedding_layer,
                                            predict=True,
                                            costFunction=crossEntropy)
Example #7
0
    def __init__(self, data_processor, model_params):
        config = data_processor.config
        embedding_layer = EmbeddingLayer(config)
        model_helper = ModelHelper(config)

        def _convolutional_block(inputs, num_layers, num_filters, name, mode):
            """Convolutional Block of VDCNN
            Convolutional block contains 2 conv layers, and can be repeated
            Temp Conv-->Batch Norm-->ReLU-->Temp Conv-->Batch Norm-->ReLU
            """
            with tf.variable_scope("conv_block_%s" % name):
                is_training = False
                if mode == tf.estimator.ModeKeys.TRAIN:
                    is_training = True
                hidden_layer = inputs
                initializer_normal = tf.random_normal_initializer(stddev=0.1)
                initializer_const = tf.constant_initializer(0.0)
                for i in range(0, num_layers):
                    filter_shape = [
                        3, 1, hidden_layer.get_shape()[3], num_filters
                    ]
                    w = tf.get_variable(name='W_' + str(i),
                                        shape=filter_shape,
                                        initializer=initializer_normal)
                    b = tf.get_variable(name='b_' + str(i),
                                        shape=[num_filters],
                                        initializer=initializer_const)
                    conv = tf.nn.conv2d(hidden_layer,
                                        w,
                                        strides=[1, 1, 1, 1],
                                        padding="SAME")
                    conv = tf.nn.bias_add(conv, b)
                    batch_norm = tf.layers.batch_normalization(
                        conv, center=True, scale=True, training=is_training)
                    hidden_layer = tf.nn.relu(batch_norm)
                return hidden_layer

        def _model_fn(features, labels, mode, params):
            self._check(params["feature_names"], data_processor, config)
            feature_name = params["feature_names"][0]
            index = data_processor.dict_names.index(feature_name)
            """VDCNN architecture
              1. text(char is recommended)
              2. embedding lookup
              3. conv layer(64 feature maps)
              4. conv blocks(contains 2 conv layers, and can be repeated)
              5. fc1
              6. fc2
              7. fc3(softmax)
              pooling is importmant and shortcut is optional
            """
            sequence_length = data_processor.max_sequence_length[index]
            # embedding shape [batch_size, sequence_length, embedding_dimension]
            embedding = embedding_layer.get_vocab_embedding(
                feature_name,
                features["fixed_len_" + feature_name],
                len(data_processor.dict_list[index]),
                params["epoch"],
                pretrained_embedding_file=data_processor.
                pretrained_embedding_files[index],
                dict_map=data_processor.dict_list[index],
                mode=mode)
            embedding = tf.reshape(embedding, [
                -1, sequence_length, config.embedding_layer.embedding_dimension
            ])
            embedding = tf.expand_dims(embedding, -1)
            if mode == tf.estimator.ModeKeys.TRAIN:
                embedding = model_helper.dropout(
                    embedding,
                    config.embedding_layer.embedding_dropout_keep_prob)

            initializer = tf.random_normal_initializer(stddev=0.1)
            # first conv layer (filter_size=3, #feature_map=64)
            with tf.variable_scope("first_conv") as scope:
                filter_shape = [
                    3, config.embedding_layer.embedding_dimension, 1, 64
                ]
                w = tf.get_variable(name='W_1',
                                    shape=filter_shape,
                                    initializer=initializer)
                """
                  argv1: input = [batch_size, in_height, in_width, in_channels]
                  argv2: filter = [filter_height, filter_width, in_channels,
                                   out_channels]
                  argv3: strides
                  return: feature_map
                  note:
                    1. out_channels = num_filters = #feature map
                    2. for padding="SAME", new_height=new_width=
                           ceil(input_size/stride)
                       for padding="VALID", new_height=new_width=
                           ceil((input_size-filter_size+1)/stride)
                """
                conv = tf.nn.conv2d(
                    embedding,
                    w,
                    strides=[
                        1, 1, config.embedding_layer.embedding_dimension, 1
                    ],
                    padding="SAME")
                b = tf.get_variable(name='b_1',
                                    shape=[64],
                                    initializer=tf.constant_initializer(0.0))
                out = tf.nn.bias_add(conv, b)
                first_conv = tf.nn.relu(out)
            """all convolutional blocks
            4 kinds of conv blocks, which #feature_map are 64,128,256,512
            Depth:             9  17 29 49
            ------------------------------
            conv block 512:    2  4  4  6
            conv block 256:    2  4  4  10
            conv block 128:    2  4  10 16
            conv block 64:     2  4  10 16
            First conv. layer: 1  1  1  1
            """
            vdcnn_depth = {}
            vdcnn_depth[9] = [2, 2, 2, 2]
            vdcnn_depth[17] = [4, 4, 4, 4]
            vdcnn_depth[29] = [10, 10, 4, 4]
            vdcnn_depth[49] = [16, 16, 10, 6]
            max_pool_ksize = [1, 3, 1, 1]
            max_pool_strides = [1, 2, 1, 1]
            num_filters = [64, 128, 256, 512]
            conv_block = first_conv
            for i in range(0, 4):
                conv_block = _convolutional_block(
                    conv_block,
                    num_layers=vdcnn_depth[config.TextVDCNN.vdcnn_depth][i],
                    num_filters=num_filters[i],
                    name="cb_" + str(i),
                    mode=mode)
                pool = tf.nn.max_pool(conv_block,
                                      ksize=max_pool_ksize,
                                      strides=max_pool_strides,
                                      padding='SAME',
                                      name="pool_" + str(i))

            pool_shape = int(np.prod(pool.get_shape()[1:]))
            pool = tf.reshape(pool, (-1, pool_shape))

            # fc1
            fc1 = tf.contrib.layers.fully_connected(inputs=pool,
                                                    num_outputs=2048,
                                                    activation_fn=tf.nn.relu)
            if mode == tf.estimator.ModeKeys.TRAIN:
                fc1 = model_helper.dropout(
                    fc1, config.train.hidden_layer_dropout_keep_prob)
            # fc2
            hidden_layer = tf.contrib.layers.fully_connected(
                inputs=fc1, num_outputs=2048, activation_fn=tf.nn.relu)
            if mode == tf.estimator.ModeKeys.TRAIN:
                hidden_layer = model_helper.dropout(
                    hidden_layer, config.train.hidden_layer_dropout_keep_prob)
            # fc3(softmax)
            return model_helper.get_softmax_estimator_spec(
                hidden_layer, mode, labels, params["label_size"],
                params["static_embedding"])

        super(TextVDCNNEstimator,
              self).__init__(model_fn=_model_fn,
                             model_dir=config.model_common.checkpoint_dir,
                             config=model_helper.get_run_config(),
                             params=model_params)
Example #8
0
    def __init__(self, data_processor, model_params):
        config = data_processor.config
        embedding_layer = EmbeddingLayer(config)
        model_helper = ModelHelper(config)

        def _model_fn(features, labels, mode, params):
            self._check(params["feature_names"], data_processor, config)
            feature_name = params["feature_names"][0]
            index = data_processor.dict_names.index(feature_name)
            padding_id = \
                data_processor.dict_list[index][data_processor.VOCAB_PADDING]
            window_size = config.TextDRNN.drnn_window_size
            vocab_ids = tf.pad(features["fixed_len_" + feature_name],
                               tf.constant([[0, 0], [window_size - 1, 0]]),
                               constant_values=padding_id)
            embedding_lookup_table = embedding_layer.get_lookup_table(
                feature_name,
                len(data_processor.dict_list[index]),
                config.embedding_layer.embedding_dimension,
                params["epoch"],
                dict_map=data_processor.dict_list[index],
                pretrained_embedding_file=data_processor.
                pretrained_embedding_files[index],
                mode=mode)
            sequence_length = \
                data_processor.max_sequence_length[index] + window_size - 1
            aligned_seq = \
                [tf.slice(vocab_ids, [0, i], [-1, window_size])
                 for i in range(0, sequence_length - window_size + 1)]
            aligned_seq = \
                tf.reshape(tf.concat(list(aligned_seq), 1),
                           [-1, sequence_length - window_size + 1, window_size])
            embedding = tf.nn.embedding_lookup(embedding_lookup_table,
                                               aligned_seq)
            if mode == tf.estimator.ModeKeys.TRAIN:
                embedding = model_helper.dropout(
                    embedding,
                    config.embedding_layer.embedding_dropout_keep_prob)
            embedding = tf.reshape(
                embedding,
                [-1, window_size, config.embedding_layer.embedding_dimension])
            _, state = model_layer.recurrent(
                embedding,
                config.TextDRNN.drnn_rnn_dimension,
                cell_type=config.TextDRNN.drnn_cell_type,
                cell_hidden_keep_prob=config.TextDRNN.
                drnn_cell_hidden_keep_prob,
                mode=mode,
                use_bidirectional=False,
                name="drnn",
                reuse=None)
            state = tf.reshape(state, [
                -1, sequence_length - window_size + 1,
                config.TextDRNN.drnn_rnn_dimension
            ])

            if mode == tf.estimator.ModeKeys.TRAIN:
                state = model_layer.batch_norm(state,
                                               tf.constant(True,
                                                           dtype=tf.bool),
                                               name="bn")
            else:
                state = model_layer.batch_norm(state,
                                               tf.constant(False,
                                                           dtype=tf.bool),
                                               name="bn")
            state = tf.contrib.layers.fully_connected(
                state,
                config.embedding_layer.embedding_dimension,
                biases_initializer=None)

            def _mask_no_padding(x):
                return tf.cast(
                    tf.not_equal(tf.cast(x, tf.int32),
                                 tf.constant(padding_id)), tf.float32)

            def _mask_padding(x):
                return tf.cast(
                    tf.equal(tf.cast(x, tf.int32), tf.constant(padding_id)),
                    tf.float32)

            trim_seq = vocab_ids[..., window_size - 1:]
            weight = tf.map_fn(_mask_no_padding,
                               trim_seq,
                               dtype=tf.float32,
                               back_prop=False)
            weight = tf.expand_dims(weight, -1)
            weighted_emb = state * weight
            neg = tf.map_fn(_mask_padding,
                            trim_seq,
                            dtype=tf.float32,
                            back_prop=False)
            neg = tf.expand_dims(neg, -1) * tf.float32.min
            weighted_emb = weighted_emb + neg
            hidden_layer = tf.reduce_max(weighted_emb, axis=1)

            if mode == tf.estimator.ModeKeys.TRAIN:
                hidden_layer = model_helper.dropout(
                    hidden_layer, config.train.hidden_layer_dropout_keep_prob)
            return model_helper.get_softmax_estimator_spec(
                hidden_layer, mode, labels, params["label_size"],
                params["static_embedding"], data_processor.label_dict_file)

        super(TextDRNNEstimator,
              self).__init__(model_fn=_model_fn,
                             model_dir=config.model_common.checkpoint_dir,
                             config=model_helper.get_run_config(),
                             params=model_params)
Example #9
0
    def __init__(self, data_processor, model_params):
        config = data_processor.config
        embedding_layer = EmbeddingLayer(config)
        model_helper = ModelHelper(config)

        def _model_fn(features, labels, mode, params):
            self._check(params["feature_names"], data_processor)
            feature_name = params["feature_names"][0]
            index = data_processor.dict_names.index(feature_name)
            sequence_length = data_processor.max_sequence_length[index] + \
                              config.fixed_len_feature.token_padding_begin + \
                              config.fixed_len_feature.token_padding_end
            padding_value = \
                data_processor.token_map[data_processor.VOCAB_PADDING]
            embedding = embedding_layer.get_vocab_embedding(
                feature_name,
                features["fixed_len_" + feature_name],
                len(data_processor.dict_list[index]),
                params["epoch"],
                pretrained_embedding_file=data_processor.
                pretrained_embedding_files[index],
                dict_map=data_processor.dict_list[index],
                mode=mode,
                begin_padding_size=config.fixed_len_feature.
                token_padding_begin,
                end_padding_size=config.fixed_len_feature.token_padding_end,
                padding_id=padding_value)
            embedding = tf.expand_dims(embedding, -1)

            if mode == tf.estimator.ModeKeys.TRAIN:
                embedding = model_helper.dropout(
                    embedding,
                    config.embedding_layer.embedding_dropout_keep_prob)

            filter_sizes = config.TextCNN.filter_sizes
            pooled_outputs = []
            for i, filter_size in enumerate(filter_sizes):
                with tf.name_scope("convolution-max_pooling-%d" % filter_size):
                    filter_shape = \
                        [filter_size,
                         config.embedding_layer.embedding_dimension,
                         1, config.TextCNN.num_filters]
                    W = tf.Variable(tf.random_uniform(filter_shape,
                                                      minval=-0.01,
                                                      maxval=0.01),
                                    name="W-%d" % filter_size)
                    b = tf.get_variable("b-%d" % filter_size,
                                        [config.TextCNN.num_filters])
                    # Strides is set to [1, 1, 1, 1].
                    # Convolution will slide 1 vocab at one time
                    convolution = tf.nn.conv2d(embedding,
                                               W,
                                               strides=[1, 1, 1, 1],
                                               padding="VALID",
                                               name="convolution")
                    h = tf.nn.relu(tf.nn.bias_add(convolution, b), name="relu")
                    pooled = tf.nn.max_pool(
                        h,
                        ksize=[1, sequence_length - filter_size + 1, 1, 1],
                        strides=[1, 1, 1, 1],
                        padding='VALID',
                        name="max_pooling")
                    pooled_outputs.append(pooled)

            num_filters_total = config.TextCNN.num_filters * len(filter_sizes)
            # pooled_outputs contains
            # tensor with shape [batch_size, 1, 1, num_filters]
            h_pool = tf.concat(pooled_outputs, 3)
            hidden_layer = tf.reshape(h_pool, [-1, num_filters_total])

            if mode == tf.estimator.ModeKeys.TRAIN:
                hidden_layer = model_helper.dropout(
                    hidden_layer, config.train.hidden_layer_dropout_keep_prob)
                # when repeating the result in the paper, the following code
                # should be added.
                # hidden_layer *= FLAGS.hidden_layer_dropout_keep_prob * (
                #         1 - FLAGS.hidden_layer_dropout_keep_prob)

            return model_helper.get_softmax_estimator_spec(
                hidden_layer, mode, labels, params["label_size"],
                params["static_embedding"], data_processor.label_dict_file)

        super(TextCNNEstimator,
              self).__init__(model_fn=_model_fn,
                             model_dir=config.model_common.checkpoint_dir,
                             config=model_helper.get_run_config(),
                             params=model_params)
Example #10
0
    def __init__(self, data_processor, model_params):
        config = data_processor.config
        embedding_layer = EmbeddingLayer(config)
        model_helper = ModelHelper(config)

        def _model_fn(features, labels, mode, params):
            self._check(params["feature_names"], data_processor, config)
            feature_name = params["feature_names"][0]

            index = data_processor.dict_names.index(feature_name)
            embedding = embedding_layer.get_vocab_embedding(
                feature_name,
                features["fixed_len_" + feature_name],
                len(data_processor.dict_list[index]),
                params["epoch"],
                pretrained_embedding_file=data_processor.
                pretrained_embedding_files[index],
                dict_map=data_processor.dict_list[index],
                mode=mode)
            if mode == tf.estimator.ModeKeys.TRAIN:
                embedding = model_helper.dropout(
                    embedding,
                    config.embedding_layer.embedding_dropout_keep_prob)
            rnn_fw_cell, rnn_bw_cell = None, None
            if config.TextRNN.cell_type == "lstm":
                rnn_fw_cell = rnn.BasicLSTMCell(config.TextRNN.rnn_dimension)
                rnn_bw_cell = rnn.BasicLSTMCell(config.TextRNN.rnn_dimension)
            elif config.TextRNN.cell_type == "gru":
                rnn_fw_cell = rnn.GRUCell(config.TextRNN.rnn_dimension)
                rnn_bw_cell = rnn.GRUCell(config.TextRNN.rnn_dimension)
            if config.TextRNN.use_bidirectional:
                outputs, _ = tf.nn.bidirectional_dynamic_rnn(
                    rnn_fw_cell,
                    rnn_bw_cell,
                    embedding,
                    dtype=tf.float32,
                    sequence_length=tf.reshape(
                        features[feature_name + "_fixed_real_len"], [-1]))
                text_embedding = tf.concat(outputs, 2)
            else:
                text_embedding, _ = tf.nn.dynamic_rnn(rnn_fw_cell,
                                                      embedding,
                                                      dtype=tf.float32)

            if config.model_common.use_self_attention:
                hidden_layer = model_helper.self_attention(
                    text_embedding, config.model_common.attention_dimension)
            else:
                sum_layer = tf.reduce_sum(text_embedding, axis=1)
                hidden_layer = sum_layer / tf.cast(
                    features[feature_name + "_fixed_real_len"],
                    dtype=tf.float32)

            if mode == tf.estimator.ModeKeys.TRAIN:
                hidden_layer = model_helper.dropout(
                    hidden_layer, config.train.hidden_layer_dropout_keep_prob)
            return model_helper.get_softmax_estimator_spec(
                hidden_layer, mode, labels, params["label_size"],
                params["static_embedding"], data_processor.label_dict_file)

        super(TextRNNEstimator,
              self).__init__(model_fn=_model_fn,
                             model_dir=config.model_common.checkpoint_dir,
                             config=model_helper.get_run_config(),
                             params=model_params)
Example #11
0
    def _model(self):
        # Input
        embedding = EmbeddingLayer(name="Input-embedding-0",
                                   zero_padding=True,
                                   scale=True,
                                   word_count=self.source_word_count,
                                   model_dimension=self.model_dimension,
                                   network=self)
        positional_encoder = PositionalEncodingLayer(name="Input-positional_encoder-0",
                                                     zero_padding=True,
                                                     scale=True,
                                                     model_dimension=self.model_dimension,
                                                     network=self)
        add_embdeding_postion = AdditionLayer(name="Input-add-0",
                                              input_list=[embedding, positional_encoder],
                                              network=self)
        self.add_layer(add_embdeding_postion)

        # Encoder
        for i in range(1, 2):
            self.add_layer(
                MultiheadAttentionLayer(name="Encoder-multihead_attention_1-{0}".format(i),
                                        batch_size=self.batch_size,
                                        model_dimension=self.model_dimension,
                                        network=self)
            )
            self.add_layer(NormalizeLayer(name="Encoder-normalize_1-{0}".format(i),
                                          network=self))
            self.add_layer(
                FeedForwardLayer(name="Encoder-feedforward_1-{0}".format(i),
                                 batch_size=self.batch_size,
                                 dimension_inner=self.dimension_inner,
                                 model_dimension=self.model_dimension,
                                 network=self)
            )
            self.add_layer(NormalizeLayer(name="Encoder-normalize_2-{0}".format(i),
                                          network=self))

        # Output
        embedding = EmbeddingLayer(name="Output-embedding-0",
                                   zero_padding=True,
                                   scale=True,
                                   word_count=self.target_word_count,
                                   model_dimension=self.model_dimension,
                                   network=self)
        positional_encoder = PositionalEncodingLayer(name="Output-positional_encoder-0",
                                                     zero_padding=True,
                                                     scale=True,
                                                     model_dimension=self.model_dimension,
                                                     network=self)
        add_embdeding_postion = AdditionLayer(name="Output-add-0",
                                              input_list=[embedding, positional_encoder],
                                              network=self)
        self.add_layer(add_embdeding_postion)

        # Decoder
        for j in range(1, 2):
            self.add_layer(
                MultiheadAttentionLayer(name="Decoder-multihead_attention_1-{0}".format(j),
                                        batch_size=self.batch_size,
                                        model_dimension=self.model_dimension,
                                        network=self)
            )
            self.add_layer(NormalizeLayer(name="Decoder-normalize_1-{0}".format(j),
                                          network=self))
            self.add_layer(
                MultiheadAttentionLayer(name="Decoder-multihead_attention_2-{0}".format(j),
                                        batch_size=self.batch_size,
                                        model_dimension=self.model_dimension,
                                        network=self),
            )
            self.add_layer(NormalizeLayer(name="Decoder-normalize_2-{0}".format(j),
                                          network=self))
            self.add_layer(
                FeedForwardLayer(name="Decoder-feedforward_1-{0}".format(j),
                                 batch_size=self.batch_size,
                                 dimension_inner=self.dimension_inner,
                                 model_dimension=self.model_dimension,
                                 network=self)
            )
            self.add_layer(NormalizeLayer(name="Decoder-normalize_3-{0}".format(j),
                                          network=self))

        # Final Output
        self.add_layer(FinalLayer(name="Final",
                                  model_dimension=self.model_dimension,
                                  word_count=self.target_word_count,
                                  network=self))
Example #12
0
    def __init__(self, data_processor, model_params):
        config = data_processor.config
        embedding_layer = EmbeddingLayer(config)
        model_helper = ModelHelper(config)

        def _model_fn(features, labels, mode, params):
            self._check(params["feature_names"], data_processor, config)
            feature_name = params["feature_names"][0]
            index = data_processor.dict_names.index(feature_name)
            sequence_length = data_processor.max_sequence_length[index]
            embedding = embedding_layer.get_vocab_embedding(
                feature_name,
                features["fixed_len_" + feature_name],
                len(data_processor.dict_list[index]),
                params["epoch"],
                pretrained_embedding_file=data_processor.
                pretrained_embedding_files[index],
                dict_map=data_processor.dict_list[index],
                mode=mode)
            dimension = config.embedding_layer.embedding_dimension
            hidden_size = config.AttentiveConvNet.attentive_hidden_size

            # first fully connected matrix
            mat_hidden1 = tf.get_variable(
                "mat_hidden1",
                shape=[dimension, hidden_size],
                initializer=tf.random_uniform_initializer(
                    -1.0 * pow(6.0 / (dimension + hidden_size), 0.5),
                    pow(6.0 / (hidden_size + dimension), 0.5)))
            bias_hidden1 = tf.get_variable("bias_hidden1", shape=[hidden_size])
            # second fully connected matrix
            mat_hidden2 = tf.get_variable(
                "mat_hidden2",
                shape=[hidden_size, hidden_size],
                initializer=tf.random_uniform_initializer(
                    -1.0 * pow(3.0 / hidden_size, 0.5),
                    pow(3.0 / hidden_size, 0.5)))
            bias_hidden2 = tf.get_variable("bias_hidden2", shape=[hidden_size])

            def _gconv(context, filter_width, name):
                """ compute equations 7,8,9
                """
                bias_ha = tf.get_variable(name + "_bias_ha", shape=[dimension])
                bias_ga = tf.get_variable(name + "_bias_ga", shape=[dimension])
                embedded_context = tf.expand_dims(context, -1)
                filter_shape = [filter_width, dimension, 1, dimension]
                filter_o = tf.Variable(tf.truncated_normal(filter_shape,
                                                           stddev=0.1),
                                       name=name + "_filter_Wo")
                filter_g = tf.Variable(tf.truncated_normal(filter_shape,
                                                           stddev=0.1),
                                       name=name + "_filter_Wg")
                conv_o = tf.nn.conv2d(embedded_context,
                                      filter_o,
                                      strides=[1, 1, dimension, 1],
                                      padding="SAME",
                                      name=name + "_convolution_Wo")
                conv_g = tf.nn.conv2d(embedded_context,
                                      filter_g,
                                      strides=[1, 1, dimension, 1],
                                      padding="SAME",
                                      name=name + "_convolution_Wg")
                conv_o = tf.keras.backend.permute_dimensions(
                    conv_o, (0, 1, 3, 2))
                conv_g = tf.keras.backend.permute_dimensions(
                    conv_g, (0, 1, 3, 2))
                o_context = tf.tanh(tf.nn.bias_add(tf.squeeze(conv_o, [-1]),
                                                   bias_ha),
                                    name=name + "_Wo_tanh")
                g_context = tf.sigmoid(tf.nn.bias_add(tf.squeeze(conv_g, [-1]),
                                                      bias_ga),
                                       name=name + "_Wg_sigmoid")
                return g_context * context + (1 - g_context) * o_context

            def _attentive_context(source, focus, name="context_generate"):
                if config.AttentiveConvNet.attentive_version == 'advanced':
                    mat_dimension = 2 * dimension
                else:
                    mat_dimension = dimension
                mat_tx = tf.get_variable(
                    name + "mat_tx",
                    shape=[mat_dimension, mat_dimension],
                    initializer=tf.random_uniform_initializer(
                        -1.0 * pow(3.0 / dimension, 0.5),
                        pow(3.0 / dimension, 0.5)))
                mat_ta = tf.get_variable(
                    name + "mat_ta",
                    shape=[dimension, mat_dimension],
                    initializer=tf.random_uniform_initializer(
                        -1.0 * pow(3.0 / dimension, 0.5),
                        pow(3.0 / dimension, 0.5)))
                # use dot and batch_dot in keras, compute equation 2
                embedding_conv = tf.keras.backend.dot(source, mat_tx)
                scores = tf.keras.backend.batch_dot(
                    embedding_conv,
                    tf.keras.backend.permute_dimensions(focus, (0, 2, 1)))
                scores_softmax = tf.keras.activations.softmax(scores, axis=1)
                # computes the context featur_map like equation 4
                res = tf.matmul(scores_softmax, focus)
                # weights the output for equation 6
                context = tf.keras.backend.permute_dimensions(
                    tf.keras.backend.dot(
                        mat_ta,
                        tf.keras.backend.permute_dimensions(res, (0, 2, 1))),
                    (1, 2, 0))
                return context

            def _attentive_convolution(benificiary,
                                       attentive_context,
                                       name="attentive_convolution"):
                """ compute equation 6
                """
                bias = tf.get_variable(name + "bias", shape=[dimension])
                embedded_text = tf.expand_dims(benificiary, -1)
                filter_shape = [
                    config.AttentiveConvNet.attentive_width, dimension, 1,
                    dimension
                ]
                conv_filter = tf.Variable(tf.truncated_normal(filter_shape,
                                                              stddev=0.1),
                                          name=name + "filter")
                convolution = tf.nn.conv2d(embedded_text,
                                           conv_filter,
                                           strides=[1, 1, dimension, 1],
                                           padding="SAME",
                                           name=name + "convolutioin")
                convolution = tf.keras.backend.permute_dimensions(
                    convolution, (0, 1, 3, 2))
                conv_text = tf.squeeze(convolution, [-1])
                merge_text = tf.add(attentive_context, conv_text)
                merge_text = tf.nn.bias_add(merge_text, bias)
                tanh_out = tf.tanh(merge_text, name=name + "tanh")
                tanh_out = tf.expand_dims(tanh_out, -1)

                return tanh_out

            if config.AttentiveConvNet.attentive_version == "advanced":
                # generate source
                source_x_uni = _gconv(embedding, 1, "source_uni")
                source_x_tri = _gconv(embedding, 3, "source_tri")
                x_mgran = tf.concat([source_x_uni, source_x_tri], -1)
                # generate focus
                focus_a_uni = _gconv(embedding, 1, "focus_uni")
                focus_a_tri = _gconv(embedding, 3, "focus_tri")
                a_mgran = tf.concat([focus_a_uni, focus_a_tri], -1)
                # generate benificiary
                x_benificiary = _gconv(embedding, 1, "beni_uni")
            else:
                # light version
                x_mgran, a_mgran, x_benificiary = \
                        embedding, embedding, embedding

            context = _attentive_context(x_mgran, a_mgran)
            attentive_embedding = _attentive_convolution(
                x_benificiary, context)
            pooled = tf.nn.max_pool(attentive_embedding,
                                    ksize=[1, sequence_length, 1, 1],
                                    strides=[1, 1, 1, 1],
                                    padding="VALID",
                                    name="max_pooling")
            hidden_layer = tf.reshape(
                pooled, [-1, config.embedding_layer.embedding_dimension])

            if mode == tf.estimator.ModeKeys.TRAIN:
                hidden_layer = model_helper.dropout(
                    hidden_layer, config.train.hidden_layer_dropout_keep_prob)
            hidden_layer1 = tf.nn.relu(tf.matmul(hidden_layer, mat_hidden1) +
                                       bias_hidden1,
                                       name="relu_hidden1")
            if mode == tf.estimator.ModeKeys.TRAIN:
                hidden_layer1 = model_helper.dropout(
                    hidden_layer1, config.train.hidden_layer_dropout_keep_prob)
            hidden_layer2 = tf.nn.relu(tf.matmul(hidden_layer1, mat_hidden2) +
                                       bias_hidden2,
                                       name="relu_hidden2")
            if mode == tf.estimator.ModeKeys.TRAIN:
                hidden_layer2 = model_helper.dropout(
                    hidden_layer2, config.train.hidden_layer_dropout_keep_prob)
            # concat max pooling, hidden layer 1 output, hidden layer 2 output
            output = tf.concat([hidden_layer, hidden_layer1, hidden_layer2],
                               -1)

            return model_helper.get_softmax_estimator_spec(
                output, mode, labels, params["label_size"],
                params["static_embedding"])

        super(AttentiveConvNetEstimator,
              self).__init__(model_fn=_model_fn,
                             model_dir=config.model_common.checkpoint_dir,
                             config=model_helper.get_run_config(),
                             params=model_params)
 def test_embed_forward(self):
     inp_seq = np.array([[3, 2, 1, 4, 0], [0, 0, 1, 1, 2]])
     obj = EmbeddingLayer(5, 3, GradientDescentMomentum)
     output = obj.forward(inp_seq)
     self.assertEqual(output.shape, (2, 5, 3))
Example #14
0
    def __init__(self, data_processor, model_params):
        config = data_processor.config
        embedding_layer = EmbeddingLayer(config)
        model_helper = ModelHelper(config)

        def _convolution(inputs, num_filters, name):
            """two layers of convolution
            """

            with tf.variable_scope("two_conv-%s" % name):
                initializer_normal = tf.random_normal_initializer(stddev=0.01)
                filter_shape = [3, 1, num_filters, num_filters]

                W1 = tf.get_variable(name="W1-%s" % name,
                                     shape=filter_shape,
                                     initializer=initializer_normal)
                b1 = tf.get_variable(name="b1-%s" % name, shape=[num_filters])
                # pre - activation, before convolution
                relu1 = tf.nn.relu(inputs, name="relu1-%s" % name)
                conv1 = tf.nn.conv2d(relu1,
                                     W1,
                                     strides=[1, 1, 1, 1],
                                     padding="SAME",
                                     name="convolution1-%s" % name)
                conv1 = tf.nn.bias_add(conv1, b1)

                W2 = tf.get_variable(name="W2-%s" % name,
                                     shape=filter_shape,
                                     initializer=initializer_normal)
                b2 = tf.get_variable(name="b2-%s" % name, shape=[num_filters])
                # pre - activation
                relu2 = tf.nn.relu(conv1, name="relu2-%s" % name)
                conv2 = tf.nn.conv2d(relu2,
                                     W2,
                                     strides=[1, 1, 1, 1],
                                     padding="SAME",
                                     name="convolution2-%s" % name)
                conv2 = tf.nn.bias_add(conv2, b2)
            # return shortcut connections with identity mapping
            return inputs + conv2

        def _convolution_block(inputs, num_filters, name):
            """DPCNN Block architecture
              1. pooling (strides=2, sequence halved)
              2. relu
              3. conv1 layer
              4. relu
              5. conv2 layer
              6. return pooling output + conv2 layer output
            """

            with tf.variable_scope("pooling-%s" % name):
                pooled = tf.nn.max_pool(inputs,
                                        ksize=[1, 3, 1, 1],
                                        strides=[1, 2, 1, 1],
                                        padding='SAME',
                                        name="max-pooling-%s" % name)

            return _convolution(pooled, num_filters, name)

        def _model_fn(features, labels, mode, params):
            self._check(params["feature_names"], data_processor, config)
            feature_name = params["feature_names"][0]
            index = data_processor.dict_names.index(feature_name)

            num_filters = config.TextDPCNN.num_filters
            sequence_length = data_processor.max_sequence_length[index]
            embedding = embedding_layer.get_vocab_embedding(
                feature_name,
                features["fixed_len_" + feature_name],
                len(data_processor.dict_list[index]),
                params["epoch"],
                pretrained_embedding_file=data_processor.
                pretrained_embedding_files[index],
                dict_map=data_processor.dict_list[index],
                mode=mode)
            embedding_dims = config.embedding_layer.embedding_dimension
            embedding = tf.reshape(embedding,
                                   [-1, sequence_length, embedding_dims])
            embedding = tf.expand_dims(embedding, -1)
            if mode == tf.estimator.ModeKeys.TRAIN:
                embedding = model_helper.dropout(
                    embedding,
                    config.embedding_layer.embedding_dropout_keep_prob)

            initializer = tf.random_normal_initializer(stddev=0.01)
            with tf.variable_scope("dpcnn") as scope:
                filter_shape = [3, embedding_dims, 1, num_filters]
                W0 = tf.get_variable(name="W0",
                                     shape=filter_shape,
                                     initializer=initializer)
                b0 = tf.get_variable(name="b0", shape=[num_filters])
                conv0 = tf.nn.conv2d(embedding,
                                     W0,
                                     strides=[1, 1, embedding_dims, 1],
                                     padding="SAME")
                conv0 = tf.nn.bias_add(conv0, b0)

                conv = _convolution(conv0, num_filters, "conv-1-2")

                for i in range(config.TextDPCNN.dpcnn_blocks):
                    conv = _convolution_block(conv, num_filters,
                                              "convolution-block-%d" % i)

                outputs_shape = int(np.prod(conv.get_shape()[1:]))
                outputs = tf.reshape(conv, (-1, outputs_shape))

            return model_helper.get_softmax_estimator_spec(
                outputs, mode, labels, params["label_size"],
                params["static_embedding"], data_processor.label_dict_file)

        super(TextDPCNNEstimator,
              self).__init__(model_fn=_model_fn,
                             model_dir=config.model_common.checkpoint_dir,
                             config=model_helper.get_run_config(),
                             params=model_params)