Exemple #1
0
  def encode(self, inputs, attention_bias, training):
    """Generate continuous representation for inputs.

    Args:
      inputs: int tensor with shape [batch_size, input_length].
      attention_bias: float tensor with shape [batch_size, 1, 1, input_length].
      training: boolean, whether in training mode or not.

    Returns:
      float tensor with shape [batch_size, input_length, hidden_size]
    """
    with tf.name_scope("encode"):
      # Prepare inputs to the layer stack by adding positional encodings and
      # applying dropout.
      embedded_inputs = self.embedding_softmax_layer(inputs)
      embedded_inputs = tf.cast(embedded_inputs, self.params["dtype"])
      inputs_padding = model_utils.get_padding(inputs)
      attention_bias = tf.cast(attention_bias, self.params["dtype"])

      with tf.name_scope("add_pos_encoding"):
        length = tf.shape(embedded_inputs)[1]
        pos_encoding = model_utils.get_position_encoding(
            length, self.params["hidden_size"])
        pos_encoding = tf.cast(pos_encoding, self.params["dtype"])
        encoder_inputs = embedded_inputs + pos_encoding

      if training:
        encoder_inputs = tf.nn.dropout(
            encoder_inputs, rate=self.params["layer_postprocess_dropout"])

      return self.encoder_stack(
          encoder_inputs, attention_bias, inputs_padding, training=training)
Exemple #2
0
    def build_embed(self, inputs, encoder=True, reuse=False):
        with tf.variable_scope(
                "Embeddings",
                reuse=reuse,
                initializer=tf.contrib.layers.xavier_initializer()):
            # Word Embedding
            self.shared_weights = tf.get_variable(
                'shared_weights', [self.vocab, self.hidden_dim],
                dtype=tf.float32)

            if encoder:
                max_seq_length = self.max_enc_len
            else:
                max_seq_length = self.max_dec_len

            # Positional Encoding
            with tf.variable_scope("positional-encoding"):
                positional_encoded = model_utils.get_position_encoding(
                    max_seq_length, self.hidden_dim)
            batch_size = tf.shape(inputs)[0]
            mask = tf.to_float(tf.not_equal(inputs, 0))
            ## Add
            word_emb = tf.nn.embedding_lookup(
                self.shared_weights, inputs)  ## batch_size, length, dim
            word_emb *= tf.expand_dims(mask, -1)  ## zeros out masked positions
            word_emb *= self.hidden_dim**0.5  ## Scale embedding by the sqrt of the hidden size
            position_inputs = tf.tile(tf.range(0, max_seq_length),
                                      [batch_size])
            position_inputs = tf.reshape(position_inputs,
                                         [batch_size, max_seq_length])
            position_emb = tf.nn.embedding_lookup(positional_encoded,
                                                  position_inputs)
            encoded_inputs = tf.add(word_emb, position_emb)
            return tf.nn.dropout(encoded_inputs, 1.0 - self.dropout)
Exemple #3
0
    def Embedding(self, x):

        # args:   x shape: [ batch_size, length]
        # return: [batch_size, length, hidden_size]
        hparams = self.hparams
        if hparams['embedding_model'] == 'transformer':

            self.embedding_layer = embedding_layer.EmbeddingSharedWeights(
                hparams["vocab_size"], hparams["hidden_size"])

            embedded_inputs = self.embedding_layer(x)
            with tf.name_scope("add_pos_encoding"):
                length = tf.shape(embedded_inputs)[1]
                pos_encoding = model_utils.get_position_encoding(
                    length, hparams["hidden_size"])
                encoder_inputs = embedded_inputs + pos_encoding

            if self.hparams['train']:
                encoder_inputs = tf.nn.dropout(
                    encoder_inputs,
                    rate=self.hparams["layer_postprocess_dropout"])

            self.inputs_padding = model_utils.get_padding(x)
            self.attention_bias = model_utils.get_padding_bias(x)
            return encoder_inputs
Exemple #4
0
    def _get_symbols_to_logits_fn(self, max_decode_length, training):
        """Returns a decoding function that calculates logits of the next tokens."""
        timing_signal = model_utils.get_position_encoding(max_decode_length + 1, self.params['hidden_size'])
        timing_signal = tf.cast(timing_signal, self.params['dtype'])
        decoder_self_attention_bias = model_utils.get_decoder_self_attention_bias(
            max_decode_length, dtype=self.params['dtype'])

        def symbols_to_logits(ids, i, cache):
            """Generate logits for next potential IDs.
            ids: [batch_size * beam_size, i + 1]
            i: Loop index
            return: [batch_size * beam_size, vocab_size]
            """
            decoder_input = ids[:, -1:]
            decoder_input = self.target_embedding_layer(decoder_input)
            decoder_input += timing_signal[i: i+1]
            self_attention_bias = decoder_self_attention_bias[:, :, i:i + 1, :i + 1]
            decoder_outputs = self.decoder_stack(
                decoder_input,
                cache.get('encoder_outputs'),
                self_attention_bias,
                cache.get('encoder_decoder_attention_bias'),
                training=training,
                cache=cache)
            logits = self.target_embedding_layer(decoder_outputs, mode='linear')
            logits = tf.squeeze(logits, axis=[1])
            return logits, cache

        return symbols_to_logits
Exemple #5
0
    def _get_symbols_to_logits_fn(self, max_decode_length):
        """Returns a decoding function that calculates logits of the next tokens."""

        timing_signal = model_utils.get_position_encoding(
            max_decode_length + 1, self.params["hidden_size"])
        decoder_self_attention_bias = model_utils.get_decoder_self_attention_bias(
            max_decode_length)  # 三角形矩阵 (1,1,length,length)

        def symbols_to_logits_fn(ids, i, cache):
            # Set decoder input to the last generated IDs
            if i == 0:
                decoder_input = tf.zeros(
                    [ids.shape[0], 1, self.params["hidden_size"]])
            else:
                decoder_input = ids[:, -1:]  # (batch, 1)
                decoder_input = self.embedding_softmax_layer_decoder(
                    decoder_input)  # (batch, 1, 256)

            decoder_input += timing_signal[i:i + 1]

            # 在翻译中,这里的 bias 是全0向量,长度与当前翻译的长度i相等. 实际上没有任何作用,加入到logits之后,logits不发生变化
            self_attention_bias = decoder_self_attention_bias[:, :,
                                                              i:i + 1, :i + 1]

            decoder_outputs = self.decoder_stack(
                decoder_input,
                cache.get("encoder_outputs"), self_attention_bias,
                cache.get("encoder_decoder_attention_bias"), cache)
            logits = self.embedding_softmax_layer_decoder.linear(
                decoder_outputs)
            return logits, cache

        return symbols_to_logits_fn
Exemple #6
0
    def encode(self, inputs, attention_bias):
        """Generate continuous representation for inputs.
        
        流程: Embedding -> 位置编码 -> dropout -> encoder_stack

        inputs: 原始的输入句子, shape=[batch_size, input_length].
        attention_bias: padding的位置标记为-1e9,其余位置标记为0. shape=[batch_size, 1, 1, input_length]
        返回encoder提取的特征: shape=[batch_size, input_length, hidden_size]
        """
        with tf.name_scope("encode"):
            # shape=(batch_size, length, embedding_dim)
            # embdding的过程中,padding的位置输出的是全0的向量
            embedded_inputs = self.embedding_layer_encoder(inputs)  # embedding
            length = tf.shape(embedded_inputs)[1]

            # 获取 padding 的位置,标记为1, 其余标记为0. shape=(batchsize, length)
            inputs_padding = model_utils.get_padding(inputs)

            with tf.name_scope("add_pos_encoding"):
                # 位置编码,shape=(length, hidden_size)
                pos_encoding = model_utils.get_position_encoding(
                    length, self.params["hidden_size"])

                # 组合, shape=(length, hidden_size)
                encoder_inputs = embedded_inputs + pos_encoding

            if self.train:
                encoder_inputs = tf.nn.dropout(
                    encoder_inputs,
                    rate=self.params["layer_postprocess_dropout"])

            # 最后一步:调用 encoder_stack处理
            return self.encoder_stack(encoder_inputs, attention_bias,
                                      inputs_padding)
Exemple #7
0
def train():
    #config
    batch_size = 4
    lr = 0.0005
    model_dir = 'model2/'
    logfile = 'second.log'
    fop = open(logfile, 'w')
    #prepare data

    dataline = open('data/train.txt').readlines()
    datalength = len(dataline)
    traindata = dataline[int(datalength / 5):]
    print(len(traindata))
    vecmodel = word2vec.sentence2vec('sgns.weibo.bigram-char')

    a = dataloader(traindata, vecmodel, batch_size)
    a.start()

    #build model
    inputdata = tf.placeholder(tf.float32, [batch_size, None, 300])
    inputpadding = tf.placeholder(tf.float32, [batch_size, None])
    pos = tf.placeholder(tf.float32, [None, 32])
    inputlabel = tf.placeholder(tf.int32, [batch_size])
    classifier = model(True, batch_size)
    outlogit = classifier(inputdata, inputpadding, pos)
    loss = tf.losses.softmax_cross_entropy(tf.one_hot(inputlabel, 2), outlogit)
    print(1)
    train_op = tf.train.AdamOptimizer(lr).minimize(loss)
    saver = tf.train.Saver(max_to_keep=0)

    print('build finished')
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)

    model_file = tf.train.latest_checkpoint(model_dir)
    saver.restore(sess, model_file)

    #sess.run(tf.global_variables_initializer())
    #sess.run(tf.local_variables_initializer())

    #train step
    for step in tqdm(range(26000, 400000)):
        data, label, padding = a.getdata()
        #data=np.zeros((3,20,300))
        #padding=np.ones((3,20))
        length = data.shape[1]
        trainloss, _ = sess.run(
            [loss, train_op],
            feed_dict={
                inputdata: data,
                inputpadding: padding,
                pos: model_utils.get_position_encoding(length, 32),
                inputlabel: label
            })
        if step % 100 == 0:
            print('loss:' + str(trainloss))
            fop.write('loss:' + str(trainloss) + '\n')
        if step % 1000 == 0:
            saver.save(sess, model_dir + '/transform.ckpt', global_step=step)
Exemple #8
0
    def encode(self, inputs, attention_bias):
        """Generate continuous representation for inputs.
    Args:
      inputs: int tensor with shape [batch_size, input_length].
      attention_bias: float tensor with shape [batch_size, 1, 1, input_length]
    Returns:
      float tensor with shape [batch_size, input_length, hidden_size]
    """
        with tf.name_scope("encode"):
            # Prepare inputs to the layer stack by adding positional encodings and
            # applying dropout.
            embedded_inputs = self.embedding_softmax_layer(inputs)
            inputs_padding = model_utils.get_padding(inputs)

            with tf.name_scope("add_pos_encoding"):
                length = tf.shape(embedded_inputs)[1]
                pos_encoding = model_utils.get_position_encoding(
                    length, self.params.hidden_size)
                encoder_inputs = embedded_inputs + pos_encoding

            if self.train:
                encoder_inputs = tf.nn.dropout(
                    encoder_inputs, 1 - self.params.layer_postprocess_dropout)

                # with tf.variable_scope(
                #         self.scope, initializer=tf.truncated_normal_initializer(stddev=0.01), reuse=tf.AUTO_REUSE):
                #     self.embeddings = tf.layers.dense(
                #         self.out1, self.params.output_dim, bias_initializer=tf.constant_initializer(0.1), name="dense")

            return self.encoder_stack(encoder_inputs, attention_bias,
                                      inputs_padding)
Exemple #9
0
    def decode(self, targets, encoder_outputs, attention_bias):
        """Generate logits for each value in the target sequence.
    Args:
      targets: target values for the output sequence.
        int tensor with shape [batch_size, target_length]
      encoder_outputs: continuous representation of input sequence.
        float tensor with shape [batch_size, input_length, hidden_size]
      attention_bias: float tensor with shape [batch_size, 1, 1, input_length]
    Returns:
      float32 tensor with shape [batch_size, target_length, vocab_size]
    """
        with tf.name_scope("decode"):
            # Prepare inputs to decoder layers by shifting targets, adding positional
            # encoding and applying dropout.
            decoder_inputs = self.embedding_softmax_layer(targets)
            with tf.name_scope("shift_targets"):
                # Shift targets to the right, and remove the last element
                decoder_inputs = tf.pad(decoder_inputs,
                                        [[0, 0], [1, 0], [0, 0]])[:, :-1, :]
            with tf.name_scope("add_pos_encoding"):
                length = tf.shape(decoder_inputs)[1]
                decoder_inputs += model_utils.get_position_encoding(
                    length, self.params.hidden_size)
            if self.train:
                decoder_inputs = tf.nn.dropout(
                    decoder_inputs, 1 - self.params.layer_postprocess_dropout)

            # Run values
            decoder_self_attention_bias = model_utils.get_decoder_self_attention_bias(
                length)
            outputs = self.decoder_stack(decoder_inputs, encoder_outputs,
                                         decoder_self_attention_bias,
                                         attention_bias)
            logits = self.embedding_softmax_layer.linear(outputs)
            return logits
Exemple #10
0
    def encode(self, inputs, attention_bias, input_types=None):
        """Generate continuous representation for inputs.
        Args:
          inputs: int tensor with shape [batch_size, input_length].
          attention_bias: float tensor with shape [batch_size, 1, 1, input_length]
        Returns:
          float tensor with shape [batch_size, input_length, hidden_size]
        """
        with tf.name_scope("encode"):
            # Prepare inputs to the layer stack by adding positional encodings and
            # applying dropout.
            embedded_inputs = self.embedding_softmax_layer(inputs)
            inputs_padding = model_utils.get_padding(inputs)

            with tf.name_scope("add_pos_encoding"):
                length = tf.shape(embedded_inputs)[1]
                pos_encoding = model_utils.get_position_encoding(
                    length, self.params["hidden_size"])
                encoder_inputs = embedded_inputs + pos_encoding
                if input_types is not None:
                    input_types = model_utils.get_input_types(
                        input_types,
                        self.params["hidden_size"],
                        num_types=self.params["num_types"])
                    encoder_inputs = encoder_inputs + input_types
            if self.train:
                encoder_inputs = tf.nn.dropout(
                    encoder_inputs,
                    1 - self.params["layer_postprocess_dropout"])

            return self.encoder_stack(encoder_inputs, attention_bias,
                                      inputs_padding)
Exemple #11
0
    def decode(self, targets, encoder_outputs, attention_bias, training):
        """Generate logits for each value in the target sequence
        targets: [batch_size, target_length]
        encoder_outputs: [batch_size, input_length, hidden_size]
        attention_bias: [batch_size, 1, 1, input_length]
        return: [batch_size, target_length, vocab_size]
        """
        with tf.name_scope('decode'):
            decoder_inputs = self.target_embedding_layer(targets)
            decoder_inputs = tf.cast(decoder_inputs, self.params['dtype'])
            attention_bias = tf.cast(attention_bias, self.params['dtype'])
            with tf.name_scope('shift_targets'):
                decoder_inputs = tf.pad(decoder_inputs, [[0, 0], [1, 0], [0, 0]])[:, :-1, :]
            with tf.name_scope('add_pos_encoding'):
                length = tf.shape(decoder_inputs)[1]
                pos_encoding = model_utils.get_position_encoding(
                    length, self.params['hidden_size'])
                pos_encoding = tf.cast(pos_encoding, self.params['dtype'])
                decoder_inputs += pos_encoding
            if training:
                decoder_inputs = tf.nn.dropout(
                    decoder_inputs, rate=self.params['layer_postprocess_dropout'])

            decoder_self_attention_bias = model_utils.get_decoder_self_attention_bias(
                length, dtype=self.params['dtype'])
            outputs = self.decoder_stack(decoder_inputs,
                                         encoder_outputs,
                                         decoder_self_attention_bias,
                                         attention_bias,
                                         training=training)
            logits = self.target_embedding_layer(outputs, mode='linear')
            logits = tf.cast(logits, tf.float32)
            return logits
Exemple #12
0
  def _get_symbols_to_logits_fn(self, max_decode_length, training):
    """Returns a decoding function that calculates logits of the next tokens."""

    timing_signal = model_utils.get_position_encoding(
        max_decode_length + 1, self.params["hidden_size"])
    timing_signal = tf.cast(timing_signal, self.params["dtype"])
    decoder_self_attention_bias = model_utils.get_decoder_self_attention_bias(
        max_decode_length, dtype=self.params["dtype"])

    # TODO(b/139770046): Refactor code with better naming of i.
    def symbols_to_logits_fn(ids, i, cache):
      """Generate logits for next potential IDs.

      Args:
        ids: Current decoded sequences. int tensor with shape [batch_size *
          beam_size, i + 1].
        i: Loop index.
        cache: dictionary of values storing the encoder output, encoder-decoder
          attention bias, and previous decoder attention values.

      Returns:
        Tuple of
          (logits with shape [batch_size * beam_size, vocab_size],
           updated cache values)
      """
      # Set decoder input to the last generated IDs
      decoder_input = ids[:, -1:]

      # Preprocess decoder input by getting embeddings and adding timing signal.
      decoder_input = self.embedding_softmax_layer(decoder_input)

      if self.params["padded_decode"]:
        timing_signal_shape = timing_signal.shape.as_list()
        decoder_input += tf.slice(timing_signal, [i, 0],
                                  [1, timing_signal_shape[1]])

        bias_shape = decoder_self_attention_bias.shape.as_list()
        self_attention_bias = tf.slice(
            decoder_self_attention_bias, [0, 0, i, 0],
            [bias_shape[0], bias_shape[1], 1, bias_shape[3]])
      else:
        decoder_input += timing_signal[i:i + 1]

        self_attention_bias = decoder_self_attention_bias[:, :, i:i + 1, :i + 1]

      decoder_outputs = self.decoder_stack(
          decoder_input,
          cache.get("encoder_outputs"),
          self_attention_bias,
          cache.get("encoder_decoder_attention_bias"),
          training=training,
          cache=cache,
          decode_loop_step=i if self.params["padded_decode"] else None)
      logits = self.embedding_softmax_layer(decoder_outputs, mode="linear")
      logits = tf.squeeze(logits, axis=[1])
      return logits, cache

    return symbols_to_logits_fn
Exemple #13
0
 def inferenceAPI(self, data):
     vec = np.array([self.vecmodel(data)])
     padnum = np.ones((1, vec.shape[0]))
     logit = self.sess.run(self.outlabel,
                           feed_dict={
                               self.inputdata:
                               vec,
                               self.inputpadding:
                               padnum,
                               self.pos:
                               model_utils.get_position_encoding(
                                   len(vec), 32)
                           })[0]
     return logit  # just return result
Exemple #14
0
  def decode(self, targets, encoder_outputs, attention_bias, training):
    """Generate logits for each value in the target sequence.

    Args:
      targets: target values for the output sequence. int tensor with shape
        [batch_size, target_length]
      encoder_outputs: continuous representation of input sequence. float tensor
        with shape [batch_size, input_length, hidden_size]
      attention_bias: float tensor with shape [batch_size, 1, 1, input_length]
      training: boolean, whether in training mode or not.

    Returns:
      float32 tensor with shape [batch_size, target_length, vocab_size]
    """
    with tf.name_scope("decode"):
      # Prepare inputs to decoder layers by shifting targets, adding positional
      # encoding and applying dropout.
      decoder_inputs = self.embedding_softmax_layer(targets)
      decoder_inputs = tf.cast(decoder_inputs, self.params["dtype"])
      attention_bias = tf.cast(attention_bias, self.params["dtype"])
      with tf.name_scope("shift_targets"):
        # Shift targets to the right, and remove the last element
        decoder_inputs = tf.pad(decoder_inputs,
                                [[0, 0], [1, 0], [0, 0]])[:, :-1, :]
      with tf.name_scope("add_pos_encoding"):
        length = tf.shape(decoder_inputs)[1]
        pos_encoding = model_utils.get_position_encoding(
            length, self.params["hidden_size"])
        pos_encoding = tf.cast(pos_encoding, self.params["dtype"])
        decoder_inputs += pos_encoding
      if training:
        decoder_inputs = tf.nn.dropout(
            decoder_inputs, rate=self.params["layer_postprocess_dropout"])

      # Run values
      decoder_self_attention_bias = model_utils.get_decoder_self_attention_bias(
          length, dtype=self.params["dtype"])
      outputs = self.decoder_stack(
          decoder_inputs,
          encoder_outputs,
          decoder_self_attention_bias,
          attention_bias,
          training=training)
      logits = self.embedding_softmax_layer(outputs, mode="linear")
      logits = tf.cast(logits, tf.float32)
      return logits
Exemple #15
0
    def decode(self, targets, encoder_outputs, attention_bias):
        """Generate logits for each value in the target sequence.

        targets: 目标语言. shape=[batch_size, target_length].用于计算损失
        encoder_outputs: encoder的输出,在decoder中要对其进行attention操作.[batch_size, input_length, hidden_size]
        attention_bias: padding的位置标记为-1e9,其余位置标记为0. shape=[batch_size, 1, 1, input_length]

        返回 shape = [batch_size, target_length, vocab_size]. 最后一维与词表长度相等
        """
        with tf.name_scope("decode"):
            # embedding后 shape=(batch_size, length, embedding_dim)
            decoder_inputs = self.embedding_softmax_layer_decoder(targets)
            # print("decoder_inputs.shape =", decoder_inputs)

            # 在length中的第一维填充全0向量. 维度不变
            with tf.name_scope("shift_targets"):
                decoder_inputs = tf.pad(decoder_inputs,
                                        [[0, 0], [1, 0], [0, 0]])[:, :-1, :]
            # print("&&", decoder_inputs[0, 0:2, :10])

            # 加入位置编码
            with tf.name_scope("add_pos_encoding"):
                length = tf.shape(decoder_inputs)[1]
                decoder_inputs += model_utils.get_position_encoding(
                    length, self.params["hidden_size"])

            if self.train:
                decoder_inputs = tf.nn.dropout(
                    decoder_inputs,
                    rate=self.params["layer_postprocess_dropout"])

            # shape=(1, 1, length, length). 主对角元和下三角为0,其余元素为无穷小.
            decoder_self_attention_bias = model_utils.get_decoder_self_attention_bias(
                length)

            # decode. 此处要传入两个bias:
            # decoder_self_attention_bias 是一个三角矩阵,表示self-attention中的依赖关系
            # attention_bias 对encoder源语言中padding的位置标记为-1e9. 输入shape=(batch,length_decoder,dim)
            outputs = self.decoder_stack(decoder_inputs, encoder_outputs,
                                         decoder_self_attention_bias,
                                         attention_bias)

            # 该输出层的权重和embedding层共享. shape=(batch,length_decoder,vocab_size)
            logits = self.embedding_softmax_layer_decoder.linear(outputs)
            return logits
Exemple #16
0
 def __call__(self, data):
     vec = np.array([self.vecmodel(data)])
     padnum = np.ones((1, vec.shape[0]))
     logit = self.sess.run(self.outlabel,
                           feed_dict={
                               self.inputdata:
                               vec,
                               self.inputpadding:
                               padnum,
                               self.pos:
                               model_utils.get_position_encoding(
                                   len(vec), 32)
                           })[0]
     print(logit)
     if logit == '0':
         print('Not spam!')
     elif logit == '1':
         print('Is spam')
Exemple #17
0
def test():
    model_dir = 'model/'
    logfile = 'test2.log'
    fop = open(logfile, 'w')
    #prepare data

    dataline = open('data/train.txt').readlines()
    datalength = len(dataline)
    testdata = dataline[:int(datalength / 5)]
    vecmodel = word2vec.sentence2vec('sgns.weibo.bigram-char')

    inputdata = tf.placeholder(tf.float32, [1, None, 300])
    inputpadding = tf.placeholder(tf.float32, [1, None])
    pos = tf.placeholder(tf.float32, [None, 32])
    inputlabel = tf.placeholder(tf.int32, [1])
    classifier = model(True, 1)
    outlabel = tf.argmax(classifier(inputdata, inputpadding, pos), 1)

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)
    saver = tf.train.Saver(max_to_keep=4)
    model_file = tf.train.latest_checkpoint(model_dir)
    saver.restore(sess, model_file)
    sum = 0
    accu = 0
    for i in range(len(testdata)):
        vec = np.array([vecmodel(''.join(testdata[i].split()[1:]))])
        padnum = np.ones((1, vec.shape[0]))
        logit = sess.run(outlabel,
                         feed_dict={
                             inputdata: vec,
                             inputpadding: padnum,
                             pos:
                             model_utils.get_position_encoding(len(vec), 32)
                         })[0]
        label = testdata[i].split()[0]
        print(logit, label, int(logit) == int(label))
        fop.write(str(logit) + ' ' + str(label) + '\n')
        sum += 1
        if logit == label:
            accu += 1
    print(accu / sum)
Exemple #18
0
    def _get_symbols_to_logits_fn(self, max_decode_length):
        """Returns a decoding function that calculates logits of the next tokens."""

        timing_signal = model_utils.get_position_encoding(
            max_decode_length + 1, self.params["hidden_size"])
        decoder_self_attention_bias = model_utils.get_decoder_self_attention_bias(
            max_decode_length)

        def symbols_to_logits_fn(ids, i, cache):
            """Generate logits for next potential IDs.
            Args:
              ids: Current decoded sequences.
                int tensor with shape [batch_size * beam_size, i + 1]
              i: Loop index
              cache: dictionary of values storing the encoder output, encoder-decoder
                attention bias, and previous decoder attention values.
            Returns:
              Tuple of
                (logits with shape [batch_size * beam_size, vocab_size],
                 updated cache values)
            """
            # Set decoder input to the last generated IDs
            decoder_input = ids[:, -1:]

            # Preprocess decoder input by getting embeddings and adding timing signal.
            decoder_input = self.embedding_softmax_layer(decoder_input)
            decoder_input += timing_signal[i:i + 1]

            self_attention_bias = decoder_self_attention_bias[:, :,
                                                              i:i + 1, :i + 1]
            decoder_outputs = self.decoder_stack(
                decoder_input,
                cache.get("encoder_outputs"), self_attention_bias,
                cache.get("encoder_decoder_attention_bias"), cache)
            logits = self.embedding_softmax_layer.linear(decoder_outputs)
            logits = tf.squeeze(logits, axis=[1])
            return logits, cache

        return symbols_to_logits_fn
Exemple #19
0
    def encode(self, inputs, attention_bias, training):
        """Generate continuous representation for inputs.
        inputs: [batch_size, input_length]
        attention_bias: [batch_size, 1, 1, input_length]
        return: [batch_size, input_length, hidden_size]
        """
        with tf.name_scope('encode'):
            embedded_inputs = self.input_embedding_layer(inputs)
            embedded_inputs = tf.cast(embedded_inputs, self.params['dtype'])
            attention_bias = tf.cast(attention_bias, self.params['dtype'])

            with tf.name_scope('add_pos_encoding'):
                length = tf.shape(embedded_inputs)[1]
                pos_encoding = model_utils.get_position_encoding(
                    length, self.params['hidden_size'])
                pos_encoding = tf.cast(pos_encoding, self.params['dtype'])
                encoder_inputs = embedded_inputs + pos_encoding

            if training:
                encoder_inputs = tf.nn.dropout(
                    encoder_inputs, rate=self.params['layer_postprocess_dropout'])

            return self.encoder_stack(encoder_inputs, attention_bias, training=training)
Exemple #20
0
    tf_encoder_outputs = tf_transformer.encode(tf_input_x_raw,
                                               tf_attention_bias)

    tf_pred = tf_transformer(tf_input_x_raw)["outputs"]
    tf_pred_res = tf_sess.run(tf_pred,
                              feed_dict={tf_input_x_raw: my_input_x_raw})
    print("tf prediction:")
    with printoptions(threshold=2000):
        print(tf_pred_res)

    k_transformer = KTransformer(params)
    k_input_x_raw = Input(shape=(_seq_len_x, ))
    k_input_y_raw = Input(shape=(_seq_len_y, ))

    k_embedded_inputs = k_transformer.embedding_softmax_layer(k_input_x_raw)
    k_pos_encoding = k_model_utils.get_position_encoding(
        seq_len_x, k_transformer.params.hidden_size)
    k_embedding_inputs = k_embedded_inputs + k_pos_encoding

    k_attention_bias = k_model_utils.get_padding_bias(k_input_x_raw)
    k_encoder_outputs = k_transformer.encode(k_input_x_raw,
                                             k_attention_bias,
                                             train=False)

    k_output = k_transformer([k_input_x_raw, k_input_y_raw], train=False)

    tf_sess.run(tf.global_variables_initializer())
    tf_sess.run(get_assign_list(k_transformer))

    k_run = K.function([k_input_x_raw, k_input_y_raw], [k_output])
    k_res = k_run([my_input_x_raw, my_input_y_raw])[0]
    print("k output:")