Exemple #1
0
    def calculate_loss(self, outputs):
        """
        calculate loss
        :param outputs: if model is train, outputs is decoder outputs, otherwise outputs is decoder outputs matrix w and
                        add basie
        :return: loss
        """
        with tf.variable_scope('loss'), tf.name_scope('loss'):

            def sampled_loss_func(inputs, labels):
                labels = tf.reshape(labels, [-1, 1])
                return tf.nn.sampled_softmax_loss(
                    weights=self.w_t,
                    biases=self.v,
                    labels=labels,
                    inputs=inputs,
                    num_sampled=self.model_config.num_softmax_samples,
                    num_classes=self.words_dict_len)

            if self.model_config.num_softmax_samples != 0 and self.model_config.model == 'train':
                loss = seq2seq_lib.sampled_sequence_loss(
                    outputs, self.targets, self.loss_weights,
                    sampled_loss_func)
            else:
                loss = tf.contrib.legacy_seq2seq.sequence_loss(
                    outputs, self.targets, self.loss_weights)

        return loss
Exemple #2
0
    def _add_seq2seq_old(self, sess):
        hps = self._hps
        vsize = self._vocab.NumIds()
        with tf.variable_scope('seq2seq'):
            encoder_inputs = tf.unpack(tf.transpose(self._articles))
            decoder_inputs = tf.unpack(tf.transpose(self._abstracts))
            targets = tf.unpack(tf.transpose(self._targets))
            loss_weights = tf.unpack(tf.transpose(self._loss_weights))
            article_lens = self._article_lens
            with tf.variable_scope('Embedding'), tf.device('/gpu:0'):
                #==============================================================================
                # Embedding shared by the input and outputs.
                #embedding = tf.get_variable(
                #      'embedding', [vsize, hps.emb_dim], dtype=tf.float32,
                #   trainable=False,
                #      initializer=tf.truncated_normal_initializer(stddev=1e-4))
                #sess.run(tf.initialize_all_variables())
                #==============================================================================
                vsize = self._vocab.NumIds()
                embedding = tf.get_variable(
                    'embedding', [vsize, hps.emb_dim],
                    dtype=tf.float32,
                    trainable=False,
                    initializer=tf.truncated_normal_initializer(stddev=1e-4))
                sess.run(tf.initialize_all_variables())
                if FLAGS.word2vec:
                    # initial matrix with random uniform
                    initW = np.random.uniform(-0.25, 0.25,
                                              (vsize, hps.emb_dim))
                    # load any vectors from the word2vec
                    print("Load word2vec file {}\n".format(FLAGS.word2vec))
                    with open(FLAGS.word2vec, "rb") as f:
                        header = f.readline()
                        vocab_size, layer1_size = map(int, header.split())
                        binary_len = np.dtype('float32').itemsize * layer1_size
                        for line in xrange(vocab_size):
                            word = []
                            while True:
                                ch = f.read(1)
                                if ch == ' ':
                                    word = ''.join(word)
                                    break
                                if ch != '\n':
                                    word.append(ch)
                            idx = data.GetWordIds(word, self._vocab)
                            if idx != None:
                                initW[idx] = np.fromstring(f.read(binary_len),
                                                           dtype='float32')
                            else:
                                f.read(binary_len)

                print "to test ... .. . . embedding first loaded:"
                print(sess.run(tf.nn.embedding_lookup(embedding, 2)))
                sess.run(embedding.assign(initW))
                print "to test ... .. .. . function loaded:"
                print(sess.run(tf.nn.embedding_lookup(embedding, 2)))
                #===============================================================================

                # Embedding shared by the input and outputs.
                emb_encoder_inputs = [
                    tf.nn.embedding_lookup(embedding, x)
                    for x in encoder_inputs
                ]
                emb_decoder_inputs = [
                    tf.nn.embedding_lookup(embedding, x)
                    for x in decoder_inputs
                ]

            #matrix factorization
        ## s,u,v=tf.svd(emb_encoder_inputs,compute_uv=True)
        ## eigenSum=tf.reduce_sum(s)
        ## eigen=0
        ## threshold=0
        ## i=0;
        ## for i in range(len(s)):
        ##   eigen=s(i)
        ##   if((eigen/eigenSum)>threshold)
        ##     break;
        #rebuild eigenvector with i length
        ## new_eigenMatrix = tf.Variable(tf.zeros([i,i]))
        ## for j in range(i):
        ##   new_eigenMatrix[j,j]=s(j)
        #decrease embedding dim  [vsize,64]
        ##emb_encoder_inputs=tf.batch_matmul(u[,:j],new_eigenMatrix)
        # new_embedding=u*s
        #or decrease word length [N,128]
        # new_embedding=v*s

            for layer_i in xrange(hps.enc_layers):
                with tf.variable_scope('encoder%d' % layer_i), tf.device(
                        self._next_device()):
                    #bidirectional rnn cell
                    cell_fw = tf.nn.rnn_cell.LSTMCell(
                        hps.num_hidden,
                        initializer=tf.random_uniform_initializer(-0.1,
                                                                  0.1,
                                                                  seed=123),
                        state_is_tuple=False)
                    cell_bw = tf.nn.rnn_cell.LSTMCell(
                        hps.num_hidden,
                        initializer=tf.random_uniform_initializer(-0.1,
                                                                  0.1,
                                                                  seed=113),
                        state_is_tuple=False)
                    cell_fw = tf.nn.rnn_cell.DropoutWrapper(
                        cell_fw,
                        input_keep_prob=hps.input_dropout,
                        output_keep_prob=hps.output_dropout)
                    cell_bw = tf.nn.rnn_cell.DropoutWrapper(
                        cell_bw,
                        input_keep_prob=hps.input_dropout,
                        output_keep_prob=hps.output_dropout)
                    (emb_encoder_inputs, fw_state,
                     _) = tf.nn.bidirectional_rnn(cell_fw,
                                                  cell_bw,
                                                  emb_encoder_inputs,
                                                  dtype=tf.float32,
                                                  sequence_length=article_lens)
            encoder_outputs = emb_encoder_inputs
            print "fw_state:", fw_state
            with tf.variable_scope('output_projection'):
                w = tf.get_variable(
                    'w', [hps.num_hidden, vsize],
                    dtype=tf.float32,
                    initializer=tf.truncated_normal_initializer(stddev=1e-4))
                w_t = tf.transpose(w)
                v = tf.get_variable(
                    'v', [vsize],
                    dtype=tf.float32,
                    initializer=tf.truncated_normal_initializer(stddev=1e-4))

            with tf.variable_scope('decoder'), tf.device(self._next_device()):
                # When decoding, use model output from the previous step
                # for the next step.
                loop_function = None
                if hps.mode == 'decode':
                    loop_function = _extract_argmax_and_embed(
                        embedding, (w, v), update_embedding=False)
                cell = tf.nn.rnn_cell.LSTMCell(
                    hps.num_hidden,
                    initializer=tf.random_uniform_initializer(-0.1,
                                                              0.1,
                                                              seed=113),
                    state_is_tuple=False)
                cell = tf.nn.rnn_cell.DropoutWrapper(
                    cell,
                    input_keep_prob=hps.input_dropout,
                    output_keep_prob=hps.output_dropout)
                encoder_outputs = [
                    tf.reshape(x, [hps.batch_size, 1, 2 * hps.num_hidden])
                    for x in encoder_outputs
                ]
                self._enc_top_states = tf.concat(1, encoder_outputs)
                self._dec_in_state = fw_state
                # During decoding, follow up _dec_in_state are fed from beam_search.
                # dec_out_state are stored by beam_search for next step feeding.
                initial_state_attention = (hps.mode == 'decode')
                decoder_outputs, self._dec_out_state = tf.nn.seq2seq.attention_decoder(
                    emb_decoder_inputs,
                    self._dec_in_state,
                    self._enc_top_states,
                    cell,
                    num_heads=1,
                    loop_function=loop_function,
                    initial_state_attention=initial_state_attention)

                print "====emb_decoder_inputs:", emb_decoder_inputs
                print "====self._dec_in_state:", self._dec_in_state
                print "====self._enc_top_states:", self._enc_top_states
                print "====decoder_outputs:", decoder_outputs
                print "====self._dec_out_state:", self._dec_out_state
            with tf.variable_scope('output'), tf.device(self._next_device()):
                model_outputs = []
                for i in xrange(len(decoder_outputs)):
                    if i > 0:
                        tf.get_variable_scope().reuse_variables()
                    model_outputs.append(
                        tf.nn.xw_plus_b(decoder_outputs[i], w, v))

            if hps.mode == 'decode':
                with tf.variable_scope('decode_output'), tf.device('/gpu:0'):
                    best_outputs = [tf.argmax(x, 1) for x in model_outputs]
                    tf.logging.info('best_outputs%s',
                                    best_outputs[0].get_shape())
                    self._outputs = tf.concat(1, [
                        tf.reshape(x, [hps.batch_size, 1])
                        for x in best_outputs
                    ])

                    self._topk_log_probs, self._topk_ids = tf.nn.top_k(
                        tf.log(tf.nn.softmax(model_outputs[-1])),
                        hps.batch_size * 2)

            with tf.variable_scope('loss'), tf.device(self._next_device()):

                def sampled_loss_func(inputs, labels):
                    with tf.device('/gpu:0'):  # Try gpu.
                        labels = tf.reshape(labels, [-1, 1])
                        tf.logging.info('num_sampled%s',
                                        hps.num_softmax_samples)
                        return tf.nn.sampled_softmax_loss(
                            w_t, v, inputs, labels, hps.num_softmax_samples,
                            vsize)

                if hps.num_softmax_samples != 0 and hps.mode == 'train':
                    self._loss = seq2seq_lib.sampled_sequence_loss(
                        decoder_outputs, targets, loss_weights,
                        sampled_loss_func)
                else:
                    self._loss = tf.nn.seq2seq.sequence_loss(
                        model_outputs, targets, loss_weights)
                tf.scalar_summary('loss', tf.minimum(12.0, self._loss))
Exemple #3
0
  def _add_seq2seq(self):
    hps = self._hps
    vsize = self._vocab.NumIds()

    with tf.variable_scope('seq2seq'):
      encoder_inputs = tf.unpack(tf.transpose(self._articles))
      decoder_inputs = tf.unpack(tf.transpose(self._abstracts))
      targets = tf.unpack(tf.transpose(self._targets))
      loss_weights = tf.unpack(tf.transpose(self._loss_weights))
      article_lens = self._article_lens

      # Embedding shared by the input and outputs.
      with tf.variable_scope('embedding'), tf.device('/cpu:0'):
        embedding = tf.get_variable(
            'embedding', [vsize, hps.emb_dim], dtype=tf.float32,
            initializer=tf.truncated_normal_initializer(stddev=1e-4))
        emb_encoder_inputs = [tf.nn.embedding_lookup(embedding, x)
                              for x in encoder_inputs]
        emb_decoder_inputs = [tf.nn.embedding_lookup(embedding, x)
                              for x in decoder_inputs]

      for layer_i in xrange(hps.enc_layers):
        with tf.variable_scope('encoder%d'%layer_i), tf.device(
            self._next_device()):
          cell_fw = tf.nn.rnn_cell.LSTMCell(
              hps.num_hidden,
              initializer=tf.random_uniform_initializer(-0.1, 0.1, seed=123),
              state_is_tuple=False)
          cell_bw = tf.nn.rnn_cell.LSTMCell(
              hps.num_hidden,
              initializer=tf.random_uniform_initializer(-0.1, 0.1, seed=113),
              state_is_tuple=False)
          (emb_encoder_inputs, fw_state, _) = tf.nn.bidirectional_rnn(
              cell_fw, cell_bw, emb_encoder_inputs, dtype=tf.float32,
              sequence_length=article_lens)
      encoder_outputs = emb_encoder_inputs

      with tf.variable_scope('output_projection'):
        w = tf.get_variable(
            'w', [hps.num_hidden, vsize], dtype=tf.float32,
            initializer=tf.truncated_normal_initializer(stddev=1e-4))
        w_t = tf.transpose(w)
        v = tf.get_variable(
            'v', [vsize], dtype=tf.float32,
            initializer=tf.truncated_normal_initializer(stddev=1e-4))

      with tf.variable_scope('decoder'), tf.device(self._next_device()):
        # When decoding, use model output from the previous step
        # for the next step.
        loop_function = None
        if hps.mode == 'decode':
          loop_function = _extract_argmax_and_embed(
              embedding, (w, v), update_embedding=False)

        cell = tf.nn.rnn_cell.LSTMCell(
            hps.num_hidden,
            initializer=tf.random_uniform_initializer(-0.1, 0.1, seed=113),
            state_is_tuple=False)

        encoder_outputs = [tf.reshape(x, [hps.batch_size, 1, 2*hps.num_hidden])
                           for x in encoder_outputs]
        self._enc_top_states = tf.concat(1, encoder_outputs)
        self._dec_in_state = fw_state
        # During decoding, follow up _dec_in_state are fed from beam_search.
        # dec_out_state are stored by beam_search for next step feeding.
        initial_state_attention = (hps.mode == 'decode')
        decoder_outputs, self._dec_out_state = tf.nn.seq2seq.attention_decoder(
            emb_decoder_inputs, self._dec_in_state, self._enc_top_states,
            cell, num_heads=1, loop_function=loop_function,
            initial_state_attention=initial_state_attention)

      with tf.variable_scope('output'), tf.device(self._next_device()):
        model_outputs = []
        for i in xrange(len(decoder_outputs)):
          if i > 0:
            tf.get_variable_scope().reuse_variables()
          model_outputs.append(
              tf.nn.xw_plus_b(decoder_outputs[i], w, v))

      if hps.mode == 'decode':
        with tf.variable_scope('decode_output'), tf.device('/cpu:0'):
          best_outputs = [tf.argmax(x, 1) for x in model_outputs]
          tf.logging.info('best_outputs%s', best_outputs[0].get_shape())
          self._outputs = tf.concat(
              1, [tf.reshape(x, [hps.batch_size, 1]) for x in best_outputs])

          self._topk_log_probs, self._topk_ids = tf.nn.top_k(
              tf.log(tf.nn.softmax(model_outputs[-1])), hps.batch_size*2)

      with tf.variable_scope('loss'), tf.device(self._next_device()):
        def sampled_loss_func(inputs, labels):
          with tf.device('/cpu:0'):  # Try gpu.
            labels = tf.reshape(labels, [-1, 1])
            return tf.nn.sampled_softmax_loss(w_t, v, inputs, labels,
                                              hps.num_softmax_samples, vsize)

        if hps.num_softmax_samples != 0 and hps.mode == 'train':
          self._loss = seq2seq_lib.sampled_sequence_loss(
              decoder_outputs, targets, loss_weights, sampled_loss_func)
        else:
          self._loss = tf.nn.seq2seq.sequence_loss(
              model_outputs, targets, loss_weights)
        tf.scalar_summary('loss', tf.minimum(12.0, self._loss))
    def _add_seq2seq(self):
        vocab_size = self._vocab.num_ids()

        hyper_params = self._hyper_params
        embedding_size = hyper_params.emb_dim
        enc_layers = hyper_params.enc_layers

        with tf.variable_scope('seq2seq'):
            encoder_inputs = tf.unstack(tf.transpose(self._articles))
            decoder_inputs = tf.unstack(tf.transpose(self._abstracts))
            targets = tf.unstack(tf.transpose(self._targets))
            loss_weights = tf.unstack(tf.transpose(self._loss_weights))

            article_lens = self._article_lens

            # TODO: initialize using pre-trained embedding
            with tf.variable_scope('embedding'), tf.device('/cpu:0'):
                embedding = tf.get_variable(
                    'embedding', [vocab_size, embedding_size], dtype=tf.float32,
                    initializer=tf.random_normal_initializer(stddev=1e-4)
                )

                emb_encoder_inputs = [tf.nn.embedding_lookup(embedding, x) for x in encoder_inputs]
                emb_decoder_inputs = [tf.nn.embedding_lookup(embedding, x) for x in decoder_inputs]

            for layer_i in xrange(enc_layers):
                with tf.variable_scope('encoder%d' % layer_i), tf.device(self._next_device()):
                    cell_fw = tf.contrib.rnn.LSTMCell(
                        hyper_params.num_hidden,
                        initializer=tf.contrib.layers.xavier_initializer(),
                        state_is_tuple=False
                    )

                    cell_bw = tf.contrib.rnn.LSTMCell(
                        hyper_params.num_hidden,
                        initializer=tf.contrib.layers.xavier_initializer(),
                        state_is_tuple=False
                    )

                    (emb_encoder_inputs, fw_state, _) = tf.contrib.rnn.static_bidirectional_rnn(
                        cell_fw, cell_bw, emb_encoder_inputs, dtype=tf.float32, sequence_length=article_lens
                    )

            encoder_outputs = emb_encoder_inputs

            with tf.variable_scope('output_projection'):
                w = tf.get_variable(
                    'w', [hyper_params.num_hidden, vocab_size], dtype=tf.float32,
                    initializer=tf.truncated_normal_initializer(stddev=1e-4)
                )

                w_t = tf.transpose(w)
                v = tf.get_variable(
                    'v', [vocab_size], dtype=tf.float32,
                    initializer=tf.truncated_normal_initializer(stddev=1e-4)
                )

            with tf.variable_scope('decoder'), tf.device(self._next_device()):
                # when decoding, use model output from the previous step for the next step
                loop_function = None
                if hyper_params.mode == 'decode':
                    loop_function = self._extract_argmax_and_embed(embedding, (w, v), update_embedding=False)

                cell = tf.contrib.rnn.LSTMCell(
                    hyper_params.num_hidden,
                    initializer=tf.contrib.layers.xavier_initializer(),
                    state_is_tuple=False
                )

                encoder_outputs = [
                    tf.reshape(x, [hyper_params.batch_size, 1, 2 * hyper_params.num_hidden]) for x in encoder_outputs
                ]

                self._enc_top_states = tf.concat(axis=1, values=encoder_outputs)
                self._dec_in_state = fw_state

                # During decoding, follow up _dec_in_state are fed from beam_search, dec_out_state are stored by
                # beam_search for next feeding.
                initial_state_attention = (hyper_params.mode == 'decode')
                decoder_outputs, self._dec_out_state = tf.contrib.legacy_seq2seq.attention_decoder(
                    emb_decoder_inputs, self._dec_in_state, self._enc_top_states,
                    cell, num_heads=1, loop_function=loop_function,
                    initial_state_attention=initial_state_attention
                )

            with tf.variable_scope('output'), tf.device(self._next_device()):
                model_outputs = []
                for i in xrange(len(decoder_outputs)):
                    if i > 0:
                        tf.get_variable_scope().reuse_variables()

                    model_outputs.append(tf.nn.xw_plus_b(decoder_outputs[i], w, v))

            if hyper_params.mode == 'decode':
                with tf.variable_scope('decode_output'), tf.device('/cpu:0'):
                    best_outputs = [tf.arg_max(x, 1) for x in model_outputs]

                    tf.logging.info('best_outputs%s', best_outputs[0].get_shape())

                    self._outputs = tf.concat(
                        axis=1, values=[tf.reshape(x, [hyper_params.batch_size, 1]) for x in best_outputs]
                    )

                    self._topk_log_probs, self._topk_ids = tf.nn.top_k(
                        tf.log(tf.nn.softmax(model_outputs[-1])), hyper_params.batch_size * 2
                    )

            with tf.variable_scope('loss'), tf.device(self._next_device()):
                def sample_loss_func(inputs, labels):
                    with tf.device('/cpu:0'):  # TODO: Try gpu
                        labels = tf.reshape(labels, [-1, 1])
                        return tf.nn.sampled_softmax_loss(
                            weights=w_t, biases=v, labels=labels, inputs=inputs,
                            num_sampled=hyper_params.num_softmax_samples, num_classes=vocab_size
                        )

                if hyper_params.num_softmax_samples != 0 and hyper_params.mode == 'train':
                    self._loss = seq2seq_lib.sampled_sequence_loss(
                        decoder_outputs, targets, loss_weights, sample_loss_func
                    )
                else:
                    self._loss = tf.contrib.legacy_seq2seq.sequence_loss(
                        model_outputs, targets, loss_weights
                    )

                tf.summary.scalar('loss', tf.minimum(12.0, self._loss))
Exemple #5
0
    def _add_seq2seq(self):
        hps = self._hps
        vsize = self._vocab.NumIds()
        with tf.variable_scope('seq2seq'):
            encoder_inputs = tf.unpack(tf.transpose(self._articles))
            decoder_inputs = tf.unpack(tf.transpose(self._abstracts))
            targets = tf.unpack(tf.transpose(self._targets))
            loss_weights = tf.unpack(tf.transpose(self._loss_weights))
            article_lens = self._article_lens

            emb_encoder_inputs = None
            emb_decoder_inputs = None
            #with tf.variable_scope('Embedding'), tf.device('/gpu:0'):
            # Embedding shared by the input and outputs.
            if FLAGS.word2vec == None:
                embedding = tf.get_variable(
                    'embedding', [vsize, hps.emb_dim],
                    dtype=tf.float32,
                    trainable=False,
                    initializer=tf.truncated_normal_initializer(stddev=1e-4))
                emb_encoder_inputs = [
                    tf.nn.embedding_lookup(embedding, x)
                    for x in encoder_inputs
                ]
                emb_decoder_inputs = [
                    tf.nn.embedding_lookup(embedding, x)
                    for x in decoder_inputs
                ]
            else:
                # Embedding shared by the input and outputs.
                emb_encoder_inputs = [
                    tf.nn.embedding_lookup(self._embedding, x)
                    for x in encoder_inputs
                ]
                emb_decoder_inputs = [
                    tf.nn.embedding_lookup(self._embedding, x)
                    for x in decoder_inputs
                ]

            if FLAGS.svd_dim:
                #svd factorization
                svd_dim = FLAGS.svd_dim
                emb_encoder_matrix = tf.pack(
                    [tf.transpose(x) for x in emb_encoder_inputs])
                emb_encoder_matrix = tf.transpose(emb_encoder_matrix)
                s, u, v = tf.svd(emb_encoder_matrix, compute_uv=True)
                b = [tf.gather(x, range(svd_dim)) for x in tf.unpack(s)]
                b = tf.pack(b)
                #100:eigen values  300:embedding
                c = [
                    tf.slice(tf.transpose(x), [0, 0], [svd_dim, hps.emb_dim])
                    for x in tf.unpack(u)
                ]
                c = tf.pack(c)
                d = [tf.diag(x) for x in tf.unpack(b)]
                d = tf.pack(d)
                e = tf.batch_matmul(d, c)
                emb_decoder_inputs = tf.unpack(tf.transpose(e, perm=[1, 0, 2]))

            for layer_i in xrange(hps.enc_layers):
                with tf.variable_scope('encoder%d' % layer_i), tf.device(
                        self._next_device()):
                    #bidirectional rnn cell
                    cell_fw = tf.nn.rnn_cell.LSTMCell(
                        hps.num_hidden,
                        initializer=tf.random_uniform_initializer(-0.1,
                                                                  0.1,
                                                                  seed=123),
                        state_is_tuple=False)
                    cell_bw = tf.nn.rnn_cell.LSTMCell(
                        hps.num_hidden,
                        initializer=tf.random_uniform_initializer(-0.1,
                                                                  0.1,
                                                                  seed=113),
                        state_is_tuple=False)
                    cell_fw = tf.nn.rnn_cell.DropoutWrapper(
                        cell_fw,
                        input_keep_prob=hps.input_dropout,
                        output_keep_prob=hps.output_dropout)
                    cell_bw = tf.nn.rnn_cell.DropoutWrapper(
                        cell_bw,
                        input_keep_prob=hps.input_dropout,
                        output_keep_prob=hps.output_dropout)
                    (emb_encoder_inputs, fw_state,
                     _) = tf.nn.bidirectional_rnn(cell_fw,
                                                  cell_bw,
                                                  emb_encoder_inputs,
                                                  dtype=tf.float32,
                                                  sequence_length=article_lens)
            encoder_outputs = emb_encoder_inputs
            print "fw_state:", fw_state
            with tf.variable_scope('output_projection'):
                w = tf.get_variable(
                    'w', [hps.num_hidden, vsize],
                    dtype=tf.float32,
                    initializer=tf.truncated_normal_initializer(stddev=1e-4))
                w_t = tf.transpose(w)
                v = tf.get_variable(
                    'v', [vsize],
                    dtype=tf.float32,
                    initializer=tf.truncated_normal_initializer(stddev=1e-4))

            with tf.variable_scope('decoder'), tf.device(self._next_device()):
                # When decoding, use model output from the previous step
                # for the next step.
                loop_function = None
                if hps.mode == 'decode':
                    if FLAGS.word2vec == None:
                        loop_function = _extract_argmax_and_embed(
                            embedding, (w, v), update_embedding=False)
                    else:
                        loop_function = _extract_argmax_and_embed(
                            self._embedding, (w, v), update_embedding=False)
                cell = tf.nn.rnn_cell.LSTMCell(
                    hps.num_hidden,
                    initializer=tf.random_uniform_initializer(-0.1,
                                                              0.1,
                                                              seed=113),
                    state_is_tuple=False)
                cell = tf.nn.rnn_cell.DropoutWrapper(
                    cell,
                    input_keep_prob=hps.input_dropout,
                    output_keep_prob=hps.output_dropout)
                encoder_outputs = [
                    tf.reshape(x, [hps.batch_size, 1, 2 * hps.num_hidden])
                    for x in encoder_outputs
                ]
                self._enc_top_states = tf.concat(1, encoder_outputs)
                self._dec_in_state = fw_state
                # During decoding, follow up _dec_in_state are fed from beam_search.
                # dec_out_state are stored by beam_search for next step feeding.
                initial_state_attention = (hps.mode == 'decode')
                decoder_outputs, self._dec_out_state = tf.nn.seq2seq.attention_decoder(
                    emb_decoder_inputs,
                    self._dec_in_state,
                    self._enc_top_states,
                    cell,
                    num_heads=1,
                    loop_function=loop_function,
                    initial_state_attention=initial_state_attention)

                print "====emb_decoder_inputs:", emb_decoder_inputs
                print "====self._dec_in_state:", self._dec_in_state
                print "====self._enc_top_states:", self._enc_top_states
                print "====decoder_outputs:", decoder_outputs
                print "====self._dec_out_state:", self._dec_out_state
            with tf.variable_scope('output'), tf.device(self._next_device()):
                model_outputs = []
                for i in xrange(len(decoder_outputs)):
                    if i > 0:
                        tf.get_variable_scope().reuse_variables()
                    model_outputs.append(
                        tf.nn.xw_plus_b(decoder_outputs[i], w, v))

            if hps.mode == 'decode':
                with tf.variable_scope('decode_output'), tf.device('/gpu:0'):
                    best_outputs = [tf.argmax(x, 1) for x in model_outputs]
                    tf.logging.info('best_outputs%s',
                                    best_outputs[0].get_shape())
                    self._outputs = tf.concat(1, [
                        tf.reshape(x, [hps.batch_size, 1])
                        for x in best_outputs
                    ])

                    self._topk_log_probs, self._topk_ids = tf.nn.top_k(
                        tf.log(tf.nn.softmax(model_outputs[-1])),
                        hps.batch_size * 2)

            with tf.variable_scope('loss'), tf.device(self._next_device()):

                def sampled_loss_func(inputs, labels):
                    with tf.device('/gpu:0'):  # Try gpu.
                        labels = tf.reshape(labels, [-1, 1])
                        tf.logging.info('num_sampled%s',
                                        hps.num_softmax_samples)
                        return tf.nn.sampled_softmax_loss(
                            w_t, v, inputs, labels, hps.num_softmax_samples,
                            vsize)

                if hps.num_softmax_samples != 0 and hps.mode == 'train':
                    self._loss = seq2seq_lib.sampled_sequence_loss(
                        decoder_outputs, targets, loss_weights,
                        sampled_loss_func)
                else:
                    self._loss = tf.nn.seq2seq.sequence_loss(
                        model_outputs, targets, loss_weights)
                tf.scalar_summary('loss', tf.minimum(12.0, self._loss))
    def _add_seq2seq(self):
        hps = self._hps
        vsize = self._vocab.NumIds()

        with tf.variable_scope('seq2seq'):
            #Xinchun: add encoder for stock price sequence
            encoderPrice_inputs = tf.unstack(tf.transpose(self._anomPrices))
            pricelist_lens = self._pricelist_lens

            encoder_inputs = tf.unstack(tf.transpose(self._articles))
            decoder_inputs = tf.unstack(tf.transpose(self._abstracts))
            targets = tf.unstack(tf.transpose(self._targets))
            loss_weights = tf.unstack(tf.transpose(self._loss_weights))
            article_lens = self._article_lens

            # Embedding shared by the input and outputs.
            with tf.variable_scope('embedding'), tf.device('/cpu:0'):
                embedding = tf.get_variable(
                    'embedding', [vsize, hps.emb_dim],
                    dtype=tf.float32,
                    initializer=tf.truncated_normal_initializer(stddev=1e-4))
                emb_encoder_inputs = [
                    tf.nn.embedding_lookup(embedding, x)
                    for x in encoder_inputs
                ]
                emb_decoder_inputs = [
                    tf.nn.embedding_lookup(embedding, x)
                    for x in decoder_inputs
                ]

                #Xinchun: added price encoder embedding
                embeddingPrice = tf.get_variable(
                    'embeddingPrice', [vsize, 0],
                    dtype=tf.float32,
                    initializer=tf.truncated_normal_initializer(stddev=1e-4))
                emb_encoderPrice_inputs = [
                    tf.nn.embedding_lookup(embeddingPrice, x)
                    for x in encoderPrice_inputs
                ]

            for layer_i in xrange(hps.enc_layers):
                with tf.variable_scope('encoder%d' % layer_i), tf.device(
                        self._next_device()):
                    cell_fw = tf.contrib.rnn.LSTMCell(
                        hps.num_hidden,
                        initializer=tf.random_uniform_initializer(-0.1,
                                                                  0.1,
                                                                  seed=123),
                        state_is_tuple=False)
                    cell_bw = tf.contrib.rnn.LSTMCell(
                        hps.num_hidden,
                        initializer=tf.random_uniform_initializer(-0.1,
                                                                  0.1,
                                                                  seed=113),
                        state_is_tuple=False)
                    (emb_encoder_inputs, fw_state,
                     _) = tf.contrib.rnn.static_bidirectional_rnn(
                         cell_fw,
                         cell_bw,
                         emb_encoder_inputs,
                         dtype=tf.float32,
                         sequence_length=article_lens)
                encoder_outputs = emb_encoder_inputs

        #Xinchun: add another encoder for anomPrice

            for layer_i in xrange(hps.enc_layers):
                with tf.variable_scope('encoderPrice%d' % layer_i), tf.device(
                        self._next_device()):
                    cellPrice_fw = tf.contrib.rnn.LSTMCell(
                        hps.num_hidden,
                        initializer=tf.random_uniform_initializer(-0.1,
                                                                  0.1,
                                                                  seed=123),
                        state_is_tuple=False)
                    cellPrice_bw = tf.contrib.rnn.LSTMCell(
                        hps.num_hidden,
                        initializer=tf.random_uniform_initializer(-0.1,
                                                                  0.1,
                                                                  seed=113),
                        state_is_tuple=False)
                    (emb_encoderPrice_inputs, fwPrice_state,
                     _) = tf.contrib.rnn.static_bidirectional_rnn(
                         cellPrice_fw,
                         cellPrice_bw,
                         emb_encoderPrice_inputs,
                         dtype=tf.float32,
                         sequence_length=pricelist_lens)
                    encoderPrice_outputs = emb_encoderPrice_inputs

            with tf.variable_scope('output_projection'):
                w = tf.get_variable(
                    'w', [hps.num_hidden, vsize],
                    dtype=tf.float32,
                    initializer=tf.truncated_normal_initializer(stddev=1e-4))
                w_t = tf.transpose(w)
                v = tf.get_variable(
                    'v', [vsize],
                    dtype=tf.float32,
                    initializer=tf.truncated_normal_initializer(stddev=1e-4))

            with tf.variable_scope('decoder'), tf.device(self._next_device()):
                # When decoding, use model output from the previous step
                # for the next step.
                loop_function = None
                if hps.mode == 'decode':
                    print("calling loop function")
                    print("----------------------------")
                    loop_function = _extract_argmax_and_embed(
                        embedding, (w, v), update_embedding=False)

                cell = tf.contrib.rnn.LSTMCell(
                    hps.num_hidden,
                    initializer=tf.random_uniform_initializer(-0.1,
                                                              0.1,
                                                              seed=113),
                    state_is_tuple=False)

                encoder_outputs = [
                    tf.reshape(x, [hps.batch_size, 1, 2 * hps.num_hidden])
                    for x in encoder_outputs
                ]
                #Xinchun: added encoderPrice_outputs
                encoderPrice_outputs = [
                    tf.reshape(x, [hps.batch_size, 1, 2 * hps.num_hidden])
                    for x in encoderPrice_outputs
                ]
                """
        Jenkai: modified the shape of self._enc_top_state, shape=(4, 240, 512)
        and self._dec_in_state, shape=(4, 1024)
        need to verify it's correctness
        """
                #Xinchun: modified _enc_top_states and _dec_in_state
                print("111111111111111")
                selfConcatEncoder1 = tf.concat((encoder_outputs), 1)
                selfConcatEncoder2 = tf.concat((encoderPrice_outputs), 1)
                self._enc_top_states = tf.concat(
                    [selfConcatEncoder1, selfConcatEncoder2], 1
                )  #tf.concat(axis = 1, values = tf.concat((encoder_outputs, encoderPrice_outputs), 1))
                self._dec_in_state = tf.add(fw_state, fwPrice_state)
                print("222222222222222")
                # self._enc_top_states = tf.concat(axis=1, values=encoder_outputs)
                # self._dec_in_state = fw_state

                # print(emb_decoder_inputs)
                # print(self._dec_in_state)
                # print(self._enc_top_states)

                # During decoding, follow up _dec_in_state are fed from beam_search.
                # dec_out_state are stored by beam_search for next step feeding.
                initial_state_attention = (hps.mode == 'decode')
                decoder_outputs, self._dec_out_state = tf.contrib.legacy_seq2seq.attention_decoder(
                    emb_decoder_inputs,
                    self._dec_in_state,
                    self._enc_top_states,
                    cell,
                    num_heads=1,
                    loop_function=loop_function,
                    initial_state_attention=initial_state_attention)
                print("3333333333333333")
            with tf.variable_scope('output'), tf.device(self._next_device()):
                model_outputs = []
                for i in xrange(len(decoder_outputs)):
                    if i > 0:
                        tf.get_variable_scope().reuse_variables()
                    model_outputs.append(
                        tf.nn.xw_plus_b(decoder_outputs[i], w, v))

            if hps.mode == 'decode':
                with tf.variable_scope('decode_output'), tf.device('/cpu:0'):
                    best_outputs = [tf.argmax(x, 1) for x in model_outputs]
                    tf.logging.info('best_outputs%s',
                                    best_outputs[0].get_shape())
                    self._outputs = tf.concat(axis=1,
                                              values=[
                                                  tf.reshape(
                                                      x, [hps.batch_size, 1])
                                                  for x in best_outputs
                                              ])

                    self._topk_log_probs, self._topk_ids = tf.nn.top_k(
                        tf.log(tf.nn.softmax(model_outputs[-1])),
                        hps.batch_size * 2)

            with tf.variable_scope('loss'), tf.device(self._next_device()):

                def sampled_loss_func(inputs, labels):
                    with tf.device('/cpu:0'):  # Try gpu.
                        labels = tf.reshape(labels, [-1, 1])
                        return tf.nn.sampled_softmax_loss(
                            weights=w_t,
                            biases=v,
                            labels=labels,
                            inputs=inputs,
                            num_sampled=hps.num_softmax_samples,
                            num_classes=vsize)

                if hps.num_softmax_samples != 0 and hps.mode == 'train':
                    self._loss = seq2seq_lib.sampled_sequence_loss(
                        decoder_outputs, targets, loss_weights,
                        sampled_loss_func)
                else:
                    self._loss = tf.contrib.legacy_seq2seq.sequence_loss(
                        model_outputs, targets, loss_weights)
                tf.summary.scalar('loss', tf.minimum(12.0, self._loss))
Exemple #7
0
    def _add_seq2seq(self):
        hps = self._hps
        vsize = self._vocab.NumIds()

        with tf.variable_scope('seq2seq'):
            encoder_inputs = tf.unstack(tf.transpose(self._articles))
            decoder_inputs = tf.unstack(tf.transpose(self._abstracts))
            targets = tf.unstack(tf.transpose(self._targets))
            loss_weights = tf.unstack(tf.transpose(self._loss_weights))
            article_lens = self._article_lens

            # Embedding shared by the input and outputs.
            with tf.variable_scope('embedding'), tf.device('/cpu:0'):
                embedding = tf.get_variable(
                    'embedding', [vsize, hps.emb_dim],
                    dtype=tf.float32,
                    initializer=tf.truncated_normal_initializer(stddev=1e-4))
                # emb_encoder_inputs中的每个元素shape是[batch_size,emb_dim]
                emb_encoder_inputs = [
                    tf.nn.embedding_lookup(embedding, x)
                    for x in encoder_inputs
                ]
                emb_decoder_inputs = [
                    tf.nn.embedding_lookup(embedding, x)
                    for x in decoder_inputs
                ]

            for layer_i in xrange(hps.enc_layers):
                with tf.variable_scope('encoder%d' % layer_i), tf.device(
                        self._next_device()):
                    cell_fw = tf.nn.rnn_cell.LSTMCell(
                        hps.num_hidden,
                        initializer=tf.random_uniform_initializer(-0.1,
                                                                  0.1,
                                                                  seed=123),
                        state_is_tuple=True)

                    cell_bw = tf.nn.rnn_cell.LSTMCell(
                        hps.num_hidden,
                        initializer=tf.random_uniform_initializer(-0.1,
                                                                  0.1,
                                                                  seed=113),
                        state_is_tuple=True)
                    # emb_encoder_inputs的shape是[batch_size,2*num_hidden]
                    (emb_encoder_inputs, fw_state,
                     _) = tf.nn.static_bidirectional_rnn(
                         cell_fw,
                         cell_bw,
                         emb_encoder_inputs,
                         dtype=tf.float32,
                         sequence_length=article_lens)

            encoder_outputs = emb_encoder_inputs

            with tf.variable_scope('output_projection'):
                w = tf.get_variable(
                    'w', [hps.num_hidden, vsize],
                    dtype=tf.float32,
                    initializer=tf.truncated_normal_initializer(stddev=1e-4))
                w_t = tf.transpose(w)
                v = tf.get_variable(
                    'v', [vsize],
                    dtype=tf.float32,
                    initializer=tf.truncated_normal_initializer(stddev=1e-4))

            with tf.variable_scope('decoder'), tf.device(self._next_device()):
                # When decoding, use model output from the previous step
                # for the next step.
                loop_function = None
                if hps.mode == 'decode':
                    loop_function = _extract_argmax_and_embed(
                        embedding, (w, v), update_embedding=False)

                cell = tf.nn.rnn_cell.LSTMCell(
                    hps.num_hidden,
                    initializer=tf.random_uniform_initializer(-0.1,
                                                              0.1,
                                                              seed=113),
                    state_is_tuple=True)
                # 扩展维度,由原来的二维变成三维
                encoder_outputs = [
                    tf.reshape(x, [hps.batch_size, 1, 2 * hps.num_hidden])
                    for x in encoder_outputs
                ]

                # _enc_top_states的shape是[batch_size,len(encoder_outputs),2*hps.num_hidden]
                self._enc_top_states = tf.concat(axis=1,
                                                 values=encoder_outputs)
                self._dec_in_state = fw_state
                # During decoding, follow up _dec_in_state are fed from beam_search.
                # dec_out_state are stored by beam_search for next step feeding.
                initial_state_attention = (hps.mode == 'decode')
                # decoder_outputs 是一个list,其中每个元素的shape为[batch_size x output_size],其中output_size,由于没有指定,所以其值为cell.output_size即hps.num_hidden
                decoder_outputs, self._dec_out_state = tf.contrib.legacy_seq2seq.attention_decoder(
                    emb_decoder_inputs,
                    self._dec_in_state,
                    self._enc_top_states,
                    cell,
                    num_heads=1,
                    loop_function=loop_function,
                    initial_state_attention=initial_state_attention)

            with tf.variable_scope('output'), tf.device(self._next_device()):
                model_outputs = []
                for i in xrange(len(decoder_outputs)):
                    if i > 0:
                        # tf.get_variable_scope() 返回的只是 variable_scope,不管 name_scope. 在使用tf.get_variable_scope().reuse_variables() 时可以无视name_scope
                        tf.get_variable_scope().reuse_variables()
                    model_outputs.append(
                        tf.nn.xw_plus_b(decoder_outputs[i], w, v))

            if hps.mode == 'decode':
                with tf.variable_scope('decode_output'), tf.device('/cpu:0'):
                    best_outputs = [tf.argmax(x, 1) for x in model_outputs]
                    tf.logging.info('best_outputs%s',
                                    best_outputs[0].get_shape())
                    self._outputs = tf.concat(axis=1,
                                              values=[
                                                  tf.reshape(
                                                      x, [hps.batch_size, 1])
                                                  for x in best_outputs
                                              ])
                    # 取model_outputs[-1]最后一个,在测试的时候,beam_search每次获取attention最后一个输出的topk,来进行搜索
                    self._topk_log_probs, self._topk_ids = tf.nn.top_k(
                        tf.log(tf.nn.softmax(model_outputs[-1])),
                        hps.batch_size * 2)

            with tf.variable_scope('loss'), tf.device(self._next_device()):

                def sampled_loss_func(inputs, labels):
                    with tf.device('/cpu:0'):  # Try gpu.
                        labels = tf.reshape(labels, [-1, 1])
                        return tf.nn.sampled_softmax_loss(
                            weights=w_t,
                            biases=v,
                            labels=labels,
                            inputs=inputs,
                            num_sampled=hps.num_softmax_samples,
                            num_classes=vsize)

                if hps.num_softmax_samples != 0 and hps.mode == 'train':
                    self._loss = seq2seq_lib.sampled_sequence_loss(
                        decoder_outputs, targets, loss_weights,
                        sampled_loss_func)
                else:
                    self._loss = tf.contrib.legacy_seq2seq.sequence_loss(
                        model_outputs, targets, loss_weights)
                tf.summary.scalar('loss', tf.minimum(12.0, self._loss))
  def _add_seq2seq(self):
    hps = self._hps
    vsize = self._vocab.NumIds()

    with tf.variable_scope('seq2seq'):
      encoder_inputs = tf.unpack(tf.transpose(self._articles))
      decoder_inputs = tf.unpack(tf.transpose(self._abstracts))
      targets = tf.unpack(tf.transpose(self._targets))
      loss_weights = tf.unpack(tf.transpose(self._loss_weights))
      article_lens = self._article_lens

      # Embedding shared by the input and outputs.
      with tf.variable_scope('embedding'), tf.device('/cpu:0'):
        embedding = tf.get_variable(
            'embedding', [vsize, hps.emb_dim], dtype=tf.float32,
            initializer=tf.truncated_normal_initializer(stddev=1e-4))
        emb_encoder_inputs = [tf.nn.embedding_lookup(embedding, x)
                              for x in encoder_inputs]
        emb_decoder_inputs = [tf.nn.embedding_lookup(embedding, x)
                              for x in decoder_inputs]

      for layer_i in xrange(hps.enc_layers):
        with tf.variable_scope('encoder%d'%layer_i), tf.device(
            self._next_device()):
          cell_fw = tf.nn.rnn_cell.LSTMCell(
              hps.num_hidden,
              initializer=tf.random_uniform_initializer(-0.1, 0.1, seed=123))
          cell_bw = tf.nn.rnn_cell.LSTMCell(
              hps.num_hidden,
              initializer=tf.random_uniform_initializer(-0.1, 0.1, seed=113))
          (emb_encoder_inputs, fw_state, _) = tf.nn.bidirectional_rnn(
              cell_fw, cell_bw, emb_encoder_inputs, dtype=tf.float32,
              sequence_length=article_lens)
      encoder_outputs = emb_encoder_inputs

      with tf.variable_scope('output_projection'):
        w = tf.get_variable(
            'w', [hps.num_hidden, vsize], dtype=tf.float32,
            initializer=tf.truncated_normal_initializer(stddev=1e-4))
        w_t = tf.transpose(w)
        v = tf.get_variable(
            'v', [vsize], dtype=tf.float32,
            initializer=tf.truncated_normal_initializer(stddev=1e-4))

      with tf.variable_scope('decoder'), tf.device(self._next_device()):
        # When decoding, use model output from the previous step
        # for the next step.
        loop_function = None
        if hps.mode == 'decode':
          loop_function = _extract_argmax_and_embed(
              embedding, (w, v), update_embedding=False)

        cell = tf.nn.rnn_cell.LSTMCell(
            hps.num_hidden,
            initializer=tf.random_uniform_initializer(-0.1, 0.1, seed=113))

        encoder_outputs = [tf.reshape(x, [hps.batch_size, 1, 2*hps.num_hidden])
                           for x in encoder_outputs]
        self._enc_top_states = tf.concat(1, encoder_outputs)
        self._dec_in_state = fw_state
        # During decoding, follow up _dec_in_state are fed from beam_search.
        # dec_out_state are stored by beam_search for next step feeding.
        initial_state_attention = (hps.mode == 'decode')
        decoder_outputs, self._dec_out_state = tf.nn.seq2seq.attention_decoder(
            emb_decoder_inputs, self._dec_in_state, self._enc_top_states,
            cell, num_heads=1, loop_function=loop_function,
            initial_state_attention=initial_state_attention)

      with tf.variable_scope('output'), tf.device(self._next_device()):
        model_outputs = []
        for i in xrange(len(decoder_outputs)):
          if i > 0:
            tf.get_variable_scope().reuse_variables()
          model_outputs.append(
              tf.nn.xw_plus_b(decoder_outputs[i], w, v))

      if hps.mode == 'decode':
        with tf.variable_scope('decode_output'), tf.device('/cpu:0'):
          best_outputs = [tf.argmax(x, 1) for x in model_outputs]
          tf.logging.info('best_outputs%s', best_outputs[0].get_shape())
          self._outputs = tf.concat(
              1, [tf.reshape(x, [hps.batch_size, 1]) for x in best_outputs])

          self._topk_log_probs, self._topk_ids = tf.nn.top_k(
              tf.log(tf.nn.softmax(model_outputs[-1])), hps.batch_size*2)

      with tf.variable_scope('loss'), tf.device(self._next_device()):
        def sampled_loss_func(inputs, labels):
          with tf.device('/cpu:0'):  # Try gpu.
            labels = tf.reshape(labels, [-1, 1])
            return tf.nn.sampled_softmax_loss(w_t, v, inputs, labels,
                                              hps.num_softmax_samples, vsize)

        if hps.num_softmax_samples != 0 and hps.mode == 'train':
          self._loss = seq2seq_lib.sampled_sequence_loss(
              decoder_outputs, targets, loss_weights, sampled_loss_func)
        else:
          self._loss = tf.nn.seq2seq.sequence_loss(
              model_outputs, targets, loss_weights)
        tf.scalar_summary('loss', tf.minimum(12.0, self._loss))
    def _add_seq2seq(self):
        hps = self._hps
        vsize = self._vocab.NumIds()

        with tf.variable_scope('seq2seq'):
            '''
      tf.transpose:转置
      tf.stack()这是一个矩阵拼接的函数,tf.unstack()则是一个矩阵分解的函数,默认是按行分解
      self._articles维度是[hps.batch_size, hps.enc_timesteps]
      这样一来,encoder_inputs的每一次输入的都是本批次所有_articles的第一个单词组成一行
      encoder_inputs就变成了由120个[64,1]组成
      '''
            encoder_inputs = tf.unstack(tf.transpose(self._articles))
            decoder_inputs = tf.unstack(tf.transpose(self._abstracts))
            targets = tf.unstack(tf.transpose(self._targets))
            loss_weights = tf.unstack(tf.transpose(self._loss_weights))
            article_lens = self._article_lens

            # Embedding shared by the input and outputs.
            # 扩充每个单词的词嵌入维度
            with tf.variable_scope('embedding'), tf.device('/cpu:0'):
                embedding = tf.get_variable(
                    'embedding', [vsize, hps.emb_dim],
                    dtype=tf.float32,
                    initializer=tf.truncated_normal_initializer(stddev=1e-4))
                #emb_encoder_inputs成为120个[64,1,128]组成
                emb_encoder_inputs = [
                    tf.nn.embedding_lookup(embedding, x)
                    for x in encoder_inputs
                ]
                emb_decoder_inputs = [
                    tf.nn.embedding_lookup(embedding, x)
                    for x in decoder_inputs
                ]
            #深度双向RNN
            for layer_i in xrange(hps.enc_layers):
                with tf.variable_scope('encoder%d' % layer_i), tf.device(
                        self._next_device()):
                    #前向RNN
                    cell_fw = tf.contrib.rnn.LSTMCell(
                        hps.num_hidden,  #256
                        initializer=tf.random_uniform_initializer(-0.1,
                                                                  0.1,
                                                                  seed=123),
                        state_is_tuple=False)
                    #反向RNN
                    cell_bw = tf.contrib.rnn.LSTMCell(
                        hps.num_hidden,
                        initializer=tf.random_uniform_initializer(-0.1,
                                                                  0.1,
                                                                  seed=113),
                        state_is_tuple=False)
                    #outputs是一个tuple(outputs_fw, outputs_bw)
                    #static_bidirectional_rnn:双向递归神经网络,目标是增加RNN可利用的信息,它可以同时使用时序数据中某个输入的历史及未来数据
                    #同一个时刻的正向节点和反向节点是不共用的,作为输出的时候是两个节点输出一个结果
                    #cell_fw代表前向的Cell,cell_bw代表反向Cell
                    #将输入emb_encoder_inputs覆盖,作为下一步的输入,同时记录了前向RNN最后时刻的fw_state
                    #emb_encoder_inputs为120个[64,1,128]组成,static_bidirectional_rnn每次取一个,输出一个结果。
                    #最终新的emb_encoder_inputs中也有120个[64,2,256]
                    #存在4层双向RNN
                    (emb_encoder_inputs, fw_state,
                     _) = tf.contrib.rnn.static_bidirectional_rnn(
                         cell_fw,
                         cell_bw,
                         emb_encoder_inputs,
                         dtype=tf.float32,
                         sequence_length=article_lens)
            encoder_outputs = emb_encoder_inputs

            with tf.variable_scope('output_projection'):
                w = tf.get_variable(  #num_hidden:256,vsize是词汇表大小
                    'w', [hps.num_hidden, vsize],
                    dtype=tf.float32,
                    initializer=tf.truncated_normal_initializer(stddev=1e-4))
                #转置后每行对应的词嵌入维度
                w_t = tf.transpose(w)
                v = tf.get_variable(
                    'v', [vsize],
                    dtype=tf.float32,
                    initializer=tf.truncated_normal_initializer(stddev=1e-4))

            with tf.variable_scope('decoder'), tf.device(self._next_device()):
                # When decoding, use model output from the previous step
                # for the next step.
                loop_function = None
                if hps.mode == 'decode':
                    loop_function = _extract_argmax_and_embed(
                        embedding, (w, v), update_embedding=False)

                cell = tf.contrib.rnn.LSTMCell(
                    hps.num_hidden,  #256
                    initializer=tf.random_uniform_initializer(-0.1,
                                                              0.1,
                                                              seed=113),
                    state_is_tuple=False)
                #encoder_outputs原本是120个[64,2,256]
                #将每一个单词的维度变为:[64,1,512]
                #这样一来encoder_outputs就变成了120个[64,1,512]
                encoder_outputs = [
                    tf.reshape(x, [hps.batch_size, 1, 2 * hps.num_hidden])
                    for x in encoder_outputs
                ]
                #将encoder_outputs拼接成[64,120,512]
                #_enc_top_states是作为decode部分中attention的输入,_dec_in_state是decode部分中LSTM的输入,fw_state是前向RNN的最后的state状态
                self._enc_top_states = tf.concat(axis=1,
                                                 values=encoder_outputs)
                self._dec_in_state = fw_state
                # During decoding, follow up _dec_in_state are fed from beam_search.
                # dec_out_state are stored by beam_search for next step feeding.
                initial_state_attention = (hps.mode == 'decode')
                #emb_decoder_inputs是120个[64,1,128]组成,_dec_in_state是前向RNN的最后的state状态,_enc_top_states是encoder部分的所有输出
                #decoder_outputs的decode阶段的输出,_dec_out_state是decode阶段中最后的lstm状态
                decoder_outputs, self._dec_out_state = tf.contrib.legacy_seq2seq.attention_decoder(
                    emb_decoder_inputs,
                    self._dec_in_state,
                    self._enc_top_states,
                    cell,
                    num_heads=1,
                    loop_function=loop_function,
                    initial_state_attention=initial_state_attention)

            with tf.variable_scope('output'), tf.device(self._next_device()):
                model_outputs = []
                for i in xrange(len(decoder_outputs)):
                    #tf.get_variable()会检测已经存在的变量是否已经共享.如果你想共享他们,你需要像下面使用的一样,通过reuse_variables()这个方法来指定.
                    if i > 0:
                        tf.get_variable_scope().reuse_variables()
                    #w是[num_hidden,vsize],num_hidden:256,vsize是词汇表大小,得到model_outputs的120个输出为:[64,vsize]
                    model_outputs.append(
                        tf.nn.xw_plus_b(decoder_outputs[i], w, v))

            if hps.mode == 'decode':
                with tf.variable_scope('decode_output'), tf.device('/cpu:0'):
                    #从vsize中取出那个字
                    #best_outputs是120个[64,1],model_outputs是120个[64,vsize],因为取的是最大的argmax,因此视为最佳
                    best_outputs = [tf.argmax(x, 1) for x in model_outputs]
                    tf.logging.info('best_outputs%s',
                                    best_outputs[0].get_shape())
                    #self._outputs是[64,120]
                    self._outputs = tf.concat(axis=1,
                                              values=[
                                                  tf.reshape(
                                                      x, [hps.batch_size, 1])
                                                  for x in best_outputs
                                              ])
                    #取出最后的state,[64,vsize]
                    #top_k这个函数的作用是返回 input 中每行最大的 k 个数,并且返回它们所在位置的索引
                    #tf.nn.top_k(input, k, name=None)
                    self._topk_log_probs, self._topk_ids = tf.nn.top_k(
                        tf.log(tf.nn.softmax(model_outputs[-1])),
                        hps.batch_size * 2)

            with tf.variable_scope('loss'), tf.device(self._next_device()):

                def sampled_loss_func(inputs, labels):
                    with tf.device('/cpu:0'):  # Try gpu.
                        labels = tf.reshape(labels, [-1, 1])
                        return tf.nn.sampled_softmax_loss(
                            weights=w_t,
                            biases=v,
                            labels=labels,
                            inputs=inputs,
                            num_sampled=hps.num_softmax_samples,
                            num_classes=vsize)

                #targets是[hps.batch_size, hps.dec_timesteps]
                #loss_weights是[hps.batch_size, hps.dec_timesteps]
                #model_outputs是
                if hps.num_softmax_samples != 0 and hps.mode == 'train':
                    self._loss = seq2seq_lib.sampled_sequence_loss(
                        decoder_outputs, targets, loss_weights,
                        sampled_loss_func)
                else:
                    self._loss = tf.contrib.legacy_seq2seq.sequence_loss(
                        model_outputs, targets, loss_weights)
                tf.summary.scalar('loss', tf.minimum(12.0, self._loss))
    def _add_seq2seq(self):
        hps = self._hps
        vsize = self._vocab.NumIds()

        with tf.variable_scope('seq2seq'):
            # unstacks the articles, abstracts, targets, etc into a list of len=time_steps.
            encoder_inputs = tf.unstack(tf.transpose(self._articles))
            decoder_inputs = tf.unstack(tf.transpose(self._abstracts))
            targets = tf.unstack(tf.transpose(self._targets))
            loss_weights = tf.unstack(tf.transpose(self._loss_weights))
            article_lens = self._article_lens

            # Embedding shared by the input and outputs.
            # embedds words in the encoder and decoder
            with tf.variable_scope('embedding'), tf.device('/cpu:0'):
                embedding = tf.get_variable(
                    'embedding', [vsize, hps.emb_dim],
                    dtype=tf.float32,
                    initializer=tf.truncated_normal_initializer(stddev=1e-4))
                emb_encoder_inputs = [
                    tf.nn.embedding_lookup(embedding, x)
                    for x in encoder_inputs
                ]
                emb_decoder_inputs = [
                    tf.nn.embedding_lookup(embedding, x)
                    for x in decoder_inputs
                ]
            # stack n layers of lstms for encoder
            for layer_i in xrange(hps.enc_layers):
                with tf.variable_scope('encoder%d' % layer_i), tf.device(
                        self._next_device()):
                    cell_fw = tf.contrib.rnn.LSTMCell(
                        hps.num_hidden,
                        initializer=tf.random_uniform_initializer(-0.1,
                                                                  0.1,
                                                                  seed=123),
                        state_is_tuple=False)
                    cell_bw = tf.contrib.rnn.LSTMCell(
                        hps.num_hidden,
                        initializer=tf.random_uniform_initializer(-0.1,
                                                                  0.1,
                                                                  seed=113),
                        state_is_tuple=False)
                    (emb_encoder_inputs, fw_state,
                     _) = tf.contrib.rnn.static_bidirectional_rnn(
                         cell_fw,
                         cell_bw,
                         emb_encoder_inputs,
                         dtype=tf.float32,
                         sequence_length=article_lens)
            encoder_outputs = emb_encoder_inputs

            # define a weight matrix to project output of hidden state to vocabulary (w=[num_hiddn x vocab_size], biases=v)
            with tf.variable_scope('output_projection'):
                w = tf.get_variable(
                    'w', [hps.num_hidden, vsize],
                    dtype=tf.float32,
                    initializer=tf.truncated_normal_initializer(stddev=1e-4))
                w_t = tf.transpose(w)
                v = tf.get_variable(
                    'v', [vsize],
                    dtype=tf.float32,
                    initializer=tf.truncated_normal_initializer(stddev=1e-4))

            with tf.variable_scope('decoder'), tf.device(self._next_device()):
                # When decoding, use model output from the previous step
                # for the next step. In training just use the direct inputs
                loop_function = None
                if hps.mode == 'decode':
                    loop_function = _extract_argmax_and_embed(
                        embedding, (w, v), update_embedding=False)

                cell = tf.contrib.rnn.LSTMCell(
                    hps.num_hidden,
                    initializer=tf.random_uniform_initializer(-0.1,
                                                              0.1,
                                                              seed=113),
                    state_is_tuple=False)

                # reshape encoder_outputs
                # the outputs is a list of shapes [batch_size x 2*num_hidden]
                # 2*num_hidden because we have a bidirectional rnn and it concats the outputs
                # we want convert the list of shapes into a single tensor where the second dimension is time_steps
                # add a new dimension at second dimension : [batch_size, 1, 2*num_hidden]
                encoder_outputs = [
                    tf.reshape(x, [hps.batch_size, 1, 2 * hps.num_hidden])
                    for x in encoder_outputs
                ]
                # then concat all the time_steps along that axis: shape=[batch_size, time_steps, 2*num_hidden]
                self._enc_top_states = tf.concat(axis=1,
                                                 values=encoder_outputs)
                # last step of the fw rnn for decoder input
                self._dec_in_state = fw_state
                # During decoding, follow up _dec_in_state are fed from beam_search.
                # dec_out_state are stored by beam_search for next step feeding.
                initial_state_attention = (hps.mode == 'decode')
                # during decoding, the RNN can look up information in the additional tensor, attention_states
                # Next decode using attention
                # decoder_outputs is a list of tensorf of shape [batch_size x output_size]
                # TODO: check actually how does the `attention_decoder` works
                decoder_outputs, self._dec_out_state = tf.contrib.legacy_seq2seq.attention_decoder(
                    decoder_inputs=
                    emb_decoder_inputs,  # a list of 2D tensorfs [batch_size x embedding_size]
                    initial_state=self.
                    _dec_in_state,  # 2D Tensor [batch_size, cell.state_size]
                    attention_states=self.
                    _enc_top_states,  # 3D Tensor [batch_size, attn_length x attn_size], attn_length here is the time_steps, attn_size is the size of the rnn output (2*num_hidden)
                    cell=cell,
                    num_heads=
                    1,  # number of attention heads that read from attention_states
                    loop_function=
                    loop_function,  # this function will be applied to i-th output in order to generate i+1 th input and decoder_inputs will be ignored, except for the first element (GO symbol). This can be also used in training to emulate,
                    initial_state_attention=initial_state_attention)

            # get the output of the decoder and project it into the vocabulary matrix (output = w*output
            with tf.variable_scope('output'), tf.device(self._next_device()):
                model_outputs = []
                for i in xrange(len(decoder_outputs)):
                    if i > 0:
                        tf.get_variable_scope().reuse_variables()
                    model_outputs.append(
                        tf.nn.xw_plus_b(decoder_outputs[i], w, v))

            if hps.mode == 'decode':
                with tf.variable_scope('decode_output'), tf.device('/cpu:0'):
                    # get the most probable word along the vocabulary.
                    best_outputs = [tf.argmax(x, 1) for x in model_outputs]
                    tf.logging.info('best_outputs%s',
                                    best_outputs[0].get_shape())
                    self._outputs = tf.concat(axis=1,
                                              values=[
                                                  tf.reshape(
                                                      x, [hps.batch_size, 1])
                                                  for x in best_outputs
                                              ])

                    self._topk_log_probs, self._topk_ids = tf.nn.top_k(
                        tf.log(tf.nn.softmax(model_outputs[-1])),
                        hps.batch_size * 2)

            # define loss, using sampled loss instead of full softmax
            with tf.variable_scope('loss'), tf.device(self._next_device()):

                def sampled_loss_func(inputs, labels):
                    with tf.device('/cpu:0'):  # Try gpu.
                        labels = tf.reshape(labels, [-1, 1])
                        return tf.nn.sampled_softmax_loss(
                            weights=w_t,
                            biases=v,
                            labels=labels,
                            inputs=inputs,
                            num_sampled=hps.num_softmax_samples,
                            num_classes=vsize)

                if hps.num_softmax_samples != 0 and hps.mode == 'train':
                    self._loss = seq2seq_lib.sampled_sequence_loss(
                        decoder_outputs, targets, loss_weights,
                        sampled_loss_func)
                else:
                    self._loss = tf.contrib.legacy_seq2seq.sequence_loss(
                        model_outputs, targets, loss_weights)
                tf.summary.scalar('loss', tf.minimum(12.0, self._loss))
    def _add_seq2seq(self):
        hps = self._hps
        # vocab size
        vsize = self._vocab.NumIds()

        with tf.variable_scope('seq2seq'):
            # max_steps个array,每个里面的元素为长度为batch的输入
            encoder_inputs = tf.unstack(tf.transpose(self._articles))
            decoder_inputs = tf.unstack(tf.transpose(self._abstracts))
            targets = tf.unstack(tf.transpose(self._targets))
            loss_weights = tf.unstack(tf.transpose(self._loss_weights))
            article_lens = self._article_lens

            # Embedding shared by the input and outputs.
            # embedding(学习出来的)
            with tf.variable_scope('embedding'), tf.device('/cpu:0'):
                # embedding的shape为vsize * embedding dim
                embedding = tf.get_variable(
                    'embedding', [vsize, hps.emb_dim],
                    dtype=tf.float32,
                    initializer=tf.truncated_normal_initializer(stddev=1e-4))
                # 得到embedded的inputs
                emb_encoder_inputs = [
                    tf.nn.embedding_lookup(embedding, x)
                    for x in encoder_inputs
                ]
                emb_decoder_inputs = [
                    tf.nn.embedding_lookup(embedding, x)
                    for x in decoder_inputs
                ]
            # enc_layers的encoder
            for layer_i in xrange(hps.enc_layers):
                with tf.variable_scope('encoder%d' % layer_i), tf.device(
                        self._next_device()):
                    # 前向和后向的cell
                    cell_fw = tf.contrib.rnn.LSTMCell(
                        hps.num_hidden,
                        initializer=tf.random_uniform_initializer(-0.1,
                                                                  0.1,
                                                                  seed=123),
                        state_is_tuple=False)
                    cell_bw = tf.contrib.rnn.LSTMCell(
                        hps.num_hidden,
                        initializer=tf.random_uniform_initializer(-0.1,
                                                                  0.1,
                                                                  seed=113),
                        state_is_tuple=False)
                    # 实际上多层是将encoder的输出接到下一层
                    (emb_encoder_inputs, fw_state,
                     _) = tf.contrib.rnn.static_bidirectional_rnn(
                         cell_fw,
                         cell_bw,
                         emb_encoder_inputs,
                         dtype=tf.float32,
                         sequence_length=article_lens)
            encoder_outputs = emb_encoder_inputs

            with tf.variable_scope('output_projection'):
                # 加一层output_projection
                w = tf.get_variable(
                    'w', [hps.num_hidden, vsize],
                    dtype=tf.float32,
                    initializer=tf.truncated_normal_initializer(stddev=1e-4))
                w_t = tf.transpose(w)
                v = tf.get_variable(
                    'v', [vsize],
                    dtype=tf.float32,
                    initializer=tf.truncated_normal_initializer(stddev=1e-4))

            with tf.variable_scope('decoder'), tf.device(self._next_device()):
                # When decoding, use model output from the previous step for the next step.
                loop_function = None
                if hps.mode == 'decode':
                    # decode的时候用
                    loop_function = _extract_argmax_and_embed(
                        embedding, (w, v), update_embedding=False)
                # decoder的cell
                cell = tf.contrib.rnn.LSTMCell(
                    hps.num_hidden,
                    initializer=tf.random_uniform_initializer(-0.1,
                                                              0.1,
                                                              seed=113),
                    state_is_tuple=False)
                # 进行reshape
                encoder_outputs = [
                    tf.reshape(x, [hps.batch_size, 1, 2 * hps.num_hidden])
                    for x in encoder_outputs
                ]
                # 得到enc的输出和dec(只要fw state)
                self._enc_top_states = tf.concat(axis=1,
                                                 values=encoder_outputs)
                self._dec_in_state = fw_state
                # During decoding, follow up _dec_in_state are fed from beam_search.
                # dec_out_state are stored by beam_search for next step feeding.
                initial_state_attention = (hps.mode == 'decode')
                # 直接调用attention_decoder
                decoder_outputs, self._dec_out_state = tf.contrib.legacy_seq2seq.attention_decoder(
                    emb_decoder_inputs,
                    self._dec_in_state,
                    self._enc_top_states,
                    cell,
                    num_heads=1,
                    loop_function=loop_function,
                    initial_state_attention=initial_state_attention)
            # 输出层
            with tf.variable_scope('output'), tf.device(self._next_device()):
                model_outputs = []
                for i in xrange(len(decoder_outputs)):
                    if i > 0:
                        tf.get_variable_scope().reuse_variables()
                    model_outputs.append(
                        tf.nn.xw_plus_b(decoder_outputs[i], w, v))
            # 对于decode模式,直接找topk,用于beam search
            if hps.mode == 'decode':
                with tf.variable_scope('decode_output'), tf.device('/cpu:0'):
                    best_outputs = [tf.argmax(x, 1) for x in model_outputs]
                    tf.logging.info('best_outputs%s',
                                    best_outputs[0].get_shape())
                    self._outputs = tf.concat(axis=1,
                                              values=[
                                                  tf.reshape(
                                                      x, [hps.batch_size, 1])
                                                  for x in best_outputs
                                              ])

                    self._topk_log_probs, self._topk_ids = tf.nn.top_k(
                        tf.log(tf.nn.softmax(model_outputs[-1])),
                        hps.batch_size * 2)
            # loss
            with tf.variable_scope('loss'), tf.device(self._next_device()):

                def sampled_loss_func(inputs, labels):
                    with tf.device('/cpu:0'):  # Try gpu.
                        labels = tf.reshape(labels, [-1, 1])
                        return tf.nn.sampled_softmax_loss(
                            weights=w_t,
                            biases=v,
                            labels=labels,
                            inputs=inputs,
                            num_sampled=hps.num_softmax_samples,
                            num_classes=vsize)

                # train的时候用sampled loss
                if hps.num_softmax_samples != 0 and hps.mode == 'train':
                    self._loss = seq2seq_lib.sampled_sequence_loss(
                        decoder_outputs, targets, loss_weights,
                        sampled_loss_func)
                else:
                    self._loss = tf.contrib.legacy_seq2seq.sequence_loss(
                        model_outputs, targets, loss_weights)
                tf.summary.scalar('loss', tf.minimum(12.0, self._loss))
    def _add_seq2seq(self):
        hps = self._hps
        vsize = self._vocab.NumIds()

        with tf.variable_scope('seq2seq'):
            encoder_inputs = tf.unstack(tf.transpose(self._articles))
            decoder_inputs = tf.unstack(tf.transpose(self._abstracts))
            targets = tf.unstack(tf.transpose(self._targets))
            loss_weights = tf.unstack(tf.transpose(self._loss_weights))
            article_lens = self._article_lens

            # Embedding shared by the input and outputs.
            with tf.variable_scope('embedding'), tf.device('/cpu:0'):
                embedding = tf.get_variable(
                    'embedding', [vsize, hps.emb_dim],
                    dtype=tf.float32,
                    initializer=tf.truncated_normal_initializer(stddev=1e-4))
                emb_encoder_inputs_1 = [
                    tf.nn.embedding_lookup(embedding, x)
                    for x in encoder_inputs
                ]
                #-----------------------------------------------------------------------
                #!!! ADD
                # emb_encoder_inputs_1: [enc_timesteps, batch_size, word_emb]
                # enc_timesteps = num_words in article, and we should convert it to
                # num_sentences in article
                # So emb_encoder_inputs_2 should be [num_sentences, batch_size, sentence_emb]
                num_sentences = 300  #max
                num_word_in_sent = np.load(
                    "/home/dell-u/Spyder/textsum/num_sent_num_words_matrix.npy"
                )
                sentence_emb = 300
                gru_size = 200

                emb_encoder_inputs_2 = tf.zeros(
                    (num_sentences, batch_size, sentence_emb))
                gru = tf.contrib.rnn.GRUCell(gru_size)
                #len_sentences is a list includes the index_of_interval of each sentence

                batch_size = emb_encoder_inputs_1.shape[1]

                for ib in range(batch_size):
                    #!!! TODO
                    len_sentences = num_word_in_sent[0]
                    # for each sentence in sample
                    for lo, hi in zip(np.append([0], len_sentences[:-1]),
                                      len_sentences):
                        sent_embs = []
                        # Initial state of the LSTM memory.
                        state = tf.zeros([1, gru.state_size])
                        for j in range(lo, hi):
                            output, state = gru(emb_encoder_inputs_1[j, ib, :],
                                                state)
                        final_state = state  # final_state is a vector
                        sent_embs.append(final_state)

                    emb_encoder_inputs_2[:, ib, ] = sent_embs

                #-----------------------------------------------------------------------
                emb_decoder_inputs = [
                    tf.nn.embedding_lookup(embedding, x)
                    for x in decoder_inputs
                ]

            for layer_i in xrange(hps.enc_layers):
                with tf.variable_scope('encoder%d' % layer_i), tf.device(
                        self._next_device()):
                    cell_fw = tf.contrib.rnn.LSTMCell(
                        hps.num_hidden,
                        initializer=tf.random_uniform_initializer(-0.1,
                                                                  0.1,
                                                                  seed=123),
                        state_is_tuple=False)
                    cell_bw = tf.contrib.rnn.LSTMCell(
                        hps.num_hidden,
                        initializer=tf.random_uniform_initializer(-0.1,
                                                                  0.1,
                                                                  seed=113),
                        state_is_tuple=False)
                    (emb_encoder_inputs, fw_state,
                     _) = tf.contrib.rnn.static_bidirectional_rnn(
                         cell_fw,
                         cell_bw,
                         emb_encoder_inputs,
                         dtype=tf.float32,
                         sequence_length=article_lens)
            encoder_outputs = emb_encoder_inputs

            with tf.variable_scope('output_projection'):
                w = tf.get_variable(
                    'w', [hps.num_hidden, vsize],
                    dtype=tf.float32,
                    initializer=tf.truncated_normal_initializer(stddev=1e-4))
                w_t = tf.transpose(w)
                v = tf.get_variable(
                    'v', [vsize],
                    dtype=tf.float32,
                    initializer=tf.truncated_normal_initializer(stddev=1e-4))

            with tf.variable_scope('decoder'), tf.device(self._next_device()):
                # When decoding, use model output from the previous step
                # for the next step.
                loop_function = None
                if hps.mode == 'decode':
                    loop_function = _extract_argmax_and_embed(
                        embedding, (w, v), update_embedding=False)

                cell = tf.contrib.rnn.LSTMCell(
                    hps.num_hidden,
                    initializer=tf.random_uniform_initializer(-0.1,
                                                              0.1,
                                                              seed=113),
                    state_is_tuple=False)

                encoder_outputs = [
                    tf.reshape(x, [hps.batch_size, 1, 2 * hps.num_hidden])
                    for x in encoder_outputs
                ]
                self._enc_top_states = tf.concat(axis=1,
                                                 values=encoder_outputs)
                self._dec_in_state = fw_state
                # During decoding, follow up _dec_in_state are fed from beam_search.
                # dec_out_state are stored by beam_search for next step feeding.
                initial_state_attention = (hps.mode == 'decode')
                decoder_outputs, self._dec_out_state = tf.contrib.legacy_seq2seq.attention_decoder(
                    emb_decoder_inputs,
                    self._dec_in_state,
                    self._enc_top_states,
                    cell,
                    num_heads=1,
                    loop_function=loop_function,
                    initial_state_attention=initial_state_attention)

            with tf.variable_scope('output'), tf.device(self._next_device()):
                model_outputs = []
                for i in xrange(len(decoder_outputs)):
                    if i > 0:
                        tf.get_variable_scope().reuse_variables()
                    model_outputs.append(
                        tf.nn.xw_plus_b(decoder_outputs[i], w, v))

            if hps.mode == 'decode':
                with tf.variable_scope('decode_output'), tf.device('/cpu:0'):
                    best_outputs = [tf.argmax(x, 1) for x in model_outputs]
                    tf.logging.info('best_outputs%s',
                                    best_outputs[0].get_shape())
                    self._outputs = tf.concat(axis=1,
                                              values=[
                                                  tf.reshape(
                                                      x, [hps.batch_size, 1])
                                                  for x in best_outputs
                                              ])

                    self._topk_log_probs, self._topk_ids = tf.nn.top_k(
                        tf.log(tf.nn.softmax(model_outputs[-1])),
                        hps.batch_size * 2)

            with tf.variable_scope('loss'), tf.device(self._next_device()):

                def sampled_loss_func(inputs, labels):
                    with tf.device('/cpu:0'):  # Try gpu.
                        labels = tf.reshape(labels, [-1, 1])
                        return tf.nn.sampled_softmax_loss(
                            weights=w_t,
                            biases=v,
                            labels=labels,
                            inputs=inputs,
                            num_sampled=hps.num_softmax_samples,
                            num_classes=vsize)

                if hps.num_softmax_samples != 0 and hps.mode == 'train':
                    self._loss = seq2seq_lib.sampled_sequence_loss(
                        decoder_outputs, targets, loss_weights,
                        sampled_loss_func)
                else:
                    self._loss = tf.contrib.legacy_seq2seq.sequence_loss(
                        model_outputs, targets, loss_weights)
                tf.summary.scalar('loss', tf.minimum(12.0, self._loss))
Exemple #13
0
    def build_model(self):
        image = tf.placeholder(tf.float32, [self.batch_size, self.dim_image])
        image_emb = tf.matmul(image, self.encode_img_W) + \
            self.encode_img_b
        captions = tf.placeholder(tf.int32,
                                  [self.batch_size, self.n_lstm_steps],
                                  name='captions')
        articles = tf.placeholder(tf.int32, [self.batch_size, None],
                                  name='articles')  # self.enc_timesteps])
        news_len = tf.placeholder(tf.int32, [self.batch_size], name='news_len')

        mask = tf.placeholder(tf.float32, [self.batch_size, self.n_lstm_steps])

        state = self.lstm.zero_state(self.batch_size, tf.float32)

        loss = 0.0
        with tf.variable_scope("encoder"):
            # Dealing with news text
            current_emb = tf.nn.embedding_lookup(self.Wemb,
                                                 articles) + self.bemb
            current_emb = tf.concat(  #for image
                1, [tf.expand_dims(image_emb, 1), current_emb])
            encoder_outputs, state = tf.nn.bidirectional_dynamic_rnn(
                self.lstm,
                self.back_lstm,
                current_emb,
                news_len,
                dtype=tf.float32)
            state = state[0]
            encoder_outputs = tf.concat(1, encoder_outputs)

        with tf.variable_scope("decoder"):

            current_emb = tf.nn.embedding_lookup(self.Wemb,
                                                 captions) + self.bemb
            current_emb = unpack_sequence(current_emb)
            cell = tf.nn.rnn_cell.LSTMCell(
                FLAGS.dim_hidden,
                state_is_tuple=True,
                initializer=tf.random_uniform_initializer(-0.1, 0.1,
                                                          seed=113))  #,
            cell = rnn_cell.DropoutWrapper(self.lstm,
                                           output_keep_prob=FLAGS.dropout)
            decoder_outputs, dec_out_state = tf.nn.seq2seq.attention_decoder(
                decoder_inputs=current_emb,
                initial_state=state,
                attention_states=encoder_outputs,
                cell=cell,
                output_size=None,
                num_heads=1,
                dtype=None,
                scope=None,
                initial_state_attention=False)

        with tf.variable_scope('loss'):

            def sampled_loss_func(inputs, labels):
                with tf.device('/cpu:0'):  # Try gpu.
                    labels = tf.reshape(labels, [-1, 1])
                    return tf.nn.sampled_softmax_loss(
                        tf.transpose(self.embed_word_W), self.embed_word_b,
                        inputs, labels, 4096, self.n_words)  #4096

            decoder_outputs = decoder_outputs[:-1]
            sentence_modif = tf.slice(captions, [0, 1], [-1, -1])
            mask_modif = tf.slice(mask, [0, 0], [-1, self.n_lstm_steps - 1],
                                  name='mask')
            loss = seq2seq_lib.sampled_sequence_loss(
                decoder_outputs, unpack_sequence(sentence_modif),
                unpack_sequence(mask_modif), sampled_loss_func)
            variable_summaries("loss", loss)

        with tf.variable_scope('output'):
            model_outputs = []
            for i in range(len(decoder_outputs)):
                model_outputs.append(
                    tf.nn.xw_plus_b(decoder_outputs[i], self.embed_word_W,
                                    self.embed_word_b))

        with tf.variable_scope('decode_output'), tf.device('/cpu:0'):
            best_outputs = [tf.argmax(x, 1) for x in model_outputs]
            best_outputs = tf.transpose(best_outputs)

        return loss, image, captions, mask, articles, news_len
Exemple #14
0
    def _add_seq2seq(self):
        hps = self._hps
        vsize = hps.vocab_size

        with tf.variable_scope('seq2seq'):
            encoder_inputs = tf.unstack(tf.transpose(self._articles))
            decoder_inputs = tf.unstack(tf.transpose(self._abstracts))
            targets = tf.unstack(tf.transpose(self._targets))
            loss_weights = tf.unstack(tf.transpose(self._loss_weights))
            article_lens = self._article_lens
        # Embedding shared by the input and outputs.
        with tf.variable_scope('embedding'), tf.device('/cpu:0'):
            W = tf.Variable(tf.constant(0.0,
                                        shape=[hps.vocab_size, hps.emb_dim]),
                            trainable=True,
                            name="W")
            embedding = W.assign(self._embedding_placeholder)
            emb_encoder_inputs = [
                tf.nn.embedding_lookup(embedding, x) for x in encoder_inputs
            ]
            emb_decoder_inputs = [
                tf.nn.embedding_lookup(embedding, x) for x in decoder_inputs
            ]
            #embedding = tf.get_variable('embedding', [vsize, hps.emb_dim], dtype=tf.float32, initializer=tf.truncated_normal_initializer(stddev=1e-4))
            #emb_encoder_inputs = [tf.nn.embedding_lookup(embedding, x) for x in encoder_inputs]
            #emb_decoder_inputs = [tf.nn.embedding_lookup(embedding, x) for x in decoder_inputs]

        for layer_i in xrange(hps.enc_layers):
            with tf.variable_scope('encoder%d' % layer_i), tf.device(
                    self._next_device()):
                cell_fw = tf.contrib.rnn.LSTMCell(
                    hps.num_hidden,
                    initializer=tf.random_uniform_initializer(-0.1,
                                                              0.1,
                                                              seed=123),
                    state_is_tuple=True)
                cell_bw = tf.contrib.rnn.LSTMCell(
                    hps.num_hidden,
                    initializer=tf.random_uniform_initializer(-0.1,
                                                              0.1,
                                                              seed=113),
                    state_is_tuple=True)
                (emb_encoder_inputs, fw_state,
                 _) = tf.contrib.rnn.static_bidirectional_rnn(
                     cell_fw,
                     cell_bw,
                     emb_encoder_inputs,
                     dtype=tf.float32,
                     sequence_length=article_lens)
        encoder_outputs = emb_encoder_inputs

        with tf.variable_scope('output_projection'):
            # TODO: change the output vocabulary to use only the word set coming from this batch
            #       rather than the whole dictionary
            # REFERTO: (Abstractive Text Summarization using Sequence-to-sequence RNNS and Beyond)
            w = tf.get_variable(
                'w', [hps.num_hidden, vsize],
                dtype=tf.float32,
                initializer=tf.truncated_normal_initializer(stddev=1e-4))
            w_t = tf.transpose(w)
            v = tf.get_variable(
                'v', [vsize],
                dtype=tf.float32,
                initializer=tf.truncated_normal_initializer(stddev=1e-4))

        with tf.variable_scope('decoder'), tf.device(self._next_device()):
            # When decoding, use model output from the previous step
            # for the next step.
            loop_function = None
            if hps.mode == 'decode':
                loop_function = _extract_argmax_and_embed(
                    embedding, (w, v), update_embedding=False)
            cell = tf.contrib.rnn.LSTMCell(
                hps.num_hidden,
                initializer=tf.random_uniform_initializer(-0.1, 0.1, seed=113),
                state_is_tuple=True)

            encoder_outputs = [
                tf.reshape(x, [hps.batch_size, 1, 2 * hps.num_hidden])
                for x in encoder_outputs
            ]
            self._enc_top_states = tf.concat(axis=1, values=encoder_outputs)
            self._dec_in_state = fw_state
            # During decoding, follow up _dec_in_state are fed from beam_search.
            # dec_out_state are stored by beam_search for next step feeding.
            initial_state_attention = (hps.mode == 'decode')
            decoder_outputs, self._dec_out_state = tf.contrib.legacy_seq2seq.attention_decoder(
                emb_decoder_inputs,
                self._dec_in_state,
                self._enc_top_states,
                cell,
                num_heads=1,
                loop_function=loop_function,
                initial_state_attention=initial_state_attention)

        with tf.variable_scope('output'), tf.device(self._next_device()):
            model_outputs = []
            for i in xrange(len(decoder_outputs)):
                if i > 0: tf.get_variable_scope().reuse_variables()
                model_outputs.append(tf.nn.xw_plus_b(decoder_outputs[i], w, v))

        if hps.mode == 'decode':
            with tf.variable_scope('decode_output'), tf.device('/cpu:0'):
                best_outputs = [tf.argmax(x, 1) for x in model_outputs]
                tf.logging.info('best_outputs%s', best_outputs[0].get_shape())
                self._outputs = tf.concat(axis=1,
                                          values=[
                                              tf.reshape(
                                                  x, [hps.batch_size, 1])
                                              for x in best_outputs
                                          ])
                self._topk_log_probs, self._topk_ids = tf.nn.top_k(
                    tf.log(tf.nn.softmax(model_outputs[-1])),
                    hps.batch_size * 2)

        with tf.variable_scope('loss'), tf.device(self._next_device()):

            def sampled_loss_func(inputs, labels):
                with tf.device('/cpu:0'):  # Try gpu.
                    labels = tf.reshape(labels, [-1, 1])
                    return tf.nn.sampled_softmax_loss(
                        weights=w_t,
                        biases=v,
                        labels=labels,
                        inputs=inputs,
                        num_sampled=hps.num_softmax_samples,
                        num_classes=vsize)

            if hps.num_softmax_samples != 0 and hps.mode == 'train':
                self._loss = seq2seq_lib.sampled_sequence_loss(
                    decoder_outputs, targets, loss_weights, sampled_loss_func)
            else:
                self._loss = tf.contrib.legacy_seq2seq.sequence_loss(
                    model_outputs, targets, loss_weights)
            tf.summary.scalar('loss', tf.minimum(12.0, self._loss))