コード例 #1
0
    def encode_v2(self, question_embeddings, document_embeddings,
                  question_mask, context_mask, encoderb_state_input,
                  dropout_keep_prob):
        """ encode_v2() 
		"""
        with vs.variable_scope("encoder"):
            # Question -> LSTM -> Q
            lstm_cell = tf.nn.rnn_cell.LSTMCell(self.embedding_size)
            question_length = tf.reduce_sum(tf.cast(question_mask, tf.int32),
                                            reduction_indices=1)
            print("Question length: ", question_length)
            Q_prime, _ = dynamic_rnn(lstm_cell,
                                     tf.transpose(question_embeddings,
                                                  [0, 2, 1]),
                                     sequence_length=question_length,
                                     time_major=False,
                                     dtype=tf.float32)
            Q_prime = tf.transpose(Q_prime, [0, 2, 1])
            print("Q_prime: ", Q_prime)

            # Non-linear projection layer on top of the question encoding
            W_Q = tf.get_variable("W_Q",
                                  (self.embedding_size, self.embedding_size))
            b_Q = tf.get_variable("b_Q", (self.embedding_size, 1))
            Q = tf.tanh(
                matrix_multiply_with_batch(matrix=W_Q,
                                           batch=question_embeddings,
                                           matrixByBatch=True) + b_Q)
            print("Q: ", Q)

            # Paragraph -> LSTM -> D
            tf.get_variable_scope().reuse_variables()
            print("Context mask: ", context_mask)
            context_length = tf.reduce_sum(tf.cast(context_mask, tf.int32),
                                           reduction_indices=1)
            D, _ = dynamic_rnn(lstm_cell,
                               tf.transpose(document_embeddings, [0, 2, 1]),
                               sequence_length=context_length,
                               time_major=False,
                               dtype=tf.float32)
            D = tf.transpose(D, [0, 2, 1])
            print("D: ", D)

            L = tf.matmul(tf.transpose(D, [0, 2, 1]), Q)
            A_Q = tf.nn.softmax(L)
            A_D = tf.nn.softmax(tf.transpose(L, [0, 2, 1]))
            print("A_Q: ", A_Q)
            print("A_D: ", A_D)

            C_Q = batch_matmul(D, A_Q)
            print("C_Q: ", C_Q)
            concat = tf.concat(1, [Q, C_Q])
            print("concat: ", concat)
            C_D = batch_matmul(tf.concat(1, [Q, C_Q]), A_D)
            print("C_D: ", C_D)

            final_D = tf.concat(1, [D, C_D])
            print("final D: ", final_D)
            return final_D
コード例 #2
0
    def decode_v2(self, final_D, W, W_prime, context_mask):
        with vs.variable_scope("answer_start"):
            a_s = tf.squeeze(
                matrix_multiply_with_batch(
                    matrix=W,
                    batch=tf.transpose(final_D, [0, 2, 1]),
                    matrixByBatch=False))  # a_s = final_D * W
            print("a_s: ", a_s)

        with vs.variable_scope("answer_end"):
            lstm_cell = tf.nn.rnn_cell.LSTMCell(self.output_size)
            context_length = tf.reduce_sum(tf.cast(context_mask, tf.int32),
                                           reduction_indices=1)
            print("Context length: ", context_length)
            final_D_prime, _ = dynamic_rnn(lstm_cell,
                                           final_D,
                                           sequence_length=context_length,
                                           time_major=False,
                                           dtype=tf.float32)
            print("final D prime: ", final_D_prime)
            a_e = matrix_multiply_with_batch(matrix=W_prime,
                                             batch=tf.transpose(
                                                 final_D_prime, [0, 2, 1]),
                                             matrixByBatch=False)
            print("a_e: ", a_e)
            a_e = tf.squeeze(
                matrix_multiply_with_batch(matrix=W_prime,
                                           batch=tf.transpose(
                                               final_D_prime, [0, 2, 1]),
                                           matrixByBatch=False))
            print("a_e: ", a_e)

        return (a_s, a_e)
コード例 #3
0
ファイル: ctrl_vae_helper.py プロジェクト: ricoshin/TextVAE
    def encoder(self, x_enc_onehot, len_enc, reuse=False):
        with tf.variable_scope("encoder", reuse=reuse):

            in_enc = self._soft_embedding_lookup(self.embed, x_enc_onehot)
            initial_state = self.cell().zero_state(self.config.batch_size,
                                                   tf.float32)

            out_tuple = dynamic_rnn(cell=self.cell(reuse),
                                    inputs=in_enc,
                                    sequence_length=len_enc,
                                    initial_state=initial_state)
            (_, encoder_hidden) = out_tuple

            # linear layers for mu and log(var)
            latent_dim = hidden_size = self.config.hidden_size
            W_mu = tf.get_variable("W_mu", [hidden_size, latent_dim])
            b_mu = tf.get_variable("b_mu", [latent_dim])
            W_logvar = tf.get_variable("W_logvar", [hidden_size, latent_dim])
            b_logvar = tf.get_variable("b_logvar", [latent_dim])
            #l2_loss = tf.nn.l2_loss(W_mu) + tf.nn.l2_loss(W_logvar)

            mu = tf.matmul(encoder_hidden, W_mu) + b_mu
            logvar = tf.matmul(encoder_hidden, W_logvar) + b_logvar

            # sample epsilon
            epsilon = tf.random_normal(tf.shape(logvar), name='epsilon')

            # sample latent variable
            stddev = tf.exp(0.5 * logvar)  # standard
            z = mu + tf.multiply(stddev, epsilon)
            return z, mu, logvar
コード例 #4
0
ファイル: ctrl_vae_helper.py プロジェクト: ricoshin/TextVAE
 def discriminator(self, inputs, inputs_length, reuse=False):
     with tf.variable_scope('discriminator', reuse=reuse):
         inputs = self._soft_embedding_lookup(self.embed, inputs)
         _, state = dynamic_rnn(cell=self.cell(reuse),
                                inputs=inputs,
                                sequence_length=inputs_length,
                                dtype=tf.float32)
         output_layer = Dense(self.config.vocab_num)
         outputs = output_layer(state)
         predicted = tf.argmax(outputs, 1)
         return outputs, predicted
コード例 #5
0
    def add_prediction_op(self):
        """Adds the core transformation for this model which transforms a batch of input
				data into a batch of predictions. In this case, the transformation is a linear layer plus a
				softmax transformation:

				y = softmax(Wx + b)

				Hint: Make sure to create tf.Variables as needed.
				Hint: For this simple use-case, it's sufficient to initialize both weights W
										and biases b with zeros.

				Returns:
						pred: A tensor of shape (batch_size, n_classes)
				"""

        lstm_cell = tf.contrib.rnn.LSTMCell(self.config.state_size)
        lstm_cell = tf.contrib.rnn.DropoutWrapper(
            lstm_cell, input_keep_prob=self.config.dropout_keep_prob)

        #print "inputs: ", self.input_placeholder

        # Masks are shape (?, 582, 13), but the last dimension is redundant, so we get rid of it when calculating
        # the sequence length for the LSTM
        source_num_frames = tf.reduce_sum(tf.cast(
            self.input_masks_placeholder[:, :, 0], tf.int32),
                                          reduction_indices=1)
        outputs, final_state = dynamic_rnn(lstm_cell,
                                           self.input_placeholder,
                                           sequence_length=source_num_frames,
                                           dtype=tf.float32)

        #print "LSTM outputs: ", outputs
        #print "final state: ", final_state

        xavier = tf.contrib.layers.xavier_initializer()
        W = tf.get_variable("W",
                            shape=(self.config.state_size,
                                   self.config.n_mfcc_features),
                            initializer=xavier)
        b = tf.get_variable("b", shape=(1, self.config.n_mfcc_features))

        print tf.shape(outputs)
        outputs = tf.reshape(outputs, [-1, self.config.state_size])
        print tf.shape(outputs)
        mfcc_preds = tf.matmul(outputs, W)
        mfcc_preds = tf.reshape(
            mfcc_preds,
            [-1, self.config.max_num_frames, self.config.n_mfcc_features])
        mfcc_preds += b
        #print "mfcc_preds: ", mfcc_preds

        self.mfcc = mfcc_preds
        return mfcc_preds
コード例 #6
0
    def build_graph(self):
        self.input_query = tf.placeholder(tf.int32, shape = [None,50],name = "index_query")
        self.query_length =tf.placeholder(dtype=tf.int32, shape=[None],name = "query_length")
        self.label = tf.placeholder(dtype = tf.int32,shape = [None,4],name = "label")
        self.dropout_pl = tf.placeholder(dtype=tf.float32, shape=[], name="dropout")
        self.lr_pl = tf.placeholder(dtype=tf.float32, shape=[], name="lr")

        _word_embeddings = tf.Variable(self.embeddings,dtype=tf.float32,trainable=self.update_embedding,name="word_embeddings")
        self.embed_query = tf.nn.dropout(tf.nn.embedding_lookup(params=_word_embeddings,ids=self.input_query,name="query_embeddings"),self.dropout)
        
        with tf.variable_scope('lstm'):
            self.cell_q = SimpleLSTMCell(self.hidden_dim)
            self.cell_q = tf.nn.rnn_cell.DropoutWrapper(self.cell_q, output_keep_prob=self.dropout)
            output,_ = dynamic_rnn(self.cell_q,self.embed_query,self,query_length,dtype=tf.float32)
        
        with tf.variabel_scope('cnn'):
            outputs = tf.expand_dims(output,-1)
            pooled_outputs = []
            for i,filter_size in enumerate(self.filters_size):
                filter_shape = [filter_size,self.hidden_dim,self.num_filters]
                w = tf.Variable(tf.truncated_normal(filter_shape,stddev=0.1),name='w')
                b = tf.Variable(tf.constant(0.1,shape=[self.num_filters]),name='b')
                conv = tf.nn.conv2d(outputs,w,strides=[1,1,1,1],padding='VALID',name='conv')
                h = tf.nn.relu(tf.nn.bias_add(conv,b),name='relu')
                pooled = tf.nn.max_pool(h,ksize=[1,50-filter_size+1,1,1],
                        strides=[1,1,1,1],padding='VALID',name='pool')
                pooled_outputs.append(pooled)
            outputs_ = tf.concat(pooled_outputs,3)
            self.output = tf.reshape(outputs_,shape=[-1,3*self.num_filters])
        
        with tf.variabel_scope('output'):
            out_final = tf.nn.dropout(self.out,keep_prob=self.dropout)
            o_w = tf.Variable(tf.truncated_normal([3*self.num_filters,4],stddev=0.1),name='o_w')
            o_b = tf.Variable(tf.constant(0.1,shape=[4]),name='o_b')
            self.logits = tf.matmal(out_final,o_w) + o_b
            self.pred_y = tf.argmax(tf.nn.softmax(self.logits),1)
            self.label_y = tf.argmax(self.pred_y,1,name="pred")
            self.pred = tf.equal(self.pred_y,self.label_y)
            self.accuray = tf.reduce_mean(tf.cast(self.pred,tf.float32),name = "accuracy")
        
        self.loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=self.logits, labels=self.label),name = "loss")
        
        self.global_step = tf.Variable(0, trainable=False)
        opt = tf.train.AdamOptimizer(learning_rate = self.lr_pl)
        grads,variables = zip(opt.compute_gradients(self.loss))
        gradients,_ = tf.clip_by_global_norm(gradients,self.clip_grad)
        self.train_op = opt.apply_gradients(zip(gradients,variables),global_step=self.global_step)

        tf.summary.scalar("loss",self.loss)
コード例 #7
0
    def _build_encoder(self, graph_embed_input):
        post_word_input = tf.nn.embedding_lookup(
            self.word_embed, self.posts_word_id)  # batch*len*unit
        encoder_cell = MultiRNNCell(
            [GRUCell(self.num_units) for _ in range(self.num_layers)])

        # encoder input: e(x_t) = [w(x_t); g_i]
        encoder_input = tf.concat([post_word_input, graph_embed_input], axis=2)
        encoder_output, encoder_state = dynamic_rnn(encoder_cell,
                                                    encoder_input,
                                                    self.posts_length,
                                                    dtype=tf.float32,
                                                    scope="encoder")
        # shape:[batch_size, max_time, cell.output_size]
        return encoder_output, encoder_state
コード例 #8
0
    def __init__(self,
                 cfg,
                 word_embd,
                 max_ques_len,
                 input_producer,
                 generated=None):
        batch_size = cfg.batch_size
        vocab_size = len(word_embd)
        with tf.variable_scope('disc'):
            word_embd = tf.get_variable(
                'word_embd',
                shape=word_embd.shape,
                initializer=tf.constant_initializer(word_embd))
            if generated:
                self.ques = generated['ques']
                self.ques_len = generated['ques_len']

                # soft embedding_lookup
                ques = tf.reshape(self.ques, [-1, vocab_size])
                ques = tf.matmul(ques, word_embd)
                ques = tf.reshape(ques, [batch_size, -1, cfg.embed_dim])
            else:
                self.ques = tf.placeholder(tf.int32,
                                           shape=[None, max_ques_len],
                                           name='question')
                self.ques_len = tf.placeholder(tf.int32,
                                               shape=[None],
                                               name='question_length')
                ques = embedding_lookup(word_embd, self.ques)
            self.answ = input_producer.answ_disc
            cell = GRUCell(cfg.hidden_size)
            _, state = dynamic_rnn(cell,
                                   ques,
                                   sequence_length=self.ques_len,
                                   dtype=tf.float32)
            output_layer = Dense(vocab_size)
            logits = output_layer(state)
            labels = tf.one_hot(self.answ, vocab_size)
            self.pred = tf.argmax(logits, 1)
            loss = softmax_cross_entropy_with_logits(labels=labels,
                                                     logits=logits)
            self.loss = tf.reduce_mean(loss)
コード例 #9
0
ファイル: model.py プロジェクト: ccmeffeng/Enzymatic_reaction
    def __init__(self,
            num_symbols,        # 18430, vocabulary size.
            num_embed_units,    # 300, Size of word embedding.
            num_units,          # 512, Size of each model layer.
            num_layers,         # 1, Number of layers in the model.
            num_labels,         # 5, Number of labels.
            embed,              # (18430, 300), word2vector list.
            learning_rate=0.5,
            max_gradient_norm=5.0):
        # todo: implement placeholders
        self.texts = tf.placeholder(dtype=tf.string, shape=[None, None], name='texts')  # shape: batch*len
        self.texts_length = tf.placeholder(dtype=tf.int64, shape=[None], name='texts_length')  # shape: batch
        self.labels = tf.placeholder(dtype=tf.int64, shape=[None], name='labels')  # shape: batch
        
        self.symbol2index = MutableHashTable(
                key_dtype=tf.string,
                value_dtype=tf.int64,
                default_value=UNK_ID,
                shared_name="in_table",
                name="in_table",
                checkpoint=True)
        # build the vocab table (string to index)
        # initialize the training process
        self.learning_rate = tf.Variable(float(learning_rate), trainable=False, dtype=tf.float32)
        self.global_step = tf.Variable(0, trainable=False)
        self.epoch = tf.Variable(0, trainable=False)
        self.epoch_add_op = self.epoch.assign(self.epoch + 1)

        self.index_input = self.symbol2index.lookup(self.texts)   # batch*len
        
        # build the embedding table (index to vector)
        if embed is None:
            # initialize the embedding randomly
            self.embed = tf.get_variable('embed', [num_symbols, num_embed_units], tf.float32)
        else:
            # initialize the embedding by pre-trained word vectors
            self.embed = tf.get_variable('embed', dtype=tf.float32, initializer=embed)

        self.embed_input = tf.nn.embedding_lookup(self.embed, self.index_input) #batch*len*embed_unit

        if num_layers == 1:
            # cell = BasicLSTMCell(num_units)
            cell = GRUCell(num_units)
            # cell = BasicRNNCell(num_units)

        keep_prob = 0.95
        dropped_input = tf.nn.dropout(self.embed_input, keep_prob=keep_prob)
        outputs, states = dynamic_rnn(cell, dropped_input, self.texts_length, dtype=tf.float32, scope="rnn")

        # todo: implement unfinished networks
        # logits = tf.layers.dense(inputs=states, units=num_labels)
        l1 = tf.nn.dropout(states, keep_prob=keep_prob)
        inner_layer = tf.layers.dense(inputs=l1, units=256, activation=tf.nn.relu)
        l2 = tf.nn.dropout(inner_layer, keep_prob=keep_prob)
        logits = tf.layers.dense(inputs=l2, units=num_labels)

        self.loss = tf.reduce_sum(tf.nn.sparse_softmax_cross_entropy_with_logits(labels=self.labels, logits=logits), name='loss')
        mean_loss = self.loss / tf.cast(tf.shape(self.labels)[0], dtype=tf.float32)
        predict_labels = tf.argmax(logits, 1, 'predict_labels')
        self.accuracy = tf.reduce_sum(tf.cast(tf.equal(self.labels, predict_labels), tf.int32), name='accuracy')

        self.params = tf.trainable_variables()
            
        # calculate the gradient of parameters
        opt = tf.train.GradientDescentOptimizer(self.learning_rate)
        gradients = tf.gradients(mean_loss, self.params)
        clipped_gradients, self.gradient_norm = tf.clip_by_global_norm(gradients, max_gradient_norm)
        self.update = opt.apply_gradients(zip(clipped_gradients, self.params), global_step=self.global_step)

        tf.summary.scalar('loss/step', self.loss)
        for each in tf.trainable_variables():
            tf.summary.histogram(each.name, each)

        self.merged_summary_op = tf.summary.merge_all()
        
        self.saver = tf.train.Saver(write_version=tf.train.SaverDef.V2, 
                max_to_keep=3, pad_step_number=True, keep_checkpoint_every_n_hours=1.0)
コード例 #10
0
ファイル: environment.py プロジェクト: Gauraviiitian/IRecGAN
    def __init__(self,
                 num_items,
                 num_embed_units,
                 num_units,
                 num_layers,
                 vocab=None,
                 embed=None,
                 learning_rate=5e-4,
                 learning_rate_decay_factor=0.95,
                 max_gradient_norm=5.0,
                 use_lstm=True):

        self.epoch = tf.Variable(0, trainable=False, name='env/epoch')
        self.epoch_add_op = self.epoch.assign(self.epoch + 1)

        self.sessions_input = tf.placeholder(tf.int32, shape=(None, None))
        self.rec_lists = tf.placeholder(tf.int32, shape=(None, None, None))
        self.rec_mask = tf.placeholder(tf.float32, shape=(None, None, None))
        self.aims_idx = tf.placeholder(tf.int32, shape=(None, None))
        self.sessions_length = tf.placeholder(tf.int32, shape=(None))
        self.purchase = tf.placeholder(tf.int32, shape=(None, None))

        if embed is None:
            self.embed = tf.get_variable(
                'env/embed', [num_items, num_embed_units],
                tf.float32,
                initializer=tf.truncated_normal_initializer(0, 1))
        else:
            self.embed = tf.get_variable('env/embed',
                                         dtype=tf.float32,
                                         initializer=embed)

        batch_size, encoder_length, rec_length = tf.shape(
            self.sessions_input)[0], tf.shape(
                self.sessions_input)[1], tf.shape(self.rec_lists)[2]

        encoder_mask = tf.reshape(
            tf.cumsum(tf.one_hot(self.sessions_length - 2, encoder_length),
                      reverse=True,
                      axis=1), [-1, encoder_length])

        self.encoder_input = tf.nn.embedding_lookup(
            self.embed, self.sessions_input)  #batch*len*unit
        self.aims = tf.one_hot(self.aims_idx, rec_length)
        if use_lstm:
            cell = MultiRNNCell(
                [LSTMCell(num_units) for _ in range(num_layers)])
        else:
            cell = MultiRNNCell(
                [GRUCell(num_units) for _ in range(num_layers)])

        # Training
        with tf.variable_scope("env"):
            # [batch_size, length, num_units]
            encoder_output, _ = dynamic_rnn(cell,
                                            self.encoder_input,
                                            self.sessions_length,
                                            dtype=tf.float32,
                                            scope="encoder")

            # [batch_size, length, embed_units]
            preference = tf.layers.dense(encoder_output,
                                         num_embed_units,
                                         name="pref_output")
            # [batch_size, length, rec_length, embed_units]
            self.candidate = tf.reshape(
                tf.gather_nd(self.embed, tf.expand_dims(self.rec_lists, 3)),
                [batch_size, encoder_length, rec_length, num_embed_units])

            # [batch_size, length, rec_length]
            logits = tf.reduce_mean(
                tf.multiply(tf.expand_dims(preference, 2), self.candidate), 3)
            mul_prob = tf.nn.softmax(logits) * self.rec_mask

            # [batch_size, length, rec_length]
            self.norm_prob = mul_prob / (
                tf.expand_dims(tf.reduce_sum(mul_prob, 2), 2) + 1e-20)
            # [batch_size, length, metric_num]
            _, self.argmax_index = tf.nn.top_k(self.norm_prob,
                                               k=FLAGS['metric'].value + 1)
            local_predict_loss = tf.reduce_sum(
                -self.aims * tf.log(self.norm_prob + 1e-20), 2) * encoder_mask
            self.predict_loss = tf.reduce_sum(
                local_predict_loss) / tf.reduce_sum(encoder_mask)

            # [batch_size, length, embed_units]
            aim_embed = tf.reduce_sum(
                tf.expand_dims(self.aims, 3) * self.candidate, 2)
            if FLAGS['use_simulated_data'].value:
                self.purchase_prob, local_purchase_loss, self.purchase_loss = tf.zeros(
                    [batch_size, encoder_length, 2],
                    dtype=tf.float32), tf.zeros([batch_size, encoder_length],
                                                dtype=tf.float32), tf.constant(
                                                    0., dtype=tf.float32)
            else:
                # [batch_size, length, 2]
                self.purchase_prob = tf.nn.softmax(
                    tf.layers.dense(tf.multiply(
                        tf.layers.dense(tf.stop_gradient(encoder_output),
                                        num_units,
                                        name="purchase_layer"),
                        tf.layers.dense(tf.stop_gradient(aim_embed),
                                        num_units,
                                        name="purchase_aim")),
                                    2,
                                    name="purchase_projection"))
                local_purchase_loss = tf.reduce_sum(
                    -tf.one_hot(self.purchase, 2) *
                    tf.log(self.purchase_prob + 1e-20),
                    2) * encoder_mask * tf.pow(
                        tf.cast(self.purchase, tf.float32) + 1, 5.3)
                self.purchase_loss = tf.reduce_sum(
                    local_purchase_loss) / tf.reduce_sum(encoder_mask)
            self.decoder_loss = self.predict_loss + self.purchase_loss

            self.score = tf.placeholder(tf.float32, (None, None))
            self.score_loss = tf.reduce_sum(
                self.score *
                (local_predict_loss +
                 local_purchase_loss)) / tf.reduce_sum(encoder_mask)

        # Inference
        with tf.variable_scope("env", reuse=True):
            # tf.get_variable_scope().reuse_variables()
            # [batch_size, 1, embed_units]
            inf_preference = tf.expand_dims(
                tf.layers.dense(encoder_output[:, -1, :],
                                num_embed_units,
                                name="pref_output"), 1)
            # [batch_size, 1, rec_length, embed_units]
            self.inf_candidate = tf.reshape(
                tf.gather_nd(self.embed, tf.expand_dims(self.rec_lists, 3)),
                [batch_size, 1, rec_length, num_embed_units])

            # [batch_size, 1, rec_length]
            inf_logits = tf.reduce_mean(
                tf.multiply(tf.expand_dims(inf_preference, 2),
                            self.inf_candidate), 3)
            inf_mul_prob = tf.nn.softmax(inf_logits) * self.rec_mask

            self.inf_norm_prob = inf_mul_prob / (
                tf.expand_dims(tf.reduce_sum(inf_mul_prob, 2), 2) + 1e-20)
            # [batch_size, 1, metric_num]
            _, self.inf_argmax_index = tf.nn.top_k(self.inf_norm_prob,
                                                   k=FLAGS['metric'].value)
            _, self.inf_all_argmax_index = tf.nn.top_k(
                self.inf_norm_prob, k=tf.shape(self.inf_norm_prob)[-1])

            def gumbel_max(inp, alpha, beta):
                # assert len(tf.shape(inp)) == 2
                g = tf.random_uniform(tf.shape(inp), 0.0001, 0.9999)
                g = -tf.log(-tf.log(g))
                inp_g = tf.nn.softmax(
                    (tf.nn.log_softmax(inp / 1.0) + g * alpha) * beta)
                return inp_g

            # [batch_size, action_num]
            _, self.inf_random_index = tf.nn.top_k(gumbel_max(
                tf.log(self.inf_norm_prob + 1e-20), 1, 1),
                                                   k=FLAGS['metric'].value)
            _, self.inf_all_random_index = tf.nn.top_k(
                gumbel_max(tf.log(self.inf_norm_prob + 1e-20), 1, 1),
                k=tf.shape(self.inf_norm_prob)[-1])

            inf_aim_embed = tf.reduce_sum(
                tf.cast(
                    tf.reshape(
                        tf.one_hot(self.inf_argmax_index[:, :, 0], rec_length),
                        [batch_size, 1, rec_length, 1]), tf.float32) *
                self.inf_candidate, 2)

            if FLAGS['use_simulated_data'].value:
                self.inf_purchase_prob = tf.zeros([batch_size, 1, 2],
                                                  dtype=tf.float32)
            else:
                # [batch_size, 1, 2]
                self.inf_purchase_prob = tf.nn.softmax(
                    tf.layers.dense(tf.multiply(
                        tf.layers.dense(tf.stop_gradient(encoder_output),
                                        num_units,
                                        name="purchase_layer"),
                        tf.layers.dense(tf.stop_gradient(inf_aim_embed),
                                        num_units,
                                        name="purchase_aim")),
                                    2,
                                    name="purchase_projection"))

        self.learning_rate = tf.Variable(float(learning_rate),
                                         trainable=False,
                                         dtype=tf.float32)
        self.learning_rate_decay_op = self.learning_rate.assign(
            self.learning_rate * learning_rate_decay_factor)

        self.global_step = tf.Variable(0, trainable=False)
        opt = tf.train.AdamOptimizer(self.learning_rate)
        self.params = tf.trainable_variables()

        # For pretraining
        gradients = tf.gradients(self.decoder_loss, self.params)
        clipped_gradients, self.gradient_norm = tf.clip_by_global_norm(
            gradients, max_gradient_norm)
        self.update = opt.apply_gradients(zip(clipped_gradients, self.params),
                                          global_step=self.global_step)

        # For adversarial training
        score_gradients = tf.gradients(self.score_loss, self.params)
        score_clipped_gradients, self.score_gradient_norm = tf.clip_by_global_norm(
            score_gradients, max_gradient_norm)
        self.score_update = opt.apply_gradients(zip(score_clipped_gradients,
                                                    self.params),
                                                global_step=self.global_step)

        self.saver = tf.train.Saver(tf.global_variables(),
                                    write_version=tf.train.SaverDef.V2,
                                    max_to_keep=100,
                                    pad_step_number=True,
                                    keep_checkpoint_every_n_hours=1.0)
コード例 #11
0
    def __init__(self,
                 num_symbols,
                 num_embed_units,
                 num_units,
                 num_labels,
                 batch_size,
                 embed,
                 learning_rate=0.001,
                 max_gradient_norm=5.0,
                 learning_rate_decay_factor=0.9):
        # todo: implement placeholders
        self.texts1 = tf.placeholder(tf.string, [batch_size, None],
                                     name='texts1')
        self.texts2 = tf.placeholder(tf.string, [batch_size, None],
                                     name='texts2')  # shape: batch*len
        self.texts_length1 = tf.placeholder(
            tf.int32, [batch_size], name='texts_length1')  # shape: batch
        self.texts_length2 = tf.placeholder(tf.int32, [batch_size],
                                            name='texts_length2')
        self.max_length = tf.placeholder(tf.int32, name='max_length')
        self.labels = tf.placeholder(tf.int64, [batch_size],
                                     name='labels')  # shape: batch
        self.keep_prob = tf.placeholder(tf.float32, name='keep_prob')
        self.embed_units = num_embed_units
        self.num_units = num_units
        self.batch_size = batch_size
        self._initializer = tf.truncated_normal_initializer(stddev=0.1)
        self.symbol2index = MutableHashTable(key_dtype=tf.string,
                                             value_dtype=tf.int64,
                                             default_value=UNK_ID,
                                             shared_name="in_table",
                                             name="in_table",
                                             checkpoint=True)
        # build the vocab table (string to index)
        # initialize the training process
        self.learning_rate = tf.Variable(float(learning_rate),
                                         trainable=False,
                                         dtype=tf.float32)
        self.global_step = tf.Variable(0, trainable=False)
        self.epoch = tf.Variable(0, trainable=False)
        self.epoch_add_op = self.epoch.assign(self.epoch + 1)
        self.learning_rate_decay_op = self.learning_rate.assign(
            self.learning_rate * learning_rate_decay_factor)
        self.index_input1 = self.symbol2index.lookup(self.texts1)  # batch*len
        self.index_input2 = self.symbol2index.lookup(self.texts2)
        self.long_length = tf.maximum(self.texts_length1, self.texts_length2)
        print self.long_length.get_shape()
        self.mask_table = tf.sequence_mask(self.long_length, dtype=tf.float32)
        # build the embedding table (index to vector)
        if embed is None:
            # initialize the embedding randomly
            self.embed = tf.get_variable('embed',
                                         [num_symbols, num_embed_units],
                                         tf.float32)
        else:
            # initialize the embedding by pre-trained word vectors
            self.embed = tf.get_variable('embed',
                                         dtype=tf.float32,
                                         initializer=embed)

        self.embed_input1 = tf.nn.embedding_lookup(
            self.embed, self.index_input1)  # batch*len*embed_unit
        self.embed_input2 = tf.nn.embedding_lookup(self.embed,
                                                   self.index_input2)

        with tf.variable_scope('lstm_s'):
            self.lstm_s = tf.contrib.rnn.LSTMCell(
                num_units=num_units,
                initializer=tf.orthogonal_initializer,
                forget_bias=0)

        with tf.variable_scope('lstm_r'):
            self.lstm_r = tf.contrib.rnn.LSTMCell(
                num_units=num_units,
                initializer=tf.orthogonal_initializer,
                forget_bias=0)

        out_s1, state_s1 = dynamic_rnn(self.lstm_s,
                                       self.embed_input1,
                                       self.texts_length1,
                                       dtype=tf.float32,
                                       scope='rnn')
        out_s2, state_s2 = dynamic_rnn(self.lstm_s,
                                       self.embed_input2,
                                       self.texts_length2,
                                       dtype=tf.float32,
                                       scope='rnn')

        self.h_s1 = out_s1
        self.h_s2 = out_s2

        reshaped_s1 = tf.reshape(self.h_s1, [-1, self.num_units])
        reshaped_s2 = tf.reshape(self.h_s2, [-1, self.num_units])
        with tf.variable_scope('Attn_'):
            W_s = tf.get_variable(shape=[self.num_units, self.num_units],
                                  initializer=self._initializer,
                                  name='W_s')
        self.s_1 = tf.matmul(reshaped_s1, W_s)
        self.s_2 = tf.matmul(reshaped_s2, W_s)
        self.s_1 = tf.transpose(
            tf.reshape(self.s_1, [self.batch_size, -1, self.num_units]),
            [1, 2, 0])
        self.s_2 = tf.transpose(
            tf.reshape(self.s_2, [self.batch_size, -1, self.num_units]),
            [1, 2, 0])
        i = tf.constant(0)

        state_r = self.lstm_r.zero_state(batch_size=batch_size,
                                         dtype=tf.float32)

        def c(t, sr):
            return tf.less(t, self.max_length)

        def b(t, sr):
            return self.attention(t, sr)

        i, state_r = tf.while_loop(cond=c, body=b, loop_vars=(i, state_r))

        with tf.variable_scope('fully_connect'):
            w_fc = tf.get_variable(shape=[self.num_units, num_labels],
                                   initializer=self._initializer,
                                   name='w_fc')
            b_fc = tf.get_variable(shape=[num_labels],
                                   initializer=self._initializer,
                                   name='b_fc')
        logits = tf.matmul(state_r.h, w_fc) + b_fc

        #logits = tf.layers.dense(outputs, num_labels)

        # todo: implement unfinished networks

        self.loss = tf.reduce_sum(
            tf.nn.sparse_softmax_cross_entropy_with_logits(labels=self.labels,
                                                           logits=logits),
            name='loss')
        mean_loss = self.loss / \
            tf.cast(tf.shape(self.labels)[0], dtype=tf.float32)
        predict_labels = tf.argmax(logits, 1, 'predict_labels')
        self.accuracy = tf.reduce_sum(tf.cast(
            tf.equal(self.labels, predict_labels), tf.int64),
                                      name='accuracy')

        self.params = tf.trainable_variables()
        # calculate the gradient of parameters
        for item in tf.global_variables():
            print item
        opt = tf.train.GradientDescentOptimizer(self.learning_rate)
        gradients = tf.gradients(mean_loss, self.params)
        clipped_gradients, self.gradient_norm = tf.clip_by_global_norm(
            gradients, max_gradient_norm)
        self.update = opt.apply_gradients(zip(clipped_gradients, self.params),
                                          global_step=self.global_step)

        #self.train_op = tf.train.AdamOptimizer(self.learning_rate).minimize(mean_loss, global_step=self.global_step,
        #var_list=self.params)
        tf.summary.scalar('loss/step', self.loss)
        for each in tf.trainable_variables():
            tf.summary.histogram(each.name, each)

        self.merged_summary_op = tf.summary.merge_all()

        self.saver = tf.train.Saver(write_version=tf.train.SaverDef.V2,
                                    max_to_keep=3,
                                    pad_step_number=True,
                                    keep_checkpoint_every_n_hours=1.0)
コード例 #12
0
    def __init__(self, data, args, embed):
        self.init_states = tf.placeholder(tf.float32, (None, args.ch_size),
                                          'ctx_inps')  # batch*ch_size
        self.posts = tf.placeholder(tf.int32, (None, None),
                                    'enc_inps')  # batch*len
        self.posts_length = tf.placeholder(tf.int32, (None, ),
                                           'enc_lens')  # batch
        self.prev_posts = tf.placeholder(tf.int32, (None, None),
                                         'enc_prev_inps')
        self.prev_posts_length = tf.placeholder(tf.int32, (None, ),
                                                'enc_prev_lens')

        self.kgs = tf.placeholder(tf.int32, (None, None, None),
                                  'kg_inps')  # batch*len
        self.kgs_h_length = tf.placeholder(tf.int32, (None, None),
                                           'kg_h_lens')  # batch
        self.kgs_hr_length = tf.placeholder(tf.int32, (None, None),
                                            'kg_hr_lens')  # batch
        self.kgs_hrt_length = tf.placeholder(tf.int32, (None, None),
                                             'kg_hrt_lens')  # batch
        self.kgs_index = tf.placeholder(tf.float32, (None, None),
                                        'kg_indices')  # batch

        self.origin_responses = tf.placeholder(tf.int32, (None, None),
                                               'dec_inps')  # batch*len
        self.origin_responses_length = tf.placeholder(tf.int32, (None, ),
                                                      'dec_lens')  # batch
        self.context_length = tf.placeholder(tf.int32, (None, ), 'ctx_lens')
        self.is_train = tf.placeholder(tf.bool)

        num_past_turns = tf.shape(self.posts)[0] // tf.shape(
            self.origin_responses)[0]

        # deal with original data to adapt encoder and decoder
        batch_size, decoder_len = tf.shape(self.origin_responses)[0], tf.shape(
            self.origin_responses)[1]
        self.responses = tf.split(self.origin_responses, [1, decoder_len - 1],
                                  1)[1]  # no go_id
        self.responses_length = self.origin_responses_length - 1
        self.responses_input = tf.split(self.origin_responses,
                                        [decoder_len - 1, 1],
                                        1)[0]  # no eos_id
        self.responses_target = self.responses
        decoder_len = decoder_len - 1
        self.posts_input = self.posts  # batch*len
        self.decoder_mask = tf.reshape(
            tf.cumsum(tf.one_hot(self.responses_length - 1, decoder_len),
                      reverse=True,
                      axis=1), [-1, decoder_len])
        kg_len = tf.shape(self.kgs)[2]
        kg_h_mask = tf.reshape(
            tf.cumsum(tf.one_hot(self.kgs_h_length - 1, kg_len),
                      reverse=True,
                      axis=2), [batch_size, -1, kg_len, 1])
        kg_hr_mask = tf.reshape(
            tf.cumsum(tf.one_hot(self.kgs_hr_length - 1, kg_len),
                      reverse=True,
                      axis=2), [batch_size, -1, kg_len, 1])
        kg_hrt_mask = tf.reshape(
            tf.cumsum(tf.one_hot(self.kgs_hrt_length - 1, kg_len),
                      reverse=True,
                      axis=2), [batch_size, -1, kg_len, 1])
        kg_key_mask = kg_hr_mask
        kg_value_mask = kg_hrt_mask - kg_hr_mask

        # initialize the training process
        self.learning_rate = tf.Variable(float(args.lr),
                                         trainable=False,
                                         dtype=tf.float32)
        self.learning_rate_decay_op = self.learning_rate.assign(
            self.learning_rate * args.lr_decay)
        self.global_step = tf.Variable(0, trainable=False)

        # build the embedding table and embedding input
        if embed is None:
            # initialize the embedding randomly
            self.embed = tf.get_variable(
                'embed', [data.vocab_size, args.embedding_size], tf.float32)
        else:
            # initialize the embedding by pre-trained word vectors
            self.embed = tf.get_variable('embed',
                                         dtype=tf.float32,
                                         initializer=embed)

        self.encoder_input = tf.nn.embedding_lookup(self.embed, self.posts)
        self.decoder_input = tf.nn.embedding_lookup(self.embed,
                                                    self.responses_input)
        self.kg_input = tf.nn.embedding_lookup(self.embed, self.kgs)
        #self.knowledge_max = tf.reduce_max(tf.where(tf.cast(tf.tile(knowledge_mask, [1, 1, args.embedding_size]), tf.bool), self.knowledge_input, -mask_value), axis=1)
        #self.knowledge_min = tf.reduce_max(tf.where(tf.cast(tf.tile(knowledge_mask, [1, 1, args.embedding_size]), tf.bool), self.knowledge_input, mask_value), axis=1)
        self.kg_key_avg = tf.reduce_sum(
            self.kg_input * kg_key_mask, axis=2) / tf.maximum(
                tf.reduce_sum(kg_key_mask, axis=2),
                tf.ones_like(tf.expand_dims(self.kgs_hrt_length, -1),
                             dtype=tf.float32))
        self.kg_value_avg = tf.reduce_sum(
            self.kg_input * kg_value_mask, axis=2) / tf.maximum(
                tf.reduce_sum(kg_value_mask, axis=2),
                tf.ones_like(tf.expand_dims(self.kgs_hrt_length, -1),
                             dtype=tf.float32))

        #self.encoder_input = tf.cond(self.is_train,
        #							 lambda: tf.nn.dropout(tf.nn.embedding_lookup(self.embed, self.posts_input), 0.8),
        #							 lambda: tf.nn.embedding_lookup(self.embed, self.posts_input))  # batch*len*unit
        #self.decoder_input = tf.cond(self.is_train,
        #							 lambda: tf.nn.dropout(tf.nn.embedding_lookup(self.embed, self.responses_input), 0.8),
        #							 lambda: tf.nn.embedding_lookup(self.embed, self.responses_input))

        # build rnn_cell
        cell_enc = tf.nn.rnn_cell.GRUCell(args.eh_size)
        cell_ctx = tf.nn.rnn_cell.GRUCell(args.ch_size)
        cell_dec = tf.nn.rnn_cell.GRUCell(args.dh_size)

        # build encoder
        with tf.variable_scope('encoder'):
            encoder_output, encoder_state = dynamic_rnn(cell_enc,
                                                        self.encoder_input,
                                                        self.posts_length,
                                                        dtype=tf.float32,
                                                        scope="encoder_rnn")

        with tf.variable_scope('encoder', reuse=tf.AUTO_REUSE):
            prev_output, _ = dynamic_rnn(cell_enc,
                                         tf.nn.embedding_lookup(
                                             self.embed, self.prev_posts),
                                         self.prev_posts_length,
                                         dtype=tf.float32,
                                         scope="encoder_rnn")

        with tf.variable_scope('context'):
            encoder_state_reshape = tf.reshape(
                encoder_state, [-1, num_past_turns, args.eh_size])
            _, self.context_state = dynamic_rnn(cell_ctx,
                                                encoder_state_reshape,
                                                self.context_length,
                                                dtype=tf.float32,
                                                scope='context_rnn')

        # get output projection function
        output_fn = MyDense(data.vocab_size, use_bias=True)
        sampled_sequence_loss = output_projection_layer(
            args.dh_size, data.vocab_size, args.softmax_samples)

        # construct attention
        '''
		encoder_len = tf.shape(encoder_output)[1]
		attention_memory = tf.reshape(encoder_output, [batch_size, -1, args.eh_size])
		attention_mask = tf.reshape(tf.sequence_mask(self.posts_length, encoder_len), [batch_size, -1])
		attention_mask = tf.concat([tf.ones([batch_size, 1], tf.bool), attention_mask[:,1:]], axis=1)
		attn_mechanism = MyAttention(args.dh_size, attention_memory, attention_mask)
		'''
        attn_mechanism = tf.contrib.seq2seq.BahdanauAttention(
            args.dh_size,
            prev_output,
            memory_sequence_length=tf.maximum(self.prev_posts_length, 1))
        cell_dec_attn = tf.contrib.seq2seq.AttentionWrapper(
            cell_dec, attn_mechanism, attention_layer_size=args.dh_size)
        ctx_state_shaping = tf.layers.dense(self.context_state,
                                            args.dh_size,
                                            activation=None)
        dec_start = cell_dec_attn.zero_state(
            batch_size, dtype=tf.float32).clone(cell_state=ctx_state_shaping)

        # calculate kg embedding
        with tf.variable_scope('knowledge'):
            query = tf.reshape(
                tf.layers.dense(tf.concat(self.context_state, axis=-1),
                                args.embedding_size,
                                use_bias=False),
                [batch_size, 1, args.embedding_size])
        kg_score = tf.reduce_sum(query * self.kg_key_avg, axis=2)
        kg_score = tf.where(tf.greater(self.kgs_hrt_length, 0), kg_score,
                            -tf.ones_like(kg_score) * np.inf)
        kg_alignment = tf.nn.softmax(kg_score)
        kg_max = tf.argmax(kg_alignment, axis=-1)
        kg_max_onehot = tf.one_hot(kg_max,
                                   tf.shape(kg_alignment)[1],
                                   dtype=tf.float32)
        self.kg_acc = tf.reduce_sum(
            kg_max_onehot * self.kgs_index) / tf.maximum(
                tf.reduce_sum(tf.reduce_max(self.kgs_index, axis=-1)),
                tf.constant(1.0))
        self.kg_loss = tf.reduce_sum(
            -tf.log(tf.clip_by_value(kg_alignment, 1e-12, 1.0)) *
            self.kgs_index,
            axis=1) / tf.maximum(tf.reduce_sum(self.kgs_index, axis=1),
                                 tf.ones([batch_size], dtype=tf.float32))
        self.kg_loss = tf.reduce_mean(self.kg_loss)

        self.knowledge_embed = tf.reduce_sum(
            tf.expand_dims(kg_alignment, axis=-1) * self.kg_value_avg *
            tf.cast(kg_num_mask, tf.float32),
            axis=1)
        #self.knowledge_embed = tf.Print(self.knowledge_embed, ['acc', self.kg_acc, 'loss', self.kg_loss])
        knowledge_embed_extend = tf.tile(
            tf.expand_dims(self.knowledge_embed, axis=1), [1, decoder_len, 1])
        self.decoder_input = tf.concat(
            [self.decoder_input, knowledge_embed_extend], axis=2)
        # construct helper
        train_helper = tf.contrib.seq2seq.TrainingHelper(
            self.decoder_input, tf.maximum(self.responses_length, 1))
        infer_helper = MyInferenceHelper(self.embed,
                                         tf.fill([batch_size], data.go_id),
                                         data.eos_id, self.knowledge_embed)
        #infer_helper = tf.contrib.seq2seq.GreedyEmbeddingHelper(self.embed, tf.fill([batch_size], data.go_id), data.eos_id)

        # build decoder (train)
        with tf.variable_scope('decoder'):
            decoder_train = tf.contrib.seq2seq.BasicDecoder(
                cell_dec_attn, train_helper, dec_start)
            train_outputs, _, _ = tf.contrib.seq2seq.dynamic_decode(
                decoder_train, impute_finished=True, scope="decoder_rnn")
            self.decoder_output = train_outputs.rnn_output
            #self.decoder_output = tf.nn.dropout(self.decoder_output, 0.8)
            self.decoder_distribution_teacher, self.decoder_loss, self.decoder_all_loss = \
             sampled_sequence_loss(self.decoder_output, self.responses_target, self.decoder_mask)

        # build decoder (test)
        with tf.variable_scope('decoder', reuse=True):
            decoder_infer = tf.contrib.seq2seq.BasicDecoder(
                cell_dec_attn, infer_helper, dec_start, output_layer=output_fn)
            infer_outputs, _, _ = tf.contrib.seq2seq.dynamic_decode(
                decoder_infer,
                impute_finished=True,
                maximum_iterations=args.max_sent_length,
                scope="decoder_rnn")
            self.decoder_distribution = infer_outputs.rnn_output
            self.generation_index = tf.argmax(
                tf.split(self.decoder_distribution, [2, data.vocab_size - 2],
                         2)[1], 2) + 2  # for removing UNK

        # calculate the gradient of parameters and update
        self.params = [
            k for k in tf.trainable_variables() if args.name in k.name
        ]
        opt = tf.train.AdamOptimizer(self.learning_rate)
        self.loss = self.decoder_loss + self.kg_loss
        gradients = tf.gradients(self.loss, self.params)
        clipped_gradients, self.gradient_norm = tf.clip_by_global_norm(
            gradients, args.grad_clip)
        self.update = opt.apply_gradients(zip(clipped_gradients, self.params),
                                          global_step=self.global_step)

        # save checkpoint
        self.latest_saver = tf.train.Saver(
            write_version=tf.train.SaverDef.V2,
            max_to_keep=args.checkpoint_max_to_keep,
            pad_step_number=True,
            keep_checkpoint_every_n_hours=1.0)
        self.best_saver = tf.train.Saver(write_version=tf.train.SaverDef.V2,
                                         max_to_keep=1,
                                         pad_step_number=True,
                                         keep_checkpoint_every_n_hours=1.0)

        # create summary for tensorboard
        self.create_summary(args)
コード例 #13
0
ファイル: model.py プロジェクト: jcnlp/dssm-lstm
    def __init__(self,
                 num_lstm_units,
                 embed,
                 neg_num=4,
                 gradient_clip_threshold=5.0):
        self.queries = tf.placeholder(dtype=tf.string,
                                      shape=[None, None])  # shape: batch*len
        self.queries_length = tf.placeholder(dtype=tf.int32,
                                             shape=[None])  # shape: batch
        self.docs = tf.placeholder(dtype=tf.string,
                                   shape=[neg_num + 1, None, None
                                          ])  # shape: (neg_num + 1)*batch*len
        self.docs_length = tf.placeholder(
            dtype=tf.int32, shape=[neg_num + 1,
                                   None])  # shape: batch*(neg_num + 1)

        self.word2index = MutableHashTable(key_dtype=tf.string,
                                           value_dtype=tf.int64,
                                           default_value=UNK_ID,
                                           shared_name="in_table",
                                           name="in_table",
                                           checkpoint=True)

        self.learning_rate = tf.Variable(0.001,
                                         trainable=False,
                                         dtype=tf.float32)
        self.global_step = tf.Variable(0, trainable=False)
        self.epoch = tf.Variable(0, trainable=False)
        self.epoch_add_op = self.epoch.assign(self.epoch + 1)
        self.momentum = tf.Variable(0.9, trainable=False, dtype=tf.float32)

        self.index_queries = self.word2index.lookup(self.queries)  # batch*len
        self.index_docs = [
            self.word2index.lookup(doc) for doc in tf.unstack(self.docs)
        ]

        self.embed = tf.get_variable('embed',
                                     dtype=tf.float32,
                                     initializer=embed)
        self.embed_queries = tf.nn.embedding_lookup(self.embed,
                                                    self.index_queries)
        self.embed_docs = [
            tf.nn.embedding_lookup(self.embed, index_doc)
            for index_doc in self.index_docs
        ]

        with tf.variable_scope('query_lstm'):
            self.cell_q = SimpleLSTMCell(num_lstm_units)
        with tf.variable_scope('doc_lstm'):
            self.cell_d = SimpleLSTMCell(num_lstm_units)

        self.states_q = dynamic_rnn(
            self.cell_q,
            self.embed_queries,
            self.queries_length,
            dtype=tf.float32,
            scope="simple_lstm_cell_query")[1][1]  # shape: batch*num_units
        self.states_d = [
            dynamic_rnn(self.cell_d,
                        self.embed_docs[i],
                        self.docs_length[i],
                        dtype=tf.float32,
                        scope="simple_lstm_cell_doc")[1][1]
            for i in range(neg_num + 1)
        ]  # shape: (neg_num + 1)*batch*num_units
        self.queries_norm = tf.sqrt(
            tf.reduce_sum(tf.square(self.states_q), axis=1))
        self.docs_norm = [
            tf.sqrt(tf.reduce_sum(tf.square(self.states_d[i]), axis=1))
            for i in range(neg_num + 1)
        ]
        self.prods = [
            tf.reduce_sum(tf.multiply(self.states_q, self.states_d[i]), axis=1)
            for i in range(neg_num + 1)
        ]
        self.sims = [(self.prods[i] / (self.queries_norm * self.docs_norm[i]))
                     for i in range(neg_num + 1)]  # shape: (neg_num + 1)*batch
        self.sims = tf.convert_to_tensor(self.sims)
        self.gamma = tf.Variable(
            initial_value=1.0, expected_shape=[],
            dtype=tf.float32)  # scaling factor according to the paper
        self.sims = self.sims * self.gamma
        self.prob = tf.nn.softmax(self.sims,
                                  dim=0)  # shape: (neg_num + 1)*batch
        self.hit_prob = tf.transpose(self.prob[0])

        self.loss = -tf.reduce_mean(tf.log(self.hit_prob))

        self.params = tf.trainable_variables()
        opt = tf.train.MomentumOptimizer(
            learning_rate=self.learning_rate,
            momentum=self.momentum,
            use_nesterov=True)  # use Nesterov's method, according to the paper
        gradients = tf.gradients(self.loss, self.params)
        clipped_gradients, self.gradient_norm = tf.clip_by_global_norm(
            gradients, gradient_clip_threshold)
        self.update = opt.apply_gradients(zip(clipped_gradients, self.params),
                                          global_step=self.global_step)
        self.saver = tf.train.Saver(write_version=tf.train.SaverDef.V2,
                                    max_to_keep=3,
                                    pad_step_number=True,
                                    keep_checkpoint_every_n_hours=1.0)
コード例 #14
0
ファイル: qa_model.py プロジェクト: abearman/cs224n-project
    def encode(self, question_embeddings, context_embeddings, question_mask,
               context_mask, encoder_state_input, dropout_keep_prob,
               batch_size):
        """
				In a generalized encode function, you pass in your inputs,
				masks, and an initial
				hidden state input into this function.

				:param inputs: Symbolic representations of your input
				:param masks: this is to make sure tf.nn.dynamic_rnn doesn't iterate
																		through masked steps
				:param encoder_state_input: (Optional) pass this as initial hidden state
																																to tf.nn.dynamic_rnn to build conditional representations
				:return: an encoded representation of your input.
													It can be context-level representation, word-level representation,
													or both.
				"""
        with vs.variable_scope("encoder", True):

            # Encode question
            with vs.variable_scope("question", True):
                lstm_cell = tf.nn.rnn_cell.LSTMCell(
                    self.state_size)  # Should be 1 at first, then 200
                lstm_cell = tf.nn.rnn_cell.DropoutWrapper(
                    lstm_cell, output_keep_prob=dropout_keep_prob)
                question_length = tf.reduce_sum(tf.cast(
                    question_mask, tf.int32),
                                                reduction_indices=1)
                print("Question length: ", question_length)
                #(fw_out, bw_out), _ = bidirectional_dynamic_rnn(lstm_cell, lstm_cell,
                #					question_embeddings, sequence_length=question_length,
                #					time_major=False, dtype=tf.float64, swap_memory=True)  #TODO: time_major=True was causing seg faults
                #self.H_q = tf.concat(2, [fw_out, bw_out])
                self.H_q, _ = dynamic_rnn(lstm_cell,
                                          question_embeddings,
                                          sequence_length=question_length,
                                          time_major=False,
                                          dtype=tf.float64,
                                          swap_memory=True)

                #last_h_q_indices = question_length - 1
                #last_h_q_indices = tf.stack([tf.range(batch_size), last_h_q_indices], axis=1)
                #self.h_q = tf.gather_nd(self.H_q, last_h_q_indices)
                self.h_q = self.H_q[:, 1, :]
                print("H_q: ", self.H_q)
                print("h_q: ", self.h_q)

            with vs.variable_scope("context", True):
                # Encode context paragraph
                context_length = tf.reduce_sum(tf.cast(context_mask, tf.int32),
                                               reduction_indices=1)
                print("Context length: ", context_length)
                #attn_cell = GRUAttnCell(2* self.state_size, self.H_q)		# TODO: 2* because fw_out and bw_out are concatenated
                #self.H_p, _ = dynamic_rnn(attn_cell, context_embeddings, dtype=tf.float64)#, sequence_length=context_length, dtype=tf.float64)
                context_lstm_cell = tf.nn.rnn_cell.LSTMCell(self.state_size)
                context_lstm_cell = tf.nn.rnn_cell.DropoutWrapper(
                    context_lstm_cell, output_keep_prob=dropout_keep_prob)
                #(fw_out, bw_out), _ = bidirectional_dynamic_rnn(context_lstm_cell, context_lstm_cell,
                #					context_embeddings, sequence_length=context_length,
                #					time_major=False, dtype=tf.float64, swap_memory=True)  #TODO: time_major=True was causing seg faults
                #self.H_p = tf.concat(2, [fw_out, bw_out])
                self.H_p, _ = dynamic_rnn(context_lstm_cell,
                                          context_embeddings,
                                          sequence_length=context_length,
                                          time_major=False,
                                          dtype=tf.float64,
                                          swap_memory=True)

                #self.last_h_p_indices = context_length - 1
                #self.last_h_p_indices = tf.stack([tf.range(batch_size), self.last_h_p_indices], axis=1)
                #self.h_p = tf.gather_nd(self.H_p, self.last_h_p_indices)
                self.h_p = self.H_p[:, 1, :]
                print("H_p: ", self.H_p)
                print("h_p: ", self.h_p)

            return self.h_q, self.h_p
コード例 #15
0
    def __init__(self,
            num_symbols,
            num_embed_units,
            num_units,
            num_layers,
            num_labels,
            embed,
            learning_rate=0.005,
            max_gradient_norm=5.0):
        
        self.texts = tf.placeholder(tf.string, (None, None), 'texts')  # shape: [batch, length]

        #todo: implement placeholders
        self.texts_length = tf.placeholder(, , 'texts_length')  # shaoe: [batch]
        self.labels = tf.placeholder(, , 'labels')  # shape: [batch]
        
        self.symbol2index = MutableHashTable(
                key_dtype=tf.string,
                value_dtype=tf.int64,
                default_value=UNK_ID,
                shared_name="in_table",
                name="in_table",
                checkpoint=True)
        # build the vocab table (string to index)
        # initialize the training process
        self.learning_rate = tf.Variable(float(learning_rate), 
                trainable=False, dtype=tf.float32)
        self.global_step = tf.Variable(0, trainable=False)


        self.index_input = self.symbol2index.lookup(self.texts)   # shape: [batch, length]
        
        # build the embedding table (index to vector)
        if embed is None:
            # initialize the embedding randomly
            self.embed = tf.get_variable('embed', [num_symbols, num_embed_units], tf.float32)
        else:
            # initialize the embedding by pre-trained word vectors
            self.embed = tf.get_variable('embed', dtype=tf.float32, initializer=embed)

        #todo: implement embedding inputs
        self.embed_input = tf.nn.embedding_lookup(, ) #shape: [batch, length, num_embed_units]

        #todo: implement other RNNCell to replace BasicRNNCell
        cell = MultiRNNCell([BasicRNNCell(num_units) for _ in range(num_layers)])
        
        outputs, states = dynamic_rnn(cell, self.embed_input, 
                self.texts_length, dtype=tf.float32, scope="rnn")

        #todo: vectors is the last hidden states of the BasicRNNCell, u may need to change the code to get the right vectors of other RNNCell
        vectors = states[-1]

        with tf.variable_scope('logits'):
            weight = tf.get_variable("weights", [num_units, num_labels])
            bias = tf.get_variable("biases", [num_labels])
            #todo: implement the linear transformation: [batch, num_units] -> [batch, num_labels], using vectors, weight, bias
            logits = 

        self.loss = tf.reduce_sum(tf.nn.sparse_softmax_cross_entropy_with_logits(labels=self.labels, logits=logits), name='loss')
        predict_labels = tf.argmax(logits, 1, 'predict_labels')
        self.accuracy = tf.reduce_sum(tf.cast(tf.equal(self.labels, predict_labels), tf.int32), name='accuracy')

        self.params = tf.trainable_variables()
            
        # calculate the gradient of parameters
        opt = tf.train.GradientDescentOptimizer(self.learning_rate)
        gradients = tf.gradients(self.loss, self.params)
        clipped_gradients, self.gradient_norm = tf.clip_by_global_norm(gradients, 
                max_gradient_norm)
        self.update = opt.apply_gradients(zip(clipped_gradients, self.params), 
                global_step=self.global_step)
        
        self.saver = tf.train.Saver(write_version=tf.train.SaverDef.V2, 
                max_to_keep=5, pad_step_number=True)
コード例 #16
0
ファイル: model.py プロジェクト: thu-coai/LM-tensorflow
    def __init__(self, data, args, embed):

        with tf.variable_scope("input"):
            with tf.variable_scope("embedding"):
                # build the embedding table and embedding input
                if embed is None:
                    # initialize the embedding randomly
                    self.embed = tf.get_variable(
                        'embed',
                        [data.frequent_vocab_size, args.embedding_size],
                        tf.float32)
                else:
                    # initialize the embedding by pre-trained word vectors
                    self.embed = tf.get_variable('embed',
                                                 dtype=tf.float32,
                                                 initializer=embed)

            # input
            self.sentence = tf.placeholder(tf.int32, (None, None),
                                           'sen_inps')  # batch*len
            self.sentence_length = tf.placeholder(tf.int32, (None, ),
                                                  'sen_lens')  # batch
            self.use_prior = tf.placeholder(dtype=tf.bool, name="use_prior")

            batch_size, batch_len = tf.shape(self.sentence)[0], tf.shape(
                self.sentence)[1]
            self.scentence_max_len = batch_len - 1

            # data processing
            LM_input = tf.split(self.sentence, [self.scentence_max_len, 1],
                                1)[0]  # no eos_id
            self.LM_input = tf.nn.embedding_lookup(
                self.embed, LM_input)  # batch*(len-1)*unit
            self.LM_target = tf.split(self.sentence,
                                      [1, self.scentence_max_len],
                                      1)[1]  # no go_id, batch*(len-1)
            self.input_len = self.sentence_length - 1
            self.input_mask = tf.sequence_mask(
                self.input_len, self.scentence_max_len,
                dtype=tf.float32)  # 0 for <pad>, batch*(len-1)

        # initialize the training process
        self.learning_rate = tf.Variable(float(args.lr),
                                         trainable=False,
                                         dtype=tf.float32)
        self.learning_rate_decay_op = self.learning_rate.assign(
            self.learning_rate * args.lr_decay)
        self.global_step = tf.Variable(0, trainable=False)

        # build LSTM NN
        basic_cell = tf.nn.rnn_cell.LSTMCell(args.dh_size)
        with tf.variable_scope('rnnlm'):
            LM_output, _ = dynamic_rnn(basic_cell,
                                       self.LM_input,
                                       self.input_len,
                                       dtype=tf.float32,
                                       scope="rnnlm")
        # fullly connected layer
        LM_output = tf.layers.dense(
            inputs=LM_output, units=data.frequent_vocab_size
        )  # shape of LM_output: (batch_size, batch_len-1, vocab_size)

        # loss
        with tf.variable_scope("loss",
                               initializer=tf.orthogonal_initializer()):
            crossent = tf.nn.sparse_softmax_cross_entropy_with_logits(
                logits=LM_output, labels=self.LM_target)
            crossent = tf.reduce_sum(crossent *
                                     self.input_mask)  # to ignore <pad>s

            self.sen_loss = crossent / tf.to_float(batch_size)
            self.ppl_loss = crossent / tf.reduce_sum(
                self.input_mask)  # crossent per word.
            # self.ppl_loss = tf.Print(self.ppl_loss, [self.ppl_loss] )

            self.decoder_distribution_teacher = tf.nn.log_softmax(LM_output)
        with tf.variable_scope("decode", reuse=True):
            self.decoder_distribution = LM_output  # (batch_size, batch_len-1, vocab_size)
            # for inference
            self.generation_index = tf.argmax(
                tf.split(self.decoder_distribution,
                         [2, data.frequent_vocab_size - 2], 2)[1],
                2) + 2  # for removing UNK. 0 for <pad> and 1 for <unk>

        self.loss = self.sen_loss

        # calculate the gradient of parameters and update
        self.params = [
            k for k in tf.trainable_variables() if args.name in k.name
        ]
        gradients = tf.gradients(self.loss, self.params)
        clipped_gradients, self.gradient_norm = tf.clip_by_global_norm(
            gradients, args.grad_clip)
        opt = tf.train.MomentumOptimizer(learning_rate=self.learning_rate,
                                         momentum=args.momentum)
        self.update = opt.apply_gradients(zip(clipped_gradients, self.params),
                                          global_step=self.global_step)

        # save checkpoint
        self.latest_saver = tf.train.Saver(
            write_version=tf.train.SaverDef.V2,
            max_to_keep=args.checkpoint_max_to_keep,
            pad_step_number=True,
            keep_checkpoint_every_n_hours=1.0)
        self.best_saver = tf.train.Saver(write_version=tf.train.SaverDef.V2,
                                         max_to_keep=1,
                                         pad_step_number=True,
                                         keep_checkpoint_every_n_hours=1.0)

        # create summary for tensorboard
        self.create_summary(args)
コード例 #17
0
    def __init__(self,
                 num_items,
                 num_embed_units,
                 num_units,
                 num_layers,
                 vocab=None,
                 embed=None,
                 learning_rate=1e-4,
                 learning_rate_decay_factor=0.95,
                 beam_size=5,
                 max_gradient_norm=5.0,
                 num_samples=512,
                 max_length=30,
                 use_lstm=True):

        self.epoch = tf.Variable(0, trainable=False, name='dis/epoch')
        self.epoch_add_op = self.epoch.assign(self.epoch + 1)

        self.sessions_input = tf.placeholder(tf.int32, shape=(None, None))
        self.sessions_length = tf.placeholder(tf.int32, shape=(None))
        self.rec_lists = tf.placeholder(tf.int32, shape=(None, None, None))
        self.rec_mask = tf.placeholder(tf.float32, shape=(None, None, None))
        self.aims_idx = tf.placeholder(tf.int32, shape=(None, None))
        self.label = tf.placeholder(tf.int32, shape=(None))
        self.purchase = tf.placeholder(tf.int32, shape=(None, None))

        if embed is None:
            self.embed = tf.get_variable('dis/embed',
                                         [num_items, num_embed_units],
                                         tf.float32)
        else:
            self.embed = tf.get_variable('dis/embed',
                                         dtype=tf.float32,
                                         initializer=embed)

        encoder_length, rec_length = tf.shape(
            self.sessions_input)[1], tf.shape(self.rec_lists)[2]

        encoder_mask = tf.reshape(
            tf.cumsum(tf.one_hot(self.sessions_length - 2, encoder_length),
                      reverse=True,
                      axis=1), [-1, encoder_length])

        self.encoder_input = tf.nn.embedding_lookup(
            self.embed, self.sessions_input)  #batch*len*unit

        if use_lstm:
            cell = MultiRNNCell(
                [LSTMCell(num_units) for _ in range(num_layers)])
        else:
            cell = MultiRNNCell(
                [GRUCell(num_units) for _ in range(num_layers)])

        # rnn encoder
        encoder_output, _ = dynamic_rnn(cell,
                                        self.encoder_input,
                                        self.sessions_length,
                                        dtype=tf.float32,
                                        scope="dis/encoder")

        #[batch_size, length, embed_units]
        self.preference = tf.layers.dense(encoder_output,
                                          num_units,
                                          name="dis/out2preference")
        #[batch_size, length, rec_len, num_units]
        self.candidate = tf.layers.dense(tf.nn.embedding_lookup(
            self.embed, self.rec_lists),
                                         num_units,
                                         name="dis/rec2candidate")
        #[batch_size, length, rec_len]
        self.pre_mul_can = tf.reduce_sum(
            tf.expand_dims(self.preference, 2) * self.candidate, 3)

        self.max_embed = tf.reduce_sum(
            tf.expand_dims(tf.nn.softmax(self.pre_mul_can / 0.1), 3) *
            self.candidate, 2)
        self.aim_embed = tf.reduce_sum(
            tf.expand_dims(tf.one_hot(self.aims_idx, rec_length), 3) *
            self.candidate, 2)
        if FLAGS['use_simulated_data'].value:
            purchase_weight = tf.constant(1.0, dtype=tf.float32)
        else:
            W_p = tf.get_variable("Wp", shape=(), dtype=tf.float32)
            b_p = tf.get_variable("bp", shape=(), dtype=tf.float32)
            purchase_weight = tf.cast(self.purchase, tf.float32) * W_p + b_p
        self.logits = tf.reduce_sum(
            tf.reduce_sum(self.max_embed * self.aim_embed, 2) * purchase_weight
            * encoder_mask, 1) / tf.reduce_sum(encoder_mask, 1)
        self.prob = tf.nn.sigmoid(self.logits)
        self.decoder_loss = tf.reduce_mean(
            tf.nn.sigmoid_cross_entropy_with_logits(logits=self.logits,
                                                    labels=tf.cast(
                                                        self.label,
                                                        tf.float32)))
        self.acc = tf.reduce_mean(
            tf.cast(
                tf.equal(tf.cast(tf.greater(self.prob, 0.5), tf.int32),
                         self.label), tf.float32))

        self.params = tf.trainable_variables()

        self.learning_rate = tf.Variable(float(learning_rate),
                                         trainable=False,
                                         dtype=tf.float32)
        self.learning_rate_decay_op = self.learning_rate.assign(
            self.learning_rate * learning_rate_decay_factor)

        self.global_step = tf.Variable(0, trainable=False)

        gradients = tf.gradients(self.decoder_loss, self.params)
        clipped_gradients, self.gradient_norm = tf.clip_by_global_norm(
            gradients, max_gradient_norm)
        self.update = tf.train.AdamOptimizer(
            self.learning_rate).apply_gradients(zip(clipped_gradients,
                                                    self.params),
                                                global_step=self.global_step)

        self.saver = tf.train.Saver(tf.global_variables(),
                                    write_version=tf.train.SaverDef.V2,
                                    max_to_keep=10,
                                    pad_step_number=True,
                                    keep_checkpoint_every_n_hours=1.0)
コード例 #18
0
    def __init__(self, input_producer, embed_mat, config, is_train):

        with tf.variable_scope("VAE") as var_scope:
            x_enc = input_producer.x_enc
            x_dec = input_producer.x_dec
            y_dec = input_producer.y_dec
            len_enc = input_producer.len_enc
            len_dec = input_producer.len_dec

            max_len = input_producer.seq_max_length
            vocab_num = input_producer.vocab_num
            batch_size = config.batch_size
            hidden_size = config.hidden_size
            embed_dim = config.embed_dim

            is_GRU = config.is_GRU
            is_argmax_sampling = config.is_argmax_sampling
            word_keep_prob = config.word_dropout_keep_prob
            max_grad_norm = config.max_grad_norm
            learning_rate = config.learning_rate

            self.KL_weight = tf.Variable(0.0, "KL_weight")
            self.input_ids = y_dec

            def _lstm_cell():
                return BasicLSTMCell(num_units=hidden_size,
                                     forget_bias=1.0,
                                     state_is_tuple=True,
                                     reuse=tf.get_variable_scope().reuse)
            def _gru_cell():
                return GRUCell(num_units=hidden_size,
                               reuse=tf.get_variable_scope().reuse)

            cell = _gru_cell if is_GRU else _lstm_cell
            self.initial_state = cell().zero_state(batch_size, tf.float32)


            # encoder
            with tf.device("/cpu:0"):
                embed_init = tf.constant_initializer(embed_mat)\
                                if (embed_mat is not None) else None
                embedding = tf.get_variable("embedding", [vocab_num, embed_dim],
                                             initializer=embed_init,
                                             trainable=True)
                in_enc = embedding_lookup(embedding, x_enc)



            with tf.variable_scope("encoder"):
                out_tuple = dynamic_rnn(cell=cell(),
                                        inputs=in_enc,
                                        sequence_length=len_enc,
                                        initial_state=self.initial_state)
                (_, encoder_hidden) = out_tuple

                # linear layers for mu and log(var)
                latent_dim = hidden_size # may have to change this later
                W_mu = tf.get_variable("W_mu", [hidden_size,latent_dim])
                b_mu = tf.get_variable("b_mu", [latent_dim])
                W_logvar = tf.get_variable("W_logvar", [hidden_size,latent_dim])
                b_logvar = tf.get_variable("b_logvar", [latent_dim])
                #l2_loss = tf.nn.l2_loss(W_mu) + tf.nn.l2_loss(W_logvar)

                mu = tf.matmul(encoder_hidden, W_mu) + b_mu
                logvar = tf.matmul(encoder_hidden, W_logvar) + b_logvar

                # sample epsilon
                epsilon = tf.random_normal(tf.shape(logvar), name='epsilon')

                # sample latent variable
                stddev = tf.exp(0.5 * logvar) # standard deviation
                self.z = mu + tf.multiply(stddev, epsilon)

            # decoder
            with tf.device("/cpu:0"):
                in_dec = embedding_lookup(embedding, x_dec)

            with tf.variable_scope("decoder"):

                helper = WordDropoutTrainingHelper(
                                      inputs=in_dec,
                                      sequence_length=len_dec,
                                      embedding=embedding,
                                      dropout_keep_prob=word_keep_prob,
                                      drop_token_id=UNK_ID,
                                      is_argmax_sampling=is_argmax_sampling)

                # projection layer
                output_layer = Dense(units=vocab_num,
                                     activation=None,
                                     use_bias=True,
                                     trainable=True)

                # decoder
                decoder = BasicDecoder(cell=cell(),
                                       helper=helper,
                                       initial_state=self.z,
                                       output_layer=output_layer)

                # dynamic_decode
                out_tuple = dynamic_decode(decoder=decoder,
                                           output_time_major=False, #  speed
                                           impute_finished=True)

            # get all the variables in this scope
            self.vars = tf.contrib.framework.get_variables(var_scope)

        # (ouputs, state, sequence_length)
        (self.outputs, _, self.cell_outputs_len) = out_tuple # final

        # (cell_outputs, sample_ids)
        (self.cell_outputs, self.sampled_ids) = self.outputs

        # compute softmax loss (reconstruction)
        len_out = tf.reduce_max(len_dec)
        targets = y_dec[:,:len_out]
        weights = tf.sequence_mask(self.cell_outputs_len, dtype=tf.float32)

        softmax_loss = sequence_loss(logits=self.cell_outputs,
                                     targets=targets,
                                     weights=weights,
                                     average_across_timesteps=True,
                                     average_across_batch=True)

        self.AE_loss = self.AE_loss_mean = softmax_loss

        # compute KL loss (regularization)
        KL_term = 1 + logvar - tf.pow(mu, 2) - tf.exp(logvar)
        self.KL_loss = -0.5 * tf.reduce_sum(KL_term, reduction_indices=1)
        self.KL_loss_mean = tf.reduce_mean(self.KL_loss)

        # total loss
        self.loss = self.AE_loss + self.KL_weight * self.KL_loss_mean

        # optimization
        self.lr = tf.Variable(learning_rate, trainable=False, name="lr")

        grads, _ = tf.clip_by_global_norm(tf.gradients(self.loss, self.vars),
                                          max_grad_norm)
        optimizer = tf.train.AdamOptimizer(self.lr)

        self.global_step = get_or_create_global_step()
        self.train_op = optimizer.apply_gradients(zip(grads, self.vars),
                                                  global_step=self.global_step)

        # learning_rate update
        self.new_lr = tf.placeholder(tf.float32, shape=[], name="new_lr")
        self.lr_update = tf.assign(self.lr, self.new_lr)

        # KL weight update
        self.new_KL_weight = tf.placeholder(tf.float32, shape=[], name="new_kl")
        self.KL_weight_update = tf.assign(self.KL_weight, self.new_KL_weight)

        # summaries
        tf.summary.scalar("Loss/AE_mean", self.AE_loss_mean)
        tf.summary.scalar("Loss/KL_mean", self.KL_loss_mean)
        tf.summary.scalar("Loss/Total", self.AE_loss_mean + self.KL_loss_mean)
        tf.summary.scalar("Misc/KL_weight", self.KL_weight)
        tf.summary.scalar("Misc/mu_mean", tf.reduce_mean(mu))
        tf.summary.scalar("Misc/sigma_mean", tf.reduce_mean(stddev))
        tf.summary.scalar("Misc/learning_rate", self.lr)
        self.summary_op = tf.summary.merge_all()
コード例 #19
0
    def __init__(self,
                 num_items,
                 num_embed_units,
                 num_units,
                 num_layers,
                 embed=None,
                 learning_rate=1e-4,
                 action_num=10,
                 learning_rate_decay_factor=0.95,
                 max_gradient_norm=5.0,
                 use_lstm=True):

        self.epoch = tf.Variable(0, trainable=False, name='agn/epoch')
        self.epoch_add_op = self.epoch.assign(self.epoch + 1)

        self.sessions_input = tf.placeholder(tf.int32, shape=(None, None))
        self.rec_lists = tf.placeholder(tf.int32, shape=(None, None, None))
        self.rec_mask = tf.placeholder(tf.float32, shape=(None, None, None))
        self.aims_idx = tf.placeholder(tf.int32, shape=(None, None))
        self.sessions_length = tf.placeholder(tf.int32, shape=(None))
        self.reward = tf.placeholder(tf.float32, shape=(None))

        if embed is None:
            self.embed = tf.get_variable(
                'agn/embed', [num_items, num_embed_units],
                tf.float32,
                initializer=tf.truncated_normal_initializer(0, 1))
        else:
            self.embed = tf.get_variable('agn/embed',
                                         dtype=tf.float32,
                                         initializer=embed)

        batch_size, encoder_length, rec_length = tf.shape(
            self.sessions_input)[0], tf.shape(
                self.sessions_input)[1], tf.shape(self.rec_lists)[2]

        encoder_mask = tf.reshape(
            tf.cumsum(tf.one_hot(self.sessions_length - 2, encoder_length),
                      reverse=True,
                      axis=1), [-1, encoder_length])
        # [batch_size, length]
        self.sessions_target = tf.concat([
            self.sessions_input[:, 1:],
            tf.ones([batch_size, 1], dtype=tf.int32) * PAD_ID
        ], 1)
        # [batch_size, length, embed_units]
        self.encoder_input = tf.nn.embedding_lookup(self.embed,
                                                    self.sessions_input)
        # [batch_size, length, rec_length]
        self.aims = tf.one_hot(self.aims_idx, rec_length)

        if use_lstm:
            cell = MultiRNNCell(
                [LSTMCell(num_units) for _ in range(num_layers)])
        else:
            cell = MultiRNNCell(
                [GRUCell(num_units) for _ in range(num_layers)])

        # Training
        with tf.variable_scope("agn"):
            output_fn, sampled_sequence_loss = output_projection_layer(
                num_units, num_items)
            self.encoder_output, self.encoder_state = dynamic_rnn(
                cell,
                self.encoder_input,
                self.sessions_length,
                dtype=tf.float32,
                scope="encoder")

            tmp_dim_1 = tf.tile(
                tf.reshape(tf.range(batch_size), [batch_size, 1, 1, 1]),
                [1, encoder_length, rec_length, 1])
            tmp_dim_2 = tf.tile(
                tf.reshape(tf.range(encoder_length),
                           [1, encoder_length, 1, 1]),
                [batch_size, 1, rec_length, 1])
            # [batch_size, length, rec_length, 3]
            gather_idx = tf.concat(
                [tmp_dim_1, tmp_dim_2,
                 tf.expand_dims(self.rec_lists, 3)], 3)

            # [batch_size, length, num_items], [batch_size*length]
            y_prob, local_loss, total_size = sampled_sequence_loss(
                self.encoder_output, self.sessions_target, encoder_mask)

            # Compute recommendation rank given rec_list
            # [batch_size, length, num_items]
            y_prob = tf.reshape(y_prob, [batch_size, encoder_length, num_items]) * \
                tf.concat([tf.zeros([batch_size, encoder_length, 2], dtype=tf.float32),
                            tf.ones([batch_size, encoder_length, num_items-2], dtype=tf.float32)], 2)
            # [batch_size, length, rec_len]
            ini_prob = tf.reshape(tf.gather_nd(y_prob, gather_idx),
                                  [batch_size, encoder_length, rec_length])
            # [batch_size, length, rec_len]
            mul_prob = ini_prob * self.rec_mask

            # [batch_size, length, action_num]
            _, self.index = tf.nn.top_k(mul_prob, k=action_num)
            # [batch_size, length, metric_num]
            _, self.metric_index = tf.nn.top_k(mul_prob,
                                               k=(FLAGS['metric'].value + 1))

            self.loss = tf.reduce_sum(
                tf.reshape(self.reward, [-1]) * local_loss) / total_size

        # Inference
        with tf.variable_scope("agn", reuse=True):
            # tf.get_variable_scope().reuse_variables()
            self.lstm_state = tf.placeholder(tf.float32,
                                             shape=(2, 2, None, num_units))
            self.ini_state = (tf.contrib.rnn.LSTMStateTuple(
                self.lstm_state[0, 0, :, :], self.lstm_state[0, 1, :, :]),
                              tf.contrib.rnn.LSTMStateTuple(
                                  self.lstm_state[1, 0, :, :],
                                  self.lstm_state[1, 1, :, :]))
            # [batch_size, length, num_units]
            self.encoder_output_predict, self.encoder_state_predict = dynamic_rnn(
                cell,
                self.encoder_input,
                self.sessions_length,
                initial_state=self.ini_state,
                dtype=tf.float32,
                scope="encoder")

            # [batch_size, num_units]
            self.final_output_predict = tf.reshape(
                self.encoder_output_predict[:, -1, :], [-1, num_units])
            # [batch_size, num_items]
            self.rec_logits = output_fn(self.final_output_predict)
            # [batch_size, action_num]
            _, self.rec_index = tf.nn.top_k(
                self.rec_logits[:, len(_START_VOCAB):], action_num)
            self.rec_index += len(_START_VOCAB)

            def gumbel_max(inp, alpha, beta):
                # assert len(tf.shape(inp)) == 2
                g = tf.random_uniform(tf.shape(inp), 0.0001, 0.9999)
                g = -tf.log(-tf.log(g))
                inp_g = tf.nn.softmax(
                    (tf.nn.log_softmax(inp / 1.0) + g * alpha) * beta)
                return inp_g

            # [batch_size, action_num]
            _, self.random_rec_index = tf.nn.top_k(
                gumbel_max(self.rec_logits[:, len(_START_VOCAB):], 1, 1),
                action_num)
            self.random_rec_index += len(_START_VOCAB)

        # initialize the training process
        self.learning_rate = tf.Variable(float(learning_rate),
                                         trainable=False,
                                         dtype=tf.float32)
        self.learning_rate_decay_op = self.learning_rate.assign(
            self.learning_rate * learning_rate_decay_factor)

        self.global_step = tf.Variable(0, trainable=False)
        self.params = tf.trainable_variables()
        gradients = tf.gradients(self.loss, self.params)
        clipped_gradients, self.gradient_norm = tf.clip_by_global_norm(
            gradients, max_gradient_norm)
        self.update = tf.train.AdamOptimizer(
            self.learning_rate).apply_gradients(zip(clipped_gradients,
                                                    self.params),
                                                global_step=self.global_step)

        self.saver = tf.train.Saver(tf.global_variables(),
                                    write_version=tf.train.SaverDef.V2,
                                    max_to_keep=100,
                                    pad_step_number=True,
                                    keep_checkpoint_every_n_hours=1.0)
コード例 #20
0
ファイル: model.py プロジェクト: juvu/seq2seq_cn
    def __init__(
            self,
            num_symbols,  # 词汇表size
            num_embed_units,  # 词嵌入size
            num_units,  # RNN 每层单元数
            num_layers,  # RNN 层数
            embed,  # 词嵌入
            entity_embed=None,  #
            num_entities=0,  #
            num_trans_units=100,  #
            learning_rate=0.0001,
            learning_rate_decay_factor=0.95,  #
            max_gradient_norm=5.0,  #
            num_samples=500,  # 样本个数,sampled softmax
            max_length=60,
            mem_use=True,
            output_alignments=True,
            use_lstm=False):

        self.posts = tf.placeholder(tf.string, (None, None),
                                    'enc_inps')  # batch_size * encoder_len
        self.posts_length = tf.placeholder(tf.int32, (None),
                                           'enc_lens')  # batch_size
        self.responses = tf.placeholder(tf.string, (None, None),
                                        'dec_inps')  # batch_size * decoder_len
        self.responses_length = tf.placeholder(tf.int32, (None),
                                               'dec_lens')  # batch_size
        self.entities = tf.placeholder(
            tf.string, (None, None, None),
            'entities')  # batch_size * triple_num * triple_len
        self.entity_masks = tf.placeholder(tf.string, (None, None),
                                           'entity_masks')  # 没用到
        self.triples = tf.placeholder(
            tf.string, (None, None, None, 3),
            'triples')  # batch_size * triple_num * triple_len * 3
        self.posts_triple = tf.placeholder(
            tf.int32, (None, None, 1),
            'enc_triples')  # batch_size * encoder_len
        self.responses_triple = tf.placeholder(
            tf.string, (None, None, 3),
            'dec_triples')  # batch_size * decoder_len * 3
        self.match_triples = tf.placeholder(
            tf.int32, (None, None, None),
            'match_triples')  # batch_size * decoder_len * triple_num

        # 获得 encoder_batch_size ,编码器的 encoder_len
        encoder_batch_size, encoder_len = tf.unstack(tf.shape(self.posts))
        # 获得 triple_num
        # 每个 post 包含的知识图个数(补齐过的)
        triple_num = tf.shape(self.triples)[1]
        # 获得 triple_len
        # 每个知识图包含的关联实体个数(补齐过的)
        triple_len = tf.shape(self.triples)[2]

        # 使用的知识三元组
        one_hot_triples = tf.one_hot(
            self.match_triples,
            triple_len)  # batch_size * decoder_len * triple_num * triple_len
        # 用 1 标注了哪个时间步产生的回复用了知识三元组
        use_triples = tf.reduce_sum(one_hot_triples,
                                    axis=[2, 3])  # batch_size * decoder_len

        # 词汇映射到 index 的 hash table
        self.symbol2index = MutableHashTable(
            key_dtype=tf.string,  # key张量的类型
            value_dtype=tf.int64,  # value张量的类型
            default_value=UNK_ID,  # 缺少key的默认值
            shared_name=
            "in_table",  # If non-empty, this table will be shared under the given name across multiple sessions
            name="in_table",  # 操作名
            checkpoint=True
        )  # if True, the contents of the table are saved to and restored from checkpoints. If shared_name is empty for a checkpointed table, it is shared using the table node name.

        # index 映射到词汇的 hash table
        self.index2symbol = MutableHashTable(key_dtype=tf.int64,
                                             value_dtype=tf.string,
                                             default_value='_UNK',
                                             shared_name="out_table",
                                             name="out_table",
                                             checkpoint=True)

        # 实体映射到 index 的 hash table
        self.entity2index = MutableHashTable(key_dtype=tf.string,
                                             value_dtype=tf.int64,
                                             default_value=NONE_ID,
                                             shared_name="entity_in_table",
                                             name="entity_in_table",
                                             checkpoint=True)

        # index 映射到实体的 hash table
        self.index2entity = MutableHashTable(key_dtype=tf.int64,
                                             value_dtype=tf.string,
                                             default_value='_NONE',
                                             shared_name="entity_out_table",
                                             name="entity_out_table",
                                             checkpoint=True)

        # 将 post 的 string 映射成词汇 id
        self.posts_word_id = self.symbol2index.lookup(
            self.posts)  # batch_size * encoder_len
        # 将 post 的 string 映射成实体 id
        self.posts_entity_id = self.entity2index.lookup(
            self.posts)  # batch_size * encoder_len

        # 将 response 的 string 映射成词汇 id
        self.responses_target = self.symbol2index.lookup(
            self.responses)  # batch_size * decoder_len
        # 获得解码器的 batch_size,decoder_len
        batch_size, decoder_len = tf.shape(self.responses)[0], tf.shape(
            self.responses)[1]
        #  去掉 responses_target 的最后一列,给第一列加上 GO_ID
        self.responses_word_id = tf.concat([
            tf.ones([batch_size, 1], dtype=tf.int64) * GO_ID,
            tf.split(self.responses_target, [decoder_len - 1, 1], 1)[0]
        ], 1)  # batch_size * decoder_len

        # 得到 response 的 mask
        # 首先将回复的长度 one_hot 编码
        # 然后横着从右向左累计求和,形成一个如果该位置在长度范围内,则为1,否则则为0的矩阵,最后一步 reshape 应该没有必要
        self.decoder_mask = tf.reshape(
            tf.cumsum(tf.one_hot(self.responses_length - 1, decoder_len),
                      reverse=True,
                      axis=1), [-1, decoder_len])  # batch_size * decoder_len

        # 初始化 词嵌入 和 实体嵌入,传入了参数就直接赋值,没有的话就随机初始化
        if embed is None:
            self.embed = tf.get_variable('word_embed',
                                         [num_symbols, num_embed_units],
                                         tf.float32)
        else:
            self.embed = tf.get_variable('word_embed',
                                         dtype=tf.float32,
                                         initializer=embed)
        if entity_embed is None:
            self.entity_trans = tf.get_variable(
                'entity_embed', [num_entities, num_trans_units],
                tf.float32,
                trainable=False)
        else:
            self.entity_trans = tf.get_variable('entity_embed',
                                                dtype=tf.float32,
                                                initializer=entity_embed,
                                                trainable=False)

        # 添加一个全连接层,输入是实体的嵌入,该层的 size=num_trans_units,激活函数是tanh
        # 为什么还要用全连接层连一下??????
        self.entity_trans_transformed = tf.layers.dense(
            self.entity_trans,
            num_trans_units,
            activation=tf.tanh,
            name='trans_transformation')
        # 7 * num_trans_units 的全零初始化的数组
        padding_entity = tf.get_variable('entity_padding_embed',
                                         [7, num_trans_units],
                                         dtype=tf.float32,
                                         initializer=tf.zeros_initializer())

        # 把 padding_entity 添加到 entity_trans_transformed 的最前,补了有什么用?????????????
        self.entity_embed = tf.concat(
            [padding_entity, self.entity_trans_transformed], axis=0)

        # tf.nn.embedding_lookup 以后维度会+1,所以通过reshape来取消这个多出来的维度
        triples_embedding = tf.reshape(
            tf.nn.embedding_lookup(self.entity_embed,
                                   self.entity2index.lookup(self.triples)),
            [encoder_batch_size, triple_num, -1, 3 * num_trans_units])
        entities_word_embedding = tf.reshape(
            tf.nn.embedding_lookup(self.embed,
                                   self.symbol2index.lookup(self.entities)),
            [encoder_batch_size, -1, num_embed_units
             ])  # [batch_size,triple_num*triple_len,num_embed_units]

        # 把 head,relation,tail分割开来
        head, relation, tail = tf.split(triples_embedding,
                                        [num_trans_units] * 3,
                                        axis=3)

        # 静态图注意力机制
        with tf.variable_scope('graph_attention'):
            # 将头和尾连接起来
            head_tail = tf.concat(
                [head, tail],
                axis=3)  # batch_size * triple_num * triple_len * 200

            # tanh(dot(W, head_tail))
            head_tail_transformed = tf.layers.dense(
                head_tail,
                num_trans_units,
                activation=tf.tanh,
                name='head_tail_transform'
            )  # batch_size * triple_num * triple_len * 100

            # dot(W, relation)
            relation_transformed = tf.layers.dense(
                relation, num_trans_units, name='relation_transform'
            )  # batch_size * triple_num * triple_len * 100

            # 两个向量先元素乘,再求和,等于两个向量的内积
            # dot(traspose(dot(W, relation)), tanh(dot(W, head_tail)))
            e_weight = tf.reduce_sum(
                relation_transformed * head_tail_transformed,
                axis=3)  # batch_size * triple_num * triple_len

            # 图中每个三元组的 alpha 权值
            alpha_weight = tf.nn.softmax(
                e_weight)  # batch_size * triple_num * triple_len

            # tf.expand_dims 使 alpha_weight 维度+1 batch_size * triple_num * triple_len * 1
            # 对第2个维度求和,由此产生每个图 100 维的图向量表示
            graph_embed = tf.reduce_sum(
                tf.expand_dims(alpha_weight, 3) * head_tail,
                axis=2)  # batch_size * triple_num * 100
        """
        [0, 1, 2... encoder_batch_size] 转化成 encoder_batch_size * 1 * 1 的矩阵 [[[0]], [[1]], [[2]],...]
        tf.tile 将矩阵的第 1 维进行扩展 encoder_batch_size * encoder_len * 1 [[[0],[0]...]],...]
        与 posts_triple 在第 2 维度上进行拼接,形成 indices 矩阵
        indices 矩阵:
        [
         [[0 0], [0 0], [0 0], [0 0], [0 1], [0 0], [0 2], [0 0],...encoder_len],
         [[1 0], [1 0], [1 0], [1 0], [1 1], [1 0], [1 2], [1 0],...encoder_len],
         [[2 0], [2 0], [2 0], [2 0], [2 1], [2 0], [2 2], [2 0],...encoder_len]
         ,...batch_size
        ]
        tf.gather_nd 将 graph_embed 中根据上面矩阵提供的索引检索图向量,再回填至 indices 矩阵
        encoder_batch_size * encoder_len * 100
        """
        graph_embed_input = tf.gather_nd(
            graph_embed,
            tf.concat([
                tf.tile(
                    tf.reshape(tf.range(encoder_batch_size, dtype=tf.int32),
                               [-1, 1, 1]), [1, encoder_len, 1]),
                self.posts_triple
            ],
                      axis=2))

        # 将 responses_triple 转化成实体嵌入 batch_size * decoder_len * 300
        triple_embed_input = tf.reshape(
            tf.nn.embedding_lookup(
                self.entity_embed,
                self.entity2index.lookup(self.responses_triple)),
            [batch_size, decoder_len, 3 * num_trans_units])

        # 将 posts_word_id 转化成词嵌入
        post_word_input = tf.nn.embedding_lookup(
            self.embed, self.posts_word_id)  # batch_size * encoder_len * 300

        # 将 responses_word_id 转化成词嵌入
        response_word_input = tf.nn.embedding_lookup(
            self.embed,
            self.responses_word_id)  # batch_size * decoder_len * 300

        # post_word_input, graph_embed_input 在第二个维度上拼接
        self.encoder_input = tf.concat(
            [post_word_input, graph_embed_input],
            axis=2)  # batch_size * encoder_len * 400
        # response_word_input, triple_embed_input 在第二个维度上拼接
        self.decoder_input = tf.concat(
            [response_word_input, triple_embed_input],
            axis=2)  # batch_size * decoder_len * 600

        # 构造 deep RNN
        encoder_cell = MultiRNNCell(
            [GRUCell(num_units) for _ in range(num_layers)])
        decoder_cell = MultiRNNCell(
            [GRUCell(num_units) for _ in range(num_layers)])

        # rnn encoder
        encoder_output, encoder_state = dynamic_rnn(encoder_cell,
                                                    self.encoder_input,
                                                    self.posts_length,
                                                    dtype=tf.float32,
                                                    scope="encoder")

        # 由于词汇表维度过大,所以输出的维度不可能和词汇表一样。通过 projection 函数,可以实现从低维向高维的映射
        # 返回:输出函数,选择器函数,计算序列损失,采样序列损失,总体损失的函数
        output_fn, selector_fn, sequence_loss, sampled_sequence_loss, total_loss = output_projection_layer(
            num_units, num_symbols, num_samples)

        # 用于训练的 decoder
        with tf.variable_scope('decoder'):
            # 得到注意力函数
            # 准备注意力
            # attention_keys_init: 注意力的 keys
            # attention_values_init: 注意力的 values
            # attention_score_fn_init: 计算注意力上下文的函数
            # attention_construct_fn_init: 计算所有上下文拼接的函数
            attention_keys_init, attention_values_init, attention_score_fn_init, attention_construct_fn_init \
                    = prepare_attention(encoder_output, 'bahdanau', num_units, imem=(graph_embed, triples_embedding), output_alignments=output_alignments and mem_use)#'luong', num_units)

            # 返回训练时解码器每一个时间步对输入的处理函数
            decoder_fn_train = attention_decoder_fn_train(
                encoder_state,
                attention_keys_init,
                attention_values_init,
                attention_score_fn_init,
                attention_construct_fn_init,
                output_alignments=output_alignments and mem_use,
                max_length=tf.reduce_max(self.responses_length))

            # 输出,最终状态,alignments 的 TensorArray
            self.decoder_output, _, alignments_ta = dynamic_rnn_decoder(
                decoder_cell,
                decoder_fn_train,
                self.decoder_input,
                self.responses_length,
                scope="decoder_rnn")

            if output_alignments:

                self.decoder_loss, self.ppx_loss, self.sentence_ppx = total_loss(
                    self.decoder_output, self.responses_target,
                    self.decoder_mask, self.alignments, triples_embedding,
                    use_triples, one_hot_triples)
                self.sentence_ppx = tf.identity(
                    self.sentence_ppx,
                    name='ppx_loss')  # 将 sentence_ppx 转化成一步操作
            else:
                self.decoder_loss = sequence_loss(self.decoder_output,
                                                  self.responses_target,
                                                  self.decoder_mask)

        # 用于推导的 decoder
        with tf.variable_scope('decoder', reuse=True):
            # 得到注意力函数
            attention_keys, attention_values, attention_score_fn, attention_construct_fn \
                    = prepare_attention(encoder_output, 'bahdanau', num_units, reuse=True, imem=(graph_embed, triples_embedding), output_alignments=output_alignments and mem_use)#'luong', num_units)
            decoder_fn_inference = attention_decoder_fn_inference(
                output_fn,
                encoder_state,
                attention_keys,
                attention_values,
                attention_score_fn,
                attention_construct_fn,
                self.embed,
                GO_ID,
                EOS_ID,
                max_length,
                num_symbols,
                imem=(entities_word_embedding,
                      tf.reshape(
                          triples_embedding,
                          [encoder_batch_size, -1, 3 * num_trans_units])),
                selector_fn=selector_fn)
            # imem: ([batch_size,triple_num*triple_len,num_embed_units],[encoder_batch_size, triple_num*triple_len, 3*num_trans_units]) 实体次嵌入和三元组嵌入的元组

            self.decoder_distribution, _, output_ids_ta = dynamic_rnn_decoder(
                decoder_cell, decoder_fn_inference, scope="decoder_rnn")

            output_len = tf.shape(self.decoder_distribution)[1]  # decoder_len
            output_ids = tf.transpose(
                output_ids_ta.gather(
                    tf.range(output_len)))  # [batch_size, decoder_len]

            # 对 output 的值域行裁剪
            word_ids = tf.cast(tf.clip_by_value(output_ids, 0, num_symbols),
                               tf.int64)  # [batch_size, decoder_len]

            # 计算的是采用的实体词在 entities 的位置
            # 1、tf.shape(entities_word_embedding)[1] = triple_num*triple_len
            # 2、tf.range(encoder_batch_size): [batch_size]
            # 3、tf.reshape(tf.range(encoder_batch_size) * tf.shape(entities_word_embedding)[1], [-1, 1]): [batch_size, 1] 实体词在 entities 中的偏移量
            # 4、tf.clip_by_value(-output_ids, 0, num_symbols): [batch_size, decoder_len] 实体词的相对位置
            # 5、entity_ids: [batch_size * decoder_len] 加上偏移量之后在 entities 中的实际位置
            entity_ids = tf.reshape(
                tf.clip_by_value(-output_ids, 0, num_symbols) + tf.reshape(
                    tf.range(encoder_batch_size) *
                    tf.shape(entities_word_embedding)[1], [-1, 1]), [-1])

            # 计算的是所用的实体词
            # 1、entities: [batch_size, triple_num, triple_len]
            # 2、tf.reshape(self.entities, [-1]): [batch_size * triple_num * triple_len]
            # 3、tf.gather: [batch_size*decoder_len]
            # 4、entities: [batch_size, output_len]
            entities = tf.reshape(
                tf.gather(tf.reshape(self.entities, [-1]), entity_ids),
                [-1, output_len])

            words = self.index2symbol.lookup(word_ids)  # 将 id 转化为实际的词
            # output_ids > 0 为 bool 张量,True 的位置用 words 中该位置的词替换
            self.generation = tf.where(output_ids > 0, words, entities)
            self.generation = tf.identity(self.generation, name='generation')

        # 初始化训练过程
        self.learning_rate = tf.Variable(float(learning_rate),
                                         trainable=False,
                                         dtype=tf.float32)

        # ???
        self.learning_rate_decay_op = self.learning_rate.assign(
            self.learning_rate * learning_rate_decay_factor)

        # 更新参数的次数
        self.global_step = tf.Variable(0, trainable=False)

        # 要训练的参数
        self.params = tf.global_variables()

        # 选择优化算法
        opt = tf.train.AdamOptimizer(learning_rate=learning_rate)

        self.lr = opt._lr

        # 根据 decoder_loss 计算 params 梯度
        gradients = tf.gradients(self.decoder_loss, self.params)
        # 梯度裁剪
        clipped_gradients, self.gradient_norm = tf.clip_by_global_norm(
            gradients, max_gradient_norm)
        self.update = opt.apply_gradients(zip(clipped_gradients, self.params),
                                          global_step=self.global_step)

        tf.summary.scalar('decoder_loss', self.decoder_loss)
        for each in tf.trainable_variables():
            tf.summary.histogram(each.name, each)

        self.merged_summary_op = tf.summary.merge_all()

        self.saver = tf.train.Saver(write_version=tf.train.SaverDef.V2,
                                    max_to_keep=3,
                                    pad_step_number=True,
                                    keep_checkpoint_every_n_hours=1.0)
        self.saver_epoch = tf.train.Saver(write_version=tf.train.SaverDef.V2,
                                          max_to_keep=1000,
                                          pad_step_number=True)
コード例 #21
0
ファイル: model.py プロジェクト: LuChengTHU/BasicRNN
    def __init__(self,
                 num_symbols,
                 num_embed_units,
                 num_units,
                 num_layers,
                 num_labels,
                 embed,
                 learning_rate=0.5,
                 max_gradient_norm=5.0,
                 model='LSTM'):
        #todo: implement placeholders
        self.texts = tf.placeholder(dtype=tf.string,
                                    shape=[None, None])  # shape: batch*len
        self.texts_length = tf.placeholder(dtype=tf.int32,
                                           shape=None)  # shape: batch
        self.labels = tf.placeholder(dtype=tf.int64,
                                     shape=None)  # shape: batch

        self.keep_prob = tf.placeholder(dtype=tf.float32)

        self.symbol2index = MutableHashTable(key_dtype=tf.string,
                                             value_dtype=tf.int64,
                                             default_value=UNK_ID,
                                             shared_name="in_table",
                                             name="in_table",
                                             checkpoint=True)
        # build the vocab table (string to index)
        # initialize the training process
        self.learning_rate = tf.Variable(float(learning_rate),
                                         trainable=False,
                                         dtype=tf.float32)
        self.global_step = tf.Variable(0, trainable=False)
        self.epoch = tf.Variable(0, trainable=False)
        self.epoch_add_op = self.epoch.assign(self.epoch + 1)

        self.index_input = self.symbol2index.lookup(self.texts)  # batch*len

        # build the embedding table (index to vector)
        if embed is None:
            # initialize the embedding randomly
            self.embed = tf.get_variable('embed',
                                         [num_symbols, num_embed_units],
                                         tf.float32)
        else:
            # initialize the embedding by pre-trained word vectors
            self.embed = tf.get_variable('embed',
                                         dtype=tf.float32,
                                         initializer=embed)

        self.embed_input = tf.nn.embedding_lookup(
            self.embed, self.index_input)  #batch*len*embed_unit

        #todo: implement unfinished networks

        if num_layers == 1:
            if model == 'LSTM':
                cell = BasicLSTMCell(num_units)
            elif model == 'RNN':
                cell = BasicRNNCell(num_units)
            elif model == 'GRU':
                cell = GRUCell(num_units)
            else:
                print("Wrong model!")
                return
            cell_dr = tf.nn.rnn_cell.DropoutWrapper(
                cell, input_keep_prob=1.0, output_keep_prob=self.keep_prob)
            outputs, states = dynamic_rnn(cell_dr,
                                          self.embed_input,
                                          self.texts_length,
                                          dtype=tf.float32,
                                          scope="rnn")
            if model == 'LSTM':
                h_state = states[0]
            else:
                h_state = states
        else:
            if model == 'LSTM':
                cell = BasicLSTMCell(num_units)
            elif model == 'RNN':
                cell = BasicRNNCell(num_units)
            elif model == 'GRU':
                cell = GRUCell(num_units)
            else:
                print("Wrong model!")
                return
            cell_dr = tf.nn.rnn_cell.DropoutWrapper(
                cell, input_keep_prob=1.0, output_keep_prob=self.keep_prob)
            multi_cell = tf.contrib.rnn.MultiRNNCell([cell_dr] * num_layers,
                                                     state_is_tuple=True)
            init_state = multi_cell.zero_state(16, tf.float32)
            outputs, state = tf.nn.dynamic_rnn(multi_cell,
                                               self.embed_input,
                                               self.texts_length,
                                               dtype=tf.float32,
                                               scope="rnn",
                                               initial_state=init_state,
                                               time_major=False)
            h_state = outputs[:, -1, :]

        logits = tf.layers.dense(h_state, num_labels)

        self.loss = tf.reduce_sum(
            tf.nn.sparse_softmax_cross_entropy_with_logits(labels=self.labels,
                                                           logits=logits),
            name='loss')
        mean_loss = self.loss / tf.cast(tf.shape(self.labels)[0],
                                        dtype=tf.float32)
        predict_labels = tf.argmax(logits, 1, 'predict_labels')
        self.accuracy = tf.reduce_sum(tf.cast(
            tf.equal(self.labels, predict_labels), tf.int32),
                                      name='accuracy')

        self.params = tf.trainable_variables()

        # calculate the gradient of parameters
        opt = tf.train.GradientDescentOptimizer(self.learning_rate)
        gradients = tf.gradients(mean_loss, self.params)
        clipped_gradients, self.gradient_norm = tf.clip_by_global_norm(
            gradients, max_gradient_norm)
        self.update = opt.apply_gradients(zip(clipped_gradients, self.params),
                                          global_step=self.global_step)

        tf.summary.scalar('loss/step', self.loss)
        for each in tf.trainable_variables():
            tf.summary.histogram(each.name, each)

        self.merged_summary_op = tf.summary.merge_all()

        self.saver = tf.train.Saver(write_version=tf.train.SaverDef.V2,
                                    max_to_keep=3,
                                    pad_step_number=True,
                                    keep_checkpoint_every_n_hours=1.0)
コード例 #22
0
ファイル: qa_model.py プロジェクト: abearman/cs224n-project
    def encode_v2(self, question_embeddings, document_embeddings,
                  question_mask, context_mask, encoderb_state_input,
                  dropout_keep_prob, max_question_len):
        """ encode_v2() 
			"""
        # Shared LSTM cell
        lstm_cell = tf.nn.rnn_cell.LSTMCell(self.state_size)
        lstm_cell = tf.nn.rnn_cell.DropoutWrapper(
            lstm_cell, input_keep_prob=dropout_keep_prob)

        # Question -> LSTM -> Q
        with tf.variable_scope('question_embedding'):
            question_length = tf.reduce_sum(tf.cast(question_mask, tf.int32),
                                            reduction_indices=1)
            Q_prime, _ = dynamic_rnn(lstm_cell,
                                     question_embeddings,
                                     sequence_length=question_length,
                                     dtype=tf.float32)
            print("Q_prime: ", Q_prime)

            # Non-linear projection layer on top of the question encoding
            Q = tf.tanh(batch_linear(Q_prime, max_question_len, True))
            Q = tf.transpose(Q, [0, 2, 1])
            print("Q: ", Q)

        with tf.variable_scope('context_embedding'):
            # Paragraph -> LSTM -> D
            #tf.get_variable_scope().reuse_variables()
            context_length = tf.reduce_sum(tf.cast(context_mask, tf.int32),
                                           reduction_indices=1)
            D, _ = dynamic_rnn(lstm_cell,
                               document_embeddings,
                               sequence_length=context_length,
                               dtype=tf.float32)
            D = tf.transpose(D, [0, 2, 1])
            print("D: ", D)

        with tf.variable_scope('coattention'):
            L = tf.batch_matmul(tf.transpose(D, [0, 2, 1]), Q)
            print("L: ", L)
            A_Q = tf.map_fn(lambda x: tf.nn.softmax(x), L, dtype=tf.float32)
            A_D = tf.map_fn(lambda x: tf.nn.softmax(x),
                            tf.transpose(L, [0, 2, 1]),
                            dtype=tf.float32)
            print("A_Q: ", A_Q)
            print("A_D: ", A_D)

            C_Q = batch_matmul(D, A_Q)
            print("C_Q: ", C_Q)
            concat = tf.concat(1, [Q, C_Q])
            print("concat: ", concat)
            C_D = batch_matmul(tf.concat(1, [Q, C_Q]), A_D)
            print("C_D: ", C_D)

            # Final coattention context: (batch size, context length, 3*hidden size)
            co_att = tf.concat(1, [D, C_D])
            co_att = tf.transpose(co_att, [0, 2, 1])
            print("co_att: ", co_att)

        with tf.variable_scope('encoder'):
            # LSTM for coattention encoding
            cell_fw = tf.nn.rnn_cell.LSTMCell(self.state_size)
            cell_bw = tf.nn.rnn_cell.LSTMCell(self.state_size)
            cell_fw = tf.nn.rnn_cell.DropoutWrapper(
                cell_fw, input_keep_prob=dropout_keep_prob)
            cell_bw = tf.nn.rnn_cell.DropoutWrapper(
                cell_bw, input_keep_prob=dropout_keep_prob)

            # Compute coattention encoding
            (fw_out, bw_out), _ = tf.nn.bidirectional_dynamic_rnn(
                cell_fw,
                cell_bw,
                co_att,
                sequence_length=context_length,
                dtype=tf.float32)
            print("fw out: ", fw_out)
            print("bw out: ", bw_out)
            U = tf.concat(2, [fw_out, bw_out])
            print("U: ", U)
            return U
コード例 #23
0
ファイル: model.py プロジェクト: zhouyonglong/dssm-lstm
    def __init__(self,
                 num_lstm_units,
                 embed,
                 neg_num=4,
                 gradient_clip_threshold=5.0):
        self.queries = tf.placeholder(dtype=tf.string, shape=[None, None])  # shape: batch*len
        self.queries_length = tf.placeholder(dtype=tf.int32, shape=[None])  # shape: batch
        self.docs = tf.placeholder(dtype=tf.string, shape=[neg_num + 1, None, None])  # shape: (neg_num + 1)*batch*len
        self.docs_length = tf.placeholder(dtype=tf.int32, shape=[neg_num + 1, None])  # shape: batch*(neg_num + 1)

        self.word2index = MutableHashTable(
            key_dtype=tf.string,
            value_dtype=tf.int64,
            default_value=UNK_ID,
            shared_name="in_table",
            name="in_table",
            checkpoint=True
        )

        self.learning_rate = tf.Variable(0.001, trainable=False, dtype=tf.float32)
        self.global_step = tf.Variable(0, trainable=False)
        self.epoch = tf.Variable(0, trainable=False)
        self.epoch_add_op = self.epoch.assign(self.epoch + 1)
        self.momentum = tf.Variable(0.9, trainable=False, dtype=tf.float32)

        self.index_queries = self.word2index.lookup(self.queries)  # batch*len
        self.index_docs = [self.word2index.lookup(doc) for doc in tf.unstack(self.docs)]

        self.embed = tf.get_variable('embed', dtype=tf.float32, initializer=embed)
        self.embed_queries = tf.nn.embedding_lookup(self.embed, self.index_queries)
        self.embed_docs = [tf.nn.embedding_lookup(self.embed, index_doc) for index_doc in self.index_docs]

        with tf.variable_scope('query_lstm'):
            self.cell_q = SimpleLSTMCell(num_lstm_units)
        with tf.variable_scope('doc_lstm'):
            self.cell_d = SimpleLSTMCell(num_lstm_units)

        self.states_q = dynamic_rnn(self.cell_q, self.embed_queries, self.queries_length, dtype=tf.float32,
                                         scope="simple_lstm_cell_query")[1][1]  # shape: batch*num_units
        self.states_d = [dynamic_rnn(self.cell_d, self.embed_docs[i], self.docs_length[i], dtype=tf.float32,
                                            scope="simple_lstm_cell_doc")[1][1] for i in range(neg_num + 1)]  # shape: (neg_num + 1)*batch*num_units
        self.queries_norm = tf.sqrt(tf.reduce_sum(tf.square(self.states_q), axis=1))
        self.docs_norm = [tf.sqrt(tf.reduce_sum(tf.square(self.states_d[i]), axis=1)) for i in range(neg_num + 1)]
        self.prods = [tf.reduce_sum(tf.multiply(self.states_q, self.states_d[i]), axis=1) for i in range(neg_num + 1)]
        self.sims = [(self.prods[i] / (self.queries_norm * self.docs_norm[i])) for i in range(neg_num + 1)]  # shape: (neg_num + 1)*batch
        self.sims = tf.convert_to_tensor(self.sims)
        self.gamma = tf.Variable(initial_value=1.0, expected_shape=[], dtype=tf.float32)  # scaling factor according to the paper
        self.origin_sims = self.sims
        self.sims = self.sims * self.gamma
        self.prob = tf.nn.softmax(self.sims, dim=0)  # shape: (neg_num + 1)*batch
        self.hit_prob = tf.transpose(self.prob[0])

        self.loss = -tf.reduce_mean(tf.log(self.hit_prob))

        self.params = tf.trainable_variables()
        opt = tf.train.MomentumOptimizer(learning_rate=self.learning_rate, momentum=self.momentum, use_nesterov=True)  # use Nesterov's method, according to the paper
        gradients = tf.gradients(self.loss, self.params)
        clipped_gradients, self.gradient_norm = tf.clip_by_global_norm(gradients, gradient_clip_threshold)
        self.update = opt.apply_gradients(zip(clipped_gradients, self.params), global_step=self.global_step)
        self.saver = tf.train.Saver(write_version=tf.train.SaverDef.V2,
                                    max_to_keep=3, pad_step_number=True, keep_checkpoint_every_n_hours=1.0)
コード例 #24
0
    def __init__(self, data, args, embed):

        self.posts = tf.placeholder(tf.int32, (None, None),
                                    'enc_inps')  # batch*len
        self.posts_length = tf.placeholder(tf.int32, (None, ),
                                           'enc_lens')  # batch
        self.origin_responses = tf.placeholder(tf.int32, (None, None),
                                               'dec_inps')  # batch*len
        self.origin_responses_length = tf.placeholder(tf.int32, (None, ),
                                                      'dec_lens')  # batch
        self.is_train = tf.placeholder(tf.bool)

        # deal with original data to adapt encoder and decoder
        batch_size, decoder_len = tf.shape(self.origin_responses)[0], tf.shape(
            self.origin_responses)[1]
        self.responses_input = tf.split(self.origin_responses,
                                        [decoder_len - 1, 1], 1)[0]
        self.responses_target = tf.split(self.origin_responses,
                                         [1, decoder_len - 1], 1)[1]
        self.responses_length = self.origin_responses_length - 1
        decoder_len = decoder_len - 1

        self.decoder_mask = tf.reshape(
            tf.cumsum(tf.one_hot(self.responses_length - 1, decoder_len),
                      reverse=True,
                      axis=1), [-1, decoder_len])

        # initialize the training process
        self.learning_rate = tf.Variable(float(args.lr),
                                         trainable=False,
                                         dtype=tf.float32)
        self.learning_rate_decay_op = self.learning_rate.assign(
            self.learning_rate * args.lr_decay)
        self.global_step = tf.Variable(0, trainable=False)

        # build the embedding table and embedding input
        if embed is None:
            # initialize the embedding randomly
            self.embed = tf.get_variable(
                'embed', [data.vocab_size, args.embedding_size], tf.float32)
        else:
            # initialize the embedding by pre-trained word vectors
            self.embed = tf.get_variable('embed',
                                         dtype=tf.float32,
                                         initializer=embed)

        self.encoder_input = tf.nn.embedding_lookup(self.embed, self.posts)
        self.decoder_input = tf.nn.embedding_lookup(self.embed,
                                                    self.responses_input)
        #self.decoder_input = tf.cond(self.is_train,
        #							 lambda: tf.nn.dropout(tf.nn.embedding_lookup(self.embed, self.responses_input), 0.8),
        #							 lambda: tf.nn.embedding_lookup(self.embed, self.responses_input))

        # build rnn_cell
        cell = tf.nn.rnn_cell.GRUCell(args.eh_size)

        # get output projection function
        output_fn = MyDense(data.vocab_size, use_bias=True)
        sampled_sequence_loss = output_projection_layer(
            args.dh_size, data.vocab_size, args.softmax_samples)

        # build encoder
        with tf.variable_scope('decoder', reuse=tf.AUTO_REUSE):
            _, self.encoder_state = dynamic_rnn(cell,
                                                self.encoder_input,
                                                self.posts_length,
                                                dtype=tf.float32,
                                                scope="decoder_rnn")

        # construct helper and attention
        infer_helper = tf.contrib.seq2seq.GreedyEmbeddingHelper(
            self.embed, tf.fill([batch_size], data.eos_id), data.eos_id)

        dec_start = tf.cond(
            self.is_train,
            lambda: tf.zeros([batch_size, args.dh_size], dtype=tf.float32),
            lambda: self.encoder_state)

        # build decoder (train)
        with tf.variable_scope('decoder', reuse=tf.AUTO_REUSE):
            self.decoder_output, _ = dynamic_rnn(
                cell,
                self.decoder_input,
                self.responses_length,
                dtype=tf.float32,
                initial_state=self.encoder_state,
                scope='decoder_rnn')
            #self.decoder_output = tf.nn.dropout(self.decoder_output, 0.8)
            self.decoder_distribution_teacher, self.decoder_loss, self.decoder_all_loss = \
             sampled_sequence_loss(self.decoder_output, self.responses_target, self.decoder_mask)

        # build decoder (test)
        with tf.variable_scope('decoder', reuse=tf.AUTO_REUSE):
            decoder_infer = tf.contrib.seq2seq.BasicDecoder(
                cell, infer_helper, dec_start, output_layer=output_fn)
            infer_outputs, _, _ = tf.contrib.seq2seq.dynamic_decode(
                decoder_infer,
                impute_finished=True,
                maximum_iterations=args.max_sent_length,
                scope="decoder_rnn")
            self.decoder_distribution = infer_outputs.rnn_output
            self.generation_index = tf.argmax(
                tf.split(self.decoder_distribution, [2, data.vocab_size - 2],
                         2)[1], 2) + 2  # for removing UNK

        # calculate the gradient of parameters and update
        self.params = [
            k for k in tf.trainable_variables() if args.name in k.name
        ]
        opt = tf.train.AdamOptimizer(self.learning_rate)
        gradients = tf.gradients(self.decoder_loss, self.params)
        clipped_gradients, self.gradient_norm = tf.clip_by_global_norm(
            gradients, args.grad_clip)
        self.update = opt.apply_gradients(zip(clipped_gradients, self.params),
                                          global_step=self.global_step)

        # save checkpoint
        self.latest_saver = tf.train.Saver(
            write_version=tf.train.SaverDef.V2,
            max_to_keep=args.checkpoint_max_to_keep,
            pad_step_number=True,
            keep_checkpoint_every_n_hours=1.0)
        self.best_saver = tf.train.Saver(write_version=tf.train.SaverDef.V2,
                                         max_to_keep=1,
                                         pad_step_number=True,
                                         keep_checkpoint_every_n_hours=1.0)

        # create summary for tensorboard
        self.create_summary(args)
コード例 #25
0
ファイル: model.py プロジェクト: streamride/seq2seq
    def __init__(self,
                 num_symbols,
                 num_embed_units,
                 num_units,
                 num_layers,
                 beam_size,
                 embed,
                 learning_rate=0.5,
                 remove_unk=False,
                 learning_rate_decay_factor=0.95,
                 max_gradient_norm=5.0,
                 num_samples=512,
                 max_length=8,
                 use_lstm=False):

        self.posts = tf.placeholder(tf.string, (None, None),
                                    'enc_inps')  # batch*len
        self.posts_length = tf.placeholder(tf.int32, (None),
                                           'enc_lens')  # batch
        self.responses = tf.placeholder(tf.string, (None, None),
                                        'dec_inps')  # batch*len
        self.responses_length = tf.placeholder(tf.int32, (None),
                                               'dec_lens')  # batch

        # initialize the training process
        self.learning_rate = tf.Variable(float(learning_rate),
                                         trainable=False,
                                         dtype=tf.float32)
        self.learning_rate_decay_op = self.learning_rate.assign(
            self.learning_rate * learning_rate_decay_factor)
        self.global_step = tf.Variable(0, trainable=False)

        self.symbol2index = MutableHashTable(key_dtype=tf.string,
                                             value_dtype=tf.int64,
                                             default_value=UNK_ID,
                                             shared_name="in_table",
                                             name="in_table",
                                             checkpoint=True)
        self.index2symbol = MutableHashTable(key_dtype=tf.int64,
                                             value_dtype=tf.string,
                                             default_value='_UNK',
                                             shared_name="out_table",
                                             name="out_table",
                                             checkpoint=True)
        # build the vocab table (string to index)

        self.posts_input = self.symbol2index.lookup(self.posts)  # batch*len
        self.responses_target = self.symbol2index.lookup(
            self.responses)  #batch*len

        batch_size, decoder_len = tf.shape(self.responses)[0], tf.shape(
            self.responses)[1]
        self.responses_input = tf.concat([
            tf.ones([batch_size, 1], dtype=tf.int64) * GO_ID,
            tf.split(self.responses_target, [decoder_len - 1, 1], 1)[0]
        ], 1)  # batch*len
        self.decoder_mask = tf.reshape(
            tf.cumsum(tf.one_hot(self.responses_length - 1, decoder_len),
                      reverse=True,
                      axis=1), [-1, decoder_len])

        # build the embedding table (index to vector)
        if embed is None:
            # initialize the embedding randomly
            self.embed = tf.get_variable('embed',
                                         [num_symbols, num_embed_units],
                                         tf.float32)
        else:
            # initialize the embedding by pre-trained word vectors
            self.embed = tf.get_variable('embed',
                                         dtype=tf.float32,
                                         initializer=embed)

        self.encoder_input = tf.nn.embedding_lookup(
            self.embed, self.posts_input)  #batch*len*unit
        self.decoder_input = tf.nn.embedding_lookup(self.embed,
                                                    self.responses_input)

        if use_lstm:
            cell = MultiRNNCell([LSTMCell(num_units)] * num_layers)
        else:
            cell = MultiRNNCell([GRUCell(num_units)] * num_layers)

        # rnn encoder
        encoder_output, encoder_state = dynamic_rnn(cell,
                                                    self.encoder_input,
                                                    self.posts_length,
                                                    dtype=tf.float32,
                                                    scope="encoder")

        # get output projection function
        output_fn, sampled_sequence_loss = output_projection_layer(
            num_units, num_symbols, num_samples)

        # get attention function
        attention_keys, attention_values, attention_score_fn, attention_construct_fn \
                = attention_decoder_fn.prepare_attention(encoder_output, 'luong', num_units)

        with tf.variable_scope('decoder'):
            decoder_fn_train = attention_decoder_fn.attention_decoder_fn_train(
                encoder_state, attention_keys, attention_values,
                attention_score_fn, attention_construct_fn)
            self.decoder_output, _, _ = dynamic_rnn_decoder(
                cell,
                decoder_fn_train,
                self.decoder_input,
                self.responses_length,
                scope="decoder_rnn")
            self.decoder_loss = sampled_sequence_loss(self.decoder_output,
                                                      self.responses_target,
                                                      self.decoder_mask)

        with tf.variable_scope('decoder', reuse=True):
            decoder_fn_inference = attention_decoder_fn.attention_decoder_fn_inference(
                output_fn, encoder_state, attention_keys, attention_values,
                attention_score_fn, attention_construct_fn, self.embed, GO_ID,
                EOS_ID, max_length, num_symbols)

            self.decoder_distribution, _, _ = dynamic_rnn_decoder(
                cell, decoder_fn_inference, scope="decoder_rnn")
            self.generation_index = tf.argmax(
                tf.split(self.decoder_distribution, [2, num_symbols - 2],
                         2)[1], 2) + 2  # for removing UNK
            self.generation = self.index2symbol.lookup(self.generation_index,
                                                       name='generation')

        with tf.variable_scope('decoder', reuse=True):
            decoder_fn_beam_inference = attention_decoder_fn_beam_inference(
                output_fn, encoder_state, attention_keys, attention_values,
                attention_score_fn, attention_construct_fn, self.embed, GO_ID,
                EOS_ID, max_length, num_symbols, beam_size, remove_unk)
            _, _, self.context_state = dynamic_rnn_decoder(
                cell, decoder_fn_beam_inference, scope="decoder_rnn")
            (log_beam_probs, beam_parents, beam_symbols, result_probs,
             result_parents, result_symbols) = self.context_state

            self.beam_parents = tf.transpose(tf.reshape(
                beam_parents.stack(), [max_length + 1, -1, beam_size]),
                                             [1, 0, 2],
                                             name='beam_parents')
            self.beam_symbols = tf.transpose(
                tf.reshape(beam_symbols.stack(),
                           [max_length + 1, -1, beam_size]), [1, 0, 2])
            self.beam_symbols = self.index2symbol.lookup(tf.cast(
                self.beam_symbols, tf.int64),
                                                         name="beam_symbols")

            self.result_probs = tf.transpose(tf.reshape(
                result_probs.stack(), [max_length + 1, -1, beam_size * 2]),
                                             [1, 0, 2],
                                             name='result_probs')
            self.result_symbols = tf.transpose(
                tf.reshape(result_symbols.stack(),
                           [max_length + 1, -1, beam_size * 2]), [1, 0, 2])
            self.result_parents = tf.transpose(tf.reshape(
                result_parents.stack(), [max_length + 1, -1, beam_size * 2]),
                                               [1, 0, 2],
                                               name='result_parents')
            self.result_symbols = self.index2symbol.lookup(
                tf.cast(self.result_symbols, tf.int64), name='result_symbols')

        self.params = tf.trainable_variables()

        # calculate the gradient of parameters
        opt = tf.train.GradientDescentOptimizer(self.learning_rate)
        gradients = tf.gradients(self.decoder_loss, self.params)
        clipped_gradients, self.gradient_norm = tf.clip_by_global_norm(
            gradients, max_gradient_norm)
        self.update = opt.apply_gradients(zip(clipped_gradients, self.params),
                                          global_step=self.global_step)

        self.saver = tf.train.Saver(write_version=tf.train.SaverDef.V2,
                                    max_to_keep=3,
                                    pad_step_number=True,
                                    keep_checkpoint_every_n_hours=1.0)

        # Exporter for serving
        self.model_exporter = exporter.Exporter(self.saver)
        inputs = {"enc_inps:0": self.posts, "enc_lens:0": self.posts_length}
        outputs = {
            "beam_symbols": self.beam_symbols,
            "beam_parents": self.beam_parents,
            "result_probs": self.result_probs,
            "result_symbols": self.result_symbols,
            "result_parents": self.result_parents
        }
        self.model_exporter.init(tf.get_default_graph().as_graph_def(),
                                 named_graph_signatures={
                                     "inputs":
                                     exporter.generic_signature(inputs),
                                     "outputs":
                                     exporter.generic_signature(outputs)
                                 })
コード例 #26
0
ファイル: model.py プロジェクト: zyjcs/ccm
    def __init__(self,
            num_symbols,
            num_embed_units,
            num_units,
            num_layers,
            embed,
            entity_embed=None,
            num_entities=0,
            num_trans_units=100,
            learning_rate=0.0001,
            learning_rate_decay_factor=0.95,
            max_gradient_norm=5.0,
            num_samples=512,
            max_length=60,
            output_alignments=True,
            use_lstm=False):
        
        self.posts = tf.placeholder(tf.string, (None, None), 'enc_inps')  # batch*len
        self.posts_length = tf.placeholder(tf.int32, (None), 'enc_lens')  # batch
        self.responses = tf.placeholder(tf.string, (None, None), 'dec_inps')  # batch*len
        self.responses_length = tf.placeholder(tf.int32, (None), 'dec_lens')  # batch
        self.entities = tf.placeholder(tf.string, (None, None), 'entities')  # batch
        self.entity_masks = tf.placeholder(tf.string, (None, None), 'entity_masks')  # batch
        self.triples = tf.placeholder(tf.string, (None, None, 3), 'triples')  # batch
        self.posts_triple = tf.placeholder(tf.int32, (None, None, 1), 'enc_triples')  # batch
        self.responses_triple = tf.placeholder(tf.string, (None, None, 3), 'dec_triples')  # batch
        self.match_triples = tf.placeholder(tf.int32, (None, None), 'match_triples')  # batch
        encoder_batch_size, encoder_len = tf.unstack(tf.shape(self.posts))
        triple_num = tf.shape(self.triples)[1]
        
        #use_triples = tf.reduce_sum(tf.cast(tf.greater_equal(self.match_triples, 0), tf.float32), axis=-1)
        one_hot_triples = tf.one_hot(self.match_triples, triple_num)
        use_triples = tf.reduce_sum(one_hot_triples, axis=[2])

        self.symbol2index = MutableHashTable(
                key_dtype=tf.string,
                value_dtype=tf.int64,
                default_value=UNK_ID,
                shared_name="in_table",
                name="in_table",
                checkpoint=True)
        self.index2symbol = MutableHashTable(
                key_dtype=tf.int64,
                value_dtype=tf.string,
                default_value='_UNK',
                shared_name="out_table",
                name="out_table",
                checkpoint=True)
        self.entity2index = MutableHashTable(
                key_dtype=tf.string,
                value_dtype=tf.int64,
                default_value=NONE_ID,
                shared_name="entity_in_table",
                name="entity_in_table",
                checkpoint=True)
        self.index2entity = MutableHashTable(
                key_dtype=tf.int64,
                value_dtype=tf.string,
                default_value='_NONE',
                shared_name="entity_out_table",
                name="entity_out_table",
                checkpoint=True)
        # build the vocab table (string to index)


        self.posts_word_id = self.symbol2index.lookup(self.posts)   # batch*len
        self.posts_entity_id = self.entity2index.lookup(self.posts)   # batch*len
        #self.posts_word_id = tf.Print(self.posts_word_id, ['use_triples', use_triples, 'one_hot_triples', one_hot_triples], summarize=1e6)
        self.responses_target = self.symbol2index.lookup(self.responses)   #batch*len
        
        batch_size, decoder_len = tf.shape(self.responses)[0], tf.shape(self.responses)[1]
        self.responses_word_id = tf.concat([tf.ones([batch_size, 1], dtype=tf.int64)*GO_ID,
            tf.split(self.responses_target, [decoder_len-1, 1], 1)[0]], 1)   # batch*len
        self.decoder_mask = tf.reshape(tf.cumsum(tf.one_hot(self.responses_length-1, 
            decoder_len), reverse=True, axis=1), [-1, decoder_len])
        
        # build the embedding table (index to vector)
        if embed is None:
            # initialize the embedding randomly
            self.embed = tf.get_variable('word_embed', [num_symbols, num_embed_units], tf.float32)
        else:
            # initialize the embedding by pre-trained word vectors
            self.embed = tf.get_variable('word_embed', dtype=tf.float32, initializer=embed)
        if entity_embed is None:
            # initialize the embedding randomly
            self.entity_trans = tf.get_variable('entity_embed', [num_entities, num_trans_units], tf.float32, trainable=False)
        else:
            # initialize the embedding by pre-trained word vectors
            self.entity_trans = tf.get_variable('entity_embed', dtype=tf.float32, initializer=entity_embed, trainable=False)

        self.entity_trans_transformed = tf.layers.dense(self.entity_trans, num_trans_units, activation=tf.tanh, name='trans_transformation')
        padding_entity = tf.get_variable('entity_padding_embed', [7, num_trans_units], dtype=tf.float32, initializer=tf.zeros_initializer())

        self.entity_embed = tf.concat([padding_entity, self.entity_trans_transformed], axis=0)

        triples_embedding = tf.reshape(tf.nn.embedding_lookup(self.entity_embed, self.entity2index.lookup(self.triples)), [encoder_batch_size, triple_num, 3 * num_trans_units])
        entities_word_embedding = tf.reshape(tf.nn.embedding_lookup(self.embed, self.symbol2index.lookup(self.entities)), [encoder_batch_size, -1, num_embed_units])


        self.encoder_input = tf.nn.embedding_lookup(self.embed, self.posts_word_id) #batch*len*unit
        self.decoder_input = tf.nn.embedding_lookup(self.embed, self.responses_word_id) #batch*len*unit

        encoder_cell = MultiRNNCell([GRUCell(num_units) for _ in range(num_layers)])
        decoder_cell = MultiRNNCell([GRUCell(num_units) for _ in range(num_layers)])
        
        # rnn encoder
        encoder_output, encoder_state = dynamic_rnn(encoder_cell, self.encoder_input, 
                self.posts_length, dtype=tf.float32, scope="encoder")

        # get output projection function
        output_fn, selector_fn, sequence_loss, sampled_sequence_loss, total_loss = output_projection_layer(num_units, 
                num_symbols, num_samples)

        

        with tf.variable_scope('decoder'):
            # get attention function
            attention_keys_init, attention_values_init, attention_score_fn_init, attention_construct_fn_init \
                    = prepare_attention(encoder_output, 'bahdanau', num_units, imem=triples_embedding, output_alignments=output_alignments)#'luong', num_units)

            decoder_fn_train = attention_decoder_fn_train(
                    encoder_state, attention_keys_init, attention_values_init,
                    attention_score_fn_init, attention_construct_fn_init, output_alignments=output_alignments, max_length=tf.reduce_max(self.responses_length))
            self.decoder_output, _, alignments_ta = dynamic_rnn_decoder(decoder_cell, decoder_fn_train, 
                    self.decoder_input, self.responses_length, scope="decoder_rnn")
            if output_alignments: 
                self.alignments = tf.transpose(alignments_ta.stack(), perm=[1,0,2])
                #self.alignments = tf.Print(self.alignments, [self.alignments], summarize=1e8)
                self.decoder_loss, self.ppx_loss, self.sentence_ppx = total_loss(self.decoder_output, self.responses_target, self.decoder_mask, self.alignments, triples_embedding, use_triples, one_hot_triples)
                self.sentence_ppx = tf.identity(self.sentence_ppx, 'ppx_loss')
                #self.decoder_loss = tf.Print(self.decoder_loss, ['decoder_loss', self.decoder_loss], summarize=1e6)
            else:
                self.decoder_loss, self.sentence_ppx = sequence_loss(self.decoder_output, 
                        self.responses_target, self.decoder_mask)
                self.sentence_ppx = tf.identity(self.sentence_ppx, 'ppx_loss')
         
        with tf.variable_scope('decoder', reuse=True):
            # get attention function
            attention_keys, attention_values, attention_score_fn, attention_construct_fn \
                    = prepare_attention(encoder_output, 'bahdanau', num_units, reuse=True, imem=triples_embedding, output_alignments=output_alignments)#'luong', num_units)
            decoder_fn_inference = attention_decoder_fn_inference(
                    output_fn, encoder_state, attention_keys, attention_values, 
                    attention_score_fn, attention_construct_fn, self.embed, GO_ID, 
                    EOS_ID, max_length, num_symbols, imem=entities_word_embedding, selector_fn=selector_fn)

                
            self.decoder_distribution, _, output_ids_ta = dynamic_rnn_decoder(decoder_cell,
                    decoder_fn_inference, scope="decoder_rnn")
            if output_alignments:
                output_len = tf.shape(self.decoder_distribution)[1]
                output_ids = tf.transpose(output_ids_ta.gather(tf.range(output_len)))
                word_ids = tf.cast(tf.clip_by_value(output_ids, 0, num_symbols), tf.int64)
                entity_ids = tf.reshape(tf.clip_by_value(-output_ids, 0, num_symbols) + tf.reshape(tf.range(encoder_batch_size) * tf.shape(entities_word_embedding)[1], [-1, 1]), [-1])
                entities = tf.reshape(tf.gather(tf.reshape(self.entities, [-1]), entity_ids), [-1, output_len])
                words = self.index2symbol.lookup(word_ids)
                self.generation = tf.where(output_ids > 0, words, entities, name='generation')
            else:
                self.generation_index = tf.argmax(self.decoder_distribution, 2)
                
                self.generation = self.index2symbol.lookup(self.generation_index, name='generation') 
        

        # initialize the training process
        self.learning_rate = tf.Variable(float(learning_rate), 
                trainable=False, dtype=tf.float32)
        self.learning_rate_decay_op = self.learning_rate.assign(
                self.learning_rate * learning_rate_decay_factor)
        self.global_step = tf.Variable(0, trainable=False)

        self.params = tf.global_variables()
            
        # calculate the gradient of parameters
        #opt = tf.train.GradientDescentOptimizer(self.learning_rate)
        opt = tf.train.AdamOptimizer(learning_rate=learning_rate)
        self.lr = opt._lr
       
        gradients = tf.gradients(self.decoder_loss, self.params)
        clipped_gradients, self.gradient_norm = tf.clip_by_global_norm(gradients, 
                max_gradient_norm)
        self.update = opt.apply_gradients(zip(clipped_gradients, self.params), 
                global_step=self.global_step)

        tf.summary.scalar('decoder_loss', self.decoder_loss)
        for each in tf.trainable_variables():
            tf.summary.histogram(each.name, each)

        self.merged_summary_op = tf.summary.merge_all()
        
        self.saver = tf.train.Saver(write_version=tf.train.SaverDef.V2, 
                max_to_keep=3, pad_step_number=True, keep_checkpoint_every_n_hours=1.0)
        
        self.saver_epoch = tf.train.Saver(write_version=tf.train.SaverDef.V2, max_to_keep=1000, pad_step_number=True)
コード例 #27
0
 def RNN(x):
   # Define a GRU cell with tensorflow
   gru_cell = nn.rnn_cell.GRUCell(num_units, name="GRU")
   # Get gru cell output
   outputs, _ = nn.dynamic_rnn(gru_cell, x, dtype=dataType)
   return outputs[-1]
コード例 #28
0
    def __init__(self,
                 num_symbols,
                 num_embed_units,
                 num_units,
                 num_layers,
                 is_train,
                 vocab=None,
                 embed=None,
                 learning_rate=0.1,
                 learning_rate_decay_factor=0.95,
                 max_gradient_norm=5.0,
                 num_samples=512,
                 max_length=30,
                 use_lstm=True):

        self.posts_1 = tf.placeholder(tf.string, shape=(None, None))
        self.posts_2 = tf.placeholder(tf.string, shape=(None, None))
        self.posts_3 = tf.placeholder(tf.string, shape=(None, None))
        self.posts_4 = tf.placeholder(tf.string, shape=(None, None))

        self.entity_1 = tf.placeholder(tf.string, shape=(None, None, None, 3))
        self.entity_2 = tf.placeholder(tf.string, shape=(None, None, None, 3))
        self.entity_3 = tf.placeholder(tf.string, shape=(None, None, None, 3))
        self.entity_4 = tf.placeholder(tf.string, shape=(None, None, None, 3))

        self.entity_mask_1 = tf.placeholder(tf.float32,
                                            shape=(None, None, None))
        self.entity_mask_2 = tf.placeholder(tf.float32,
                                            shape=(None, None, None))
        self.entity_mask_3 = tf.placeholder(tf.float32,
                                            shape=(None, None, None))
        self.entity_mask_4 = tf.placeholder(tf.float32,
                                            shape=(None, None, None))

        self.posts_length_1 = tf.placeholder(tf.int32, shape=(None))
        self.posts_length_2 = tf.placeholder(tf.int32, shape=(None))
        self.posts_length_3 = tf.placeholder(tf.int32, shape=(None))
        self.posts_length_4 = tf.placeholder(tf.int32, shape=(None))

        self.responses = tf.placeholder(tf.string, shape=(None, None))
        self.responses_length = tf.placeholder(tf.int32, shape=(None))

        self.epoch = tf.Variable(0, trainable=False, name='epoch')
        self.epoch_add_op = self.epoch.assign(self.epoch + 1)

        if is_train:
            self.symbols = tf.Variable(vocab, trainable=False, name="symbols")
        else:
            self.symbols = tf.Variable(np.array(['.'] * num_symbols),
                                       name="symbols")

        self.symbol2index = HashTable(KeyValueTensorInitializer(
            self.symbols,
            tf.Variable(
                np.array([i for i in range(num_symbols)], dtype=np.int32),
                False)),
                                      default_value=UNK_ID,
                                      name="symbol2index")

        self.posts_input_1 = self.symbol2index.lookup(self.posts_1)

        self.posts_2_target = self.posts_2_embed = self.symbol2index.lookup(
            self.posts_2)
        self.posts_3_target = self.posts_3_embed = self.symbol2index.lookup(
            self.posts_3)
        self.posts_4_target = self.posts_4_embed = self.symbol2index.lookup(
            self.posts_4)

        self.responses_target = self.symbol2index.lookup(self.responses)

        batch_size, decoder_len = tf.shape(self.posts_1)[0], tf.shape(
            self.responses)[1]

        self.posts_input_2 = tf.concat([
            tf.ones([batch_size, 1], dtype=tf.int32) * GO_ID,
            tf.split(self.posts_2_embed, [tf.shape(self.posts_2)[1] - 1, 1],
                     1)[0]
        ], 1)
        self.posts_input_3 = tf.concat([
            tf.ones([batch_size, 1], dtype=tf.int32) * GO_ID,
            tf.split(self.posts_3_embed, [tf.shape(self.posts_3)[1] - 1, 1],
                     1)[0]
        ], 1)
        self.posts_input_4 = tf.concat([
            tf.ones([batch_size, 1], dtype=tf.int32) * GO_ID,
            tf.split(self.posts_4_embed, [tf.shape(self.posts_4)[1] - 1, 1],
                     1)[0]
        ], 1)

        self.responses_target = self.symbol2index.lookup(self.responses)

        batch_size, decoder_len = tf.shape(self.posts_1)[0], tf.shape(
            self.responses)[1]

        self.responses_input = tf.concat([
            tf.ones([batch_size, 1], dtype=tf.int32) * GO_ID,
            tf.split(self.responses_target, [decoder_len - 1, 1], 1)[0]
        ], 1)

        self.encoder_2_mask = tf.reshape(
            tf.cumsum(tf.one_hot(self.posts_length_2 - 1,
                                 tf.shape(self.posts_2)[1]),
                      reverse=True,
                      axis=1), [-1, tf.shape(self.posts_2)[1]])
        self.encoder_3_mask = tf.reshape(
            tf.cumsum(tf.one_hot(self.posts_length_3 - 1,
                                 tf.shape(self.posts_3)[1]),
                      reverse=True,
                      axis=1), [-1, tf.shape(self.posts_3)[1]])
        self.encoder_4_mask = tf.reshape(
            tf.cumsum(tf.one_hot(self.posts_length_4 - 1,
                                 tf.shape(self.posts_4)[1]),
                      reverse=True,
                      axis=1), [-1, tf.shape(self.posts_4)[1]])

        self.decoder_mask = tf.reshape(
            tf.cumsum(tf.one_hot(self.responses_length - 1, decoder_len),
                      reverse=True,
                      axis=1), [-1, decoder_len])

        if embed is None:
            self.embed = tf.get_variable('embed',
                                         [num_symbols, num_embed_units],
                                         tf.float32)
        else:
            self.embed = tf.get_variable('embed',
                                         dtype=tf.float32,
                                         initializer=embed)

        self.encoder_input_1 = tf.nn.embedding_lookup(self.embed,
                                                      self.posts_input_1)
        self.encoder_input_2 = tf.nn.embedding_lookup(self.embed,
                                                      self.posts_input_2)
        self.encoder_input_3 = tf.nn.embedding_lookup(self.embed,
                                                      self.posts_input_3)
        self.encoder_input_4 = tf.nn.embedding_lookup(self.embed,
                                                      self.posts_input_4)

        self.decoder_input = tf.nn.embedding_lookup(self.embed,
                                                    self.responses_input)

        entity_embedding_1 = tf.reshape(
            tf.nn.embedding_lookup(self.embed,
                                   self.symbol2index.lookup(self.entity_1)),
            [
                batch_size,
                tf.shape(self.entity_1)[1],
                tf.shape(self.entity_1)[2], 3 * num_embed_units
            ])
        entity_embedding_2 = tf.reshape(
            tf.nn.embedding_lookup(self.embed,
                                   self.symbol2index.lookup(self.entity_2)),
            [
                batch_size,
                tf.shape(self.entity_2)[1],
                tf.shape(self.entity_2)[2], 3 * num_embed_units
            ])
        entity_embedding_3 = tf.reshape(
            tf.nn.embedding_lookup(self.embed,
                                   self.symbol2index.lookup(self.entity_3)),
            [
                batch_size,
                tf.shape(self.entity_3)[1],
                tf.shape(self.entity_3)[2], 3 * num_embed_units
            ])
        entity_embedding_4 = tf.reshape(
            tf.nn.embedding_lookup(self.embed,
                                   self.symbol2index.lookup(self.entity_4)),
            [
                batch_size,
                tf.shape(self.entity_4)[1],
                tf.shape(self.entity_4)[2], 3 * num_embed_units
            ])

        head_1, relation_1, tail_1 = tf.split(entity_embedding_1,
                                              [num_embed_units] * 3,
                                              axis=3)
        head_2, relation_2, tail_2 = tf.split(entity_embedding_2,
                                              [num_embed_units] * 3,
                                              axis=3)
        head_3, relation_3, tail_3 = tf.split(entity_embedding_3,
                                              [num_embed_units] * 3,
                                              axis=3)
        head_4, relation_4, tail_4 = tf.split(entity_embedding_4,
                                              [num_embed_units] * 3,
                                              axis=3)

        with tf.variable_scope('graph_attention'):
            #[batch_size, max_reponse_length, max_triple_num, 2*embed_units]
            head_tail_1 = tf.concat([head_1, tail_1], axis=3)
            #[batch_size, max_reponse_length, max_triple_num, embed_units]
            head_tail_transformed_1 = tf.layers.dense(
                head_tail_1,
                num_embed_units,
                activation=tf.tanh,
                name='head_tail_transform')
            #[batch_size, max_reponse_length, max_triple_num, embed_units]
            relation_transformed_1 = tf.layers.dense(relation_1,
                                                     num_embed_units,
                                                     name='relation_transform')
            #[batch_size, max_reponse_length, max_triple_num]
            e_weight_1 = tf.reduce_sum(relation_transformed_1 *
                                       head_tail_transformed_1,
                                       axis=3)
            #[batch_size, max_reponse_length, max_triple_num]
            alpha_weight_1 = tf.nn.softmax(e_weight_1)
            #[batch_size, max_reponse_length, embed_units]
            graph_embed_1 = tf.reduce_sum(
                tf.expand_dims(alpha_weight_1, 3) *
                (tf.expand_dims(self.entity_mask_1, 3) * head_tail_1),
                axis=2)

        with tf.variable_scope('graph_attention', reuse=True):
            head_tail_2 = tf.concat([head_2, tail_2], axis=3)
            head_tail_transformed_2 = tf.layers.dense(
                head_tail_2,
                num_embed_units,
                activation=tf.tanh,
                name='head_tail_transform')
            relation_transformed_2 = tf.layers.dense(relation_2,
                                                     num_embed_units,
                                                     name='relation_transform')
            e_weight_2 = tf.reduce_sum(relation_transformed_2 *
                                       head_tail_transformed_2,
                                       axis=3)
            alpha_weight_2 = tf.nn.softmax(e_weight_2)
            graph_embed_2 = tf.reduce_sum(
                tf.expand_dims(alpha_weight_2, 3) *
                (tf.expand_dims(self.entity_mask_2, 3) * head_tail_2),
                axis=2)

        with tf.variable_scope('graph_attention', reuse=True):
            head_tail_3 = tf.concat([head_3, tail_3], axis=3)
            head_tail_transformed_3 = tf.layers.dense(
                head_tail_3,
                num_embed_units,
                activation=tf.tanh,
                name='head_tail_transform')
            relation_transformed_3 = tf.layers.dense(relation_3,
                                                     num_embed_units,
                                                     name='relation_transform')
            e_weight_3 = tf.reduce_sum(relation_transformed_3 *
                                       head_tail_transformed_3,
                                       axis=3)
            alpha_weight_3 = tf.nn.softmax(e_weight_3)
            graph_embed_3 = tf.reduce_sum(
                tf.expand_dims(alpha_weight_3, 3) *
                (tf.expand_dims(self.entity_mask_3, 3) * head_tail_3),
                axis=2)

        with tf.variable_scope('graph_attention', reuse=True):
            head_tail_4 = tf.concat([head_4, tail_4], axis=3)
            head_tail_transformed_4 = tf.layers.dense(
                head_tail_4,
                num_embed_units,
                activation=tf.tanh,
                name='head_tail_transform')
            relation_transformed_4 = tf.layers.dense(relation_4,
                                                     num_embed_units,
                                                     name='relation_transform')
            e_weight_4 = tf.reduce_sum(relation_transformed_4 *
                                       head_tail_transformed_4,
                                       axis=3)
            alpha_weight_4 = tf.nn.softmax(e_weight_4)
            graph_embed_4 = tf.reduce_sum(
                tf.expand_dims(alpha_weight_4, 3) *
                (tf.expand_dims(self.entity_mask_4, 3) * head_tail_4),
                axis=2)

        if use_lstm:
            cell = MultiRNNCell([LSTMCell(num_units)] * num_layers)
        else:
            cell = MultiRNNCell([GRUCell(num_units)] * num_layers)

        output_fn, sampled_sequence_loss = output_projection_layer(
            num_units, num_symbols, num_samples)

        encoder_output_1, encoder_state_1 = dynamic_rnn(cell,
                                                        self.encoder_input_1,
                                                        self.posts_length_1,
                                                        dtype=tf.float32,
                                                        scope="encoder")

        attention_keys_1, attention_values_1, attention_score_fn_1, attention_construct_fn_1 \
                = attention_decoder_fn.prepare_attention(graph_embed_1, encoder_output_1, 'luong', num_units)
        decoder_fn_train_1 = attention_decoder_fn.attention_decoder_fn_train(
            encoder_state_1,
            attention_keys_1,
            attention_values_1,
            attention_score_fn_1,
            attention_construct_fn_1,
            max_length=tf.reduce_max(self.posts_length_2))
        encoder_output_2, encoder_state_2, alignments_ta_2 = dynamic_rnn_decoder(
            cell,
            decoder_fn_train_1,
            self.encoder_input_2,
            self.posts_length_2,
            scope="decoder")
        self.alignments_2 = tf.transpose(alignments_ta_2.stack(),
                                         perm=[1, 0, 2])

        self.decoder_loss_2 = sampled_sequence_loss(encoder_output_2,
                                                    self.posts_2_target,
                                                    self.encoder_2_mask)

        with variable_scope.variable_scope('', reuse=True):
            attention_keys_2, attention_values_2, attention_score_fn_2, attention_construct_fn_2 \
                    = attention_decoder_fn.prepare_attention(graph_embed_2, encoder_output_2, 'luong', num_units)
            decoder_fn_train_2 = attention_decoder_fn.attention_decoder_fn_train(
                encoder_state_2,
                attention_keys_2,
                attention_values_2,
                attention_score_fn_2,
                attention_construct_fn_2,
                max_length=tf.reduce_max(self.posts_length_3))
            encoder_output_3, encoder_state_3, alignments_ta_3 = dynamic_rnn_decoder(
                cell,
                decoder_fn_train_2,
                self.encoder_input_3,
                self.posts_length_3,
                scope="decoder")
            self.alignments_3 = tf.transpose(alignments_ta_3.stack(),
                                             perm=[1, 0, 2])

            self.decoder_loss_3 = sampled_sequence_loss(
                encoder_output_3, self.posts_3_target, self.encoder_3_mask)

            attention_keys_3, attention_values_3, attention_score_fn_3, attention_construct_fn_3 \
                    = attention_decoder_fn.prepare_attention(graph_embed_3, encoder_output_3, 'luong', num_units)
            decoder_fn_train_3 = attention_decoder_fn.attention_decoder_fn_train(
                encoder_state_3,
                attention_keys_3,
                attention_values_3,
                attention_score_fn_3,
                attention_construct_fn_3,
                max_length=tf.reduce_max(self.posts_length_4))
            encoder_output_4, encoder_state_4, alignments_ta_4 = dynamic_rnn_decoder(
                cell,
                decoder_fn_train_3,
                self.encoder_input_4,
                self.posts_length_4,
                scope="decoder")
            self.alignments_4 = tf.transpose(alignments_ta_4.stack(),
                                             perm=[1, 0, 2])

            self.decoder_loss_4 = sampled_sequence_loss(
                encoder_output_4, self.posts_4_target, self.encoder_4_mask)

            attention_keys, attention_values, attention_score_fn, attention_construct_fn \
                    = attention_decoder_fn.prepare_attention(graph_embed_4, encoder_output_4, 'luong', num_units)

        if is_train:
            with variable_scope.variable_scope('', reuse=True):
                decoder_fn_train = attention_decoder_fn.attention_decoder_fn_train(
                    encoder_state_4,
                    attention_keys,
                    attention_values,
                    attention_score_fn,
                    attention_construct_fn,
                    max_length=tf.reduce_max(self.responses_length))
                self.decoder_output, _, alignments_ta = dynamic_rnn_decoder(
                    cell,
                    decoder_fn_train,
                    self.decoder_input,
                    self.responses_length,
                    scope="decoder")
                self.alignments = tf.transpose(alignments_ta.stack(),
                                               perm=[1, 0, 2])

                self.decoder_loss = sampled_sequence_loss(
                    self.decoder_output, self.responses_target,
                    self.decoder_mask)

            self.params = tf.trainable_variables()

            self.learning_rate = tf.Variable(float(learning_rate),
                                             trainable=False,
                                             dtype=tf.float32)
            self.learning_rate_decay_op = self.learning_rate.assign(
                self.learning_rate * learning_rate_decay_factor)
            self.global_step = tf.Variable(0, trainable=False)

            #opt = tf.train.GradientDescentOptimizer(self.learning_rate)
            opt = tf.train.MomentumOptimizer(self.learning_rate, 0.9)

            gradients = tf.gradients(
                self.decoder_loss + self.decoder_loss_2 + self.decoder_loss_3 +
                self.decoder_loss_4, self.params)
            clipped_gradients, self.gradient_norm = tf.clip_by_global_norm(
                gradients, max_gradient_norm)
            self.update = opt.apply_gradients(zip(clipped_gradients,
                                                  self.params),
                                              global_step=self.global_step)

        else:
            with variable_scope.variable_scope('', reuse=True):
                decoder_fn_inference = attention_decoder_fn.attention_decoder_fn_inference(
                    output_fn, encoder_state_4, attention_keys,
                    attention_values, attention_score_fn,
                    attention_construct_fn, self.embed, GO_ID, EOS_ID,
                    max_length, num_symbols)
                self.decoder_distribution, _, alignments_ta = dynamic_rnn_decoder(
                    cell, decoder_fn_inference, scope="decoder")
                output_len = tf.shape(self.decoder_distribution)[1]
                self.alignments = tf.transpose(
                    alignments_ta.gather(tf.range(output_len)), [1, 0, 2])

            self.generation_index = tf.argmax(
                tf.split(self.decoder_distribution, [2, num_symbols - 2],
                         2)[1], 2) + 2  # for removing UNK
            self.generation = tf.nn.embedding_lookup(self.symbols,
                                                     self.generation_index,
                                                     name="generation")

            self.params = tf.trainable_variables()

        self.saver = tf.train.Saver(tf.global_variables(),
                                    write_version=tf.train.SaverDef.V2,
                                    max_to_keep=10,
                                    pad_step_number=True,
                                    keep_checkpoint_every_n_hours=1.0)
コード例 #29
0
ファイル: model.py プロジェクト: ivanium/ann-hw4
    def __init__(self,
                 num_symbols,
                 num_embed_units,
                 num_units,
                 num_layers,
                 num_labels,
                 embed,
                 learning_rate=0.5,
                 max_gradient_norm=5.0):
        #todo: implement placeholders
        self.texts = tf.placeholder(tf.string, [None, None],
                                    name="texts")  # shape: batch*len
        self.texts_length = tf.placeholder(tf.int64, [None],
                                           name="texts_length")  # shape: batch
        self.labels = tf.placeholder(tf.int64, [None],
                                     name="labels")  # shape: batch

        self.symbol2index = MutableHashTable(key_dtype=tf.string,
                                             value_dtype=tf.int64,
                                             default_value=UNK_ID,
                                             shared_name="in_table",
                                             name="in_table",
                                             checkpoint=True)
        # build the vocab table (string to index)
        # initialize the training process
        self.learning_rate = tf.Variable(float(learning_rate),
                                         trainable=False,
                                         dtype=tf.float32)
        learning_rate_decay_factor = 0.9
        self.learning_rate_decay_op = self.learning_rate.assign(
            self.learning_rate * learning_rate_decay_factor)

        self.global_step = tf.Variable(0, trainable=False)
        self.epoch = tf.Variable(0, trainable=False)
        self.epoch_add_op = self.epoch.assign(self.epoch + 1)

        self.index_input = self.symbol2index.lookup(self.texts)  # batch*len

        # build the embedding table (index to vector)
        if embed is None:
            # initialize the embedding randomly
            self.embed = tf.get_variable('embed',
                                         [num_symbols, num_embed_units],
                                         tf.float32)
        else:
            # initialize the embedding by pre-trained word vectors
            self.embed = tf.get_variable('embed',
                                         dtype=tf.float32,
                                         initializer=embed)

        self.embed_input = tf.nn.embedding_lookup(
            self.embed, self.index_input)  #batch*len*embed_unit

        model = 'lstm'

        if num_layers == 1:
            if (model == 'rnn'):
                cell = BasicRNNCell(num_units)
            elif (model == 'gru'):
                cell = GRUCell(num_units)
            elif (model == 'lstm'):
                cell = BasicLSTMCell(num_units)

            cell_do = tf.nn.rnn_cell.DropoutWrapper(
                cell, input_keep_prob=1.0, output_keep_prob=FLAGS.keep_prob)
            outputs, states = dynamic_rnn(cell_do,
                                          self.embed_input,
                                          self.texts_length,
                                          dtype=tf.float32,
                                          scope="rnn")
            #todo: implement unfinished networks
            outputs_flat = tf.reduce_mean(outputs, 1)
            if (model == 'lstm'):
                states = states[0]
            # W_f = weight_variable([tf.app.flags.FLAGS.units, 5])
            # b_f = bias_variable([5])
            # logits = tf.matmul(outputs_flat, W_f) + b_f
            # fc_layer = tf.layers.dense(inputs = states, units = 32, activation = tf.nn.relu)
            logits = tf.layers.dense(inputs=states, units=5, activation=None)

        else:
            self.reverse_texts = tf.placeholder(
                tf.string, [None, None],
                name="reverse_texts")  # shape: batch*len
            self.index_reverse_input = self.symbol2index.lookup(
                self.reverse_texts)
            self.embed_reverse_input = tf.nn.embedding_lookup(
                self.embed, self.index_reverse_input)  #batch*len*embed_unit

            if (model == 'rnn'):
                cell1 = BasicRNNCell(num_units)
                cell2 = BasicRNNCell(num_units)
            elif (model == 'gru'):
                cell1 = GRUCell(num_units)
                cell2 = GRUCell(num_units)
            elif (model == 'lstm'):
                cell1 = BasicLSTMCell(num_units)
                cell2 = BasicLSTMCell(num_units)

            cell1_do = tf.nn.rnn_cell.DropoutWrapper(
                cell1, input_keep_prob=1.0, output_keep_prob=FLAGS.keep_prob)
            cell2_do = tf.nn.rnn_cell.DropoutWrapper(
                cell2, input_keep_prob=1.0, output_keep_prob=FLAGS.keep_prob)

            outputs1, states1 = dynamic_rnn(cell1_do,
                                            self.embed_input,
                                            self.texts_length,
                                            dtype=tf.float32,
                                            scope="rnn")
            outputs2, states2 = dynamic_rnn(cell2_do,
                                            self.embed_reverse_input,
                                            self.texts_length,
                                            dtype=tf.float32,
                                            scope="rnn")

            if (model == 'lstm'):
                states = states1[0] + states2[0]
            else:
                states = states1 + states2

            # fc_layer = tf.layers.dense(inputs = states, units = 32, activation = tf.nn.relu)
            logits = tf.layers.dense(inputs=states, units=5, activation=None)

        self.loss = tf.reduce_sum(
            tf.nn.sparse_softmax_cross_entropy_with_logits(labels=self.labels,
                                                           logits=logits),
            name='loss')
        mean_loss = self.loss / tf.cast(tf.shape(self.labels)[0],
                                        dtype=tf.float32)
        predict_labels = tf.argmax(logits, 1, 'predict_labels')
        self.accuracy = tf.reduce_sum(tf.cast(
            tf.equal(self.labels, predict_labels), tf.int32),
                                      name='accuracy')

        self.params = tf.trainable_variables()

        # calculate the gradient of parameters
        opt = tf.train.GradientDescentOptimizer(self.learning_rate)
        # opt = tf.train.AdamOptimizer(self.learning_rate)

        gradients = tf.gradients(mean_loss, self.params)
        clipped_gradients, self.gradient_norm = tf.clip_by_global_norm(
            gradients, max_gradient_norm)
        self.update = opt.apply_gradients(zip(clipped_gradients, self.params),
                                          global_step=self.global_step)

        tf.summary.scalar('loss/step', self.loss)
        for each in tf.trainable_variables():
            tf.summary.histogram(each.name, each)

        self.merged_summary_op = tf.summary.merge_all()

        self.saver = tf.train.Saver(write_version=tf.train.SaverDef.V2,
                                    max_to_keep=3,
                                    pad_step_number=True,
                                    keep_checkpoint_every_n_hours=1.0)
コード例 #30
0
    def __init__(self, data, args, embed):

        with tf.variable_scope("input"):
            with tf.variable_scope("embedding"):
                # build the embedding table and embedding input
                if embed is None:
                    # initialize the embedding randomly
                    self.embed = tf.get_variable(
                        'embed', [data.vocab_size, args.embedding_size],
                        tf.float32)
                else:
                    # initialize the embedding by pre-trained word vectors
                    self.embed = tf.get_variable('embed',
                                                 dtype=tf.float32,
                                                 initializer=embed)

            self.sentence = tf.placeholder(tf.int32, (None, None),
                                           'sen_inps')  # batch*len
            self.sentence_length = tf.placeholder(tf.int32, (None, ),
                                                  'sen_lens')  # batch
            self.use_prior = tf.placeholder(dtype=tf.bool, name="use_prior")

            batch_size, batch_len = tf.shape(self.sentence)[0], tf.shape(
                self.sentence)[1]
            self.decoder_max_len = batch_len - 1

            self.encoder_input = tf.nn.embedding_lookup(
                self.embed, self.sentence)  # batch*len*unit
            self.encoder_len = self.sentence_length

            decoder_input = tf.split(self.sentence, [self.decoder_max_len, 1],
                                     1)[0]  # no eos_id
            self.decoder_input = tf.nn.embedding_lookup(
                self.embed, decoder_input)  # batch*(len-1)*unit
            self.decoder_target = tf.split(self.sentence,
                                           [1, self.decoder_max_len],
                                           1)[1]  # no go_id, batch*(len-1)
            self.decoder_len = self.sentence_length - 1
            self.decoder_mask = tf.sequence_mask(
                self.decoder_len, self.decoder_max_len,
                dtype=tf.float32)  # batch*(len-1)

        # initialize the training process
        self.learning_rate = tf.Variable(float(args.lr),
                                         trainable=False,
                                         dtype=tf.float32)
        self.learning_rate_decay_op = self.learning_rate.assign(
            self.learning_rate * args.lr_decay)
        self.global_step = tf.Variable(0, trainable=False)

        # build rnn_cell
        cell_enc = tf.nn.rnn_cell.GRUCell(args.eh_size)
        cell_dec = tf.nn.rnn_cell.GRUCell(args.dh_size)

        # build encoder
        with tf.variable_scope('encoder'):
            encoder_output, encoder_state = dynamic_rnn(cell_enc,
                                                        self.encoder_input,
                                                        self.encoder_len,
                                                        dtype=tf.float32,
                                                        scope="encoder_rnn")

        with tf.variable_scope('recognition_net'):
            recog_input = encoder_state
            self.recog_mu = tf.layers.dense(inputs=recog_input,
                                            units=args.z_dim,
                                            activation=None,
                                            name='recog_mu')
            self.recog_logvar = tf.layers.dense(inputs=recog_input,
                                                units=args.z_dim,
                                                activation=None,
                                                name='recog_logvar')

            epsilon = tf.random_normal(tf.shape(self.recog_logvar),
                                       name="epsilon")
            std = tf.exp(0.5 * self.recog_logvar)
            self.recog_z = tf.add(self.recog_mu,
                                  tf.multiply(std, epsilon),
                                  name='recog_z')

            self.kld = tf.reduce_mean(0.5 * tf.reduce_sum(
                tf.exp(self.recog_logvar) + self.recog_mu * self.recog_mu -
                self.recog_logvar - 1,
                axis=-1))
            self.prior_z = tf.random_normal(tf.shape(self.recog_logvar),
                                            name="prior_z")
            latent_sample = tf.cond(self.use_prior,
                                    lambda: self.prior_z,
                                    lambda: self.recog_z,
                                    name='latent_sample')
            dec_init_state = tf.layers.dense(inputs=latent_sample,
                                             units=args.dh_size,
                                             activation=None)

        with tf.variable_scope("output_layer",
                               initializer=tf.orthogonal_initializer()):
            self.output_layer = Dense(
                data.vocab_size,
                kernel_initializer=tf.truncated_normal_initializer(stddev=0.1),
                use_bias=True)

        with tf.variable_scope("decode",
                               initializer=tf.orthogonal_initializer()):
            train_helper = tf.contrib.seq2seq.TrainingHelper(
                inputs=self.decoder_input, sequence_length=self.decoder_len)
            train_decoder = tf.contrib.seq2seq.BasicDecoder(
                cell=cell_dec,
                helper=train_helper,
                initial_state=dec_init_state,
                output_layer=self.output_layer)
            train_output, _, _ = tf.contrib.seq2seq.dynamic_decode(
                decoder=train_decoder,
                maximum_iterations=self.decoder_max_len,
                impute_finished=True)
            logits = train_output.rnn_output

            crossent = tf.nn.sparse_softmax_cross_entropy_with_logits(
                labels=self.decoder_target, logits=logits)
            crossent = tf.reduce_sum(crossent * self.decoder_mask)
            self.sen_loss = crossent / tf.to_float(batch_size)
            self.ppl_loss = crossent / tf.reduce_sum(self.decoder_mask)

            self.decoder_distribution_teacher = tf.nn.log_softmax(logits)

        with tf.variable_scope("decode", reuse=True):
            infer_helper = tf.contrib.seq2seq.GreedyEmbeddingHelper(
                self.embed, tf.fill([batch_size], data.go_id), data.eos_id)
            infer_decoder = tf.contrib.seq2seq.BasicDecoder(
                cell=cell_dec,
                helper=infer_helper,
                initial_state=dec_init_state,
                output_layer=self.output_layer)
            infer_output, _, _ = tf.contrib.seq2seq.dynamic_decode(
                decoder=infer_decoder,
                maximum_iterations=self.decoder_max_len,
                impute_finished=True)
            self.decoder_distribution = infer_output.rnn_output
            self.generation_index = tf.argmax(
                tf.split(self.decoder_distribution, [2, data.vocab_size - 2],
                         2)[1], 2) + 2  # for removing UNK

        self.kl_weights = tf.minimum(
            tf.to_float(self.global_step) / args.full_kl_step, 1.0)
        self.kl_loss = self.kl_weights * tf.maximum(self.kld, args.min_kl)
        self.loss = self.sen_loss + self.kl_loss

        # calculate the gradient of parameters and update
        self.params = [
            k for k in tf.trainable_variables() if args.name in k.name
        ]
        opt = tf.train.MomentumOptimizer(learning_rate=self.learning_rate,
                                         momentum=args.momentum)
        gradients = tf.gradients(self.loss, self.params)
        clipped_gradients, self.gradient_norm = tf.clip_by_global_norm(
            gradients, args.grad_clip)
        self.update = opt.apply_gradients(zip(clipped_gradients, self.params),
                                          global_step=self.global_step)

        # save checkpoint
        self.latest_saver = tf.train.Saver(
            write_version=tf.train.SaverDef.V2,
            max_to_keep=args.checkpoint_max_to_keep,
            pad_step_number=True,
            keep_checkpoint_every_n_hours=1.0)
        self.best_saver = tf.train.Saver(write_version=tf.train.SaverDef.V2,
                                         max_to_keep=1,
                                         pad_step_number=True,
                                         keep_checkpoint_every_n_hours=1.0)

        # create summary for tensorboard
        self.create_summary(args)
コード例 #31
0
    def __init__(self,
            num_symbols,
            num_qwords, #modify
            num_embed_units,
            num_units,
            num_layers,
            is_train,
            vocab=None,
            embed=None,
            question_data=True,
            learning_rate=0.5,
            learning_rate_decay_factor=0.95,
            max_gradient_norm=5.0,
            num_samples=512,
            max_length=30,
            use_lstm=False):

        self.posts = tf.placeholder(tf.string, shape=(None, None))  # batch*len
        self.posts_length = tf.placeholder(tf.int32, shape=(None))  # batch
        self.responses = tf.placeholder(tf.string, shape=(None, None))  # batch*len
        self.responses_length = tf.placeholder(tf.int32, shape=(None))  # batch
        self.keyword_tensor = tf.placeholder(tf.float32, shape=(None, 3, None)) #(batch * len) * 3 * numsymbol
        self.word_type = tf.placeholder(tf.int32, shape=(None))   #(batch * len)

        # build the vocab table (string to index)
        if is_train:
            self.symbols = tf.Variable(vocab, trainable=False, name="symbols")
        else:
            self.symbols = tf.Variable(np.array(['.']*num_symbols), name="symbols")
        self.symbol2index = HashTable(KeyValueTensorInitializer(self.symbols,
            tf.Variable(np.array([i for i in range(num_symbols)], dtype=np.int32), False)),
            default_value=UNK_ID, name="symbol2index")
        self.posts_input = self.symbol2index.lookup(self.posts)   # batch*len
        self.responses_target = self.symbol2index.lookup(self.responses)   #batch*len
        
        batch_size, decoder_len = tf.shape(self.responses)[0], tf.shape(self.responses)[1]
        self.responses_input = tf.concat([tf.ones([batch_size, 1], dtype=tf.int32)*GO_ID,
            tf.split(self.responses_target, [decoder_len-1, 1], 1)[0]], 1)   # batch*len
        #delete the last column of responses_target) and add 'GO at the front of it.
        self.decoder_mask = tf.reshape(tf.cumsum(tf.one_hot(self.responses_length-1,
            decoder_len), reverse=True, axis=1), [-1, decoder_len]) # bacth * len

        print "embedding..."
        # build the embedding table (index to vector)
        if embed is None:
            # initialize the embedding randomly
            self.embed = tf.get_variable('embed', [num_symbols, num_embed_units], tf.float32)
        else:
            print len(vocab), len(embed), len(embed[0])
            print embed
            # initialize the embedding by pre-trained word vectors
            self.embed = tf.get_variable('embed', dtype=tf.float32, initializer=embed)

        self.encoder_input = tf.nn.embedding_lookup(self.embed, self.posts_input) #batch*len*unit
        self.decoder_input = tf.nn.embedding_lookup(self.embed, self.responses_input)

        print "embedding finished"

        if use_lstm:
            cell = MultiRNNCell([LSTMCell(num_units)] * num_layers)
        else:
            cell = MultiRNNCell([GRUCell(num_units)] * num_layers)

        # rnn encoder
        encoder_output, encoder_state = dynamic_rnn(cell, self.encoder_input,
                self.posts_length, dtype=tf.float32, scope="encoder")
        # get output projection function
        output_fn, sampled_sequence_loss = output_projection_layer(num_units,
                num_symbols, num_qwords, num_samples, question_data)

        print "encoder_output.shape:", encoder_output.get_shape()

        # get attention function
        attention_keys, attention_values, attention_score_fn, attention_construct_fn \
              = attention_decoder_fn.prepare_attention(encoder_output, 'luong', num_units)

        # get decoding loop function
        decoder_fn_train = attention_decoder_fn.attention_decoder_fn_train(encoder_state,
                attention_keys, attention_values, attention_score_fn, attention_construct_fn)
        decoder_fn_inference = attention_decoder_fn.attention_decoder_fn_inference(output_fn,
                self.keyword_tensor,
                encoder_state, attention_keys, attention_values, attention_score_fn,
                attention_construct_fn, self.embed, GO_ID, EOS_ID, max_length, num_symbols)

        if is_train:
            # rnn decoder
            self.decoder_output, _, _ = dynamic_rnn_decoder(cell, decoder_fn_train,
                    self.decoder_input, self.responses_length, scope="decoder")
            # calculate the loss of decoder
            # self.decoder_output = tf.Print(self.decoder_output, [self.decoder_output])
            self.decoder_loss, self.log_perplexity = sampled_sequence_loss(self.decoder_output,
                    self.responses_target, self.decoder_mask, self.keyword_tensor, self.word_type)

            # building graph finished and get all parameters
            self.params = tf.trainable_variables()

            for item in tf.trainable_variables():
                print item.name, item.get_shape()

            # initialize the training process
            self.learning_rate = tf.Variable(float(learning_rate), trainable=False,
                    dtype=tf.float32)
            self.learning_rate_decay_op = self.learning_rate.assign(
                    self.learning_rate * learning_rate_decay_factor)

            self.global_step = tf.Variable(0, trainable=False)

            # calculate the gradient of parameters

            opt = tf.train.GradientDescentOptimizer(self.learning_rate)
            gradients = tf.gradients(self.decoder_loss, self.params)
            clipped_gradients, self.gradient_norm = tf.clip_by_global_norm(gradients,
                    max_gradient_norm)
            self.update = opt.apply_gradients(zip(clipped_gradients, self.params),
                    global_step=self.global_step)

        else:
            # rnn decoder
            self.decoder_distribution, _, _ = dynamic_rnn_decoder(cell, decoder_fn_inference,
                    scope="decoder")
            print("self.decoder_distribution.shape():",self.decoder_distribution.get_shape())
            self.decoder_distribution = tf.Print(self.decoder_distribution, ["distribution.shape()", tf.reduce_sum(self.decoder_distribution)])
            # generating the response
            self.generation_index = tf.argmax(tf.split(self.decoder_distribution,
                [2, num_symbols-2], 2)[1], 2) + 2 # for removing UNK
            self.generation = tf.nn.embedding_lookup(self.symbols, self.generation_index)

            self.params = tf.trainable_variables()

        self.saver = tf.train.Saver(tf.global_variables(), write_version=tf.train.SaverDef.V2,
                max_to_keep=3, pad_step_number=True, keep_checkpoint_every_n_hours=1.0)