Beispiel #1
0
def bi_gru_layer(
    layer_sizes: List[int],
    input: tf.Variable,
    input_length: tf.Variable,
    dropout_function: Callable[[tf.Variable], tf.Variable] = None,
    parallel_iterations: int = 64
) -> Tuple[tf.Variable, tf.Variable, tf.Variable]:
    curr_input, fw_final, bw_final = input, None, None

    for i, layer_size in enumerate(layer_sizes):
        with tf.variable_scope('bigru_{}'.format(i)) as scope:
            if dropout_function is not None and i is not 0:
                curr_input = dropout_function(curr_input)

            fw_cell = GRUCell(layer_size)
            bw_cell = GRUCell(layer_size)
            (fw_out, bw_out), (fw_final, bw_final) = bidirectional_dynamic_rnn(
                fw_cell,
                bw_cell,
                inputs=curr_input,
                dtype=tf.float32,
                sequence_length=input_length,
                scope=scope,
                parallel_iterations=parallel_iterations,
                swap_memory=True)
            curr_input = tf.concat([fw_out, bw_out], axis=2)

    return curr_input, fw_final, bw_final
Beispiel #2
0
def rnet_matching_layer(layer_size: int,
                        att_size: int,
                        par_vecs: tf.Variable,
                        qu_vecs: tf.Variable,
                        par_num_words: tf.Variable,
                        parallel_iterations: int = 64) -> tf.Variable:
    with tf.variable_scope('alignment_par_qu') as scope:
        with tf.variable_scope('fw/match_rnn_cell/attention'):
            fw_cell = MatchRNNCell(GRUCell(layer_size), qu_vecs, att_size)

        with tf.variable_scope('bw/match_rnn_cell/attention'):
            bw_cell = MatchRNNCell(GRUCell(layer_size), qu_vecs, att_size)

        (fw_out, bw_out), (_, _) = bidirectional_dynamic_rnn(
            fw_cell,
            bw_cell,
            inputs=par_vecs,
            dtype=tf.float32,
            sequence_length=par_num_words,
            scope=scope,
            parallel_iterations=parallel_iterations,
            swap_memory=True)
        match_par_qu_out = tf.concat([fw_out, bw_out], axis=2)

    return match_par_qu_out
Beispiel #3
0
def rnet_self_matching_layer_real(
        layer_size: int,
        att_size: int,
        par_vecs: tf.Variable,
        par_num_words: tf.Variable,
        parallel_iterations: int = 64) -> tf.Variable:
    with tf.variable_scope('alignment_self') as scope:
        WP = tf.get_variable('WP', [2 * layer_size, att_size])
        WPtilde = tf.get_variable('WPtilde', [2 * layer_size, att_size])
        v = tf.get_variable('v', [att_size])
        att_match_input = tf.einsum('ijk,kl->ijl', par_vecs, WPtilde)

        with tf.variable_scope('fw/match_rnn_cell/attention'):
            fw_cell = MatchRNNCellV2(GRUCell(layer_size), WP, v, par_vecs,
                                     att_match_input)

        with tf.variable_scope('bw/match_rnn_cell/attention'):
            bw_cell = MatchRNNCellV2(GRUCell(layer_size), WP, v, par_vecs,
                                     att_match_input)

        (fw_out, bw_out), (_, _) = bidirectional_dynamic_rnn(
            fw_cell,
            bw_cell,
            inputs=par_vecs,
            dtype=tf.float32,
            sequence_length=par_num_words,
            scope=scope,
            parallel_iterations=parallel_iterations,
            swap_memory=True)
        match_self_out = tf.concat([fw_out, bw_out], axis=2)

    return match_self_out
Beispiel #4
0
    def build_model(self):
        temp = self.all_sequence[-1]

        with tf.variable_scope("lstm"):
            temp = dropout(temp, 0.1)
            seq_len = tf.reduce_sum(self.sent_mask, axis=1)
            gru_fw = GRUCell(num_units=768, activation=tf.tanh)
            gru_bw = GRUCell(num_units=768, activation=tf.tanh)
            outputs, output_states = bidirectional_dynamic_rnn(
                gru_fw, gru_bw, temp,
                sequence_length=seq_len, dtype=tf.float32)

            gru_output = tf.concat(outputs, axis=2)
            # gru_output = dropout(gru_output, 0.1)
            gru_output = tf.layers.dense(gru_output, units=768,
                                         kernel_initializer=create_initializer(0.02))
            gru_output = dropout(gru_output, 0.1)
            outputs = layer_norm(gru_output + temp)

            in_outputs = tf.layers.dense(outputs, units=768, activation=tf.tanh,
                                         kernel_initializer=create_initializer(0.02))

            layer_output = tf.layers.dense(in_outputs, 768,
                                           kernel_initializer=create_initializer(0.02))
            layer_output = dropout(layer_output, 0.1)
            layer_output = layer_norm(layer_output + outputs)

        return layer_output
Beispiel #5
0
def biGRU(input, input_length, params, dropout=None, layers=None):
    dropout = dropout or params.dropout
    cell_fw = MultiRNNCell([
        DropoutWrapper(
            GRUCell(params.units),
            # output_keep_prob=1.0 - dropout,
            input_keep_prob=1.0 - dropout,
            state_keep_prob=1.0 - dropout,
            variational_recurrent=True,
            dtype=tf.float32,
            input_size=input.get_shape()[-1]
            if layer == 0 else tf.TensorShape(params.units))
        for layer in range(layers or params.layers)
    ])
    cell_bw = MultiRNNCell([
        DropoutWrapper(
            GRUCell(params.units),
            # output_keep_prob=1.0 - dropout,
            input_keep_prob=1.0 - dropout,
            state_keep_prob=1.0 - dropout,
            variational_recurrent=True,
            dtype=tf.float32,
            input_size=input.get_shape()[-1]
            if layer == 0 else tf.TensorShape(params.units))
        for layer in range(layers or params.layers)
    ])

    output, states = tf.nn.bidirectional_dynamic_rnn(
        cell_fw,
        cell_bw,
        input,
        sequence_length=input_length,
        dtype=tf.float32)
    output = tf.concat(output, -1)
    return output, states
def _char_embedding_layer(
        embedder: EmbeddingService, chars: tf.Variable, num_words: tf.Variable,
        num_chars: tf.Variable, char_rnn_size: int,
        dropout_function: Callable[[tf.Variable], tf.Variable]) -> tf.Variable:
    batch_size = int(chars.get_shape()[0])
    embedding_size = embedder.embedding_dim

    with tf.variable_scope('char_embedding_layer'):
        # [batch_size, dim_num_words, dim_num_chars]
        char_embeddings = tf.get_variable(name='char_embeddings',
                                          trainable=True,
                                          dtype=tf.float32,
                                          initializer=tf.constant(
                                              embedder.embedding_matrix,
                                              dtype=tf.float32))
        char_raw_embed = dropout_function(
            tf.nn.embedding_lookup(char_embeddings, chars))

        # we need to unstack instead of reshape as two dimension are unknown
        # batch_size * [dim_num_words, dim_num_chars, embedding_size]
        char_raw_embed_list = tf.unstack(char_raw_embed, batch_size, axis=0)
        char_raw_embed_length_list = tf.unstack(num_chars, batch_size, axis=0)
        # batch_size * [dim_num_words, layer_size]
        char_embed_list = []

        with tf.variable_scope('encoding') as scope:
            fw_cell = GRUCell(char_rnn_size)
            bw_cell = GRUCell(char_rnn_size)

            for i in range(len(char_raw_embed_list)):
                batch_embed = char_raw_embed_list[i]
                batch_char_length = char_raw_embed_length_list[i]

                (_, _), (fw_final, bw_final) = bidirectional_dynamic_rnn(
                    fw_cell,
                    bw_cell,
                    inputs=batch_embed,
                    dtype=tf.float32,
                    sequence_length=batch_char_length,
                    scope=scope,
                    parallel_iterations=64,
                    swap_memory=True)
                out = tf.concat([fw_final, bw_final], axis=1)
                char_embed_list.append(out)

    return tf.stack(char_embed_list, axis=0)
Beispiel #7
0
  def __init__(self, feature_size, max_video_length, num_classes, cell_size, use_lstm, learning_rate,
      learning_rate_decay_factor, min_learning_rate, training_steps_per_epoch, max_gradient_norm,
      keep_prob=0.5, is_training=False):

    self.frame_feature_ph = tf.placeholder(tf.float32, [None, max_video_length, feature_size])
    self.video_length_ph = tf.placeholder(tf.int32, [None])
    self.video_label_ph = tf.placeholder(tf.int32, [None])

    if is_training:
      self.global_step = tf.Variable(0, trainable=False)
      self.learning_rate = tf.maximum(
          tf.train.exponential_decay(
            learning_rate,
            self.global_step,
            training_steps_per_epoch,
            learning_rate_decay_factor,
            staircase=True),
          min_learning_rate)

    # Make RNN cells
    cell = GRUCell(cell_size)
    if use_lstm:
      cell = BasicLSTMCell(cell_size, state_is_tuple=False)

    # RNN
    with tf.variable_scope('DynamicRNN'):
      outputs, state = dynamic_rnn(cell=cell, inputs=self.frame_feature_ph, sequence_length=self.video_length_ph, dtype=tf.float32)

    state = tf.nn.relu(state)

    if is_training:
      state = tf.nn.dropout(state, keep_prob=keep_prob)

    if num_classes == 2:
      with tf.variable_scope('Classification'):
        logit = tf.contrib.layers.fully_connected(inputs=state, num_outputs=1, activation_fn=None) # [batch_size, 1]
      self.logit = tf.squeeze(logit)                                                               # [batch_size]
      if is_training:
        video_label = tf.cast(x=self.video_label_ph, dtype=tf.float32)
        self.loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels=video_label, logits=self.logit))
      else:
        self.prediction = tf.cast(tf.greater(x=logit, y=0.5), tf.int32)
    else:
      with tf.variable_scope('Classification'):
        self.logits = tf.contrib.layers.fully_connected(inputs=state, num_outputs=num_classes, activation_fn=None) # [batch_size, num_classes]
      if is_training:
        self.loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(labels=self.video_label_ph, logits=self.logits))
      else:
        self.prediction = tf.argmax(logits, 1)
        
    if is_training:
      params = tf.trainable_variables()
      gradients = tf.gradients(self.loss, params)
      clipped_gradients, norm = tf.clip_by_global_norm(gradients, max_gradient_norm)
      self.train_op = tf.train.AdamOptimizer(self.learning_rate).apply_gradients(
        zip(clipped_gradients, params), global_step=self.global_step)

    self.saver = tf.train.Saver(tf.global_variables(), max_to_keep=99999999)
Beispiel #8
0
    def match_par_qu_layer(self):
        with tf.variable_scope('alignment_par_qu') as scope:
            rnn_cell = MatchRNNCell(GRUCell(self.conf_layer_size), self.qu_encoded, self.conf_att_size)

            outputs, final_state = dynamic_rnn(rnn_cell, self.par_encoded, self.par_num_words,
                                               parallel_iterations=self.conf_rnn_parallelity,
                                               scope=scope, swap_memory=True, dtype=tf.float32)

            with tf.variable_scope('encoding'):
                outputs, _, _ = bi_gru_layer([self.conf_layer_size], self.apply_dropout(outputs), self.par_num_words,
                                             self.apply_dropout)

        return outputs
Beispiel #9
0
  def __init__(self, frame_feature_ph, num_classes, cell_size, use_lstm=False):

    self.frame_feature_ph = frame_feature_ph

    cell = GRUCell(cell_size)
    if use_lstm:
      cell = BasicLSTMCell(cell_size, state_is_tuple=False)
    with tf.variable_scope('DynamicRNN'):
      outputs, state = dynamic_rnn(cell=cell, inputs=self.frame_feature_ph,  dtype=tf.float32)

    outputs = tf.nn.relu(outputs)
    with tf.variable_scope('Classification'):
      node_logit = tf.contrib.layers.fully_connected(inputs=outputs, num_outputs=num_classes, activation_fn=None)
    logit = tf.nn.softmax(node_logit)
    self.logit = tf.nn.softmax(tf.reduce_mean(node_logit,1))
    self.node = tf.argmax(logit, 2)
    self.prediction = tf.argmax(self.logit,1)
    self.saver = tf.train.Saver(tf.global_variables(), max_to_keep=99999999)
Beispiel #10
0
def pointer_net(passage, passage_length, question_pool, params, attention_fun,
                dropout):
    # question_pool = tf.nn.dropout(question_pool, 1 - dropout)

    attention_cell = attention_fun(memory=passage,
                                   memory_sequence_length=passage_length,
                                   name="pointer_attention",
                                   probability_fn=tf.identity,
                                   score_mask_value=0)
    p1, _ = attention_cell(question_pool, None)

    context = tf.reduce_sum(tf.expand_dims(tf.nn.softmax(p1), -1) * passage, 1)
    rnn = GRUCell(params.units * 2, name="pointer_gru")
    _, state = rnn(context, question_pool)

    # state = tf.nn.dropout(state, 1 - dropout)

    p2, _ = attention_cell(state, None)
    return p1, p2
Beispiel #11
0
def rnet_matching_layer_unidirectional(
        layer_size: int,
        att_size: int,
        par_vecs: tf.Variable,
        qu_vecs: tf.Variable,
        par_num_words: tf.Variable,
        parallel_iterations: int = 64) -> tf.Variable:
    with tf.variable_scope('alignment_par_qu') as scope:
        with tf.variable_scope('fw/match_rnn_cell/attention'):
            rnn_cell = MatchRNNCell(GRUCell(layer_size), qu_vecs, att_size)

        output, _ = dynamic_rnn(rnn_cell,
                                inputs=par_vecs,
                                dtype=tf.float32,
                                sequence_length=par_num_words,
                                scope=scope,
                                parallel_iterations=parallel_iterations,
                                swap_memory=True)

    return output
def train(epochs, batch_size):
    session_conf = tf.ConfigProto(
        # device_count={'GPU': gpu_count},
        allow_soft_placement=allow_soft_placement,
        log_device_placement=log_device_placement,
        gpu_options=tf.GPUOptions(allow_growth=True))
    # Training
    # ==================================================
    best_acc = 0
    best_epoch = 0
    best_report = ''
    gpu_device = 0
    with tf.device('/device:GPU:%d' % gpu_device):
        print('Using GPU - ', '/device:GPU:%d' % gpu_device)
        with tf.Graph().as_default():
            sess = tf.Session(config=session_conf)
            with sess.as_default():
                seed = 1227

                kernel_init = tf.glorot_uniform_initializer(seed=seed,
                                                            dtype=tf.float32)
                bias_init = tf.zeros_initializer()
                word_cell = GRUCell(50,
                                    name='gru',
                                    activation=tf.nn.tanh,
                                    kernel_initializer=kernel_init,
                                    bias_initializer=bias_init)
                sent_cell = GRUCell(50,
                                    name='gru',
                                    activation=tf.nn.tanh,
                                    kernel_initializer=kernel_init,
                                    bias_initializer=bias_init)

                model = HAN_Model(vocab_size=vocab_size,
                                  embedding_size=200,
                                  classes=classes,
                                  word_cell=word_cell,
                                  sentence_cell=sent_cell,
                                  word_output_size=100,
                                  sentence_output_size=100,
                                  device=args.device,
                                  learning_rate=args.lr,
                                  dropout_keep_proba=0.5,
                                  scope='HANModel')
                sess.run(
                    tf.group(tf.global_variables_initializer(),
                             tf.local_variables_initializer()))
                # tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())

                print("\nEvaluation before training:")
                # Evaluation after epoch
                validate(-1, model, sess, X_val, sent_length_val,
                         word_length_val, y_val, batch_size)

                for epoch in range(epochs):
                    epoch += 1

                    batches = batch_iter(
                        list(
                            zip(X_train, sent_length_train, word_length_train,
                                y_train)), batch_size)

                    # Training loop. For each batch...
                    print('\nTraining epoch {}'.format(epoch))
                    l = []
                    a = []
                    for i, batch in tqdm(enumerate(list(batches))):
                        X_batch, sent_len_batch, word_lenght_batch, y_batch = zip(
                            *batch)
                        # print('batch_hist_v', len(batch_utt_v))
                        feed_dict = {
                            model.inputs: X_batch,
                            model.sentence_lengths: sent_len_batch,
                            model.word_lengths: word_lenght_batch,
                            model.labels: y_batch,
                            model.is_training: True,
                        }

                        _, step, loss, accuracy = sess.run([
                            model.train_op, model.global_step, model.loss,
                            model.accuracy
                        ], feed_dict)
                        l.append(loss)
                        a.append(accuracy)

                    print("\t \tEpoch {}:, loss {:g}, Accuracy {:g}".format(
                        epoch, np.average(l), np.average(a)))
                    # Evaluation after epoch
                    accuracy, report = validate(epoch, model, sess, X_val,
                                                sent_length_val,
                                                word_length_val, y_val,
                                                batch_size)

                    if accuracy > best_acc:
                        best_epoch = epoch
                        best_acc = accuracy
                        best_report = report

                print("\n\nBest epoch: {}\nBest test accuracy: {}".format(
                    best_epoch, best_acc))
                print("\n\nBest epoch: {}\nBest test report: \n{}".format(
                    best_epoch, best_report))
Beispiel #13
0
    def __init__(self,
            num_symbols,
            num_embed_units,
            num_units,
            name_scope,
            sequence_length,
            start_token,
            end_token,
            learning_rate=0.001,
            learning_rate_decay_factor=0.95,
            max_gradient_norm=5,
            num_samples=512,
            max_length=30):

        # Input: text_id and text_length
        self.sequence_length = sequence_length
        self.responses = tf.placeholder(tf.int32, shape=[None, None])  # (batch, len)
        self.responses_length = tf.placeholder(tf.int32, shape=[None, ])  # batch
        self.end_token = end_token

        # Build the embedding table (index to vector)
        self.embed = tf.get_variable('embed', [num_symbols, num_embed_units], tf.float32)

        # Construct the input and output of GRU
        self.responses_target = self.responses
        batch_size, decoder_len = tf.shape(self.responses)[0], tf.shape(self.responses)[1]
        self.responses_input = tf.concat([tf.ones([batch_size, 1], dtype=tf.int32)*start_token,
            tf.split(self.responses_target, [decoder_len-1, 1], 1)[0]], 1)   # batch*len
        self.decoder_mask = tf.reshape(tf.cumsum(tf.one_hot(self.responses_length-1,
            decoder_len), reverse=True, axis=1), [-1, decoder_len]) # batch * len

        self.decoder_input = tf.nn.embedding_lookup(self.embed, self.responses_input)
        cell_dec = GRUCell(num_units)
        encoder_state = tf.zeros([batch_size, num_units])
        output_fn, sampled_sequence_loss = output_projection_layer(num_units, num_symbols, num_samples)

        # RNN language model
        with variable_scope.variable_scope('decoder'):
            decoder_fn_train = my_simple_decoder_fn.simple_decoder_fn_train(encoder_state)
            self.decoder_output, _, _ = my_seq2seq.dynamic_rnn_decoder(cell_dec, decoder_fn_train,
                                                            self.decoder_input, self.responses_length, scope = "decoder_rnn")
            self.decoder_loss, self.all_decoder_output = my_loss.sequence_loss(self.decoder_output, self.responses_target, self.decoder_mask,
                                                      softmax_loss_function = sampled_sequence_loss)

        with variable_scope.variable_scope('decoder', reuse = True):
            decoder_fn_inference = my_simple_decoder_fn.simple_decoder_fn_inference(output_fn,
                                                                                    encoder_state,
                                                                                    self.embed, start_token, end_token,
                                                                                    max_length, num_symbols)
            self.decoder_distribution, _, _ = my_seq2seq.dynamic_rnn_decoder(cell_dec, decoder_fn_inference, scope = "decoder_rnn")
            self.generation_index = tf.argmax(tf.split(self.decoder_distribution,
                [2, num_symbols-2], 2)[1], 2) + 2 # for removing UNK
            self.generation = self.generation_index

        self.params = [k for k in tf.trainable_variables() if name_scope in k.name]

        # Initialize the training process
        self.learning_rate = tf.Variable(float(learning_rate), trainable=False, dtype=tf.float32)
        self.learning_rate_decay_op = self.learning_rate.assign(self.learning_rate * learning_rate_decay_factor)
        self.global_step = tf.Variable(0, trainable=False)

        # Calculate the gradient of parameters
        self.cost = tf.reduce_mean(self.decoder_loss)
        opt = tf.train.AdamOptimizer(self.learning_rate)
        gradients = tf.gradients(self.cost, self.params)
        clipped_gradients, self.gradient_norm = tf.clip_by_global_norm(gradients, max_gradient_norm)
        self.update = opt.apply_gradients(zip(clipped_gradients, self.params), global_step=self.global_step)

        all_variables = [k for k in tf.global_variables() if name_scope in k.name]
        self.saver = tf.train.Saver(all_variables, write_version=tf.train.SaverDef.V2,
                max_to_keep=3, pad_step_number=True, keep_checkpoint_every_n_hours=1.0)
def Model(_abnormal_data, _abnormal_label, _hidden_num, _elem_num, _file_name,
          _partition):
    tf.reset_default_graph()
    g = tf.Graph()
    with g.as_default():
        # placeholder list
        p_input = tf.placeholder(tf.float32,
                                 shape=(batch_num, _abnormal_data.shape[1],
                                        _abnormal_data.shape[2]))
        p_inputs = [
            tf.squeeze(t, [1])
            for t in tf.split(p_input, _abnormal_data.shape[1], 1)
        ]

        # projection_layer = tf.layers.Dense(units=_elem_num, use_bias=True)

        # with tf.device('/device:GPU:0'):
        d_enc = {}
        with tf.variable_scope('encoder'):
            for j in range(ensemble_space):
                if cell_type == 0:
                    enc_cell = tf.nn.rnn_cell.BasicRNNCell(_hidden_num)
                if cell_type == 1:
                    pure_enc_cell = LSTMCell(_hidden_num)
                    residual_enc_cell = RLSTMCell(_hidden_num,
                                                  reuse=tf.AUTO_REUSE)
                    enc_cell = RSLSTMCell(_hidden_num,
                                          file_name=_file_name,
                                          component=j,
                                          partition=_partition,
                                          type='enc',
                                          reuse=tf.AUTO_REUSE)
                if cell_type == 2:
                    pure_enc_cell = GRUCell(_hidden_num)
                    enc_cell = RSGRUCell(_hidden_num)

                if j == 0:
                    enc_state = pure_enc_cell.zero_state(batch_size=batch_num,
                                                         dtype=tf.float32)
                    enc_outputs = []
                    for step in range(len(p_inputs)):
                        enc_input = p_inputs[step]
                        enc_output_, enc_state = pure_enc_cell(
                            enc_input, enc_state)
                        enc_outputs.append(enc_output_)

                    d_enc['enc_output_{0}'.format(j)] = enc_outputs
                    d_enc['enc_state_{0}'.format(j)] = enc_state

                elif j == 1:
                    enc_state = residual_enc_cell.zero_state(
                        batch_size=batch_num, dtype=tf.float32)
                    enc_outputs = []
                    for step in range(len(p_inputs)):
                        enc_input = p_inputs[step]
                        enc_output_, enc_state = residual_enc_cell(
                            enc_input, enc_state)
                        enc_outputs.append(enc_output_)

                    d_enc['enc_output_{0}'.format(j)] = enc_outputs
                    d_enc['enc_state_{0}'.format(j)] = enc_state

                else:
                    enc_state = enc_cell.zero_state(batch_size=batch_num,
                                                    dtype=tf.float32)
                    enc_outputs = []
                    for step in range(len(p_inputs)):
                        enc_input = p_inputs[step]
                        enc_output_, enc_state = enc_cell(enc_input, enc_state)
                        enc_outputs.append(enc_output_)

                    d_enc['enc_output_{0}'.format(j)] = enc_outputs
                    d_enc['enc_state_{0}'.format(j)] = enc_state

            shared_state_c = tf.concat([
                d_enc['enc_state_{0}'.format(j)].c
                for j in range(ensemble_space)
            ],
                                       axis=1)
            shared_state_h = tf.concat([
                d_enc['enc_state_{0}'.format(j)].h
                for j in range(ensemble_space)
            ],
                                       axis=1)

            if compress:
                compress_state = tf.layers.Dense(units=_hidden_num,
                                                 activation=tf.tanh,
                                                 use_bias=True)
                shared_state_c = compress_state(shared_state_c)
                shared_state_h = compress_state(shared_state_h)

            shared_state = LSTMStateTuple(shared_state_c, shared_state_h)

        # with tf.device('/device:GPU:1'):
        d_dec = {}
        with tf.variable_scope('decoder') as vs:
            dec_weight_ = tf.Variable(tf.truncated_normal(
                [_hidden_num * ensemble_space, _elem_num], dtype=tf.float32),
                                      name="dec_weight")
            dec_bias_ = tf.Variable(tf.constant(0.1,
                                                shape=[_elem_num],
                                                dtype=tf.float32),
                                    name="dec_bias")
            if decode_without_input:
                for k in range(ensemble_space):
                    if cell_type == 0:
                        dec_cell = tf.nn.rnn_cell.BasicRNNCell(_hidden_num)
                    if cell_type == 1:
                        if compress:
                            pure_dec_cell = LSTMCell(_hidden_num)
                            residual_dec_cell = RLSTMCell(_hidden_num)
                            dec_cell = RSLSTMCell(_hidden_num,
                                                  file_name=_file_name,
                                                  component=k,
                                                  partition=_partition,
                                                  type='dec',
                                                  reuse=tf.AUTO_REUSE)
                        else:
                            pure_dec_cell = LSTMCell(_hidden_num *
                                                     ensemble_space)
                            residual_dec_cell = RLSTMCell(_hidden_num *
                                                          ensemble_space)
                            dec_cell = RSLSTMCell(_hidden_num * ensemble_space,
                                                  file_name=_file_name,
                                                  component=k,
                                                  partition=_partition,
                                                  type='dec',
                                                  reuse=tf.AUTO_REUSE)
                    if cell_type == 2:
                        if compress:
                            pure_dec_cell = GRUCell(_hidden_num)
                            dec_cell = RSGRUCell(_hidden_num)
                        else:
                            pure_dec_cell = GRUCell(_hidden_num *
                                                    ensemble_space)
                            dec_cell = RSGRUCell(_hidden_num * ensemble_space)

                    if k == 0:
                        dec_inputs = [
                            tf.zeros(tf.shape(p_inputs[0]), dtype=tf.float32)
                            for _ in range(len(p_inputs))
                        ]
                        dec_outputs, dec_state = tf.contrib.rnn.static_rnn(
                            pure_dec_cell,
                            dec_inputs,
                            initial_state=shared_state,
                            dtype=tf.float32)
                    elif k == 1:
                        dec_inputs = [
                            tf.zeros(tf.shape(p_inputs[0]), dtype=tf.float32)
                            for _ in range(len(p_inputs))
                        ]
                        dec_outputs, dec_state = tf.contrib.rnn.static_rnn(
                            residual_dec_cell,
                            dec_inputs,
                            initial_state=shared_state,
                            dtype=tf.float32)
                    else:
                        dec_inputs = [
                            tf.zeros(tf.shape(p_inputs[0]), dtype=tf.float32)
                            for _ in range(len(p_inputs))
                        ]
                        dec_outputs, dec_state = tf.contrib.rnn.static_rnn(
                            dec_cell,
                            dec_inputs,
                            initial_state=shared_state,
                            dtype=tf.float32)

                    if reverse:
                        dec_outputs = dec_outputs[::-1]

                    dec_output_ = tf.transpose(tf.stack(dec_outputs),
                                               [1, 0, 2])
                    dec_weight_ = tf.tile(tf.expand_dims(dec_weight_, 0),
                                          [batch_num, 1, 1])
                    d_dec['dec_output_{0}'.format(k)] = tf.matmul(
                        dec_output_, dec_weight_) + dec_bias_

                    if reverse:
                        d_dec['dec_output_{0}'.format(k)] = d_dec[
                            'dec_output_{0}'.format(k)][::-1]

            else:
                for k in range(ensemble_space):
                    if cell_type == 0:
                        dec_cell = tf.nn.rnn_cell.BasicRNNCell(_hidden_num)
                    if cell_type == 1:
                        if compress:
                            pure_dec_cell = LSTMCell(_hidden_num)
                            residual_dec_cell = RLSTMCell(_hidden_num,
                                                          reuse=tf.AUTO_REUSE)
                            dec_cell = RSLSTMCell(_hidden_num,
                                                  file_name=_file_name,
                                                  component=k,
                                                  partition=_partition,
                                                  type='dec',
                                                  reuse=tf.AUTO_REUSE)
                        else:
                            pure_dec_cell = LSTMCell(_hidden_num *
                                                     ensemble_space)
                            residual_dec_cell = RLSTMCell(_hidden_num *
                                                          ensemble_space,
                                                          reuse=tf.AUTO_REUSE)
                            dec_cell = RSLSTMCell(_hidden_num * ensemble_space,
                                                  file_name=_file_name,
                                                  component=k,
                                                  partition=_partition,
                                                  type='dec',
                                                  reuse=tf.AUTO_REUSE)
                    if cell_type == 2:
                        if compress:
                            pure_dec_cell = GRUCell(_hidden_num)
                            dec_cell = RSGRUCell(_hidden_num)
                        else:
                            pure_dec_cell = GRUCell(_hidden_num *
                                                    ensemble_space)
                            dec_cell = RSGRUCell(_hidden_num * ensemble_space)

                    if k == 0:
                        dec_state = shared_state
                        dec_input_ = tf.zeros(tf.shape(p_inputs[0]),
                                              dtype=tf.float32)
                        dec_outputs = []
                        for step in range(len(p_inputs)):
                            if step > 0:
                                vs.reuse_variables()
                            dec_input_, dec_state = pure_dec_cell(
                                dec_input_, dec_state)
                            dec_input_ = tf.matmul(dec_input_,
                                                   dec_weight_) + dec_bias_
                            dec_outputs.append(dec_input_)

                    elif k == 1:
                        dec_state = shared_state
                        dec_input_ = tf.zeros(tf.shape(p_inputs[0]),
                                              dtype=tf.float32)
                        dec_outputs = []
                        for step in range(len(p_inputs)):
                            if step > 0:
                                vs.reuse_variables()
                            dec_input_, dec_state = residual_dec_cell(
                                dec_input_, dec_state)
                            dec_input_ = tf.matmul(dec_input_,
                                                   dec_weight_) + dec_bias_
                            dec_outputs.append(dec_input_)

                    else:
                        dec_state = shared_state
                        dec_input_ = tf.zeros(tf.shape(p_inputs[0]),
                                              dtype=tf.float32)
                        dec_outputs = []
                        for step in range(len(p_inputs)):
                            if step > 0:
                                vs.reuse_variables()
                            dec_input_, dec_state = dec_cell(
                                dec_input_, dec_state)
                            dec_input_ = tf.matmul(dec_input_,
                                                   dec_weight_) + dec_bias_
                            dec_outputs.append(dec_input_)

                    d_dec['dec_output_{0}'.format(k)] = dec_outputs

                    if reverse:
                        d_dec['dec_output_{0}'.format(k)] = d_dec[
                            'dec_output_{0}'.format(k)][::-1]

        sum_of_difference = 0
        for i in range(ensemble_space):
            sum_of_difference += d_dec['dec_output_{0}'.format(i)][0] - p_input

        loss = tf.reduce_mean(tf.square(sum_of_difference))
        optimizer = tf.train.AdamOptimizer(
            learning_rate=learning_rate).minimize(loss)
        # Add ops to save and restore all the variables.
        saver = tf.train.Saver()
    return g, p_input, d_dec, loss, optimizer, saver
Beispiel #15
0
    def _build_decoder(self):
        with tf.variable_scope("dialog_decoder"):
            with tf.variable_scope("decoder_output_projection"):  # 全连接层
                output_layer = layers_core.Dense(
                    self.config.vocab_size,
                    use_bias=False,
                    name="output_projection")  # units单元个数  词表大小

            with tf.variable_scope("decoder_rnn"):
                attn_mech = tc_seq2seq.BahdanauAttention(
                    self.config.dec_hidden_size, self.word_outputs, None)
                attn_mech1 = tc_seq2seq.BahdanauAttention(
                    self.config.dec_hidden_size, self.uttn_outputs, None)
                attn_mech2 = tc_seq2seq.BahdanauAttention(
                    self.config.dec_hidden_size, self.encoder_outputs, None)

                self.att1 = attn_mech.batch_size
                self.att2 = attn_mech.batch_size
                self.att3 = attn_mech.batch_size

                dec_cell = GRUCell(self.config.dec_hidden_size)

                #dec_cell = grucell_cond.GRUCellCond(self.config.dec_hidden_size)
                #self.encoder_outputs = tf.reshape(self.encoder_outputs,[-1,self.config.dec_hidden_size*2])
                #dec_cell = grucell_cond.CondWrapper(dec_cell, self.encoder_outputs)
                #word_outputs = tf.reshape(self.word_outputs,[self.batch_size,-1])

                dec_cell = EAttentionWrapper(
                    dec_cell, [attn_mech, attn_mech1, attn_mech2],
                    attention_layer_size=[
                        self.config.dec_hidden_size,
                        self.config.dec_hidden_size,
                        self.config.dec_hidden_size
                    ])
                #print('self.batch_size',self.batch_size)
                dec_init_state = dec_cell.zero_state(
                    batch_size=self.batch_size, dtype=tf.float32)

                # Training or Eval
                if self.mode != ModelMode.infer:  # not infer, do decode turn by turn

                    resp_emb_inp = tf.nn.embedding_lookup(
                        self.decoder_embeddings, self.target_input)
                    helper = tc_seq2seq.TrainingHelper(resp_emb_inp,
                                                       self.target_length)
                    decoder = tc_seq2seq.BasicDecoder(
                        cell=dec_cell,
                        helper=helper,
                        initial_state=dec_init_state,  # 编码层的最终状态
                        output_layer=output_layer  # 全连接层
                    )

                    dec_outputs, dec_state, _ = tc_seq2seq.dynamic_decode(
                        decoder)
                    sample_id = dec_outputs.sample_id
                    logits = dec_outputs.rnn_output

                else:
                    start_tokens = tf.fill([self.batch_size],
                                           self.config.sos_idx)
                    end_token = self.config.eos_idx
                    maximum_iterations = tf.to_int32(self.config.infer_max_len)

                    helper = tc_seq2seq.GreedyEmbeddingHelper(
                        self.decoder_embeddings,
                        start_tokens=start_tokens,
                        end_token=tf.constant(end_token, dtype=tf.int32))

                    decoder = tc_seq2seq.BasicDecoder(
                        cell=dec_cell,
                        helper=helper,
                        initial_state=dec_init_state,
                        output_layer=output_layer  # 全连接层
                    )

                    dec_outputs, dec_state, _ = tc_seq2seq.dynamic_decode(
                        decoder, maximum_iterations=maximum_iterations)
                    logits = tf.no_op()
                    sample_id = dec_outputs.sample_id

                self.logits = logits
                self.sample_id = sample_id
Beispiel #16
0
    def __init__(self,
                 num_symbols,
                 num_embed_units,
                 num_units,
                 vocab=None,
                 embed=None,
                 name_scope=None,
                 learning_rate=0.0001,
                 learning_rate_decay_factor=0.95,
                 max_gradient_norm=5,
                 l2_lambda=0.2):

        self.posts = tf.placeholder(tf.string, shape=[None,
                                                      None])  # batch * len
        self.posts_length = tf.placeholder(tf.int32, shape=[None])  # batch
        self.responses = tf.placeholder(tf.string, shape=[None,
                                                          None])  # batch*len
        self.responses_length = tf.placeholder(tf.int32, shape=[None])  # batch
        self.generation = tf.placeholder(tf.string, shape=[None,
                                                           None])  # batch*len
        self.generation_length = tf.placeholder(tf.int32,
                                                shape=[None])  # batch

        # build the vocab table (string to index)
        self.symbols = tf.Variable(vocab, trainable=False, name="symbols")
        self.symbol2index = HashTable(KeyValueTensorInitializer(
            self.symbols,
            tf.Variable(
                np.array([i for i in range(num_symbols)], dtype=np.int32),
                False)),
                                      default_value=UNK_ID,
                                      name="symbol2index")

        # build the embedding table (index to vector)
        if embed is None:
            # initialize the embedding randomly
            self.embed = tf.get_variable('embed',
                                         [num_symbols, num_embed_units],
                                         tf.float32)
        else:
            # initialize the embedding by pre-trained word vectors
            self.embed = tf.get_variable('embed',
                                         dtype=tf.float32,
                                         initializer=embed)

        self.posts_input = self.symbol2index.lookup(
            self.posts)  # batch * utter_len
        self.posts_input_embed = tf.nn.embedding_lookup(
            self.embed, self.posts_input)  #batch * utter_len * embed_unit
        self.responses_input = self.symbol2index.lookup(self.responses)
        self.responses_input_embed = tf.nn.embedding_lookup(
            self.embed, self.responses_input)  # batch * utter_len * embed_unit
        self.generation_input = self.symbol2index.lookup(self.generation)
        self.generation_input_embed = tf.nn.embedding_lookup(
            self.embed,
            self.generation_input)  # batch * utter_len * embed_unit

        # Construct bidirectional GRU cells for encoder / decoder
        cell_fw_post = GRUCell(num_units)
        cell_bw_post = GRUCell(num_units)
        cell_fw_resp = GRUCell(num_units)
        cell_bw_resp = GRUCell(num_units)

        # Encode the post sequence
        with variable_scope.variable_scope("post_encoder"):
            posts_state, posts_final_state = tf.nn.bidirectional_dynamic_rnn(
                cell_fw_post,
                cell_bw_post,
                self.posts_input_embed,
                self.posts_length,
                dtype=tf.float32)
            posts_final_state_bid = tf.concat(
                posts_final_state, 1)  # batch_size * (2 * num_units)

        # Encode the real response sequence
        with variable_scope.variable_scope("resp_encoder"):
            responses_state, responses_final_state = tf.nn.bidirectional_dynamic_rnn(
                cell_fw_resp,
                cell_bw_resp,
                self.responses_input_embed,
                self.responses_length,
                dtype=tf.float32)
            responses_final_state_bid = tf.concat(responses_final_state, 1)

        # Encode the generated response sequence
        with variable_scope.variable_scope("resp_encoder", reuse=True):
            generation_state, generation_final_state = tf.nn.bidirectional_dynamic_rnn(
                cell_fw_resp,
                cell_bw_resp,
                self.generation_input_embed,
                self.generation_length,
                dtype=tf.float32)
            generation_final_state_bid = tf.concat(generation_final_state, 1)

        # Calculate the relevance score between post and real response
        with variable_scope.variable_scope("calibration"):
            self.W = tf.get_variable('W', [2 * num_units, 2 * num_units],
                                     tf.float32)
            vec_post = tf.reshape(posts_final_state_bid,
                                  [-1, 1, 2 * num_units])
            vec_resp = tf.reshape(responses_final_state_bid,
                                  [-1, 2 * num_units, 1])
            attn_score_true = tf.einsum(
                'aij,ajk->aik', tf.einsum('aij,jk->aik', vec_post, self.W),
                vec_resp)
            attn_score_true = tf.reshape(attn_score_true, [-1, 1])
            fc_true_input = tf.concat([
                posts_final_state_bid, responses_final_state_bid,
                attn_score_true
            ], 1)

            self.output_fc_W = tf.get_variable("output_fc_W",
                                               [4 * num_units + 1, num_units],
                                               tf.float32)
            self.output_fc_b = tf.get_variable("output_fc_b", [num_units],
                                               tf.float32)
            fc_true = tf.nn.tanh(
                tf.nn.xw_plus_b(fc_true_input, self.output_fc_W,
                                self.output_fc_b))  # batch_size

            self.output_W = tf.get_variable("output_W", [num_units, 1],
                                            tf.float32)
            self.output_b = tf.get_variable("output_b", [1], tf.float32)
            self.cost_true = tf.nn.sigmoid(
                tf.nn.xw_plus_b(fc_true, self.output_W,
                                self.output_b))  # batch_size

        # Calculate the relevance score between post and generated response
        with variable_scope.variable_scope("calibration", reuse=True):
            vec_gen = tf.reshape(generation_final_state_bid,
                                 [-1, 2 * num_units, 1])
            attn_score_false = tf.einsum(
                'aij,ajk->aik', tf.einsum('aij,jk->aik', vec_post, self.W),
                vec_gen)
            attn_score_false = tf.reshape(attn_score_false, [-1, 1])
            fc_false_input = tf.concat([
                posts_final_state_bid, generation_final_state_bid,
                attn_score_false
            ], 1)
            fc_false = tf.nn.tanh(
                tf.nn.xw_plus_b(fc_false_input, self.output_fc_W,
                                self.output_fc_b))  # batch_size
            self.cost_false = tf.nn.sigmoid(
                tf.nn.xw_plus_b(fc_false, self.output_W,
                                self.output_b))  # batch_size

        self.PR_cost = tf.reduce_mean(
            tf.reduce_sum(tf.square(self.cost_true - 1.0), axis=1))
        self.PG_cost = tf.reduce_mean(
            tf.reduce_sum(tf.square(self.cost_false), axis=1))

        # Use the loss similar to least square GAN
        self.cost = self.PR_cost / 2.0 + self.PG_cost / 2.0 + l2_lambda * (
            tf.nn.l2_loss(self.output_fc_W) + tf.nn.l2_loss(self.output_fc_b) +
            tf.nn.l2_loss(self.output_W) + tf.nn.l2_loss(self.output_b) +
            tf.nn.l2_loss(self.W))

        # building graph finished and get all parameters
        self.params = [
            k for k in tf.trainable_variables() if name_scope in k.name
        ]

        # initialize the training process
        self.learning_rate = tf.Variable(float(learning_rate),
                                         trainable=False,
                                         dtype=tf.float32)
        self.learning_rate_decay_op = self.learning_rate.assign(
            self.learning_rate * learning_rate_decay_factor)
        self.global_step = tf.Variable(0, trainable=False)
        self.adv_global_step = tf.Variable(0, trainable=False)

        # calculate the gradient of parameters
        opt = tf.train.AdamOptimizer(self.learning_rate)
        gradients = tf.gradients(self.cost, self.params)
        clipped_gradients, self.gradient_norm = tf.clip_by_global_norm(
            gradients, max_gradient_norm)
        self.update = opt.apply_gradients(zip(clipped_gradients, self.params),
                                          global_step=self.global_step)
        self.reward = tf.reduce_sum(self.cost_false, axis=1)  # batch

        all_variables = [
            k for k in tf.global_variables() if name_scope in k.name
        ]
        self.saver = tf.train.Saver(all_variables,
                                    write_version=tf.train.SaverDef.V2,
                                    max_to_keep=5,
                                    pad_step_number=True,
                                    keep_checkpoint_every_n_hours=1.0)
        self.adv_saver = tf.train.Saver(all_variables,
                                        write_version=tf.train.SaverDef.V2,
                                        max_to_keep=5,
                                        pad_step_number=True,
                                        keep_checkpoint_every_n_hours=1.0)
Beispiel #17
0
def attention_decoder(inputs,
                      memory,
                      num_units=None,
                      batch_size=1,
                      inputs_length=None,
                      n_mels=80,
                      reduction=1,
                      default_max_iters=200,
                      is_training=True,
                      scope='attention_decoder',
                      reuse=None):
    """
    Applies a GRU to 'inputs', while attending 'memory'.
    :param inputs: A 3d tensor with shape of [N, T', C']. Decoder inputs.
    :param memory: A 3d tensor with shape of [N, T, C]. Outputs of encoder network.
    :param num_units: An int. Attention size.
    :param batch_size: An int. Batch size.
    :param inputs_length: An int. Memory length.
    :param n_mels: An int. Number of Mel banks to generate.
    :param reduction: An int. Reduction factor. Paper => 2, 3, 5.
    :param default_max_iters: Default max iteration of decoding.
    :param is_training: running mode.
    :param scope: Optional scope for `variable_scope`.
    :param reuse: Boolean, whether to reuse the weights of a previous layer by the same name.
    :return: A 3d tensor with shape of [N, T, num_units].
    """
    with tf.variable_scope(scope, reuse=reuse):
        # params setting
        if is_training:
            max_iters = None
        else:
            max_iters = default_max_iters
        # max_iters = default_max_iters
        if num_units is None:
            num_units = inputs.get_shape().as_list()[-1]

        # Decoder cell
        decoder_cell = tf.nn.rnn_cell.GRUCell(num_units)

        # Attention
        # [N, T_in, attention_depth]
        attention_cell = AttentionWrapper(decoder_cell,
                                          BahdanauAttention(num_units, memory),
                                          alignment_history=True)

        # Concatenate attention context vector and RNN cell output into a 2*attention_depth=512D vector.
        # [N, T_in, 2*attention_depth]
        concat_cell = ConcatOutputAndAttentionWrapper(attention_cell)

        # Decoder (layers specified bottom to top):
        # [N, T_in, decoder_depth]
        decoder_cell = MultiRNNCell([
            OutputProjectionWrapper(concat_cell, num_units),
            ResidualWrapper(GRUCell(num_units)),
            ResidualWrapper(GRUCell(num_units))
        ],
                                    state_is_tuple=True)

        # Project onto r mel spectrogram (predict r outputs at each RNN step):
        output_cell = OutputProjectionWrapper(decoder_cell, n_mels * reduction)

        decoder_init_state = output_cell.zero_state(batch_size=batch_size,
                                                    dtype=tf.float32)

        if is_training:
            # helper = TacotronTrainingHelper(batch_size, n_mels, reduction, inputs)
            helper = tf.contrib.seq2seq.TrainingHelper(
                inputs=inputs, sequence_length=inputs_length, time_major=False)
        else:
            helper = TacotronInferenceHelper(batch_size, n_mels, reduction)

        decoder = BasicDecoder(output_cell, helper, decoder_init_state)
        # [N, T_out/r, M*r]
        (decoder_outputs, _), final_decoder_state, _ = dynamic_decode(
            decoder, maximum_iterations=max_iters)

    return decoder_outputs, final_decoder_state
Beispiel #18
0
def Model(_abnormal_data, _abnormal_label, _hidden_num, _elem_num, _file_name,
          _partition):
    tf.reset_default_graph()
    g = tf.Graph()
    with g.as_default():
        # placeholder list
        p_input = tf.placeholder(tf.float32,
                                 shape=(batch_num, _abnormal_data.shape[1],
                                        _abnormal_data.shape[2]))
        # p_inputs = [tf.squeeze(t, [1]) for t in tf.split(p_input, _abnormal_data.shape[1], 1)]

        # Regularizer signature
        l1_regularizer = tf.contrib.layers.l1_regularizer(scale=0.005,
                                                          scope=None)

        # Projection layer
        projection_layer = tf.layers.Dense(units=_elem_num, use_bias=True)

        # with tf.device('/device:GPU:0'):
        d_enc = {}
        with tf.variable_scope('encoder'):
            for j in range(ensemble_space):
                # create RNN cell
                if cell_type == 0:
                    enc_cell = tf.nn.rnn_cell.BasicRNNCell(_hidden_num)
                if cell_type == 1:
                    pure_enc_cell = LSTMCell(_hidden_num)
                    residual_enc_cell = RLSTMCell(_hidden_num)
                    # enc_cell = RSLSTMCell(_hidden_num, file_name=_file_name, type='enc', partition=_partition,
                    #                       component=j, reuse=tf.AUTO_REUSE)
                    enc_cell = RKLSTMCell(_hidden_num,
                                          file_name=_file_name,
                                          type='enc',
                                          partition=_partition,
                                          component=j,
                                          reuse=tf.AUTO_REUSE)
                if cell_type == 2:
                    pure_enc_cell = GRUCell(_hidden_num)
                    enc_cell = RSGRUCell(_hidden_num)
                if j == 0:
                    d_enc['enc_output_{0}'.format(j)], d_enc[
                        'enc_state_{0}'.format(j)] = tf.nn.dynamic_rnn(
                            pure_enc_cell, p_input, dtype=tf.float32)

                elif j == 1:
                    d_enc['enc_output_{0}'.format(j)], d_enc[
                        'enc_state_{0}'.format(j)] = tf.nn.dynamic_rnn(
                            residual_enc_cell, p_input, dtype=tf.float32)

                else:
                    d_enc['enc_output_{0}'.format(j)], d_enc[
                        'enc_state_{0}'.format(j)] = tf.nn.dynamic_rnn(
                            enc_cell, p_input, dtype=tf.float32)

            # shared_state_c = tf.concat([d_enc['enc_state_{0}'.format(j)].c for j in range(ensemble_space)], axis=1)
            # shared_state_h = tf.concat([d_enc['enc_state_{0}'.format(j)].h for j in range(ensemble_space)], axis=1)
            w_c = tf.Variable(tf.zeros([_hidden_num, _hidden_num]))
            b_c = tf.Variable(tf.zeros([_hidden_num]))
            w_h = tf.Variable(tf.zeros([_hidden_num, _hidden_num]))
            b_h = tf.Variable(tf.zeros([_hidden_num]))
            shared_state_c = tf.concat([
                tf.matmul(d_enc['enc_state_{0}'.format(j)].c, w_c) + b_c
                for j in range(ensemble_space)
            ],
                                       axis=1)
            shared_state_h = tf.concat([
                tf.matmul(d_enc['enc_state_{0}'.format(j)].h, w_h) + b_h
                for j in range(ensemble_space)
            ],
                                       axis=1)

            if compress:
                compress_state = tf.layers.Dense(units=_hidden_num,
                                                 activation=tf.tanh,
                                                 use_bias=True)
                shared_state_c = compress_state(shared_state_c)
                shared_state_h = compress_state(shared_state_h)

            shared_state = LSTMStateTuple(shared_state_c, shared_state_h)

        # with tf.device('/device:GPU:1'):
        d_dec = {}
        with tf.variable_scope('decoder') as vs:
            if decode_without_input:
                dec_input = tf.zeros(
                    [p_input.shape[0], p_input.shape[1], p_input.shape[2]],
                    dtype=tf.float32)
                for k in range(ensemble_space):
                    # create RNN cell
                    if cell_type == 0:
                        dec_cell = tf.nn.rnn_cell.BasicRNNCell(_hidden_num)
                    if cell_type == 1:
                        if compress:
                            pure_dec_cell = LSTMCell(_hidden_num)
                            residual_dec_cell = RLSTMCell(_hidden_num)
                            dec_cell = RSLSTMCell(_hidden_num,
                                                  file_name=_file_name,
                                                  type='dec',
                                                  partition=_partition,
                                                  component=k,
                                                  reuse=tf.AUTO_REUSE)
                        else:
                            pure_dec_cell = LSTMCell(_hidden_num *
                                                     ensemble_space)
                            residual_dec_cell = RLSTMCell(_hidden_num *
                                                          ensemble_space)
                            dec_cell = RSLSTMCell(_hidden_num * ensemble_space,
                                                  file_name=_file_name,
                                                  type='dec',
                                                  partition=_partition,
                                                  component=k,
                                                  reuse=tf.AUTO_REUSE)
                    if cell_type == 2:
                        if compress:
                            pure_dec_cell = GRUCell(_hidden_num)
                            dec_cell = RSGRUCell(_hidden_num)
                        else:
                            pure_dec_cell = GRUCell(_hidden_num *
                                                    ensemble_space)
                            dec_cell = RSGRUCell(_hidden_num * ensemble_space)

                    if k == 0:
                        d_dec['dec_output_{0}'.format(k)], d_dec[
                            'dec_state_{0}'.format(k)] = tf.nn.dynamic_rnn(
                                pure_dec_cell,
                                dec_input,
                                initial_state=shared_state,
                                dtype=tf.float32)
                    elif k == 1:
                        d_dec['dec_output_{0}'.format(k)], d_dec[
                            'dec_state_{0}'.format(k)] = tf.nn.dynamic_rnn(
                                residual_dec_cell,
                                dec_input,
                                initial_state=shared_state,
                                dtype=tf.float32)
                    else:
                        d_dec['dec_output_{0}'.format(k)], d_dec[
                            'dec_state_{0}'.format(k)] = tf.nn.dynamic_rnn(
                                dec_cell,
                                dec_input,
                                initial_state=shared_state,
                                dtype=tf.float32)

                    if reverse:
                        d_dec['dec_output_{0}'.format(k)] = d_dec[
                            'dec_output_{0}'.format(k)][::-1]

            else:
                dec_input = tf.zeros([p_input.shape[0], p_input.shape[2]],
                                     dtype=tf.float32)
                for k in range(ensemble_space):
                    # create RNN cell
                    if cell_type == 0:
                        dec_cell = tf.nn.rnn_cell.BasicRNNCell(_hidden_num)
                    if cell_type == 1:
                        if compress:
                            pure_dec_cell = LSTMCell(_hidden_num)
                            residual_dec_cell = RLSTMCell(_hidden_num)
                            # dec_cell = RSLSTMCell(_hidden_num, file_name=_file_name, type='dec', partition=_partition,
                            #                       component=k, reuse=tf.AUTO_REUSE)
                            dec_cell = RKLSTMCell(_hidden_num,
                                                  file_name=_file_name,
                                                  type='dec',
                                                  partition=_partition,
                                                  component=k,
                                                  reuse=tf.AUTO_REUSE)
                        else:
                            pure_dec_cell = LSTMCell(_hidden_num *
                                                     ensemble_space)
                            residual_dec_cell = RLSTMCell(_hidden_num *
                                                          ensemble_space)
                            # dec_cell = RSLSTMCell(_hidden_num * ensemble_space, file_name=_file_name, type='dec',
                            #                       partition=_partition, component=k, reuse=tf.AUTO_REUSE)
                            dec_cell = RKLSTMCell(_hidden_num * ensemble_space,
                                                  file_name=_file_name,
                                                  type='dec',
                                                  partition=_partition,
                                                  component=k,
                                                  reuse=tf.AUTO_REUSE)
                    if cell_type == 2:
                        if compress:
                            pure_dec_cell = GRUCell(_hidden_num)
                            dec_cell = RSGRUCell(_hidden_num)
                        else:
                            pure_dec_cell = GRUCell(_hidden_num *
                                                    ensemble_space)
                            dec_cell = RSGRUCell(_hidden_num * ensemble_space)

                    inference_helper = tf.contrib.seq2seq.InferenceHelper(
                        sample_fn=lambda outputs: outputs,
                        sample_shape=[_elem_num],
                        sample_dtype=tf.float32,
                        start_inputs=dec_input,
                        end_fn=lambda sample_ids: False)
                    if k == 0:
                        inference_decoder = tf.contrib.seq2seq.BasicDecoder(
                            pure_dec_cell,
                            inference_helper,
                            shared_state,
                            output_layer=projection_layer)
                    elif k == 1:
                        inference_decoder = tf.contrib.seq2seq.BasicDecoder(
                            residual_dec_cell,
                            inference_helper,
                            shared_state,
                            output_layer=projection_layer)
                    else:
                        inference_decoder = tf.contrib.seq2seq.BasicDecoder(
                            dec_cell,
                            inference_helper,
                            shared_state,
                            output_layer=projection_layer)

                    d_dec['dec_output_{0}'.format(
                        k)], _, _ = tf.contrib.seq2seq.dynamic_decode(
                            inference_decoder,
                            impute_finished=True,
                            maximum_iterations=p_input.shape[1])

                    if reverse:
                        d_dec['dec_output_{0}'.format(k)] = d_dec[
                            'dec_output_{0}'.format(k)][::-1]

        sum_of_difference = 0
        for i in range(ensemble_space):
            sum_of_difference += d_dec['dec_output_{0}'.format(i)][0] - p_input

        loss = tf.reduce_mean(tf.square(sum_of_difference))
        regularization_penalty = tf.contrib.layers.apply_regularization(
            l1_regularizer, [shared_state])
        loss = loss + regularization_penalty
        optimizer = tf.train.AdamOptimizer(
            learning_rate=learning_rate).minimize(loss)
        # Add ops to save and restore all the variables.
        saver = tf.train.Saver()
    return g, p_input, d_dec, loss, optimizer, saver
 def gru_cell():
     """gru核"""
     return GRUCell(self.config.hidden_dim)
Beispiel #20
0
def Model(_j, _abnormal_data, _abnormal_label, _hidden_num, _elem_num,
          _file_name, _partition):
    tf.reset_default_graph()
    g = tf.Graph()
    with g.as_default():
        # placeholder list
        p_input = tf.placeholder(tf.float32,
                                 shape=(batch_num, _abnormal_data.shape[1],
                                        _abnormal_data.shape[2]))
        p_inputs = [
            tf.squeeze(t, [1])
            for t in tf.split(p_input, _abnormal_data.shape[1], 1)
        ]

        # create RNN cell
        if cell_type == 0:
            enc_cell = tf.nn.rnn_cell.BasicRNNCell(_hidden_num)
            dec_cell = tf.nn.rnn_cell.BasicRNNCell(_hidden_num)
        if cell_type == 1:
            pure_enc_cell = LSTMCell(_hidden_num)
            pure_dec_cell = LSTMCell(_hidden_num)
            residual_enc_cell = RLSTMCell(_hidden_num)
            residual_dec_cell = RLSTMCell(_hidden_num)
            enc_cell = RSLSTMCell(_hidden_num,
                                  file_name=_file_name,
                                  component=_j,
                                  partition=_partition,
                                  type='enc')
            dec_cell = RSLSTMCell(_hidden_num,
                                  file_name=_file_name,
                                  component=_j,
                                  partition=_partition,
                                  type='dec')
        if cell_type == 2:
            pure_enc_cell = GRUCell(_hidden_num)
            pure_dec_cell = GRUCell(_hidden_num)
            enc_cell = RGRUCell(_hidden_num)
            dec_cell = RGRUCell(_hidden_num)

        # projection_layer = tf.layers.Dense(units=_elem_num, use_bias=True)

        # with tf.device('/device:GPU:0'):
        with tf.variable_scope("encoder"):
            if _j == 0:
                enc_state = pure_enc_cell.zero_state(batch_size=batch_num,
                                                     dtype=tf.float32)
                enc_outputs = []
                for step in range(len(p_inputs)):
                    enc_input = p_inputs[step]
                    enc_output_, enc_state = pure_enc_cell(
                        enc_input, enc_state)
                    enc_outputs.append(enc_output_)
            elif _j == 1:
                enc_state = residual_enc_cell.zero_state(batch_size=batch_num,
                                                         dtype=tf.float32)
                enc_outputs = []
                for step in range(len(p_inputs)):
                    enc_output_, enc_state = residual_enc_cell(
                        p_inputs[step], enc_state)
                    enc_outputs.append(enc_output_)
            else:
                enc_state = enc_cell.zero_state(batch_size=batch_num,
                                                dtype=tf.float32)
                enc_outputs = []
                for step in range(len(p_inputs)):
                    enc_output_, enc_state = enc_cell(p_inputs[step],
                                                      enc_state)
                    enc_outputs.append(enc_output_)

        # with tf.device('/device:GPU:1'):
        with tf.variable_scope('decoder') as vs:
            dec_weight_ = tf.Variable(tf.truncated_normal(
                [_hidden_num, _elem_num], dtype=tf.float32),
                                      name="dec_weight")
            dec_bias_ = tf.Variable(tf.constant(0.1,
                                                shape=[_elem_num],
                                                dtype=tf.float32),
                                    name="dec_bias")
            if decode_without_input:
                if _j == 0:
                    dec_inputs = [
                        tf.zeros(tf.shape(p_inputs[0]), dtype=tf.float32)
                        for _ in range(len(p_inputs))
                    ]
                    dec_outputs, dec_state = tf.contrib.rnn.static_rnn(
                        pure_dec_cell,
                        dec_inputs,
                        initial_state=enc_state,
                        dtype=tf.float32)
                elif _j == 1:
                    dec_inputs = [
                        tf.zeros(tf.shape(p_inputs[0]), dtype=tf.float32)
                        for _ in range(len(p_inputs))
                    ]
                    dec_outputs, dec_state = tf.contrib.rnn.static_rnn(
                        residual_dec_cell,
                        dec_inputs,
                        initial_state=enc_state,
                        dtype=tf.float32)
                else:
                    dec_inputs = [
                        tf.zeros(tf.shape(p_inputs[0]), dtype=tf.float32)
                        for _ in range(len(p_inputs))
                    ]
                    dec_outputs, dec_state = tf.contrib.rnn.static_rnn(
                        dec_cell,
                        dec_inputs,
                        initial_state=enc_state,
                        dtype=tf.float32)

                if reverse:
                    dec_outputs = dec_outputs[::-1]

                dec_output_ = tf.transpose(tf.stack(dec_outputs), [1, 0, 2])
                dec_weight_ = tf.tile(tf.expand_dims(dec_weight_, 0),
                                      [batch_num, 1, 1])
                dec_outputs = tf.matmul(dec_output_, dec_weight_) + dec_bias_

            else:
                if _j == 0:
                    dec_state = enc_state
                    dec_input_ = tf.zeros(tf.shape(p_inputs[0]),
                                          dtype=tf.float32)
                    dec_outputs = []
                    for step in range(len(p_inputs)):
                        if step > 0:
                            vs.reuse_variables()
                        dec_input_, dec_state = pure_dec_cell(
                            dec_input_, dec_state)
                        dec_input_ = tf.matmul(dec_input_,
                                               dec_weight_) + dec_bias_
                        dec_outputs.append(dec_input_)

                elif _j == 1:
                    dec_state = enc_state
                    dec_input_ = tf.zeros(tf.shape(p_inputs[0]),
                                          dtype=tf.float32)
                    dec_outputs = []
                    for step in range(len(p_inputs)):
                        if step > 0:
                            vs.reuse_variables()
                        dec_input_, dec_state = residual_dec_cell(
                            dec_input_, dec_state)
                        dec_input_ = tf.matmul(dec_input_,
                                               dec_weight_) + dec_bias_
                        dec_outputs.append(dec_input_)
                else:
                    dec_state = enc_state
                    dec_input_ = tf.zeros(tf.shape(p_inputs[0]),
                                          dtype=tf.float32)
                    dec_outputs = []
                    for step in range(len(p_inputs)):
                        if step > 0:
                            vs.reuse_variables()
                        dec_input_, dec_state = dec_cell(dec_input_, dec_state)
                        dec_input_ = tf.matmul(dec_input_,
                                               dec_weight_) + dec_bias_
                        dec_outputs.append(dec_input_)

                if reverse:
                    dec_outputs = dec_outputs[::-1]

        loss = tf.reduce_mean(tf.square(p_input - dec_outputs))
        optimizer = tf.train.AdamOptimizer(
            learning_rate=learning_rate).minimize(loss)
        # Add ops to save and restore all the variables.
        saver = tf.train.Saver()
    return g, p_input, dec_outputs, loss, optimizer, saver
Beispiel #21
0
def model_fn(features,
             labels,
             mode,
             params,
             word_embeddings_np=None,
             char_embeddings_np=None):
    attention_fun = partial(BahdanauAttention, num_units=params.units) if params.attention == 'bahdanau' \
        else partial(LuongAttention, num_units=2 * params.units)

    dropout = params.dropout if mode == tf.estimator.ModeKeys.TRAIN else 0.0
    passage_count = params.passage_count if mode != tf.estimator.ModeKeys.TRAIN \
        else params.train_passage_count

    question_words_length = features['question_length']
    passage_words_length = features['passage_length']

    devices = get_devices()

    with tf.device('/cpu:0'):
        word_embeddings_placeholder = tf.placeholder(
            shape=[params.vocab_size, params.emb_size], dtype=tf.float32)
        char_embeddings_placeholder = tf.placeholder(
            shape=[params.char_vocab_size, params.char_emb_size],
            dtype=tf.float32)

        # word_embeddings = tf.create_partitioned_variables(shape=[params.vocab_size, params.emb_size],
        #                                                   slicing=[10, 1],
        #                                                   initializer=word_embeddings_placeholder,
        #                                                   trainable=False, name="word_embeddings")
        word_embeddings = tf.Variable(word_embeddings_placeholder,
                                      trainable=False,
                                      name="word_embeddings")
        char_embeddings = tf.Variable(char_embeddings_placeholder,
                                      trainable=False,
                                      name="char_embeddings")

        word_embeddings = tf.nn.dropout(word_embeddings,
                                        1.0 - dropout,
                                        noise_shape=[params.vocab_size, 1])
        char_embeddings = tf.nn.dropout(
            char_embeddings,
            1.0 - dropout,
            noise_shape=[params.char_vocab_size, 1])

    question_words_emb = tf.nn.embedding_lookup(word_embeddings,
                                                features['question_words'])
    question_chars_emb = tf.nn.embedding_lookup(char_embeddings,
                                                features['question_chars'])

    passage_words_emb = tf.nn.embedding_lookup(word_embeddings,
                                               features['passage_words'])
    passage_chars_emb = tf.nn.embedding_lookup(char_embeddings,
                                               features['passage_chars'])

    with tf.device(next(devices)):
        with tf.variable_scope('question_encoding'):
            question_enc = encoder(question_words_emb,
                                   question_words_length,
                                   question_chars_emb,
                                   features['question_char_length'],
                                   params,
                                   dropout=dropout)

    with tf.device(next(devices)):
        with tf.variable_scope('passage_encoding'):
            passage_enc = encoder(passage_words_emb,
                                  passage_words_length,
                                  passage_chars_emb,
                                  features['passage_char_length'],
                                  params,
                                  dropout=dropout)
        # question_enc = tf.Print(question_enc, [question_enc], summarize=1000)

        with tf.variable_scope('attention'):
            attention = attention_fun(
                memory=question_enc,
                memory_sequence_length=question_words_length)
            cell_fw = GatedAttentionWrapper(
                attention,
                DropoutWrapper(
                    GRUCell(params.units, name="attention_gru"),
                    # output_keep_prob=1.0 - dropout,
                    input_keep_prob=1.0 - dropout,
                    # state_keep_prob=1.0 - dropout,
                    variational_recurrent=True,
                    input_size=4 * params.units,
                    dtype=tf.float32),
                dropout=0)

            cell_bw = GatedAttentionWrapper(
                attention,
                DropoutWrapper(
                    GRUCell(params.units, name="attention_gru"),
                    # output_keep_prob=1.0 - dropout,
                    input_keep_prob=1.0 - dropout,
                    # state_keep_prob=1.0 - dropout
                    variational_recurrent=True,
                    input_size=4 * params.units,
                    dtype=tf.float32),
                dropout=0)

            passage_repr, _ = tf.nn.bidirectional_dynamic_rnn(
                cell_fw,
                cell_bw,
                passage_enc,
                passage_words_length,
                dtype=tf.float32)
            passage_repr = tf.concat(passage_repr, -1)

        with tf.variable_scope('pointer'):
            question_att = attention_fun(
                memory=question_enc,
                memory_sequence_length=question_words_length,
                name="question_align")

            pool_param = tf.get_variable('pool_param',
                                         shape=(question_att._num_units, ),
                                         initializer=tf.initializers.ones)
            pool_param = tf.reshape(
                tf.tile(pool_param, [tf.shape(question_enc)[0]]),
                (-1, question_att._num_units))

            question_alignments, _ = question_att(pool_param, None)
            question_pool = tf.reduce_sum(
                tf.expand_dims(question_alignments, -1) * question_enc, 1)

            logits1, logits2 = pointer_net(passage_repr,
                                           passage_words_length,
                                           question_pool,
                                           params,
                                           attention_fun=attention_fun,
                                           dropout=dropout)

        outer = tf.matmul(tf.expand_dims(tf.nn.softmax(logits1), axis=2),
                          tf.expand_dims(tf.nn.softmax(logits2), axis=1))
        outer = tf.matrix_band_part(outer, 0, 15)
        p1 = tf.argmax(tf.reduce_max(outer, axis=2), axis=1)
        p2 = tf.argmax(tf.reduce_max(outer, axis=1), axis=1)

        if mode == tf.estimator.ModeKeys.PREDICT:
            predictions = {'start': p1, 'end': p2}
            export_outputs = {
                'prediction': tf.estimator.export.PredictOutput(predictions)
            }

            return tf.estimator.EstimatorSpec(mode,
                                              predictions=predictions,
                                              export_outputs=export_outputs)

        with tf.variable_scope('passage_ranking'):
            W_g = Dense(params.units, activation=tf.tanh, use_bias=False)
            v_g = Dense(1, use_bias=False)

            memory_layer = Dense(params.units,
                                 name="memory_layer",
                                 use_bias=False,
                                 dtype=tf.float32)
            query_layer = Dense(params.units,
                                name="query_layer",
                                use_bias=False,
                                dtype=tf.float32)
            g = []

            for i in range(passage_count):
                passage_mask = tf.boolean_mask(
                    passage_repr, tf.equal(features['partitions'], i))
                passage_i = tf.split(passage_mask,
                                     features['partitions_len'][:, i])
                passage_i = [
                    pad_to_shape_2d(
                        p, (tf.Dimension(params.passage_max_len), p.shape[1]))
                    for p in passage_i
                ]
                passage_i = tf.stack(passage_i)

                passage_alignment, _ = ReusableBahdanauAttention(
                    params.units,
                    passage_i,
                    features['partitions_len'][:, i],
                    memory_layer=memory_layer,
                    query_layer=query_layer,
                    name="passage_align")(question_pool, None)

                passage_pool = tf.reduce_sum(
                    tf.expand_dims(passage_alignment, -1) * passage_i, 1)
                g_i = v_g(W_g(tf.concat([question_pool, passage_pool], -1)))

                # g_i = tf.Print(g_i, [passage_mask, passage_i], message='is_nan_{}'.format(i), summarize=1000)
                g.append(g_i)

            g = tf.concat(g, -1)

    answer_start, answer_end, passage_rank = labels

    loss1 = tf.nn.softmax_cross_entropy_with_logits_v2(
        logits=logits1, labels=tf.stop_gradient(answer_start))
    loss2 = tf.nn.softmax_cross_entropy_with_logits_v2(
        logits=logits2, labels=tf.stop_gradient(answer_end))

    loss3 = tf.nn.softmax_cross_entropy_with_logits_v2(
        logits=g, labels=tf.stop_gradient(passage_rank))

    # loss1 = tf.Print(loss1, [tf.argmax(answer_start, -1), tf.argmax(answer_end, -1),
    #                          tf.reduce_mean(loss1), tf.reduce_mean(loss2), tf.reduce_mean(loss3)], message="loss")

    loss = (params.r * tf.reduce_mean(loss1 + loss2) + (1 - params.r) * tf.reduce_mean(loss3)) \
        if params.r < 1 else tf.reduce_mean(loss1 + loss2)

    if mode == tf.estimator.ModeKeys.TRAIN:
        optimizer = tf.train.AdadeltaOptimizer(
            learning_rate=params.learning_rate, epsilon=1e-6)
        global_step = tf.train.get_or_create_global_step()

        grads = optimizer.compute_gradients(loss)
        gradients, variables = zip(*grads)
        capped_grads, _ = tf.clip_by_global_norm(gradients, params.grad_clip)
        train_op = optimizer.apply_gradients(zip(capped_grads, variables),
                                             global_step=global_step)

        return EstimatorSpec(
            mode,
            loss=loss,
            train_op=train_op,
            scaffold=tf.train.Scaffold(
                init_feed_dict={
                    word_embeddings_placeholder: word_embeddings_np,
                    char_embeddings_placeholder: char_embeddings_np
                }),
        )

    if mode == tf.estimator.ModeKeys.EVAL:
        table = lookup_ops.index_to_string_table_from_file(
            params.word_vocab_file, value_column_index=0, delimiter=" ")
        return EstimatorSpec(mode,
                             loss=loss,
                             eval_metric_ops={
                                 'rouge-l':
                                 extraction_metric(p1, p2,
                                                   tf.argmax(answer_start, -1),
                                                   tf.argmax(answer_end, -1),
                                                   features['passage_words'],
                                                   params, table),
                                 'f1':
                                 extraction_metric(p1,
                                                   p2,
                                                   tf.argmax(answer_start, -1),
                                                   tf.argmax(answer_end, -1),
                                                   features['passage_words'],
                                                   params,
                                                   table,
                                                   metric='f1')
                             })
Beispiel #22
0
 def single_cell():
     return GRUCell(rnnHiddenSize)
Beispiel #23
0
    def __init__(self, num_emb, batch_size, emb_dim, hidden_dim,
                 sequence_length, l2_reg_lambda=0):
        self.filter_sizes = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 15, 20]
        self.num_filters = [100, 200, 200, 200, 200, 100, 100, 100, 100, 100, 160, 160]

        self.vocab_size = num_emb
        self.batch_size = batch_size
        self.embedding_size = emb_dim
        self.hidden_dim = hidden_dim
        self.sequence_length = sequence_length
        self.r_params = []
        self.grad_clip = 5.0
        self.input_x = tf.placeholder(tf.int32, [None, self.sequence_length], name="input_x")
        self.input_y = tf.placeholder(tf.float32, [None, ], name="input_y")
        self.dis_learning_rate = tf.placeholder(tf.float32, name="lr")
        self.dropout_keep_prob = tf.placeholder(tf.float32, name="drop_rate")
        self.l2_loss = tf.constant(0.0)

        with tf.variable_scope('rewarder'):
            # Embedding layer
            with tf.device('/cpu:0'), tf.name_scope("embedding"):
                self.W = tf.Variable(
                    tf.random_uniform([self.vocab_size, self.embedding_size], -1.0, 1.0),
                    name="W")
                self.embedded_chars = tf.nn.embedding_lookup(self.W, self.input_x) # (batch_size, sequence_length, embedding_size)

            # Encode the text with GRU
            cell_enc = GRUCell(self.hidden_dim)
            encoder_output, _ = tf.nn.dynamic_rnn(cell_enc, self.embedded_chars, dtype=tf.float32) # batch_size, sequence_length, hidden_dim
            self.embedded_chars_expanded = tf.expand_dims(encoder_output, -1)

            # Construct convolution and maxpool layer
            pooled_outputs = []
            for filter_size, num_filter in zip(self.filter_sizes, self.num_filters):
                with tf.name_scope("conv-maxpool-%s" % filter_size):
                    # Convolution Layer
                    filter_shape = [filter_size, self.hidden_dim, 1, num_filter]
                    W = tf.Variable(tf.truncated_normal(filter_shape, stddev=0.1), name="W")
                    b = tf.Variable(tf.constant(0.1, shape=[num_filter]), name="b")
                    conv = tf.nn.conv2d(self.embedded_chars_expanded, W, strides=[1, 1, 1, 1], padding="VALID", name="conv")
                    # Apply nonlinearity
                    h = tf.nn.relu(tf.nn.bias_add(conv, b), name="relu")
                    # Maxpooling over the outputs
                    pooled = tf.nn.max_pool(h, ksize=[1, self.sequence_length - filter_size + 1, 1, 1], strides=[1, 1, 1, 1], padding='VALID', name="pool")
                    pooled_outputs.append(pooled)

            # Combine all the pooled features
            num_filters_total = sum(self.num_filters)
            self.h_pool = tf.concat(pooled_outputs, 3)
            self.h_pool_flat = tf.reshape(self.h_pool, [-1, num_filters_total])

            # Add highway
            with tf.name_scope("highway"):
                self.h_highway = highway(self.h_pool_flat, self.h_pool_flat.get_shape()[1], 1, 0)

            # Add dropout
            with tf.name_scope("dropout"):
                self.h_drop = tf.nn.dropout(self.h_highway, self.dropout_keep_prob)

            # Final scores
            with tf.name_scope("output"):
                W = tf.Variable(tf.truncated_normal([num_filters_total, 1], stddev=0.1), name="W")
                b = tf.Variable(tf.constant(0.1, shape=[1]), name="b")
                self.l2_loss += tf.nn.l2_loss(W)
                self.l2_loss += tf.nn.l2_loss(b)
                self.scores = tf.nn.sigmoid(tf.nn.xw_plus_b(self.h_drop, W, b, name="scores")) # batch_size

            # Calculate least-square loss
            with tf.name_scope("loss"):
                self.labels = tf.reshape(self.input_y, [-1, 1])
                losses = tf.reduce_sum((self.scores - self.labels) * (self.scores - self.labels), 1)
                self.loss = tf.reduce_mean(losses) + l2_reg_lambda * self.l2_loss

        self.params = [param for param in tf.trainable_variables() if 'rewarder' in param.name]
        d_optimizer = tf.train.AdamOptimizer(self.dis_learning_rate)
        grads_and_vars = d_optimizer.compute_gradients(self.loss, self.params, aggregation_method=2)
        self.train_op = d_optimizer.apply_gradients(grads_and_vars)
Beispiel #24
0
    def __init__(self,
            num_symbols,
            num_embed_units,
            num_units,
            is_train,
            vocab=None,
            content_pos=None,
            rhetoric_pos = None,
            embed=None,
            learning_rate=0.1,
            learning_rate_decay_factor=0.9995,
            max_gradient_norm=5.0,
            max_length=30,
            latent_size=128,
            use_lstm=False,
            num_classes=3,
            full_kl_step=80000,
            mem_slot_num=4,
            mem_size=128):
        
        self.ori_sents = tf.placeholder(tf.string, shape=(None, None))
        self.ori_sents_length = tf.placeholder(tf.int32, shape=(None))
        self.rep_sents = tf.placeholder(tf.string, shape=(None, None))
        self.rep_sents_length = tf.placeholder(tf.int32, shape=(None))
        self.labels = tf.placeholder(tf.float32, shape=(None, num_classes))
        self.use_prior = tf.placeholder(tf.bool)
        self.global_t = tf.placeholder(tf.int32)
        self.content_mask = tf.reduce_sum(tf.one_hot(content_pos, num_symbols, 1.0, 0.0), axis = 0)
        self.rhetoric_mask = tf.reduce_sum(tf.one_hot(rhetoric_pos, num_symbols, 1.0, 0.0), axis = 0)

        topic_memory = tf.zeros(name="topic_memory", dtype=tf.float32,
                                  shape=[None, mem_slot_num, mem_size])

        w_topic_memory = tf.get_variable(name="w_topic_memory", dtype=tf.float32,
                                    initializer=tf.random_uniform([mem_size, mem_size], -0.1, 0.1))

        # build the vocab table (string to index)
        if is_train:
            self.symbols = tf.Variable(vocab, trainable=False, name="symbols")
        else:
            self.symbols = tf.Variable(np.array(['.']*num_symbols), name="symbols")
        self.symbol2index = HashTable(KeyValueTensorInitializer(self.symbols, 
            tf.Variable(np.array([i for i in range(num_symbols)], dtype=np.int32), False)), 
            default_value=UNK_ID, name="symbol2index")

        self.ori_sents_input = self.symbol2index.lookup(self.ori_sents)
        self.rep_sents_target = self.symbol2index.lookup(self.rep_sents)
        batch_size, decoder_len = tf.shape(self.rep_sents)[0], tf.shape(self.rep_sents)[1]
        self.rep_sents_input = tf.concat([tf.ones([batch_size, 1], dtype=tf.int32)*GO_ID,
            tf.split(self.rep_sents_target, [decoder_len-1, 1], 1)[0]], 1)
        self.decoder_mask = tf.reshape(tf.cumsum(tf.one_hot(self.rep_sents_length-1,
            decoder_len), reverse=True, axis=1), [-1, decoder_len])        
        
        # build the embedding table (index to vector)
        if embed is None:
            # initialize the embedding randomly
            self.embed = tf.get_variable('embed', [num_symbols, num_embed_units], tf.float32)
        else:
            # initialize the embedding by pre-trained word vectors
            self.embed = tf.get_variable('embed', dtype=tf.float32, initializer=embed)

        self.pattern_embed = tf.get_variable('pattern_embed', [num_classes, num_embed_units], tf.float32)
        
        self.encoder_input = tf.nn.embedding_lookup(self.embed, self.ori_sents_input)
        self.decoder_input = tf.nn.embedding_lookup(self.embed, self.rep_sents_input)

        if use_lstm:
            cell_fw = LSTMCell(num_units)
            cell_bw = LSTMCell(num_units)
            cell_dec = LSTMCell(2*num_units)
        else:
            cell_fw = GRUCell(num_units)
            cell_bw = GRUCell(num_units)
            cell_dec = GRUCell(2*num_units)

        # origin sentence encoder
        with variable_scope.variable_scope("encoder"):
            encoder_output, encoder_state = tf.nn.bidirectional_dynamic_rnn(cell_fw, cell_bw, self.encoder_input, 
                self.ori_sents_length, dtype=tf.float32)
            post_sum_state = tf.concat(encoder_state, 1)
            encoder_output = tf.concat(encoder_output, 2)

        # response sentence encoder
        with variable_scope.variable_scope("encoder", reuse = True):
            decoder_state, decoder_last_state = tf.nn.bidirectional_dynamic_rnn(cell_fw, cell_bw, self.decoder_input, 
                self.rep_sents_length, dtype=tf.float32)
            response_sum_state = tf.concat(decoder_last_state, 1)

        # recognition network
        with variable_scope.variable_scope("recog_net"):
            recog_input = tf.concat([post_sum_state, response_sum_state], 1)
            recog_mulogvar = tf.contrib.layers.fully_connected(recog_input, latent_size * 2, activation_fn=None, scope="muvar")
            recog_mu, recog_logvar = tf.split(recog_mulogvar, 2, axis=1)

        # prior network
        with variable_scope.variable_scope("prior_net"):
            prior_fc1 = tf.contrib.layers.fully_connected(post_sum_state, latent_size * 2, activation_fn=tf.tanh, scope="fc1")
            prior_mulogvar = tf.contrib.layers.fully_connected(prior_fc1, latent_size * 2, activation_fn=None, scope="muvar")
            prior_mu, prior_logvar = tf.split(prior_mulogvar, 2, axis=1)

        latent_sample = tf.cond(self.use_prior,
                                lambda: sample_gaussian(prior_mu, prior_logvar),
                                lambda: sample_gaussian(recog_mu, recog_logvar))


        # classifier
        with variable_scope.variable_scope("classifier"):
            classifier_input = latent_sample
            pattern_fc1 = tf.contrib.layers.fully_connected(classifier_input, latent_size, activation_fn=tf.tanh, scope="pattern_fc1")
            self.pattern_logits = tf.contrib.layers.fully_connected(pattern_fc1, num_classes, activation_fn=None, scope="pattern_logits")

        self.label_embedding = tf.matmul(self.labels, self.pattern_embed)

        output_fn, my_sequence_loss = output_projection_layer(2*num_units, num_symbols, latent_size, num_embed_units, self.content_mask, self.rhetoric_mask)

        attention_keys, attention_values, attention_score_fn, attention_construct_fn = my_attention_decoder_fn.prepare_attention(encoder_output, 'luong', 2*num_units)

        with variable_scope.variable_scope("dec_start"):
            temp_start = tf.concat([post_sum_state, self.label_embedding, latent_sample], 1)
            dec_fc1 = tf.contrib.layers.fully_connected(temp_start, 2*num_units, activation_fn=tf.tanh, scope="dec_start_fc1")
            dec_fc2 = tf.contrib.layers.fully_connected(dec_fc1, 2*num_units, activation_fn=None, scope="dec_start_fc2")

        if is_train:
            # rnn decoder
            topic_memory = self.update_memory(topic_memory, encoder_output)
            extra_info = tf.concat([self.label_embedding, latent_sample, topic_memory], 1)

            decoder_fn_train = my_attention_decoder_fn.attention_decoder_fn_train(dec_fc2, 
                attention_keys, attention_values, attention_score_fn, attention_construct_fn, extra_info)
            self.decoder_output, _, _ = my_seq2seq.dynamic_rnn_decoder(cell_dec, decoder_fn_train, 
                self.decoder_input, self.rep_sents_length, scope = "decoder")

            # calculate the loss
            self.decoder_loss = my_loss.sequence_loss(logits = self.decoder_output, 
                targets = self.rep_sents_target, weights = self.decoder_mask,
                extra_information = latent_sample, label_embedding = self.label_embedding, softmax_loss_function = my_sequence_loss)
            temp_klloss = tf.reduce_mean(gaussian_kld(recog_mu, recog_logvar, prior_mu, prior_logvar))
            self.kl_weight = tf.minimum(tf.to_float(self.global_t)/full_kl_step, 1.0)
            self.klloss = self.kl_weight * temp_klloss
            temp_labels = tf.argmax(self.labels, 1)
            self.classifierloss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=self.pattern_logits, labels=temp_labels))
            self.loss = self.decoder_loss + self.klloss + self.classifierloss  # need to anneal the kl_weight
            
            # building graph finished and get all parameters
            self.params = tf.trainable_variables()
        
            # initialize the training process
            self.learning_rate = tf.Variable(float(learning_rate), trainable=False, dtype=tf.float32)
            self.learning_rate_decay_op = self.learning_rate.assign(self.learning_rate * learning_rate_decay_factor)
            self.global_step = tf.Variable(0, trainable=False)
            
            # calculate the gradient of parameters
            opt = tf.train.MomentumOptimizer(self.learning_rate, 0.9)
            gradients = tf.gradients(self.loss, self.params)
            clipped_gradients, self.gradient_norm = tf.clip_by_global_norm(gradients, 
                    max_gradient_norm)
            self.update = opt.apply_gradients(zip(clipped_gradients, self.params), 
                    global_step=self.global_step)

        else:
            # rnn decoder
            topic_memory = self.update_memory(topic_memory, encoder_output)
            extra_info = tf.concat([self.label_embedding, latent_sample, topic_memory], 1)
            decoder_fn_inference = my_attention_decoder_fn.attention_decoder_fn_inference(output_fn, 
                dec_fc2, attention_keys, attention_values, attention_score_fn, 
                attention_construct_fn, self.embed, GO_ID, EOS_ID, max_length, num_symbols, extra_info)
            self.decoder_distribution, _, _ = my_seq2seq.dynamic_rnn_decoder(cell_dec, decoder_fn_inference, scope="decoder")
            self.generation_index = tf.argmax(tf.split(self.decoder_distribution,
                [2, num_symbols-2], 2)[1], 2) + 2 # for removing UNK
            self.generation = tf.nn.embedding_lookup(self.symbols, self.generation_index)
            
            self.params = tf.trainable_variables()

        self.saver = tf.train.Saver(tf.global_variables(), write_version=tf.train.SaverDef.V2, 
                max_to_keep=3, pad_step_number=True, keep_checkpoint_every_n_hours=1.0)
Beispiel #25
0
    def __init__(self,
                 num_symbols,
                 num_embed_units,
                 num_units,
                 num_layers,
                 vocab=None,
                 embed=None,
                 name_scope=None,
                 learning_rate=0.001,
                 learning_rate_decay_factor=0.95,
                 max_gradient_norm=5,
                 num_samples=512,
                 max_length=30):

        self.posts = tf.placeholder(tf.string, shape=[None,
                                                      None])  # batch * len
        self.posts_length = tf.placeholder(tf.int32, shape=[None])  # batch
        self.responses = tf.placeholder(tf.string, shape=[None,
                                                          None])  # batch*len
        self.responses_length = tf.placeholder(tf.int32, shape=[None])  # batch
        self.weight = tf.placeholder(tf.float32, shape=[None])  # batch

        # build the vocab table (string to index)
        self.symbols = tf.Variable(vocab, trainable=False, name="symbols")
        self.symbol2index = HashTable(KeyValueTensorInitializer(
            self.symbols,
            tf.Variable(
                np.array([i for i in range(num_symbols)], dtype=np.int32),
                False)),
                                      default_value=UNK_ID,
                                      name="symbol2index")

        # build the embedding table (index to vector)
        if embed is None:
            # initialize the embedding randomly
            self.embed = tf.get_variable('embed',
                                         [num_symbols, num_embed_units],
                                         tf.float32)
        else:
            # initialize the embedding by pre-trained word vectors
            self.embed = tf.get_variable('embed',
                                         dtype=tf.float32,
                                         initializer=embed)

        self.posts_input = self.symbol2index.lookup(
            self.posts)  # batch * utter_len
        self.encoder_input = tf.nn.embedding_lookup(
            self.embed, self.posts_input)  # batch * utter_len * embed_unit

        self.responses_target = self.symbol2index.lookup(
            self.responses)  # batch, len
        batch_size, decoder_len = tf.shape(self.responses)[0], tf.shape(
            self.responses)[1]
        self.responses_input = tf.concat([
            tf.ones([batch_size, 1], dtype=tf.int32) * GO_ID,
            tf.split(self.responses_target, [decoder_len - 1, 1], 1)[0]
        ], 1)  # batch, len
        self.decoder_mask = tf.reshape(
            tf.cumsum(tf.one_hot(self.responses_length - 1, decoder_len),
                      reverse=True,
                      axis=1), [-1, decoder_len])  # batch, len

        self.decoder_input = tf.nn.embedding_lookup(self.embed,
                                                    self.responses_input)

        # Construct multi-layer GRU cells for encoder and decoder
        cell_enc = MultiRNNCell(
            [GRUCell(num_units) for _ in range(num_layers)])
        cell_dec = MultiRNNCell(
            [GRUCell(num_units) for _ in range(num_layers)])

        # Encode the post sequence
        encoder_output, encoder_state = tf.nn.dynamic_rnn(cell_enc,
                                                          self.encoder_input,
                                                          self.posts_length,
                                                          dtype=tf.float32,
                                                          scope="encoder")

        output_fn, sampled_sequence_loss = output_projection_layer(
            num_units, num_symbols, num_samples)
        attention_keys, attention_values, attention_score_fn, attention_construct_fn \
            = my_attention_decoder_fn.prepare_attention(encoder_output, 'bahdanau', num_units)

        # Decode the response sequence (Training)
        with variable_scope.variable_scope('decoder'):
            decoder_fn_train = my_attention_decoder_fn.attention_decoder_fn_train(
                encoder_state, attention_keys, attention_values,
                attention_score_fn, attention_construct_fn)
            self.decoder_output, _, _ = my_seq2seq.dynamic_rnn_decoder(
                cell_dec,
                decoder_fn_train,
                self.decoder_input,
                self.responses_length,
                scope='decoder_rnn')
            self.decoder_loss = my_loss.sequence_loss(
                self.decoder_output,
                self.responses_target,
                self.decoder_mask,
                softmax_loss_function=sampled_sequence_loss)
            self.weighted_decoder_loss = self.decoder_loss * self.weight

        attention_keys_infer, attention_values_infer, attention_score_fn_infer, attention_construct_fn_infer \
            = my_attention_decoder_fn.prepare_attention(encoder_output, 'bahdanau', num_units, reuse = True)

        # Decode the response sequence (Inference)
        with variable_scope.variable_scope('decoder', reuse=True):
            decoder_fn_inference = my_attention_decoder_fn.attention_decoder_fn_inference(
                output_fn, encoder_state, attention_keys_infer,
                attention_values_infer, attention_score_fn_infer,
                attention_construct_fn_infer, self.embed, GO_ID, EOS_ID,
                max_length, num_symbols)
            self.decoder_distribution, _, _ = my_seq2seq.dynamic_rnn_decoder(
                cell_dec, decoder_fn_inference, scope='decoder_rnn')
            self.generation_index = tf.argmax(
                tf.split(self.decoder_distribution, [2, num_symbols - 2],
                         2)[1], 2) + 2  # for removing UNK
            self.generation = tf.nn.embedding_lookup(self.symbols,
                                                     self.generation_index)

        self.params = [
            k for k in tf.trainable_variables() if name_scope in k.name
        ]

        # initialize the training process
        self.learning_rate = tf.Variable(float(learning_rate),
                                         trainable=False,
                                         dtype=tf.float32)
        self.learning_rate_decay_op = self.learning_rate.assign(
            self.learning_rate * learning_rate_decay_factor)
        self.global_step = tf.Variable(0, trainable=False)
        self.adv_global_step = tf.Variable(0, trainable=False)

        # calculate the gradient of parameters
        self.cost = tf.reduce_mean(self.weighted_decoder_loss)
        self.unweighted_cost = tf.reduce_mean(self.decoder_loss)
        opt = tf.train.AdamOptimizer(self.learning_rate)
        gradients = tf.gradients(self.cost, self.params)
        clipped_gradients, self.gradient_norm = tf.clip_by_global_norm(
            gradients, max_gradient_norm)
        self.update = opt.apply_gradients(zip(clipped_gradients, self.params),
                                          global_step=self.global_step)

        all_variables = [
            k for k in tf.global_variables() if name_scope in k.name
        ]
        self.saver = tf.train.Saver(all_variables,
                                    write_version=tf.train.SaverDef.V2,
                                    max_to_keep=5,
                                    pad_step_number=True,
                                    keep_checkpoint_every_n_hours=1.0)
        self.adv_saver = tf.train.Saver(all_variables,
                                        write_version=tf.train.SaverDef.V2,
                                        max_to_keep=5,
                                        pad_step_number=True,
                                        keep_checkpoint_every_n_hours=1.0)
Beispiel #26
0
 def cell_fn():
     return GRUCell(RNN_nodes)