Beispiel #1
0
 def encode(self, seq, reuse=None):
     # input_lengths  = tf.reduce_sum(tf.to_int32(tf.not_equal(seq, 1)), 1)
     if self.embeddings_mat is not None:
         input_embed = layers.embed_sequence(
             seq,
             vocab_size=self.vocab_size,
             embed_dim=self.embed_dim,
             initializer=tf.constant_initializer(self.embeddings_mat,
                                                 dtype=tf.float32),
             trainable=False,
             scope='embed',
             reuse=reuse)
     else:
         input_embed = layers.embed_sequence(seq,
                                             vocab_size=self.vocab_size,
                                             embed_dim=self.embed_dim,
                                             scope='embed',
                                             reuse=reuse)
     forward_cell = tf.contrib.rnn.LSTMCell(num_units=self.num_units / 2,
                                            reuse=reuse)
     backward_cell = tf.contrib.rnn.LSTMCell(num_units=self.num_units / 2,
                                             reuse=reuse)
     # encoder_outputs, encoder_final_state = tf.nn.dynamic_rnn(cell, input_embed, dtype=tf.float32)
     # encoder_final_state_vec = tf.nn.l2_normalize(tf.concat(encoder_final_state, 1), 1)
     encoder_outputs, encoder_states = tf.nn.bidirectional_dynamic_rnn(
         forward_cell, backward_cell, input_embed, dtype=tf.float32)
     encoder_states = tf.nn.rnn_cell.LSTMStateTuple(
         c=tf.concat((encoder_states[0][0], encoder_states[1][0]), 1),
         h=tf.concat((encoder_states[0][1], encoder_states[1][1]), 1))
     encoder_final_state_vec = tf.nn.l2_normalize(
         tf.concat(encoder_states, 1), 1)
     return encoder_states, encoder_final_state_vec
Beispiel #2
0
    def make_graph(self,mode, features, labels, params):
        embed_dim = params.embed_dim
        num_units = params.num_units

        input,output   = features['input'], features['output']
        batch_size     = tf.shape(input)[0]
        start_tokens   = tf.zeros([batch_size], dtype= tf.int64)
        train_output   = tf.concat([tf.expand_dims(start_tokens, 1), output], 1)
        input_lengths  = tf.reduce_sum(tf.to_int32(tf.not_equal(input, 1)), 1)
        output_lengths = tf.reduce_sum(tf.to_int32(tf.not_equal(train_output, 1)), 1)
        input_embed    = layers.embed_sequence(input, vocab_size=self.vocab_size, embed_dim = embed_dim, scope = 'embed')
        output_embed   = layers.embed_sequence(train_output, vocab_size=self.vocab_size, embed_dim = embed_dim, scope = 'embed', reuse = True)
        with tf.variable_scope('embed', reuse=True):
            embeddings = tf.get_variable('embeddings')
        cell = tf.contrib.rnn.LSTMCell(num_units=num_units)
        if self.FLAGS.use_residual_lstm:
            cell = tf.contrib.rnn.ResidualWrapper(cell)
        encoder_outputs, encoder_final_state = tf.nn.dynamic_rnn(cell, input_embed, dtype=tf.float32)


        def decode(helper, scope, reuse=None):
            # Decoder is partially based on @ilblackdragon//tf_example/seq2seq.py
            with tf.variable_scope(scope, reuse=reuse):
                attention_mechanism = tf.contrib.seq2seq.BahdanauAttention(
                    num_units=num_units, memory=encoder_outputs,
                    memory_sequence_length=input_lengths)
                cell = tf.contrib.rnn.LSTMCell(num_units=num_units)
                attn_cell = tf.contrib.seq2seq.AttentionWrapper(cell, attention_mechanism, attention_layer_size=num_units / 2)
                out_cell = tf.contrib.rnn.OutputProjectionWrapper(attn_cell, self.vocab_size, reuse=reuse)
                decoder = tf.contrib.seq2seq.BasicDecoder(
                    cell=out_cell, helper=helper,
                    initial_state=out_cell.zero_state(
                        dtype=tf.float32, batch_size=batch_size))
                outputs = tf.contrib.seq2seq.dynamic_decode(
                    decoder=decoder, output_time_major=False,
                    impute_finished=True, maximum_iterations=self.FLAGS.output_max_length)
                return outputs[0]

        train_helper = tf.contrib.seq2seq.TrainingHelper(output_embed, output_lengths)
        pred_helper = tf.contrib.seq2seq.GreedyEmbeddingHelper(embeddings, start_tokens=tf.to_int32(start_tokens), end_token=1)
        train_outputs = decode(train_helper, 'decode')
        pred_outputs  = decode(pred_helper, 'decode', reuse=True)

        tf.identity(train_outputs.sample_id[0], name='train_pred')
        weights = tf.to_float(tf.not_equal(train_output[:, :-1], 1))
        loss = tf.contrib.seq2seq.sequence_loss(train_outputs.rnn_output, output, weights=weights)
        train_op = layers.optimize_loss(
            loss, tf.train.get_global_step(),
            optimizer=params.optimizer,
            learning_rate=params.learning_rate,
            summaries=['loss', 'learning_rate'])

        tf.identity(pred_outputs.sample_id[0], name='predict')
        return tf.estimator.EstimatorSpec(mode=mode, predictions=pred_outputs.sample_id, loss=loss, train_op=train_op)
def seq2seq_model(inputs, targets, keep_prob, batch_size, seq_length,
                  answers_num_words, questions_num_words,
                  encoder_embedding_size, decoder_embedding_size, rnn_size,
                  num_layers, questionswords2int_dict):
    encoder_embedded_input = embed_sequence(
        ids=inputs,
        vocab_size=answers_num_words + 1,
        embed_dim=encoder_embedding_size,
        initializer=tf.random_uniform_initializer(minval=0, maxval=1))
    encoder_state = encoder_rnn(encoder_embedded_input, rnn_size, num_layers,
                                keep_prob, seq_length)
    preprocessed_targets = preprocess_targets(targets, questionswords2int_dict,
                                              batch_size)
    decoder_embeddings_matrix = tf.Variable(
        tf.random_uniform(
            shape=[questions_num_words + 1, decoder_embedding_size],
            minval=0,
            maxval=1))
    decoder_embedded_input = tf.nn.embedding_lookup(
        params=decoder_embeddings_matrix, ids=preprocessed_targets)
    train_pred, test_pred = decoder_rnn(decoder_embedded_input,
                                        decoder_embeddings_matrix,
                                        encoder_state, num_words, seq_length,
                                        rnn_size, num_layers,
                                        questionswords2int_dict, keep_prob,
                                        batch_size)
    return train_pred, test_pred
Beispiel #4
0
def get_encoder_layer(input_data, rnn_size, num_layers, source_sequence_length,
                      source_vocab_size, encoding_embedding_size):
    '''
    构造Encoder层

    参数说明:
    - input_data: 输入tensor
    - rnn_size: rnn隐层结点数量
    - num_layers: 堆叠的rnn cell数量
    - source_sequence_length: 源数据的序列长度
    - source_vocab_size: 源数据的词典大小
    - encoding_embedding_size: embedding的大小
    '''
    # Encoder embedding
    encoder_embed_input = layers.embed_sequence(input_data, source_vocab_size,
                                                encoding_embedding_size)

    # RNN cell
    def get_lstm_cell(rnn_size):
        lstm_cell = rnn.LSTMCell(rnn_size,
                                 initializer=tf.truncated_normal_initializer)
        return lstm_cell

    cell = rnn.MultiRNNCell(
        [get_lstm_cell(rnn_size) for _ in range(num_layers)])
    encoder_output, encoder_state = tf.nn.dynamic_rnn(
        cell,
        encoder_embed_input,
        sequence_length=source_sequence_length,
        dtype=tf.float32)
    return encoder_output, encoder_state
Beispiel #5
0
def seq2seq(inputs, targets, batch_size, questionword2int,
            encoder_embedded_size, decoder_embedding_size, questions_num_words,
            answer_num_word, rnn_size, keep_prob, num_of_layers,
            sequence_length):
    encoder_embedded_input = layers.embed_sequence(
        inputs,
        encoder_embedded_size,
        answer_num_word,
        initializer=tf.random_uniform_initializer(0, 1))
    encoder_state = rnn_encoder(encoder_embedded_input, rnn_size, keep_prob,
                                num_of_layers, sequence_length)
    preprocessing_targets = rnn_training_data(batch_size, targets,
                                              questionword2int)
    decoder_embeddings_matrix = tf.Variable(
        tf.random_uniform([questions_num_words + 1, decoder_embedding_size], 0,
                          1))
    decoder_embedded_input = tf.nn.embedding_lookup(decoder_embeddings_matrix,
                                                    preprocessing_targets)

    training_prediction, test_predictions = decoder_rnn(
        rnn_size, keep_prob, num_of_layers, questions_num_words, encoder_state,
        questionword2int, batch_size, decoder_embedded_input,
        decoder_embeddings_matrix, sequence_length)

    return training_prediction, test_predictions
    def __attention_loss_branch(self, rnn_features):
        output_embed = layers.embed_sequence(self.att_train_output,
                                             vocab_size=self.vocab_att_size,
                                             embed_dim=self.att_embed_dim,
                                             scope='embed')
        #  with tf.device('/cpu:0'):
        embeddings = tf.Variable(tf.truncated_normal(
            shape=[self.vocab_att_size, self.att_embed_dim], stddev=0.1),
                                 name='decoder_embedding')
        start_tokens = tf.zeros([self.batch_size], dtype=tf.int64)

        train_helper = tf.contrib.seq2seq.TrainingHelper(
            output_embed, self.att_train_length)
        pred_helper = tf.contrib.seq2seq.GreedyEmbeddingHelper(
            embeddings, start_tokens=tf.to_int32(start_tokens), end_token=1)

        train_outputs = self.__att_decode(train_helper, rnn_features, 'decode')
        pred_outputs = self.__att_decode(pred_helper,
                                         rnn_features,
                                         'decode',
                                         reuse=True)

        # train_decode_result = train_outputs[0].rnn_output[0, :-1, :]
        # pred_decode_result = pred_outputs[0].rnn_output[0, :, :]

        mask = tf.cast(
            tf.sequence_mask(self.batch_size * [self.att_train_length[0] - 1],
                             self.att_train_length[0]), tf.float32)
        att_loss = tf.contrib.seq2seq.sequence_loss(
            train_outputs[0].rnn_output, self.att_target_output, weights=mask)

        return att_loss
Beispiel #7
0
def get_encoder_layer(input_data, rnn_size, num_layers, source_seq_len,
                      source_vocab_size, embedding_size):
    """
    构造 encoder 层
    :param input_data:
    :param rnn_size:
    :param num_layers:
    :param source_seq_len:
    :param source_vocab_size:
    :param embedding_size:
    :return:
    """
    encoder_embed_input = layers.embed_sequence(input_data, source_vocab_size,
                                                embedding_size)

    # build RNN cell
    def get_lstm_cell(rnn_size):
        lstm_cell = rnn.LSTMCell(rnn_size,
                                 initializer=tf.random_uniform_initializer(
                                     -0.1, 0.1, seed=2))
        return lstm_cell

    cell = rnn.MultiRNNCell(
        [get_lstm_cell(rnn_size) for _ in range(num_layers)])

    encoder_out, encoder_state = tf.nn.dynamic_rnn(cell,
                                                   encoder_embed_input,
                                                   source_seq_len,
                                                   dtype=tf.float32)
    return encoder_out, encoder_state
Beispiel #8
0
def build_network(is_training):
    train_output_embed= encoder_net(image, 'encode_features',is_training)

#vocab_size: 输入数据的总词汇量,指的是总共有多少类词汇,不是总个数,embed_dim:想要得到的嵌入矩阵的维度
    output_embed = layers.embed_sequence(train_output, vocab_size=cfg.VOCAB_SIZE, embed_dim=cfg.VOCAB_SIZE, scope='embed')#有种变为one-hot的意味
    embeddings = tf.Variable(tf.truncated_normal(shape=[cfg.VOCAB_SIZE, cfg.VOCAB_SIZE], stddev=0.1), name='decoder_embedding')#embdding变为类别

    start_tokens = tf.zeros([cfg.BATCH_SIZE], dtype=tf.int64)

    train_helper = tf.contrib.seq2seq.TrainingHelper(output_embed, train_length)

    #用于inference阶段的helper,将output输出后的logits使用argmax获得id再经过embedding layer来获取下一时刻的输入。
    #start_tokens: batch中每个序列起始输入的token_id  end_token:序列终止的token_id
    #start_tokens: int32 vector shaped [batch_size], the start tokens.
    #end_token: int32 scalar, the token that marks end of decoding.
    pred_helper = tf.contrib.seq2seq.GreedyEmbeddingHelper(embeddings, start_tokens=tf.to_int32(start_tokens), end_token=1)#GO,EOS的序号
    train_outputs = decode(train_helper, train_output_embed, 'decode')

    pred_outputs = decode(pred_helper, train_output_embed, 'decode', reuse=True)
    train_decode_result = train_outputs[0].rnn_output[:, :-1, :]
    pred_decode_result = pred_outputs[0].rnn_output
    mask = tf.cast(tf.sequence_mask(cfg.BATCH_SIZE * [train_length[0] - 1], train_length[0]),
                   tf.float32)
    att_loss = tf.contrib.seq2seq.sequence_loss(train_outputs[0].rnn_output, target_output,weights=mask)
    loss = tf.reduce_mean(att_loss)



    return loss,train_decode_result, pred_decode_result
Beispiel #9
0
def encoding_layer(inputs, encode_token, em_size, num_layers, num_units,
                   drop_val):

    # Maps a sequence of symbols to a sequence of embeddings.
    # embed_sequence is equivalant to:
    # encode_embed = tf.get_variable("encode_embedding",
    #                               initializer=tf.random_uniform([encode_token, em_size]),
    #                               dtype=tf.float32)
    # encode_embed_input = tf.nn.embedding_lookup(encode_embed, inputs)
    encode_embed_input = layers.embed_sequence(inputs,
                                               vocab_size=encode_token,
                                               embed_dim=em_size)

    stacked_lstm_fw = rnn.MultiRNNCell(
        [get_a_lstm(num_units) for _ in range(num_layers)])
    stacked_lstm_bw = rnn.MultiRNNCell(
        [get_a_lstm(num_units) for _ in range(num_layers)])
    outputs, final_states = tf.nn.bidirectional_dynamic_rnn(stacked_lstm_fw,
                                                            stacked_lstm_bw,
                                                            encode_embed_input,
                                                            dtype=tf.float32)
    output_fw, output_bw = outputs
    state_fw, state_bw = final_states
    encode_output = tf.concat([output_fw, output_bw], 2)
    encode_state = tf.concat([state_fw, state_bw], 2)
    # ref 0: GPU setup
    #     stacked_lstm = tf.contrib.cudnn_rnn.CudnnLSTM(num_layers, num_units,
    #                                                   direction='bidirectional',
    #                                                   dropout=drop_val)
    return encode_output, encode_state
def TestModel(input1):
    batch_size = tf.shape(input1)[0]
    start_tokens = tf.zeros([batch_size], dtype=tf.int64)
    input_lengths = tf.reduce_sum(tf.to_int32(tf.not_equal(input1, 1)), 1)
    input_embed = layers.embed_sequence(
        input1, vocab_size=vocab_size, embed_dim=embed_dim, scope='embed')
    with tf.variable_scope('embed', reuse=True):
        embeddings = tf.get_variable('embeddings')
    if(dropout==1):
        cell = tf.contrib.rnn.DropoutWrapper(tf.contrib.rnn.BasicLSTMCell(num_units),1)
    else:
        cell = tf.contrib.rnn.BasicLSTMCell(num_units=num_units)#initial_state = cell.zero_state([batch_size], dtype=tf.float32)
     if(uni_directional==1):
      encoder_outputs, encoder_final_state = tf.nn.dynamic_rnn(cell, input_embed, dtype=tf.float32)
      num_units1=512
    else:
      ((encoder_fw_outputs,encoder_bw_outputs), (encoder_fw_final_state,encoder_bw_final_state)) = tf.nn.bidirectional_dynamic_rnn(cell_fw=cell,cell_bw=cell,inputs=input_embed,dtype=tf.float32, 
      time_major=True)
      encoder_outputs=tf.concat((encoder_fw_outputs,encoder_bw_outputs),2)
      encoder_final_state_c=tf.concat((encoder_fw_final_state.c,encoder_bw_final_state.c),1)
      encoder_final_state_h=tf.concat((encoder_fw_final_state.h,encoder_bw_final_state.h),1)
      encoder_final_state=LSTMStateTuple(c=encoder_final_state_c,h=encoder_final_state_h)
      num_units1=1024
    pred_helper = tf.contrib.seq2seq.GreedyEmbeddingHelper(embeddings, start_tokens=tf.to_int32(start_tokens), end_token=1)
    decoder_cell = tf.contrib.rnn.BasicLSTMCell(num_units=num_units1)
    projection_layer = Dense(units=vocab_size,use_bias=True)
    def decode(helper, scope, reuse=None):
        with tf.variable_scope(scope, reuse=reuse):
          if(decode_method==1):
            attention_mechanism = tf.contrib.seq2seq.BahdanauAttention(
                num_units=num_units1, memory=encoder_outputs,
                memory_sequence_length=input_lengths)
            attn_cell = tf.contrib.seq2seq.AttentionWrapper(
                decoder_cell, attention_mechanism, attention_layer_size=num_units1/2)
            out_cell = tf.contrib.rnn.OutputProjectionWrapper(
                attn_cell, vocab_size, reuse=reuse
            )
            decoder = tf.contrib.seq2seq.BasicDecoder(
                cell=out_cell, helper=helper,
                initial_state=out_cell.zero_state(
                    dtype=tf.float32, batch_size=batch_size),output_layer=projection_layer)
          else:
            out_cell = tf.contrib.rnn.OutputProjectionWrapper(
                decoder_cell, vocab_size, reuse=reuse)
            decoder = tf.contrib.seq2seq.BasicDecoder(
                cell=out_cell, helper=helper,
                initial_state=out_cell.zero_state(
                    dtype=tf.float32, batch_size=batch_size),output_layer=projection_layer)
          outputs = tf.contrib.seq2seq.dynamic_decode(
                decoder=decoder, output_time_major=False,
                impute_finished=True, maximum_iterations=output_max_length
            )
        return outputs[0]
    train_outputs = decode(pred_helper, 'decode', reuse=True)
    return train_outputs
def preprocess_pandas(features_df):
    # Organize continues features.
    final_features = [tf.expand_dims(tf.cast(features_df[var], tf.float32), 1) for var in continuous_vars]
    # Embed categorical variables into distributed representation.
    for var in categorical_vars:
        feature = layers.embed_sequence(
            features_df[var + '_ids'], vocab_size=len(categorical_var_encoders[var].classes_),
            embed_dim=CATEGORICAL_EMBED_SIZE, scope=var)
        final_features.append(feature)
    # Concatenate all features into one vector.
    features = tf.concat(final_features, 1)
    return features
Beispiel #12
0
    def decode(self, encoder_out, scope, output, reuse=None):

        # From the encoder
        encoder_state = encoder_out[0]

        # Perform the embedding
        # if mode=='train':
        #     if output is None:
        #         raise Exception('output must be provided for mode=train')
        train_output = tf.concat(
            [tf.expand_dims(self.start_tokens, 1), output], 1)
        output_lengths = tf.reduce_sum(
            tf.to_int32(tf.not_equal(train_output, 1)), 1)
        output_embed = layers.embed_sequence(train_output,
                                             vocab_size=self.vocab_size,
                                             embed_dim=self.embed_dim,
                                             scope='encode/embed',
                                             reuse=True)

        # Prepare the helper
        # if mode=='train':
        #     helper = tf.contrib.seq2seq.TrainingHelper(output_embed, output_lengths)
        # if mode=='predict':
        #     helper = tf.contrib.seq2seq.GreedyEmbeddingHelper(
        #         self.embeddings,
        #         start_tokens=tf.to_int32(self.start_tokens),
        #         end_token=1
        #         )
        helper = tf.contrib.seq2seq.TrainingHelper(output_embed,
                                                   output_lengths)

        # Decoder is partially based on @ilblackdragon//tf_example/seq2seq.py
        with tf.variable_scope(scope, reuse=reuse):
            # attention_mechanism = tf.contrib.seq2seq.BahdanauAttention(
            #     num_units=self.num_units, memory=encoder_outputs,
            #     memory_sequence_length=input_lengths)
            cell = tf.contrib.rnn.LSTMCell(num_units=self.num_units)
            # attn_cell = tf.contrib.seq2seq.AttentionWrapper(cell, attention_mechanism, attention_layer_size=self.num_units / 2)
            out_cell = tf.contrib.rnn.OutputProjectionWrapper(cell,
                                                              self.vocab_size,
                                                              reuse=reuse)
            decoder = tf.contrib.seq2seq.BasicDecoder(
                cell=out_cell, helper=helper, initial_state=encoder_state)
            outputs = tf.contrib.seq2seq.dynamic_decode(
                decoder=decoder,
                output_time_major=False,
                impute_finished=True,
                maximum_iterations=self.FLAGS.output_max_length + 1)

            return outputs[0]
Beispiel #13
0
    def build_embedding(self):
        with tf.variable_scope("Embedding"):
            self.encoder_input_embedding = tcl.embed_sequence(
                self.encoder_inputs,  # [None, None, 15]
                self.data.encoder_vocab_size,
                self.encoder_embedding_size,
                scope="encoder_input_embedding")

            self.decoder_embedding = tf.Variable(
                tf.random_uniform([
                    self.data.decoder_vocab_size, self.decoder_embedding_size
                ]))  # [31,15]
            self.decoder_input_embedding = tf.nn.embedding_lookup(
                self.decoder_embedding,
                self.decoder_input,
                name="decoder_target_embedding")  # [None, None, 15]
Beispiel #14
0
def embed_features(feature,
                   vocab_size,
                   embed_dim,
                   scope='Embed',
                   reuse=False,
                   pretrained=None,
                   trainable=True):
    with tf.variable_scope(scope, reuse=reuse):
        embeded = layers.embed_sequence(feature,
                                        vocab_size=vocab_size,
                                        embed_dim=embed_dim,
                                        trainable=trainable)
        if pretrained is not None:
            tf.contrib.framework.init_from_checkpoint(
                pretrained, {scope + '/': scope + '/'})
        return embeded
Beispiel #15
0
    def __build_model(self):
        # Define model
        self.input_x = tf.placeholder(tf.int32, [None, self.seqlen],
                                      name="input_x")
        self.input_y = tf.placeholder(tf.float32, [None, self.total_class],
                                      name="input_y")

        w = tf.get_variable("w_e", [self.seqlen, self.embed_dim])
        #self.embed = tf.nn.embedding_lookup(w, self.input_x)
        self.embed = layers.embed_sequence(self.input_x,
                                           vocab_size=self.vocab_size,
                                           embed_dim=self.embed_dim)
        #self.rnn_unit = tf.nn.rnn_cell.DropoutWrapper(
        self.rnn_unit = tf.nn.rnn_cell.GRUCell(self.embed_dim)
        #        output_keep_prob=1-self.dropout_keep)
        #self.cell_stack = tf.nn.rnn_cell.MultiRNNCell([self.rnn_unit] * self.total_layer)
        words = tf.unstack(self.embed, axis=1)
        _, encoding = tf.nn.static_rnn(cell=self.rnn_unit,
                                       inputs=words,
                                       dtype=tf.float32)
        # calc logits
        self.logits = tf.layers.dense(encoding,
                                      self.total_class,
                                      activation=None)

        self.pred = tf.nn.softmax(self.logits)
        #self.pred = tf.argmax(self.logits, 1, name="pred")
        self.acc = tf.reduce_mean(tf.cast(
            tf.equal(tf.argmax(self.input_y, 1), tf.argmax(self.logits, 1)),
            "float"),
                                  name="acc")
        self.loss = tf.reduce_mean(
            tf.nn.sigmoid_cross_entropy_with_logits(logits=self.logits,
                                                    labels=self.input_y))

        params, _ = tf.clip_by_global_norm(
            tf.gradients(self.loss, tf.trainable_variables()), self.clip_norm)
        self.train_op = tf.train.AdamOptimizer(self.lr).apply_gradients(
            zip(params, tf.trainable_variables()))

        self.global_step = tf.Variable(self.init_step, trainable=False)

        summary = []
        summary.append(tf.summary.scalar("loss", self.loss))
        summary.append(tf.summary.scalar("acc", self.acc))

        self.summary = tf.summary.merge(summary)
Beispiel #16
0
def cnn_model_fn(features, labels, mode, params):
    # mapping the features into our embedding layer
    print(features)
    input_layer = embed_sequence(ids=features["x"],
                                 vocab_size=vocab_size,
                                 embed_dim=embedding_size,
                                 initializer=params["embedding_initializer"]
                                 )  # [batch, sentence_len, embed_size]
    print(input_layer.shape)

    training = mode == estimator.estimator.ModeKeys.TRAIN
    dropout_emb = tf.layers.dropout(inputs=input_layer,
                                    rate=0.2,
                                    training=training)
    conv = tf.layers.conv1d(
        inputs=dropout_emb,
        filters=32,
        kernel_size=3,
        padding="same",
        activation=tf.nn.relu)  # [batch, sentence_len, filters]
    print(conv.shape)

    pool = tf.reduce_max(input_tensor=conv, axis=1)  # [batch, filters]

    hidden = tf.layers.dense(inputs=pool, units=250)

    dropout_hidden = tf.layers.dropout(inputs=hidden,
                                       rate=0.2,
                                       training=training)

    logits = tf.layers.dense(inputs=dropout_hidden, units=1)

    # This will be None when predicting
    if labels is not None:
        labels = tf.reshape(labels, [-1, 1])

    optimizer = tf.train.AdamOptimizer()

    def _train_op_fn(loss):
        return optimizer.minimize(loss=loss,
                                  global_step=tf.train.get_global_step())

    return head.create_estimator_spec(features=features,
                                      labels=labels,
                                      mode=mode,
                                      logits=logits,
                                      train_op_fn=_train_op_fn)
        'dev_bleu': [],
        'ig': []
    }

    #################### model ####################
    tf.reset_default_graph()

    X = tf.placeholder(tf.int32, [None, None])
    X_len = tf.placeholder(tf.int32, [None])
    Y = tf.placeholder(tf.int32, [None, None])
    Y_len = tf.placeholder(tf.int32, [None])
    Y_mask = tf.placeholder(tf.float32, [None, None])
    Star = tf.placeholder(tf.float32, [None, 5])

    inputs_enc = layers.embed_sequence(X,
                                       vocab_size=vocab_dim,
                                       embed_dim=embedding_dim)
    outputs_enc = layers.embed_sequence(Y,
                                        vocab_size=vocab_dim,
                                        embed_dim=embedding_dim)
    cell_enc = tf.contrib.rnn.BasicLSTMCell(num_units=latent_dim)
    outputs_enc, state_enc = tf.nn.dynamic_rnn(cell=cell_enc,
                                               inputs=inputs_enc,
                                               sequence_length=X_len,
                                               dtype=tf.float32,
                                               scope='g1')
    cell_dec = tf.contrib.rnn.BasicLSTMCell(num_units=latent_dim // 2,
                                            state_is_tuple=False)
    g1 = tf.concat([state_enc.h, Star], axis=-1)
    latent = tf.layers.dense(g1, latent_dim)
    init = latent  # tf.layers.dense(latent, latent_dim)
    def _model(self, features, labels, mode, params):
        """
            main model.
        """
        question_sequence = features['question_seq']
        answer_sequence = features['answer_seq']

        batch_size = tf.shape(question_sequence)[0]
        start_token = tf.ones([1], tf.int32)

        model_size = params["model_size"]
        num_layers = params["num_layers"]
        keep_prob = params["keep_prob"]
        vocab_size = params["vocab_size"]
        embedding_size = params["embedding_size"]

        question_lengths = tf.reduce_sum(
            tf.to_int32(tf.not_equal(question_sequence, self.vocabs["<PAD>"])),
            1)
        answer_lengths = tf.reduce_sum(
            tf.to_int32(tf.not_equal(answer_sequence, self.vocabs["<PAD>"])),
            1)

        question_embed = layers.embed_sequence(question_sequence,
                                               vocab_size=vocab_size,
                                               embed_dim=embedding_size,
                                               scope='embed')
        answer_embed = layers.embed_sequence(answer_sequence,
                                             vocab_size=vocab_size,
                                             embed_dim=embedding_size,
                                             scope='embed',
                                             reuse=True)
        with tf.variable_scope('embed', reuse=True):
            embeddings = tf.get_variable('embeddings')
        fcells = []
        for i in range(num_layers):
            c = tf.nn.rnn_cell.GRUCell(model_size)
            c = tf.nn.rnn_cell.DropoutWrapper(c,
                                              input_keep_prob=keep_prob,
                                              output_keep_prob=keep_prob)
            fcells.append(c)
        # I cant figure out how to use tuple version.
        fcell = tf.nn.rnn_cell.MultiRNNCell(fcells)

        #bcells = []
        #for i in range(num_layers):
        #    c = tf.nn.rnn_cell.GRUCell(model_size)
        #    c = tf.nn.rnn_cell.DropoutWrapper(c, input_keep_prob=keep_prob,
        #                                    output_keep_prob=keep_prob)
        #    bcells.append(c)
        # I cant figure out how to use tuple version.
        #bcell = tf.nn.rnn_cell.MultiRNNCell(bcells)

        bcell = tf.contrib.rnn.GRUCell(num_units=model_size)

        #icell = tf.contrib.rnn.GRUCell(num_units=model_size)
        encoder_outputs, encoder_final_state = tf.nn.bidirectional_dynamic_rnn(
            fcell,
            bcell,
            question_embed,
            sequence_length=question_lengths,
            dtype=tf.float32)

        # helpers
        train_helper = tf.contrib.seq2seq.TrainingHelper(answer_embed,
                                                         answer_lengths,
                                                         time_major=False)
        start_tokens = tf.tile(tf.constant([self.vocabs['<START>']],
                                           dtype=tf.int32), [batch_size],
                               name='start_tokens')
        pred_helper = tf.contrib.seq2seq.GreedyEmbeddingHelper(
            embeddings,
            start_tokens=start_tokens,
            end_token=self.vocabs["<EOS>"])

        # rnn cell and dense layer
        cell = tf.contrib.rnn.GRUCell(num_units=model_size)
        cells = []
        for i in range(num_layers):
            c = tf.nn.rnn_cell.GRUCell(model_size)
            c = tf.nn.rnn_cell.DropoutWrapper(c,
                                              input_keep_prob=keep_prob,
                                              output_keep_prob=keep_prob)
            cells.append(c)
        # I cant figure out how to use tuple version.
        cell = tf.nn.rnn_cell.MultiRNNCell(cells)
        projection_layer = Dense(
            units=vocab_size,
            kernel_initializer=tf.truncated_normal_initializer(mean=0.0,
                                                               stddev=0.1))

        # deocder in seq2seq model. For this case we don't have an encoder.
        def decode(helper, scope, output_max_length, reuse=None):
            with tf.variable_scope(scope, reuse=reuse):
                attention_mechanism = tf.contrib.seq2seq.BahdanauAttention(
                    num_units=model_size,
                    memory=encoder_outputs[0],
                    memory_sequence_length=question_lengths)
                #cell = tf.contrib.rnn.GRUCell(num_units=model_size)
                attn_cell = tf.contrib.seq2seq.AttentionWrapper(
                    cell, attention_mechanism, attention_layer_size=model_size)
                #out_cell = tf.contrib.rnn.OutputProjectionWrapper(
                #    attn_cell, vocab_size, reuse=reuse
                #)
                decoder = tf.contrib.seq2seq.BasicDecoder(
                    cell=attn_cell,
                    helper=helper,
                    initial_state=attn_cell.zero_state(dtype=tf.float32,
                                                       batch_size=batch_size),
                    #initial_state=encoder_final_state,
                    output_layer=projection_layer)
                outputs = tf.contrib.seq2seq.dynamic_decode(
                    decoder=decoder,
                    output_time_major=False,
                    impute_finished=True,
                    maximum_iterations=output_max_length)
            return outputs[0]

        train_outputs = decode(train_helper, 'decode', 3000)
        pred_outputs = decode(pred_helper, 'decode', 300, reuse=True)

        targets = answer_sequence[:, 1:]

        probs = tf.nn.softmax(pred_outputs.rnn_output, name="probs")
        # in case in prediction mode return
        if mode == tf.estimator.ModeKeys.PREDICT:
            return tf.estimator.EstimatorSpec(mode=mode,
                                              predictions={
                                                  "probs": probs,
                                                  "syms":
                                                  pred_outputs.sample_id
                                              })

        # mask the PADs
        mask = tf.to_float(
            tf.not_equal(answer_sequence[:, :-1], self.vocabs["<PAD>"]))

        #tf.identity(mask[0], name='mask')
        #tf.identity(targets[0], name='targets')
        #tf.identity(train_outputs.rnn_output[0,output_lengths[0]-2:output_lengths[0],:], name='rnn_out')
        # Loss function
        loss = tf.contrib.seq2seq.sequence_loss(
            train_outputs.rnn_output[:, :-1, :], targets, mask)
        tf.summary.scalar("loss", loss)

        # Optimizer
        learning_rate = tf.Variable(0.0, trainable=False)
        initial_learning_rate = tf.constant(0.001)
        learning_rate = tf.train.exponential_decay(initial_learning_rate,
                                                   tf.train.get_global_step(),
                                                   100, 0.99)
        tf.summary.scalar("learning_rate", learning_rate)
        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(loss, tvars), 5.0)
        optimizer = tf.train.AdamOptimizer(learning_rate)
        # Visualise gradients
        vis_grads = [0 if i is None else i for i in grads]
        for g in vis_grads:
            tf.summary.histogram("gradients_" + str(g), g)
        train_op = optimizer.apply_gradients(
            zip(grads, tvars), global_step=tf.train.get_global_step())
        tf.identity(question_sequence[0], name="train_input")
        tf.identity(train_outputs.sample_id[0], name='train_pred')
        tf.identity(pred_outputs.sample_id[0], name='predictions')
        return tf.estimator.EstimatorSpec(mode=mode,
                                          predictions=None,
                                          loss=loss,
                                          train_op=train_op)
Beispiel #19
0
    def seq2seq_model_fn(self, mode, features, labels, params):
        src_vocab_size = params['src_vocab_size']
        tar_vocab_size = params['tar_vocab_size']
        embed_dim = params['embed_dim']
        rnn_size = params['rnn_size']

        inp = features['input']
        output = features['output']
        batch_size = tf.shape(inp)[0]
        start_tokens = tf.zeros([batch_size], dtype=tf.int64)
        train_output = tf.concat([tf.expand_dims(start_tokens, 1), output], 1)
        input_lengths = tf.reduce_sum(
            tf.to_int32(tf.not_equal(inp, self.STOP_ID)), 1)
        output_lengths = tf.reduce_sum(
            tf.to_int32(tf.not_equal(train_output, self.STOP_ID)), 1)
        input_embed = layers.embed_sequence(inp,
                                            vocab_size=src_vocab_size,
                                            embed_dim=embed_dim,
                                            scope='src_embed')
        output_embed = layers.embed_sequence(train_output,
                                             vocab_size=tar_vocab_size,
                                             embed_dim=embed_dim,
                                             scope='tar_embed')
        with tf.variable_scope('tar_embed', reuse=True):
            embeddings = tf.get_variable('embeddings')

        cell = tf.contrib.rnn.DropoutWrapper(
            tf.contrib.rnn.GRUCell(num_units=rnn_size),
            input_keep_prob=self.keep_prob,
            output_keep_prob=self.keep_prob)
        encoder_outputs, encoder_final_state = tf.nn.dynamic_rnn(
            cell, input_embed, dtype=tf.float32)

        train_helper = tf.contrib.seq2seq.TrainingHelper(
            output_embed, output_lengths)

        pred_helper = tf.contrib.seq2seq.GreedyEmbeddingHelper(
            embeddings,
            start_tokens=tf.to_int32(start_tokens),
            end_token=self.STOP_ID)

        def decode(helper, scope, train=True, reuse=None):
            with tf.variable_scope(scope, reuse=reuse):
                cell = tf.contrib.rnn.DropoutWrapper(
                    tf.contrib.rnn.GRUCell(num_units=rnn_size),
                    input_keep_prob=self.keep_prob,
                    output_keep_prob=self.keep_prob)
                if train:
                    if self.use_attn:
                        attention_mechanism = tf.contrib.seq2seq.BahdanauAttention(
                            num_units=rnn_size,
                            memory=encoder_outputs,
                            memory_sequence_length=input_lengths)
                        cell = tf.contrib.seq2seq.AttentionWrapper(
                            cell,
                            attention_mechanism,
                            attention_layer_size=rnn_size / 2)
                    out_cell = tf.contrib.rnn.OutputProjectionWrapper(
                        cell, tar_vocab_size, reuse=reuse)
                    if self.use_attn:
                        decoder_initial_state = out_cell.zero_state(
                            dtype=tf.float32, batch_size=batch_size)
                        decoder_initial_state = decoder_initial_state.clone(
                            cell_state=encoder_final_state)
                        decoder = tf.contrib.seq2seq.BasicDecoder(
                            cell=out_cell,
                            helper=helper,
                            initial_state=decoder_initial_state)
                    else:
                        decoder = tf.contrib.seq2seq.BasicDecoder(
                            cell=out_cell,
                            helper=helper,
                            initial_state=encoder_final_state)
                else:
                    tiled_encoder_final_state = tf.contrib.seq2seq.tile_batch(
                        encoder_final_state, multiplier=self.beam_width)
                    if self.use_attn:
                        tiled_encoder_outputs = tf.contrib.seq2seq.tile_batch(
                            encoder_outputs, multiplier=self.beam_width)
                        tiled_sequence_length = tf.contrib.seq2seq.tile_batch(
                            input_lengths, multiplier=self.beam_width)

                        attention_mechanism = tf.contrib.seq2seq.BahdanauAttention(
                            num_units=rnn_size,
                            memory=tiled_encoder_outputs,
                            memory_sequence_length=tiled_sequence_length)
                        cell = tf.contrib.seq2seq.AttentionWrapper(
                            cell,
                            attention_mechanism,
                            attention_layer_size=rnn_size / 2)
                    out_cell = tf.contrib.rnn.OutputProjectionWrapper(
                        cell, tar_vocab_size, reuse=reuse)
                    if self.use_attn:
                        decoder_initial_state = out_cell.zero_state(
                            dtype=tf.float32,
                            batch_size=batch_size * self.beam_width)
                        decoder_initial_state = decoder_initial_state.clone(
                            cell_state=tiled_encoder_final_state)
                        decoder = tf.contrib.seq2seq.BeamSearchDecoder(
                            cell=out_cell,
                            embedding=embeddings,
                            start_tokens=tf.to_int32(start_tokens),
                            end_token=self.STOP_ID,
                            initial_state=decoder_initial_state,
                            beam_width=self.beam_width)
                    else:
                        decoder = tf.contrib.seq2seq.BeamSearchDecoder(
                            cell=out_cell,
                            embedding=embeddings,
                            start_tokens=tf.to_int32(start_tokens),
                            end_token=self.STOP_ID,
                            initial_state=tiled_encoder_final_state,
                            beam_width=self.beam_width)
                outputs = tf.contrib.seq2seq.dynamic_decode(
                    decoder=decoder,
                    impute_finished=train or not self.use_beam_search,
                    maximum_iterations=2 * tf.reduce_max(output_lengths))
            return outputs[0]

        train_outputs = decode(train_helper, 'decode')
        if not self.use_beam_search:
            pred_outputs = decode(pred_helper,
                                  'decode',
                                  train=True,
                                  reuse=True)

            tf.identity(train_outputs.sample_id[0], name='train_pred')
            weights = tf.to_float(
                tf.not_equal(train_output[:, :-1], self.STOP_ID))
            self.loss = tf.contrib.seq2seq.sequence_loss(
                train_outputs.rnn_output, output, weights=weights)
            train_op = layers.optimize_loss(
                self.loss,
                tf.train.get_global_step(),
                optimizer=params.get('optimizer', 'Adam'),
                learning_rate=params.get('learning_rate', 0.001),
                summaries=['loss', 'learning_rate'])

            tf.identity(pred_outputs.sample_id[0], name='predictions')
            return tf.estimator.EstimatorSpec(
                mode=mode,
                predictions=pred_outputs.sample_id,
                loss=self.loss,
                train_op=train_op)
        else:
            pred_outputs = decode(pred_helper,
                                  'decode',
                                  train=False,
                                  reuse=True)
            tf.identity(train_outputs.sample_id[0], name='train_pred')
            weights = tf.to_float(
                tf.not_equal(train_output[:, :-1], self.STOP_ID))
            self.loss = tf.contrib.seq2seq.sequence_loss(
                train_outputs.rnn_output, output, weights=weights)
            train_op = layers.optimize_loss(
                self.loss,
                tf.train.get_global_step(),
                optimizer=params.get('optimizer', 'Adam'),
                learning_rate=params.get('learning_rate', 0.001),
                summaries=['loss', 'learning_rate'])

            tf.identity(pred_outputs.predicted_ids[0], name='predictions')
            return tf.estimator.EstimatorSpec(
                mode=mode,
                predictions=pred_outputs.predicted_ids[:, :, 0],
                loss=self.loss,
                train_op=train_op)
    def __init__(self, is_testing):
        super().__init__()
        self.is_testing = is_testing

        print("Preparing data...")
        self.train, self.valid, self.test, self.vocab = self.encode_data(
            bAbI('en-valid-10k'))

        print("Creating graph...")
        with tf.Graph().as_default(), tf.device('/cpu:0'):
            regularizer = layers.l2_regularizer(1e-4)

            self.session = tf.Session(config=tf.ConfigProto(
                allow_soft_placement=True))
            self.global_step = tf.Variable(initial_value=0, trainable=False)
            self.optimizer = tf.train.AdamOptimizer(learning_rate=2e-4)

            self.facts_ph = tf.placeholder(tf.int32,
                                           shape=(None,
                                                  None))  # (bs*#facts, seq)
            self.facts_pos_ph = tf.placeholder(tf.int32,
                                               shape=(None, ))  # (bs*#facts, )
            self.question_ph = tf.placeholder(tf.int32,
                                              shape=(None, None))  # (bs, seq)
            self.answers_ph = tf.placeholder(tf.int32,
                                             shape=(None, ))  # (bs, )
            self.edge_indices_ph = tf.placeholder(tf.int32, shape=(None, 2))
            self.fact_segments_ph = tf.placeholder(tf.int32, shape=(None, ))
            self.edge_segments_ph = tf.placeholder(tf.int32, shape=(None, ))
            self.q_seq_length_ph = tf.placeholder(tf.int32, shape=(None, ))
            self.f_seq_length_ph = tf.placeholder(tf.int32, shape=(None, ))
            self.task_indices_ph = tf.placeholder(tf.int32, shape=(None, ))
            self.edge_keep_prob_ph = tf.placeholder(tf.float32, shape=())
            self.is_training_ph = tf.placeholder(tf.bool)

            placeholders = [
                self.facts_ph, self.facts_pos_ph, self.question_ph,
                self.answers_ph, self.edge_indices_ph, self.fact_segments_ph,
                self.edge_segments_ph, self.q_seq_length_ph,
                self.f_seq_length_ph, self.task_indices_ph,
                self.edge_keep_prob_ph
            ]

            self.train_queue = tf.FIFOQueue(self.qsize,
                                            [ph.dtype for ph in placeholders],
                                            name='train-queue')
            self.val_queue = tf.FIFOQueue(self.qsize,
                                          [ph.dtype for ph in placeholders],
                                          name='val-queue')

            self.train_enqueue_op = self.train_queue.enqueue(placeholders)
            self.train_qsize_op = self.train_queue.size()
            tf.summary.scalar('queues/train', self.train_qsize_op)

            self.val_enqueue_op = self.val_queue.enqueue(placeholders)
            self.val_qsize_op = self.val_queue.size()
            tf.summary.scalar('queues/val', self.val_qsize_op)

            def avg_n(x):
                return tf.reduce_mean(tf.stack(x, axis=0), axis=0)

            towers = []
            with tf.variable_scope(tf.get_variable_scope()):
                for device_nr, device in enumerate(self.devices):
                    with tf.device('/cpu:0'):
                        if self.is_testing:
                            facts_ph, facts_pos_ph, question_ph, answers_ph, edge_indices_ph, fact_segments_ph, edge_segments_ph, q_seq_length_ph, f_seq_length_ph, task_indices_ph, edge_keep_prob = placeholders
                        else:
                            facts_ph, facts_pos_ph, question_ph, answers_ph, edge_indices_ph, fact_segments_ph, edge_segments_ph, q_seq_length_ph, f_seq_length_ph, task_indices_ph, edge_keep_prob = tf.cond(
                                self.is_training_ph,
                                true_fn=lambda: self.train_queue.dequeue(),
                                false_fn=lambda: self.val_queue.dequeue(),
                            )

                            vars = (facts_ph, facts_pos_ph, question_ph,
                                    answers_ph, edge_indices_ph,
                                    fact_segments_ph, edge_segments_ph,
                                    q_seq_length_ph, f_seq_length_ph,
                                    task_indices_ph, edge_keep_prob)
                            for v, ph in zip(vars, placeholders):
                                v.set_shape(ph.get_shape())

                        facts_emb = layers.embed_sequence(
                            facts_ph,
                            self.vocab.size(),
                            self.emb_size,
                            scope='word-embeddings')
                        questions_emb = layers.embed_sequence(
                            question_ph,
                            self.vocab.size(),
                            self.emb_size,
                            scope='word-embeddings',
                            reuse=True)

                    with tf.device(device), tf.name_scope("device-%s" %
                                                          device_nr):

                        def mlp(x, scope, n_hidden):
                            with tf.variable_scope(scope):
                                for i in range(3):
                                    x = layers.fully_connected(
                                        x,
                                        n_hidden,
                                        weights_regularizer=regularizer)
                                return layers.fully_connected(
                                    x,
                                    n_hidden,
                                    weights_regularizer=regularizer,
                                    activation_fn=None)

                        _, (_, f_encoding) = tf.nn.dynamic_rnn(
                            tf.nn.rnn_cell.LSTMCell(32),
                            facts_emb,
                            dtype=tf.float32,
                            sequence_length=f_seq_length_ph,
                            scope='fact-encoder')

                        random_pos_offsets = tf.random_uniform(
                            tf.shape(answers_ph),
                            minval=0,
                            maxval=self.num_facts,
                            dtype=tf.int32)
                        fact_pos = facts_pos_ph + tf.gather(
                            random_pos_offsets, fact_segments_ph)
                        facts_pos_encoding = tf.one_hot(
                            fact_pos, 2 * self.num_facts)
                        f_encoding = tf.concat(
                            [f_encoding, facts_pos_encoding], axis=1)

                        _, (_, q_encoding) = tf.nn.dynamic_rnn(
                            tf.nn.rnn_cell.LSTMCell(32),
                            questions_emb,
                            dtype=tf.float32,
                            sequence_length=q_seq_length_ph,
                            scope='question-encoder')

                        def graph_fn(x):
                            with tf.variable_scope('graph-fn'):
                                x = layers.fully_connected(
                                    x,
                                    self.n_hidden,
                                    weights_regularizer=regularizer)
                                x = layers.fully_connected(
                                    x,
                                    self.n_hidden,
                                    weights_regularizer=regularizer)
                                return layers.fully_connected(
                                    x,
                                    self.vocab.size(),
                                    activation_fn=None,
                                    weights_regularizer=regularizer)

                        x = tf.concat([
                            f_encoding,
                            tf.gather(q_encoding, fact_segments_ph)
                        ], 1)
                        x0 = mlp(x, 'pre', self.n_hidden)
                        edge_features = tf.gather(q_encoding, edge_segments_ph)
                        x = x0
                        outputs = []
                        log_losses = []
                        with tf.variable_scope('steps'):
                            lstm_cell = LSTMCell(self.n_hidden)
                            state = lstm_cell.zero_state(
                                tf.shape(x)[0], tf.float32)

                            for step in range(self.n_steps):
                                x = message_passing(
                                    x, edge_indices_ph, edge_features,
                                    lambda x: mlp(x, 'message-fn', self.
                                                  n_hidden), edge_keep_prob)
                                x = mlp(tf.concat([x, x0], axis=1), 'post-fn',
                                        self.n_hidden)
                                x, state = lstm_cell(x, state)
                                with tf.variable_scope('graph-sum'):
                                    graph_sum = tf.segment_sum(
                                        x, fact_segments_ph)
                                    out = graph_fn(graph_sum)
                                    outputs.append(out)
                                    log_losses.append(
                                        tf.reduce_mean(
                                            tf.nn.
                                            sparse_softmax_cross_entropy_with_logits(
                                                labels=answers_ph,
                                                logits=out)))

                                tf.get_variable_scope().reuse_variables()

                        reg_loss = sum(
                            tf.get_collection(
                                tf.GraphKeys.REGULARIZATION_LOSSES))
                        loss = avg_n(log_losses) + reg_loss

                        towers.append({
                            'loss':
                            loss,
                            'grads':
                            self.optimizer.compute_gradients(loss),
                            'log_losses':
                            tf.stack(log_losses),  # (n_steps, 1)
                            'answers':
                            answers_ph,  # (batch_size, n_outputs)
                            'outputs':
                            tf.stack(
                                outputs),  # (n_steps, batch_size, n_outputs)
                            'task_indices':
                            task_indices_ph  # (batch_size, n_outputs
                        })

                        tf.get_variable_scope().reuse_variables()

            self.loss = avg_n([t['loss'] for t in towers])
            self.out = tf.concat([t['outputs'] for t in towers], axis=1)
            self.answers = tf.concat([t['answers'] for t in towers], axis=0)
            self.task_indices = tf.concat([t['task_indices'] for t in towers],
                                          axis=0)

            tf.summary.scalar('losses/total', self.loss)
            tf.summary.scalar('losses/reg', reg_loss)
            log_losses = avg_n([t['log_losses'] for t in towers])
            for i in range(self.n_steps):
                tf.summary.scalar('steps/%d/losses/log' % i, log_losses[i])

            avg_gradients = util.average_gradients(
                [t['grads'] for t in towers])
            self.train_step = self.optimizer.apply_gradients(
                avg_gradients, global_step=self.global_step)

            self.session.run(tf.global_variables_initializer())
            self.saver = tf.train.Saver()
            util.print_vars(tf.trainable_variables())

            self.train_writer = tf.summary.FileWriter(
                '/tmp/tensorboard/bAbI/%s/train/%s' %
                (self.revision, self.name), self.session.graph)
            self.test_writer = tf.summary.FileWriter(
                '/tmp/tensorboard/bAbI/%s/test/%s' %
                (self.revision, self.name), self.session.graph)

            self.summaries = tf.summary.merge_all()

        print("Starting data loaders...")
        train_mp_queue = mp.Manager().Queue(maxsize=self.qsize)
        val_mp_queue = mp.Manager().Queue(maxsize=self.qsize)

        data_loader_processes = [
            mp.Process(target=self.data_loader, args=(train_mp_queue, True))
            for i in range(4)
        ]
        val_data_loader_processes = [
            mp.Process(target=self.data_loader, args=(val_mp_queue, False))
            for i in range(1)
        ]

        for p in data_loader_processes + val_data_loader_processes:
            p.daemon = True
            p.start()

        queue_putter_threads = [
            threading.Thread(target=self.queue_putter,
                             args=(train_mp_queue, self.train_enqueue_op,
                                   'train', 1000)),
            threading.Thread(target=self.queue_putter,
                             args=(val_mp_queue, self.val_enqueue_op, 'val',
                                   1)),
        ]
        for t in queue_putter_threads:
            t.daemon = True
            t.start()

        train_qsize, val_qsize = 0, 0
        print("Waiting for queue to fill...")
        while train_qsize < self.qsize or val_qsize < self.qsize:
            train_qsize = self.session.run(self.train_qsize_op)
            val_qsize = self.session.run(self.val_qsize_op)
            print('train_qsize: %d, val_qsize: %d' % (train_qsize, val_qsize),
                  flush=True)
            time.sleep(1)
# will use this to set the weights for every category in every methodology
initial_emb_weights = [
    np.random.rand(n, embedding_dim) for n in n_cat_by_feature
]

# the actual features
features = [
    tf.placeholder(shape=[H, W], dtype="int32", name="feat%d" % i)
    for i, _ in enumerate(n_cat_by_feature)
]

# 1.1) embed on channel -> concat on channel
embedded1 = []
for f, n, w in zip(features, n_cat_by_feature, initial_emb_weights):
    e = layers.embed_sequence(f,
                              vocab_size=n,
                              embed_dim=embedding_dim,
                              initializer=tf.constant_initializer(w))
    embedded1.append(e)

out11 = tf.concat(embedded1, axis=2)

# 1.2) onehot on channel -> 1x1 conv separately -> concat on channel
embedded2 = []
for f, n, w in zip(features, n_cat_by_feature, initial_emb_weights):
    one_hot = layers.one_hot_encoding(f, num_classes=n)

    conv_out = layers.conv2d(inputs=one_hot,
                             num_outputs=embedding_dim,
                             weights_initializer=tf.constant_initializer(w),
                             kernel_size=1,
                             stride=1)
Beispiel #22
0
    def build_model(self):
        self.placeholders = _get_placeholders(self.spatial_dim)
        with tf.variable_scope("theta"):
            units_embedded = layers.embed_sequence(
                self.placeholders.screen_unit_type,
                vocab_size=SCREEN_FEATURES.unit_type.scale,
                embed_dim=self.unit_type_emb_dim,
                scope="unit_type_emb",
                trainable=self.trainable
            )

            # Let's not one-hot zero which is background
            player_relative_screen_one_hot = layers.one_hot_encoding(
                self.placeholders.player_relative_screen,
                num_classes=SCREEN_FEATURES.player_relative.scale
            )[:, :, :, 1:]
            player_relative_minimap_one_hot = layers.one_hot_encoding(
                self.placeholders.player_relative_minimap,
                num_classes=MINIMAP_FEATURES.player_relative.scale
            )[:, :, :, 1:]

            channel_axis = 3
            screen_numeric_all = tf.concat(
                [self.placeholders.screen_numeric, units_embedded, player_relative_screen_one_hot],
                axis=channel_axis
            )
            minimap_numeric_all = tf.concat(
                [self.placeholders.minimap_numeric, player_relative_minimap_one_hot],
                axis=channel_axis
            )

            # BUILD CONVNNs
            screen_output = self._build_convs(screen_numeric_all, "screen_network")
            minimap_output = self._build_convs(minimap_numeric_all, "minimap_network")


            # State representation (last layer before separation as described in the paper)
            self.map_output = tf.concat([screen_output, minimap_output], axis=channel_axis)

            # BUILD CONVLSTM
            self.rnn_in = tf.reshape(self.map_output, [1, -1, 32, 32, 64])
            self.cell = tf.contrib.rnn.Conv2DLSTMCell(input_shape=[32, 32, 1], # input dims
                                                 kernel_shape=[3, 3],  # for a 3 by 3 conv
                                                 output_channels=64)  # number of feature maps
            c_init = np.zeros((1, 32, 32, 64), np.float32)
            h_init = np.zeros((1, 32, 32, 64), np.float32)
            self.state_init = [c_init, h_init]
            step_size = tf.shape(self.map_output)[:1] # Get step_size from input dimensions
            c_in = tf.placeholder(tf.float32, [None, 32, 32, 64])
            h_in = tf.placeholder(tf.float32, [None, 32, 32, 64])
            self.state_in = (c_in, h_in)
            state_in = tf.nn.rnn_cell.LSTMStateTuple(c_in, h_in)
            self.step_size = tf.placeholder(tf.float32, [1])
            (self.outputs, self.state) = tf.nn.dynamic_rnn(self.cell, self.rnn_in, initial_state=state_in, sequence_length=step_size, time_major=False,
                                                          dtype=tf.float32)
            lstm_c, lstm_h = self.state
            self.state_out = (lstm_c[:1, :], lstm_h[:1, :])
            rnn_out = tf.reshape(self.outputs, [-1, 32, 32, 64])
            
            # 1x1 conv layer to generate our spatial policy
            self.spatial_action_logits = layers.conv2d(
                rnn_out,
                data_format="NHWC",
                num_outputs=1,
                kernel_size=1,
                stride=1,
                activation_fn=None,
                scope='spatial_action',
                trainable=self.trainable
            )

            spatial_action_probs = tf.nn.softmax(layers.flatten(self.spatial_action_logits))


            map_output_flat = tf.reshape(self.outputs, [-1, 65536])  # (32*32*64)
            # fully connected layer for Value predictions and action_id
            self.fc1 = layers.fully_connected(
                map_output_flat,
                num_outputs=256,
                activation_fn=tf.nn.relu,
                scope="fc1",
                trainable=self.trainable
            )
            # fc/action_id
            action_id_probs = layers.fully_connected(
                self.fc1,
                num_outputs=len(actions.FUNCTIONS),
                activation_fn=tf.nn.softmax,
                scope="action_id",
                trainable=self.trainable
            )
            # fc/value
            self.value_estimate = tf.squeeze(layers.fully_connected(
                self.fc1,
                num_outputs=1,
                activation_fn=None,
                scope='value',
                trainable=self.trainable
            ), axis=1)

            # disregard non-allowed actions by setting zero prob and re-normalizing to 1 ((MINE) THE MASK)
            action_id_probs *= self.placeholders.available_action_ids
            action_id_probs /= tf.reduce_sum(action_id_probs, axis=1, keepdims=True)

            def logclip(x):
                return tf.log(tf.clip_by_value(x, 1e-12, 1.0))

            spatial_action_log_probs = (
                    logclip(spatial_action_probs)
                    * tf.expand_dims(self.placeholders.is_spatial_action_available, axis=1)
            )
            # non-available actions get log(1e-10) value but that's ok because it's never used
            action_id_log_probs = logclip(action_id_probs)

            self.value_estimate = self.value_estimate
            self.action_id_probs = action_id_probs
            self.spatial_action_probs = spatial_action_probs
            self.action_id_log_probs = action_id_log_probs
            self.spatial_action_log_probs = spatial_action_log_probs

        selected_spatial_action_flat = ravel_index_pairs(
            self.placeholders.selected_spatial_action, self.spatial_dim
        )

        selected_log_probs = self._get_select_action_probs(selected_spatial_action_flat)

        # maximum is to avoid 0 / 0 because this is used to calculate some means
        sum_spatial_action_available = tf.maximum(
            1e-10, tf.reduce_sum(self.placeholders.is_spatial_action_available)
        )

        neg_entropy_spatial = tf.reduce_sum(
            self.spatial_action_probs * self.spatial_action_log_probs
        ) / sum_spatial_action_available
        neg_entropy_action_id = tf.reduce_mean(tf.reduce_sum(
            self.action_id_probs * self.action_id_log_probs, axis=1
        ))
        
        # Sample now actions from the corresponding dstrs defined by the policy network theta
        self.sampled_action_id = weighted_random_sample(self.action_id_probs)
        self.sampled_spatial_action = weighted_random_sample(self.spatial_action_probs)
        
        self.value_estimate = self.value_estimate
        policy_loss = -tf.reduce_mean(selected_log_probs.total * self.placeholders.advantage)

        value_loss = tf.losses.mean_squared_error(
            self.placeholders.value_target, self.value_estimate)

        loss = (
            policy_loss
            + value_loss * self.loss_value_weight
            + neg_entropy_spatial * self.entropy_weight_spatial
            + neg_entropy_action_id * self.entropy_weight_action_id
        )

        self.train_op = layers.optimize_loss(
            loss=loss,
            global_step=tf.train.get_global_step(),
            optimizer=self.optimiser,
            clip_gradients=self.max_gradient_norm,
            summaries=OPTIMIZER_SUMMARIES,
            learning_rate=None,
            name="train_op"
        )

        self._scalar_summary("value/estimate", tf.reduce_mean(self.value_estimate))
        self._scalar_summary("value/target", tf.reduce_mean(self.placeholders.value_target))
        self._scalar_summary("action/is_spatial_action_available",
                             tf.reduce_mean(self.placeholders.is_spatial_action_available))
        self._scalar_summary("action/selected_id_log_prob",
                             tf.reduce_mean(selected_log_probs.action_id))
        self._scalar_summary("loss/policy", policy_loss)
        self._scalar_summary("loss/value", value_loss)
        self._scalar_summary("loss/neg_entropy_spatial", neg_entropy_spatial)
        self._scalar_summary("loss/neg_entropy_action_id", neg_entropy_action_id)
        self._scalar_summary("loss/total", loss)
        self._scalar_summary("value/advantage", tf.reduce_mean(self.placeholders.advantage))
        self._scalar_summary("action/selected_total_log_prob",
                             tf.reduce_mean(selected_log_probs.total))
        self._scalar_summary("action/selected_spatial_log_prob",
                             tf.reduce_sum(selected_log_probs.spatial) / sum_spatial_action_available)

        self.init_op = tf.global_variables_initializer()
        self.saver = tf.train.Saver(max_to_keep=2)
        self.all_summary_op = tf.summary.merge_all(tf.GraphKeys.SUMMARIES)
        self.scalar_summary_op = tf.summary.merge(tf.get_collection(self._scalar_summary_key))
Beispiel #23
0
def seq2seq(mode, features, labels, params):
    vocab_size = params['vocab_size']
    embed_dim = params['embed_dim']
    num_units = params['num_units']
    input_max_length = params['input_max_length']
    output_max_length = params['output_max_length']

    inp = features['input']
    output_tensor = features['output']
    batch_size = tf.shape(inp)[0]
    start_tokens = tf.zeros([batch_size], dtype=tf.int64) + GO_TOKEN 
    train_output = tf.concat([tf.expand_dims(start_tokens, 1), output_tensor], 1)
    #print (train_output.get_shape().as_list())
    input_lengths = tf.reduce_sum(tf.to_int32(tf.not_equal(inp, 1)), 1)
    #print (input_lengths.get_shape().as_list())
    output_lengths = tf.reduce_sum(tf.to_int32(tf.not_equal(train_output, 1)), 1)
    #print (output_lengths.get_shape().as_list())
    input_embed = layers.embed_sequence(
        inp, vocab_size=vocab_size, embed_dim=embed_dim, scope='embed')
    output_embed = layers.embed_sequence(
        train_output, vocab_size=vocab_size, embed_dim=embed_dim, scope='embed', reuse=True)
    with tf.variable_scope('embed', reuse=True):
        embeddings = tf.get_variable('embeddings')

    cell = tf.contrib.rnn.GRUCell(num_units=num_units)
    encoder_outputs, encoder_final_state = tf.nn.dynamic_rnn(cell, input_embed, dtype=tf.float32)
    #print (encoder_outputs.get_shape().as_list())
    train_helper = tf.contrib.seq2seq.TrainingHelper(output_embed, output_lengths)
    # train_helper = tf.contrib.seq2seq.ScheduledEmbeddingTrainingHelper(
    #     output_embed, output_lengths, embeddings, 0.3
    # )
    pred_helper = tf.contrib.seq2seq.GreedyEmbeddingHelper(
        embeddings, start_tokens=tf.to_int32(start_tokens), end_token=8)

    def decode(helper, scope, reuse=None):
        with tf.variable_scope(scope, reuse=reuse):
            attention_mechanism = tf.contrib.seq2seq.BahdanauAttention(
                num_units=num_units, memory=encoder_outputs,
                memory_sequence_length=input_lengths)
            cell = tf.contrib.rnn.GRUCell(num_units=num_units)
            attn_cell = tf.contrib.seq2seq.AttentionWrapper(
                cell, attention_mechanism, attention_layer_size=num_units / 2)
            out_cell = tf.contrib.rnn.OutputProjectionWrapper(
                attn_cell, vocab_size, reuse=reuse
            )
            decoder = tf.contrib.seq2seq.BasicDecoder(
                cell=out_cell, helper=helper,
                initial_state=out_cell.zero_state(
                    dtype=tf.float32, batch_size=batch_size))
                #initial_state=encoder_final_state)
            outputs = tf.contrib.seq2seq.dynamic_decode(
                decoder=decoder, output_time_major=False,
                impute_finished=True, maximum_iterations=output_max_length
            )
            return outputs[0]
    train_outputs = decode(train_helper, 'decode')
    pred_outputs = decode(pred_helper, 'decode', reuse=True)

    tf.identity(train_outputs.sample_id[0], name='train_pred')
    weights = tf.to_float(tf.not_equal(train_output[:, :-1], 1))
    loss = tf.contrib.seq2seq.sequence_loss(
        train_outputs.rnn_output, output_tensor, weights=weights)
    train_op = layers.optimize_loss(
        loss, tf.train.get_global_step(),
        optimizer=params.get('optimizer', 'Adam'),
        learning_rate=params.get('learning_rate', 0.001),
        summaries=['loss', 'learning_rate'])

    tf.identity(pred_outputs.sample_id[0], name='predictions')
    
    
#    if mode == tf.estimator.ModeKeys.PREDICT:
#        return tf.estimator.EstimatorSpec(mode=mode, predictions = pred_outputs)

    return tf.estimator.EstimatorSpec(
        mode=mode,
        predictions=pred_outputs.sample_id,
        loss=loss,
        train_op=train_op
    )
Beispiel #24
0
def seq2seq(features, labels, mode, params):

    vocab_size = params['vocab_size']
    embed_dim = params['embed_dim']
    num_units = params['num_units']
    input_max_length = params['input_max_length']
    output_max_length = params['output_max_length']
    dropout = params['dropout']
    attention_mechanism_name = params['attention_mechanism_name']
    cell_type = params['cell_type']
    beam_width = params['beam_width']

    inp = features['input']
    batch_size = tf.shape(inp)[0]
    start_tokens = tf.zeros([batch_size], dtype=tf.int64)
    input_lengths = tf.reduce_sum(tf.to_int32(tf.not_equal(inp, 1)), 1)

    input_embed = layers.embed_sequence(inp,
                                        vocab_size=vocab_size,
                                        embed_dim=embed_dim,
                                        scope='embed')

    with tf.variable_scope('embed', reuse=True):
        embeddings = tf.get_variable('embeddings')

    if cell_type.upper() == 'GRU':
        fw_cell = tf.contrib.rnn.GRUCell(num_units=num_units)
        bw_cell = tf.contrib.rnn.GRUCell(num_units=num_units)
    elif cell_type.upper() == 'LSTM':
        fw_cell = tf.contrib.rnn.BasicLSTMCell(num_units=num_units)
        bw_cell = tf.contrib.rnn.BasicLSTMCell(num_units=num_units)
    else:
        raise ValueError("The Memory Cell unit %s provided is not valid " %
                         cell_type)

    if dropout > 0.0:
        print("  %s, dropout=%g " % (type(fw_cell).__name__, dropout))
        fw_cell = tf.contrib.rnn.DropoutWrapper(cell=fw_cell,
                                                input_keep_prob=(1.0 -
                                                                 dropout))
        bw_cell = tf.contrib.rnn.DropoutWrapper(cell=bw_cell,
                                                input_keep_prob=(1.0 -
                                                                 dropout))

    bd_encoder_outputs, bd_encoder_final_state = \
        tf.nn.bidirectional_dynamic_rnn(cell_fw=fw_cell, cell_bw=bw_cell,
                                        inputs=input_embed, dtype=tf.float32)

    encoder_outputs = tf.concat(bd_encoder_outputs, -1)
    encoder_final_state = tf.concat(bd_encoder_final_state, -1)

    pred_helper = tf.contrib.seq2seq.GreedyEmbeddingHelper(
        embeddings,
        start_tokens=tf.to_int32(start_tokens),
        end_token=END_TOKEN)

    if mode == tf.estimator.ModeKeys.PREDICT:
        # Specific for Prediction
        pred_outputs = set_decoder.setting_decoder(pred_helper,
                                                   'decode',
                                                   num_units,
                                                   encoder_outputs,
                                                   encoder_final_state,
                                                   input_lengths,
                                                   vocab_size,
                                                   batch_size,
                                                   output_max_length,
                                                   attention_mechanism_name,
                                                   cell_type,
                                                   embeddings,
                                                   start_tokens,
                                                   END_TOKEN,
                                                   beam_width,
                                                   reuse=False)

        if beam_width > 0:
            tf.identity(pred_outputs.predicted_ids, name='predictions')
            return tf.estimator.EstimatorSpec(
                mode=mode, predictions=pred_outputs.predicted_ids)
        else:
            tf.identity(pred_outputs.sample_id[0], name='predictions')
            return tf.estimator.EstimatorSpec(
                mode=mode, predictions=pred_outputs.sample_id)

    else:
        # Specific For Training
        output = features['output']
        train_output = tf.concat([tf.expand_dims(start_tokens, 1), output], 1)
        output_lengths = tf.reduce_sum(
            tf.to_int32(tf.not_equal(train_output, 1)), 1)

        output_embed = layers.embed_sequence(train_output,
                                             vocab_size=vocab_size,
                                             embed_dim=embed_dim,
                                             scope='embed',
                                             reuse=True)

        train_helper = tf.contrib.seq2seq.TrainingHelper(
            output_embed, output_lengths)

        train_outputs = set_decoder.setting_decoder(train_helper,
                                                    'decode',
                                                    num_units,
                                                    encoder_outputs,
                                                    encoder_final_state,
                                                    input_lengths,
                                                    vocab_size,
                                                    batch_size,
                                                    output_max_length,
                                                    attention_mechanism_name,
                                                    cell_type,
                                                    embeddings,
                                                    start_tokens,
                                                    END_TOKEN,
                                                    beam_width,
                                                    reuse=None)

        pred_outputs = set_decoder.setting_decoder(pred_helper,
                                                   'decode',
                                                   num_units,
                                                   encoder_outputs,
                                                   encoder_final_state,
                                                   input_lengths,
                                                   vocab_size,
                                                   batch_size,
                                                   output_max_length,
                                                   attention_mechanism_name,
                                                   cell_type,
                                                   embeddings,
                                                   start_tokens,
                                                   END_TOKEN,
                                                   beam_width,
                                                   reuse=True)

        tf.identity(train_outputs.sample_id[0], name='train_pred')
        weights = tf.to_float(tf.not_equal(train_output[:, :-1], 1))

        loss = tf.contrib.seq2seq.sequence_loss(train_outputs.rnn_output,
                                                output,
                                                weights=weights)
        train_op = layers.optimize_loss(
            loss,
            tf.train.get_global_step(),
            optimizer=params.get('optimizer', 'Adam'),
            learning_rate=params.get('learning_rate', 0.001),
            summaries=['loss', 'learning_rate'])

        tf.identity(pred_outputs.sample_id[0], name='predictions')
        return tf.estimator.EstimatorSpec(mode=mode,
                                          predictions=pred_outputs.sample_id,
                                          loss=loss,
                                          train_op=train_op)
    def seq2seq(self, features, labels, params):
        vocab_size = params['vocab_size']
        embed_dim = params['embed_dim']
        num_units = params['num_units']
        output_max_length = params['output_max_length']

        print("获得输入张量的名字", features.name, labels.name)
        #inp = tf.identity(features[0], 'input_0')
        #output = tf.identity(labels[0], 'output_0')
        #print(inp.name,output.name)#用于钩子函数显示

        batch_size = tf.shape(features)[0]
        start_tokens = tf.tile(
            [self.START_TOKEN],
            [batch_size])  #也可以使用tf.zeros([batch_size], dtype=tf.int32)
        train_output = tf.concat([tf.expand_dims(start_tokens, 1), labels],
                                 1)  #为其添加开始标志

        input_lengths = tf.reduce_sum(tf.cast(
            tf.not_equal(features, self.END_TOKEN), tf.int32),
                                      1,
                                      name="len")
        output_lengths = tf.reduce_sum(tf.cast(
            tf.not_equal(train_output, self.END_TOKEN), tf.int32),
                                       1,
                                       name="outlen")

        input_embed = layers.embed_sequence(features,
                                            vocab_size=vocab_size,
                                            embed_dim=embed_dim,
                                            scope='embed')
        output_embed = layers.embed_sequence(train_output,
                                             vocab_size=vocab_size,
                                             embed_dim=embed_dim,
                                             scope='embed',
                                             reuse=True)

        with tf.variable_scope('embed', reuse=True):
            embeddings = tf.get_variable('embeddings')
        Indcell = tf.nn.rnn_cell.DeviceWrapper(
            tf.contrib.rnn.IndRNNCell(num_units=num_units), "/device:GPU:0")
        IndyLSTM_cell = tf.nn.rnn_cell.DeviceWrapper(
            tf.contrib.rnn.IndyLSTMCell(num_units=num_units), "/device:GPU:0")
        multi_cell = tf.nn.rnn_cell.MultiRNNCell([Indcell, IndyLSTM_cell])
        encoder_outputs, encoder_final_state = tf.nn.dynamic_rnn(
            multi_cell,
            input_embed,
            sequence_length=input_lengths,
            dtype=tf.float32)

        if self.useScheduled:
            train_helper = tf.contrib.seq2seq.ScheduledEmbeddingTrainingHelper(
                output_embed, tf.tile([output_max_length], [batch_size]),
                embeddings, 0.3)
        else:
            train_helper = tf.contrib.seq2seq.TrainingHelper(
                output_embed, tf.tile([output_max_length], [batch_size]))

        pred_helper = tf.contrib.seq2seq.GreedyEmbeddingHelper(
            embeddings,
            start_tokens=tf.tile([self.START_TOKEN], [batch_size]),
            end_token=self.END_TOKEN)

        def decode(helper, scope, reuse=None):
            with tf.variable_scope(scope, reuse=reuse):
                attention_mechanism = tf.contrib.seq2seq.BahdanauAttention(  #注意力模型
                    num_units=num_units,
                    memory=encoder_outputs,
                    memory_sequence_length=input_lengths)

                cell = tf.contrib.rnn.IndRNNCell(num_units=num_units)
                if reuse == None:
                    keep_prob = 0.8
                else:
                    keep_prob = 1
                cell = tf.nn.rnn_cell.DropoutWrapper(
                    cell, output_keep_prob=keep_prob)

                attn_cell = tf.contrib.seq2seq.AttentionWrapper(
                    cell,
                    attention_mechanism,
                    attention_layer_size=num_units / 2)

                out_cell = tf.contrib.rnn.OutputProjectionWrapper(attn_cell,
                                                                  vocab_size,
                                                                  reuse=reuse)
                decoder = tf.contrib.seq2seq.BasicDecoder(
                    cell=out_cell,
                    helper=helper,
                    initial_state=out_cell.zero_state(dtype=tf.float32,
                                                      batch_size=batch_size))

                outputs = tf.contrib.seq2seq.dynamic_decode(
                    decoder=decoder,
                    output_time_major=False,
                    impute_finished=True,
                    maximum_iterations=output_max_length)
                return outputs[0]

        train_outputs = decode(train_helper, 'decode')
        pred_outputs = decode(pred_helper, 'decode', reuse=True)

        #tf.identity(train_outputs.sample_id[0], name='train_pred')

        # weights = tf.cast(tf.not_equal(train_output[:, :-1], 0),tf.float32)#掩码
        masks = tf.sequence_mask(output_lengths,
                                 output_max_length,
                                 dtype=tf.float32,
                                 name="masks")

        loss = tf.contrib.seq2seq.sequence_loss(train_outputs.rnn_output,
                                                labels,
                                                weights=masks)

        train_op = layers.optimize_loss(
            loss,
            tf.train.get_global_step(),
            optimizer=params.get('optimizer', 'Adam'),
            learning_rate=params.get('learning_rate', 0.001),
            summaries=['loss', 'learning_rate'])

        #tf.identity(pred_outputs.sample_id[0], name='predictions')  # 用于钩子函数显示

        return train_op, pred_outputs.sample_id, loss
    def fast_text_model_fn(self, features, labels, mode, params):
        vocab_table = lookup.index_table_from_file(
            vocabulary_file=self.VOCAB_FILE,
            num_oov_buckets=1,
            default_value=-1)
        text = features[self.FEATURE_COL]
        words = tf.string_split(text)
        dense_words = tf.sparse_tensor_to_dense(words,
                                                default_value=self.PAD_WORD)
        word_ids = vocab_table.lookup(dense_words)
        padding = tf.constant([[0, 0], [0, self.MAX_LEN]])
        # Pad all the word_ids entries to the maximum document length
        word_ids_padded = tf.pad(word_ids, padding)
        word_id_vector = tf.slice(word_ids_padded, [0, 0], [-1, self.MAX_LEN])

        if mode == tf.estimator.ModeKeys.TRAIN:
            tf.keras.backend.set_learning_phase(True)
        else:
            tf.keras.backend.set_learning_phase(False)

        with tf.name_scope('embedding'):
            embedding_vectors = layers.embed_sequence(
                word_id_vector,
                vocab_size=self.VOCAB_LEN,
                embed_dim=self.EMBED_DIM,
                initializer=layers.xavier_initializer(seed=42))
            tf.logging.info('Word Vectors = {}'.format(embedding_vectors))

        with tf.name_scope('fast_text'):
            average_vectors = tf.reduce_sum(embedding_vectors, axis=1)
            tf.logging.info(
                'Average Word Vectors = {}'.format(average_vectors))

        with tf.name_scope('hidden_layer'):
            fc1 = tf.keras.layers.Dense(1024,
                                        activation='relu')(average_vectors)
            d1 = tf.keras.layers.Dropout(0.5)(fc1)
            fc2 = tf.keras.layers.Dense(self.EMBED_DIM / 2,
                                        activation='relu')(d1)
            d2 = tf.keras.layers.Dropout(0.5)(fc2)
            tf.logging.info('Hidden Layer = {}'.format(d2))

        with tf.name_scope('output'):
            logits = tf.keras.layers.Dense(self.TARGET_SIZE,
                                           activation=None)(d2)
            tf.logging.info('Logits Layer = {}'.format(logits))

        probabilities = tf.nn.softmax(logits)
        predicted_indices = tf.argmax(probabilities, axis=1)

        tf.summary.histogram('fasttext', average_vectors)
        tf.summary.histogram('softmax', probabilities)

        if mode == tf.estimator.ModeKeys.PREDICT:
            predictions = {
                'class': predicted_indices,
                'probabilities': probabilities
            }

            exported_outputs = {
                'prediction': tf.estimator.export.PredictOutput(predictions)
            }
            return tf.estimator.EstimatorSpec(mode=mode,
                                              predictions=predictions,
                                              export_outputs=exported_outputs)

        loss = tf.losses.sparse_softmax_cross_entropy(labels=labels,
                                                      logits=logits)
        tf.summary.scalar('loss', loss)
        acc = tf.equal(predicted_indices, labels)
        acc = tf.reduce_mean(tf.cast(acc, tf.float32))

        tf.summary.scalar('acc', acc)

        if mode == tf.estimator.ModeKeys.TRAIN:
            optimizer = tf.train.AdamOptimizer()
            train_op = optimizer.minimize(
                loss=loss, global_step=tf.train.get_global_step())
            return tf.estimator.EstimatorSpec(mode=mode,
                                              loss=loss,
                                              train_op=train_op)

        if mode == tf.estimator.ModeKeys.EVAL:
            eval_metrics_ops = {
                'accuracy':
                tf.metrics.accuracy(labels=labels,
                                    predictions=predicted_indices),
                'precision':
                tf.metrics.precision(labels=labels,
                                     predictions=predicted_indices),
                'recall':
                tf.metrics.recall(labels=labels,
                                  predictions=predicted_indices),
                'f1_score':
                self.streaming_f1(labels=labels,
                                  predictions=predicted_indices,
                                  n_classes=self.TARGET_SIZE)
            }
        return tf.estimator.EstimatorSpec(mode=mode,
                                          loss=loss,
                                          eval_metric_ops=eval_metrics_ops)
    def __init__(self, is_testing):
        super().__init__()
        self.is_testing = is_testing
        with tf.Graph().as_default(), tf.device('/cpu:0'):
            regularizer = layers.l2_regularizer(1e-4)
            self.name = "%s %s" % (self.revision, self.message)
            self.train, self.valid, self.test = self.encode_data(sudoku())

            print("Building graph...")
            self.session = tf.Session(config=tf.ConfigProto(
                allow_soft_placement=True))
            self.global_step = tf.Variable(initial_value=0, trainable=False)
            self.optimizer = tf.train.AdamOptimizer(learning_rate=2e-4)
            self.mode = tf.placeholder(tf.string)

            edges = self.sudoku_edges()
            edges = [(i + (b * 81), j + (b * 81))
                     for b in range(self.batch_size) for i, j in edges]
            ridx = [edges.index((j, i)) for i, j in edges]
            edge_indices = tf.constant(edges, tf.int32)
            n_edges = tf.shape(edge_indices)[0]

            positions = tf.constant([[(i, j) for i in range(9)
                                      for j in range(9)]
                                     for b in range(self.batch_size)],
                                    tf.int32)  # (bs, 81, 2)
            rows = layers.embed_sequence(positions[:, :, 0],
                                         9,
                                         self.emb_size,
                                         scope='row-embeddings',
                                         unique=True)  # bs, 81, emb_size
            cols = layers.embed_sequence(positions[:, :, 1],
                                         9,
                                         self.emb_size,
                                         scope='cols-embeddings',
                                         unique=True)  # bs, 81, emb_size

            def avg_n(x):
                return tf.reduce_mean(tf.stack(x, axis=0), axis=0)

            towers = []
            with tf.variable_scope(tf.get_variable_scope()):
                for device_nr, device in enumerate(self.devices):
                    with tf.device('/cpu:0'):

                        if self.is_testing:
                            (quizzes, answers
                             ), edge_keep_prob = self.test.get_next(), 1.0
                        else:
                            (quizzes, answers), edge_keep_prob = tf.cond(
                                tf.equal(self.mode, "train"),
                                true_fn=lambda:
                                (self.train.get_next(), self.edge_keep_prob),
                                false_fn=lambda: (self.valid.get_next(), 1.0))

                        x = layers.embed_sequence(
                            quizzes,
                            10,
                            self.emb_size,
                            scope='nr-embeddings',
                            unique=True)  # bs, 81, emb_size
                        x = tf.concat([x, rows, cols], axis=2)
                        x = tf.reshape(x, (-1, 3 * self.emb_size))

                    with tf.device(device), tf.name_scope("device-%s" %
                                                          device_nr):

                        def mlp(x, scope, n_out):
                            with tf.variable_scope(scope):
                                for i in range(3):
                                    x = layers.fully_connected(
                                        x,
                                        n_out,
                                        weights_regularizer=regularizer)
                                return layers.fully_connected(
                                    x,
                                    n_out,
                                    weights_regularizer=regularizer,
                                    activation_fn=None)

                        x = mlp(x, 'C1', self.n_hidden)
                        dependents = tf.zeros((n_edges, 10))
                        outputs = []
                        log_losses = []
                        with tf.variable_scope('steps'):
                            for step in range(self.n_steps):
                                # M_F = c2(c1(x, p), c1(x, N_F\p), d_pF)
                                # d_pF = sum_{q \in N_F\p} (M_F)
                                # p(y_p|x) = softmax(sum(M_F))

                                logits, messages = message_passing(
                                    x, edge_indices, dependents,
                                    lambda x: mlp(x, 'C2', 10))
                                dependents = tf.gather(
                                    logits, edge_indices[:, 0]) - tf.gather(
                                        messages, ridx)
                                out = tf.reshape(logits, (-1, 81, 10))
                                outputs.append(out)
                                log_losses.append(
                                    tf.reduce_mean(
                                        tf.nn.
                                        sparse_softmax_cross_entropy_with_logits(
                                            labels=answers, logits=out)))
                                tf.get_variable_scope().reuse_variables()

                        reg_loss = sum(
                            tf.get_collection(
                                tf.GraphKeys.REGULARIZATION_LOSSES))
                        loss = log_losses[-1] + reg_loss

                        towers.append({
                            'loss':
                            loss,
                            'grads':
                            [(tf.clip_by_value(g, -10.0, 10.0), v)
                             for g, v in self.optimizer.compute_gradients(loss)
                             ],
                            'log_losses':
                            tf.stack(log_losses),  # (n_steps, 1)
                            'quizzes':
                            quizzes,  # (bs, 81, 10)
                            'answers':
                            answers,  # (bs, 81, 10)
                            'outputs':
                            tf.stack(outputs)  # n_steps, bs, 81, 10
                        })

                        tf.get_variable_scope().reuse_variables()

            self.loss = avg_n([t['loss'] for t in towers])
            self.out = tf.concat([t['outputs'] for t in towers],
                                 axis=1)  # n_steps, bs, 81, 10
            self.predicted = tf.cast(tf.argmax(self.out, axis=3), tf.int32)
            self.answers = tf.concat([t['answers'] for t in towers], axis=0)
            self.quizzes = tf.concat([t['quizzes'] for t in towers], axis=0)

            tf.summary.scalar('losses/total', self.loss)
            tf.summary.scalar('losses/reg', reg_loss)
            log_losses = avg_n([t['log_losses'] for t in towers])

            for step in range(self.n_steps):
                equal = tf.equal(self.answers, self.predicted[step])

                digit_acc = tf.reduce_mean(tf.to_float(equal))
                tf.summary.scalar('steps/%d/digit-acc' % step, digit_acc)

                puzzle_acc = tf.reduce_mean(
                    tf.to_float(tf.reduce_all(equal, axis=1)))
                tf.summary.scalar('steps/%d/puzzle-acc' % step, puzzle_acc)

                tf.summary.scalar('steps/%d/losses/log' % step,
                                  log_losses[step])

            avg_gradients = util.average_gradients(
                [t['grads'] for t in towers])
            self.train_step = self.optimizer.apply_gradients(
                avg_gradients, global_step=self.global_step)

            self.session.run(tf.global_variables_initializer())
            self.saver = tf.train.Saver()
            util.print_vars(tf.trainable_variables())

            self.train_writer = tf.summary.FileWriter(
                self.tensorboard_dir + '/sudoku/%s/train/%s' %
                (self.revision, self.name), self.session.graph)
            self.test_writer = tf.summary.FileWriter(
                self.tensorboard_dir + '/sudoku/%s/test/%s' %
                (self.revision, self.name), self.session.graph)
            self.summaries = tf.summary.merge_all()
Beispiel #28
0
    def __init__(self, config, num_words, num_answers, reuse=False, device=''):
        ResnetModel.__init__(self, "clevr", device=device)

        with tf.variable_scope(self.scope_name, reuse=reuse):
            batch_size = None
            self._is_training = tf.placeholder(tf.bool, name="is_training")

            dropout_keep_scalar = float(config["dropout_keep_prob"])
            dropout_keep = tf.cond(self._is_training,
                                   lambda: tf.constant(dropout_keep_scalar),
                                   lambda: tf.constant(1.0))

            #####################
            #   QUESTION
            #####################

            self._question = tf.placeholder(tf.int32, [batch_size, None], name='question')
            self._seq_length = tf.placeholder(tf.int32, [batch_size], name='seq_length')
            self._answer = tf.placeholder(tf.int64, [batch_size], name='answer')

            word_emb = tfc_layers.embed_sequence(
                ids=self._question,
                vocab_size=num_words,
                embed_dim=config["question"]["word_embedding_dim"],
                scope="word_embedding",
                reuse=reuse)

            if config["question"]['glove']:
                self._glove = tf.placeholder(tf.float32, [None, None, 300], name="glove")
                word_emb = tf.concat([word_emb, self._glove], axis=2)

            word_emb = tf.nn.dropout(word_emb, dropout_keep)

            _, last_rnn_state = rnn.rnn_factory(
                inputs=word_emb,
                seq_length=self._seq_length,
                cell=config["question"]["cell"],
                num_hidden=config["question"]["rnn_state_size"],
                bidirectional=config["question"]["bidirectional"],
                max_pool=config["question"]["max_pool"],
                layer_norm=config["question"]["layer_norm"],
                reuse=reuse)

            last_rnn_state = tf.nn.dropout(last_rnn_state, dropout_keep)

            #####################
            #   IMAGES
            #####################

            self._image = tf.placeholder(tf.float32, [batch_size] + config['image']["dim"], name='image')

            visual_features = get_image_features(image=self._image,
                                                 is_training=self._is_training,
                                                 config=config['image'])

            with tf.variable_scope("image_film_stack", reuse=reuse):
                film_stack = FiLM_Stack(image=visual_features,
                                        film_input=last_rnn_state,
                                        is_training=self._is_training,
                                        config=config["film_block"],
                                        reuse=reuse)

                visual_features = film_stack.get()

            # Pool Image Features
            with tf.variable_scope("image_pooling"):
                multimodal_features = get_attention(visual_features, last_rnn_state,
                                                    is_training=self._is_training,
                                                    config=config["pooling"],
                                                    dropout_keep=dropout_keep,
                                                    reuse=reuse)

            with tf.variable_scope("classifier"):
                self.hidden_state = tfc_layers.fully_connected(multimodal_features,
                                                               num_outputs=config["classifier"]["no_mlp_units"],
                                                               normalizer_fn=tfc_layers.batch_norm,
                                                               normalizer_params={"center": True, "scale": True,
                                                                                  "decay": 0.9,
                                                                                  "is_training": self._is_training,
                                                                                  "reuse": reuse},
                                                               activation_fn=tf.nn.relu,
                                                               reuse=reuse,
                                                               scope="classifier_hidden_layer")

                self.out = tfc_layers.fully_connected(self.hidden_state,
                                                      num_outputs=num_answers,
                                                      activation_fn=None,
                                                      reuse=reuse,
                                                      scope="classifier_softmax_layer")

            #####################
            #   Loss
            #####################

            self.cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=self.out, labels=self._answer, name='cross_entropy')
            self.loss = tf.reduce_mean(self.cross_entropy)

            self.softmax = tf.nn.softmax(self.out, name='answer_prob')
            self.prediction = tf.argmax(self.out, axis=1, name='predicted_answer')  # no need to compute the softmax

            with tf.variable_scope('accuracy'):
                self.accuracy = tf.equal(self.prediction, self._answer)
                self.accuracy = tf.reduce_mean(tf.cast(self.accuracy, tf.float32))

            tf.summary.scalar('accuracy', self.accuracy)

            print('Model... build!')
Beispiel #29
0
def seq2seq(mode, features, labels, params):
    vocab_size = params['vocab_size']
    embed_dim = params['embed_dim']
    num_units = params['num_units']
    input_max_length = params['input_max_length']
    output_max_length = params['output_max_length']

    inp = features['input']
    output = features['output']
    batch_size = tf.shape(inp)[0]
    start_tokens = tf.zeros([batch_size], dtype=tf.int64)
    train_output = tf.concat([tf.expand_dims(start_tokens, 1), output], 1)
    input_lengths = tf.reduce_sum(tf.to_int32(tf.not_equal(inp, 1)), 1)
    output_lengths = tf.reduce_sum(tf.to_int32(tf.not_equal(train_output, 1)),
                                   1)
    input_embed = layers.embed_sequence(inp,
                                        vocab_size=vocab_size,
                                        embed_dim=embed_dim,
                                        scope='embed')
    output_embed = layers.embed_sequence(train_output,
                                         vocab_size=vocab_size,
                                         embed_dim=embed_dim,
                                         scope='embed',
                                         reuse=True)
    with tf.variable_scope('embed', reuse=True):
        embeddings = tf.get_variable('embeddings')

    cell = tf.contrib.rnn.GRUCell(num_units=num_units)
    encoder_outputs, encoder_final_state = tf.nn.dynamic_rnn(cell,
                                                             input_embed,
                                                             dtype=tf.float32)

    train_helper = tf.contrib.seq2seq.TrainingHelper(output_embed,
                                                     output_lengths)
    # train_helper = tf.contrib.seq2seq.ScheduledEmbeddingTrainingHelper(
    #     output_embed, output_lengths, embeddings, 0.3
    # )
    pred_helper = tf.contrib.seq2seq.GreedyEmbeddingHelper(
        embeddings, start_tokens=tf.to_int32(start_tokens), end_token=1)

    def decode(helper, scope, reuse=None):
        with tf.variable_scope(scope, reuse=reuse):
            attention_mechanism = tf.contrib.seq2seq.BahdanauAttention(
                num_units=num_units,
                memory=encoder_outputs,
                memory_sequence_length=input_lengths)
            cell = tf.contrib.rnn.GRUCell(num_units=num_units)
            attn_cell = tf.contrib.seq2seq.AttentionWrapper(
                cell, attention_mechanism, attention_layer_size=num_units / 2)
            out_cell = tf.contrib.rnn.OutputProjectionWrapper(attn_cell,
                                                              vocab_size,
                                                              reuse=reuse)
            decoder = tf.contrib.seq2seq.BasicDecoder(
                cell=out_cell,
                helper=helper,
                initial_state=out_cell.zero_state(dtype=tf.float32,
                                                  batch_size=batch_size))
            #initial_state=encoder_final_state)
            outputs = tf.contrib.seq2seq.dynamic_decode(
                decoder=decoder,
                output_time_major=False,
                impute_finished=True,
                maximum_iterations=output_max_length)
            return outputs[0]

    train_outputs = decode(train_helper, 'decode')
    pred_outputs = decode(pred_helper, 'decode', reuse=True)

    tf.identity(train_outputs.sample_id[0], name='train_pred')
    weights = tf.to_float(tf.not_equal(train_output[:, :-1], 1))
    loss = tf.contrib.seq2seq.sequence_loss(train_outputs.rnn_output,
                                            output,
                                            weights=weights)
    train_op = layers.optimize_loss(loss,
                                    tf.train.get_global_step(),
                                    optimizer=params.get('optimizer', 'Adam'),
                                    learning_rate=params.get(
                                        'learning_rate', 0.001),
                                    summaries=['loss', 'learning_rate'])

    tf.identity(pred_outputs.sample_id[0], name='predictions')
    return tf.estimator.EstimatorSpec(mode=mode,
                                      predictions=pred_outputs.sample_id,
                                      loss=loss,
                                      train_op=train_op)
    def fast_text_model_fn(self, features, labels, mode, params):
        vocab_table = lookup.index_table_from_file(vocabulary_file=self.VOCAB_FILE, num_oov_buckets=1,
                                                   default_value=-1)
        text = features[self.FEATURE_COL]
        words = tf.string_split(text)
        dense_words = tf.sparse_tensor_to_dense(words, default_value=self.PAD_WORD)
        word_ids = vocab_table.lookup(dense_words)
        padding = tf.constant([[0, 0], [0, self.MAX_LEN]])
        # Pad all the word_ids entries to the maximum document length
        word_ids_padded = tf.pad(word_ids, padding)
        word_id_vector = tf.slice(word_ids_padded, [0, 0], [-1, self.MAX_LEN])

        if mode == tf.estimator.ModeKeys.TRAIN:
            tf.keras.backend.set_learning_phase(True)
        else:
            tf.keras.backend.set_learning_phase(False)

        with tf.name_scope('embedding'):
            embedding_vectors = layers.embed_sequence(word_id_vector, vocab_size=self.VOCAB_LEN,
                                                      embed_dim=self.EMBED_DIM,
                                                      initializer=layers.xavier_initializer(seed=42))
            tf.logging.info('Word Vectors = {}'.format(embedding_vectors))

        with tf.name_scope('fast_text'):
            average_vectors = tf.reduce_sum(embedding_vectors, axis=1)
            tf.logging.info('Average Word Vectors = {}'.format(average_vectors))

        with tf.name_scope('hidden_layer'):
            fc1 = tf.keras.layers.Dense(1024, activation='relu')(average_vectors)
            d1 = tf.keras.layers.Dropout(0.5)(fc1)
            fc2 = tf.keras.layers.Dense(self.EMBED_DIM / 2, activation='relu')(d1)
            d2 = tf.keras.layers.Dropout(0.5)(fc2)
            tf.logging.info('Hidden Layer = {}'.format(d2))

        with tf.name_scope('output'):
            logits = tf.keras.layers.Dense(self.TARGET_SIZE, activation=None)(d2)
            tf.logging.info('Logits Layer = {}'.format(logits))

        probabilities = tf.nn.softmax(logits)
        predicted_indices = tf.argmax(probabilities, axis=1)

        tf.summary.histogram('fasttext', average_vectors)
        tf.summary.histogram('softmax', probabilities)

        if mode == tf.estimator.ModeKeys.PREDICT:
            predictions = {
                'class': predicted_indices,
                'probabilities': probabilities
            }

            exported_outputs = {
                'prediction': tf.estimator.export.PredictOutput(predictions)
            }
            return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions, export_outputs=exported_outputs)

        loss = tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits)
        tf.summary.scalar('loss', loss)
        acc = tf.equal(predicted_indices, labels)
        acc = tf.reduce_mean(tf.cast(acc, tf.float32))

        tf.summary.scalar('acc', acc)

        if mode == tf.estimator.ModeKeys.TRAIN:
            optimizer = tf.train.AdamOptimizer()
            train_op = optimizer.minimize(loss=loss, global_step=tf.train.get_global_step())
            return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op)

        if mode == tf.estimator.ModeKeys.EVAL:
            eval_metrics_ops = {
                'accuracy': tf.metrics.accuracy(labels=labels, predictions=predicted_indices),
                'precision': tf.metrics.precision(labels=labels, predictions=predicted_indices),
                'recall': tf.metrics.recall(labels=labels, predictions=predicted_indices),
                'f1_score': self.streaming_f1(labels=labels, predictions=predicted_indices, n_classes=self.TARGET_SIZE)
            }
        return tf.estimator.EstimatorSpec(mode=mode, loss=loss, eval_metric_ops=eval_metrics_ops)
Beispiel #31
0
def model_fn(features, labels, mode, params):

    sequences = features["sequences"]

    # 分散表現を取得
    emb_sequences = embed_sequence(sequences,
                                   params["vocab_size"],
                                   params["embed_dim"],
                                   initializer=tf.random_uniform_initializer(
                                       -1, 1))

    # 文章の長さを取得
    mask = tf.to_int32(tf.not_equal(sequences, tf.zeros_like(sequences)))
    length = tf.reduce_sum(mask, axis=-1)

    print(params)

    if params["lstm"] == 1:
        # RNN(LSTM / 双方向)を実行
        cell = tf.nn.rnn_cell.LSTMCell(num_units=params["embed_dim"])
        outputs, states = tf.nn.bidirectional_dynamic_rnn(
            cell_fw=cell,
            cell_bw=cell,
            inputs=emb_sequences,
            dtype=tf.float32,
            sequence_length=length)
        output_fw, output_bw = outputs
        states_fw, states_bw = states

        # 双方向の出力を結合
        output = tf.concat([output_fw, output_bw], axis=-1)
    else:
        output = emb_sequences

    # 出力の総和を取る(average pooling)
    mask = tf.expand_dims(tf.cast(mask, tf.float32), -1)
    length = tf.expand_dims(tf.cast(length, tf.float32), -1)
    logits = tf.reduce_sum(emb_sequences * mask, 1) / length
    logits = layers.dense(logits, params["category_size"])

    # 結果出力の準備 (結果出力モード)
    predictions = {
        "classes": tf.argmax(logits, axis=1),  # 1位のカテゴリ
        "probabilities": tf.nn.softmax(logits, name="probabilities")  # 識別確率
    }

    # 結果出力モード
    if mode == tf.estimator.ModeKeys.PREDICT:
        return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)

    # lossの計算 (学習モード / 評価モード)
    onehot_labels = tf.one_hot(indices=tf.to_int32(labels),
                               depth=params["category_size"])
    loss = tf.losses.softmax_cross_entropy(onehot_labels=onehot_labels,
                                           logits=logits)

    # 学習モード
    if mode == tf.estimator.ModeKeys.TRAIN:
        optimizer = tf.train.AdamOptimizer(
            learning_rate=params["learning_rate"])
        train_op = optimizer.minimize(loss=loss,
                                      global_step=tf.train.get_global_step())
        return tf.estimator.EstimatorSpec(mode=mode,
                                          loss=loss,
                                          train_op=train_op)

    # 評価値の計算 (評価モード)
    eval_metric_ops = {
        "accuracy":
        tf.metrics.accuracy(labels=labels, predictions=predictions["classes"])
    }

    # 評価モード
    return tf.estimator.EstimatorSpec(mode=mode,
                                      loss=loss,
                                      eval_metric_ops=eval_metric_ops)
# will use this to set the weights for every category in every methodology
initial_emb_weights = [np.random.rand(n, embedding_dim) for n in n_cat_by_feature]

# the actual features
features = [
    tf.placeholder(shape=[H, W], dtype="int32", name="feat%d" % i)
    for i, _ in enumerate(n_cat_by_feature)
]

# 1.1) embed on channel -> concat on channel
embedded1 = []
for f, n, w in zip(features, n_cat_by_feature, initial_emb_weights):
    e = layers.embed_sequence(
        f,
        vocab_size=n,
        embed_dim=embedding_dim,
        initializer=tf.constant_initializer(w)
    )
    embedded1.append(e)

out11 = tf.concat(embedded1, axis=2)

# 1.2) onehot on channel -> 1x1 conv separately -> concat on channel
embedded2 = []
for f, n, w in zip(features, n_cat_by_feature, initial_emb_weights):
    one_hot = layers.one_hot_encoding(f, num_classes=n)

    conv_out = layers.conv2d(
        inputs=one_hot,
        num_outputs=embedding_dim,
        weights_initializer=tf.constant_initializer(w),
Beispiel #33
0
        def attend(pixels, word_indices, pattern_indices, char_indices,
                   memory_mask, parses):
            """
            :param pixels: (bs, h, w)
            :param word_indices: (bs, h, w)
            :param pattern_indices: (bs, h, w)
            :param char_indices: (bs, h, w)
            :param parses: (bs, h, w, 4, 2)
            """
            bs = tf.shape(pixels)[0]

            X, Y = tf.meshgrid(tf.linspace(0.0, 1.0, RealData.im_size[0]),
                               tf.linspace(0.0, 1.0, RealData.im_size[0]))
            X = tf.tile(X[None, ..., None], (bs, 1, 1, 1))
            Y = tf.tile(Y[None, ..., None], (bs, 1, 1, 1))

            word_embeddings = tf.reshape(
                layers.embed_sequence(tf.reshape(word_indices, (bs, -1)),
                                      vocab_size=train.word_hash_size,
                                      embed_dim=self.n_hid,
                                      unique=False,
                                      scope="word-embeddings"),
                (bs, h, w, self.n_hid))
            pattern_embeddings = tf.reshape(
                layers.embed_sequence(tf.reshape(pattern_indices, (bs, -1)),
                                      vocab_size=train.pattern_hash_size,
                                      embed_dim=self.n_hid,
                                      unique=False,
                                      scope="pattern-embeddings"),
                (bs, h, w, self.n_hid))
            char_embeddings = tf.reshape(
                layers.embed_sequence(tf.reshape(char_indices, (bs, -1)),
                                      vocab_size=train.n_output,
                                      embed_dim=self.n_hid,
                                      unique=False,
                                      scope="char-embeddings"),
                (bs, h, w, self.n_hid))

            pixels = tf.reshape(pixels, (bs, h, w, 3))
            parses = tf.reshape(parses, (bs, h, w, 8))
            memory_mask = tf.reshape(memory_mask, (bs, h, w, 1))
            x = tf.concat([
                pixels, word_embeddings, pattern_embeddings, char_embeddings,
                parses, X, Y, memory_mask
            ],
                          axis=3)

            with tf.variable_scope('attend'):
                # x = tf.nn.relu(dilated_block(x))
                for i in range(4):
                    x = tf.nn.relu(dilated_block(x))

                x = layers.dropout(x,
                                   self.keep_prob,
                                   is_training=self.is_training_ph)
                pre_att_logits = x
                att_logits = layers.conv2d(x,
                                           train.n_memories,
                                           3,
                                           activation_fn=None,
                                           weights_regularizer=self.regularizer
                                           )  # (bs, h, w, n_memories)
                att_logits = memory_mask * att_logits - (
                    1.0 - memory_mask
                ) * 1000  # TODO only sum the memory_mask idx, in the softmax

                logits = tf.reshape(att_logits,
                                    (bs, -1))  # (bs, h * w * n_memories)
                logits -= tf.reduce_max(logits, axis=1, keepdims=True)
                lp = tf.nn.log_softmax(logits,
                                       axis=1)  # (bs, h * w * n_memories)
                p = tf.nn.softmax(logits, axis=1)  # (bs, h * w * n_memories)

                spatial_attention = tf.reshape(
                    p, (bs, h * w * train.n_memories, 1,
                        1))  # (bs, h * w * n_memories, 1, 1)

                p_uniform = memory_mask / tf.reduce_sum(
                    memory_mask, axis=(1, 2, 3), keepdims=True)
                cross_entropy_uniform = -tf.reduce_sum(
                    p_uniform * tf.reshape(lp, (bs, h, w, train.n_memories)),
                    axis=(1, 2, 3))  # (bs, 1)
                attention_entropy = -tf.reduce_sum(p * lp, axis=1) / tf.log(
                    2.)  # (bs, 1)

                cp = tf.reduce_sum(tf.reshape(p, (bs, h, w, train.n_memories)),
                                   axis=3,
                                   keepdims=True)

                context = tf.reduce_sum(cp * pre_att_logits,
                                        axis=(1, 2))  # (bs, 4*n_hidden)

            return spatial_attention, attention_entropy, cross_entropy_uniform, context
def model_fn(features, labels, mode, params):
    if mode == tf.estimator.ModeKeys.TRAIN:
        tf.keras.backend.set_learning_phase(True)
    else:
        tf.keras.backend.set_learning_phase(False)

    vocab_table = lookup.index_table_from_file(vocabulary_file='data/vocab.csv', num_oov_buckets=1, default_value=-1)
    text = features[commons.FEATURE_COL]
    words = tf.string_split(text)
    dense_words = tf.sparse_tensor_to_dense(words, default_value=commons.PAD_WORD)
    word_ids = vocab_table.lookup(dense_words)

    padding = tf.constant([[0, 0], [0, commons.MAX_DOCUMENT_LENGTH]])
    # Pad all the word_ids entries to the maximum document length
    word_ids_padded = tf.pad(word_ids, padding)
    word_id_vector = tf.slice(word_ids_padded, [0, 0], [-1, commons.MAX_DOCUMENT_LENGTH])

    word_embeddings = layers.embed_sequence(word_id_vector, vocab_size=params.N_WORDS, embed_dim=50)

    min_vectors = tf.reduce_min(word_embeddings, axis=1)
    max_vectors = tf.reduce_max(word_embeddings, axis=1)

    min_max_vectors = tf.concat([min_vectors, max_vectors], axis=1)

    d1 = tf.keras.layers.Dense(25, activation='relu')(min_max_vectors)
    logits = tf.keras.layers.Dense(commons.TARGET_SIZE)(d1)

    probabilities = tf.nn.softmax(logits)
    predicted_indices = tf.argmax(probabilities, axis=1)

    if mode == tf.estimator.ModeKeys.PREDICT:
        predictions = {
            'class': tf.gather(commons.TARGET_LABELS, predicted_indices),
            'probabilities': probabilities
        }

        exported_outputs = {
            'prediction': tf.estimator.export.PredictOutput(predictions)
        }
        return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions, export_outputs=exported_outputs)

    weights = features[commons.WEIGHT_COLUNM_NAME]

    loss = tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits, weights=weights)
    tf.summary.scalar('loss', loss)

    acc = tf.equal(predicted_indices, labels)
    acc = tf.reduce_mean(tf.cast(acc, tf.float32))

    tf.summary.scalar('acc', acc)

    if mode == tf.estimator.ModeKeys.TRAIN:
        optimizer = tf.train.AdamOptimizer()
        train_op = optimizer.minimize(loss=loss, global_step=tf.train.get_global_step())
        return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op)

    if mode == tf.estimator.ModeKeys.EVAL:
        eval_metrics_ops = {
            'accuracy': tf.metrics.accuracy(labels=labels, predictions=predicted_indices, weights=weights),
            'precision': tf.metrics.precision(labels=labels, predictions=predicted_indices, weights=weights),
            'recall': tf.metrics.recall(labels=labels, predictions=predicted_indices, weights=weights),
            'f1_score': streaming_f1(labels=labels, predictions=predicted_indices)
        }


        return tf.estimator.EstimatorSpec(mode=mode, loss=loss, eval_metric_ops=eval_metrics_ops)