Exemple #1
0
def apply_func_on_depth(obj, func, depth, permeable_types=(list, tuple, dict)):
    if depth != 0 and isinstance(obj, permeable_types):
        if isinstance(obj, (list, tuple)):
            processed = list()
            for elem in obj:
                processed.append(
                    apply_func_on_depth(elem,
                                        func,
                                        depth - 1,
                                        permeable_types=permeable_types))
            if isinstance(obj, LSTMStateTuple):
                processed = LSTMStateTuple(
                    c=processed[0],
                    h=processed[1],
                )
            elif isinstance(obj, tuple):
                processed = tuple(processed)
            return processed
        elif isinstance(obj, dict):
            processed = dict()
            for key, value in obj.items():
                processed[key] = apply_func_on_depth(
                    value, func, depth - 1, permeable_types=permeable_types)
            return processed
    return func(obj)
Exemple #2
0
def deep_zip(objects, depth, permeable_types=(list, tuple, dict)):
    # print("(deep_zip)objects:", objects)
    if depth != 0 and isinstance(objects[0], permeable_types):
        if isinstance(objects[0], (list, tuple)):
            zipped = list()
            for comb in zip(*objects):
                zipped.append(
                    deep_zip(comb, depth - 1, permeable_types=permeable_types))
            if isinstance(objects[0], LSTMStateTuple):
                zipped = LSTMStateTuple(
                    c=zipped[0],
                    h=zipped[1],
                )
            elif isinstance(objects[0], tuple):
                zipped = tuple(zipped)

            return zipped
        elif isinstance(objects[0], dict):
            zipped = dict()
            for key in objects[0].keys():
                values = [obj[key] for obj in objects]
                zipped[key] = deep_zip(values,
                                       depth - 1,
                                       permeable_types=permeable_types)
            return zipped
    return tuple(objects)
Exemple #3
0
    def _add_encoder(self, encoder_inputs, seq_len):
        with tf.variable_scope('encoder'):
            cell_fw = tf.contrib.rnn.LSTMCell(self.hidden_size,
                                              initializer=self.rand_uni_init,
                                              state_is_tuple=True)

            cell_bw = tf.contrib.rnn.LSTMCell(self.hidden_size,
                                              initializer=self.rand_uni_init,
                                              state_is_tuple=True)
            outputs, states = bidirectional_dynamic_rnn(
                cell_fw,
                cell_bw,
                encoder_inputs,
                sequence_length=seq_len,
                swap_memory=True,
                dtype=tf.float32)
            # encoder_outputs, encoder_states = tf.nn.dynamic_rnn(cell_fw, encoder_inputs,
            #                                                     dtype=tf.float32, sequence_length=seq_len,
            #                                                     swap_memory=True)

            fw_outputs, bw_outputs = outputs
            c_fw, h_fw = states[0]
            c_bw, h_bw = states[1]

            encoder_c = c_fw + c_bw
            encoder_h = h_fw + h_bw
            # assemble to be a state tuple object
            encoder_states = LSTMStateTuple(c=encoder_c, h=encoder_h)
            encoder_outputs = fw_outputs + bw_outputs  # add outputs
            # print(outputs)
            # print(states)
            return encoder_outputs, encoder_states
    def call(self, inputs):
        demo, times, values, measurements, dt, lengths = inputs
        demo_encoded = self.demo_encoder(demo)
        initial_state = LSTMStateTuple(*tf.split(demo_encoded, 2, axis=-1))

        values = tf.concat((values, tf.cast(measurements, tf.float32), dt),
                           axis=-1)
        mask = tf.sequence_mask(tf.squeeze(lengths, axis=-1), name='mask')
        out = self.rnn(PhasedLSTMInput(times=times, x=values),
                       mask=mask,
                       initial_state=initial_state)
        return self.output_layer(out)
Exemple #5
0
    def call(self, inputs, state):#state
        sigmoid = math_ops.sigmoid
        # Parameters of gates are concatenated into one multiply for efficiency.
        if self._state_is_tuple:
            c, h = state
        else:
            c, h = array_ops.split(value=state, num_or_size_splits=2, axis=1)

        concat = _linear([inputs, h], 4 * self._num_units + 2 * self.n_chunk, True)

        # i = input_gate, j = new_input, f = forget_gate, o = output_gate
        f_master_t = concat[:,:self.n_chunk]
        f_master_t = self.cumsum(tf.nn.softmax(f_master_t,axis=-1))
        f_master_t = tf.expand_dims(f_master_t,2)

        i_master_t = concat[:,self.n_chunk:2*self.n_chunk]
        i_master_t = self.cumsum(tf.nn.softmax(i_master_t,axis=-1),'left')
        i_master_t = tf.expand_dims(i_master_t,2)
        concat = concat[:, 2*self.n_chunk:]
        #reshape
        concat = tf.reshape(concat,[-1,self.n_chunk*4,self.chunk_size])

        f_t = tf.nn.sigmoid(concat[:, :self.n_chunk])
        i_t = tf.nn.sigmoid(concat[:, self.n_chunk : 2*self.n_chunk])
        o_t = tf.nn.sigmoid(concat[:, 2*self.n_chunk : 3*self.n_chunk])
        c_t_hat = tf.tanh(concat[:, 3*self.n_chunk:])

        w_t = f_master_t * i_master_t

        new_c = w_t*(f_t*tf.reshape(c,[-1,self.n_chunk,self.chunk_size]) + i_t*c_t_hat) + \
         (i_master_t-w_t)*c_t_hat + \
         (f_master_t-w_t)*tf.reshape(c,[-1,self.n_chunk,self.chunk_size])
        new_h = tf.tanh(new_c)*o_t
        new_c = tf.reshape(new_c,[-1,self._num_units])
        new_h = tf.reshape(new_h,[-1,self._num_units])

#         i, j, f, o = array_ops.split(value=concat, num_or_size_splits=4, axis=1)

#         new_c = (
#             c * sigmoid(f + self._forget_bias) + sigmoid(i) * self._activation(j))
#         new_h = self._activation(new_c) * sigmoid(o)

        if self._state_is_tuple:
            new_state = LSTMStateTuple(new_c, new_h)
        else:
            new_state = array_ops.concat([new_c, new_h], 1)
        return new_h, new_state
Exemple #6
0
 def tuplify_pass(obj, outer_struct=None, outer_key=None):
     if isinstance(obj, dict):
         if 'name__' in obj:
             tuplified = copy.copy(obj)
             del tuplified['name__']
             if obj['name__'] == "LSTMStateTuple":
                 tuplified = LSTMStateTuple(**tuplified)
             else:
                 tuplified = namedtuple(obj['name__'],
                                        sorted(tuplified))(**tuplified)
             outer_struct[outer_key] = tuplified
         else:
             for key in obj:
                 tuplify_pass(obj[key], obj, key)
     elif isinstance(obj, list):
         for i in range(len(obj)):
             tuplify_pass(obj[i], obj, i)
     else:
         outer_struct[outer_key] = obj
Exemple #7
0
    def call(self, inputs, state):
        sigmoid = math_ops.sigmoid
        one = constant_op.constant(1, dtype=dtypes.int32)

        if self._state_is_tuple:
            c, h = state
        else:
            c, h = array_ops.split(value=state, num_or_size_splits=2, axis=one)

        gate_inputs = math_ops.matmul(array_ops.concat([inputs, h], 1), self._kernel)
        gate_inputs = nn_ops.bias_add(gate_inputs, self._bias)

        i, j, f, o = array_ops.split(value=gate_inputs, num_or_size_splits=4, axis=one)

        forget_bias_tensor = constant_op.constant(self._forget_bias, dtype=f.dtype)

        add = math_ops.add
        multiply = math_ops.multiply

        new_c = add(multiply(c, sigmoid(add(f, forget_bias_tensor))), multiply(sigmoid(i), self._activation(j)))
        new_h = multiply(self._activation(new_c), sigmoid(o))

        # vib
        if self.pruning:
            std = tf.exp(self._logD * 0.5)
            dim = tf.shape(self._logD)[0]
            # eps = tf.random.normal(shape=[self.batch_size, dim])
            z_scale = tf.cond(self.is_training, lambda: tf.reshape(self._mu, shape=[1, -1]) + tf.random.normal(
                shape=[self.batch_size, dim]) * tf.reshape(std, shape=[1, -1]), lambda: (tf.reshape(self._mu, shape=[1,
                                                                                                                     -1]) + tf.zeros(
                shape=[self.batch_size, dim])) * self.get_mask())
            new_h = new_h * z_scale

        if self._state_is_tuple:
            new_state = LSTMStateTuple(new_c, new_h)
        else:
            new_state = array_ops.concat([new_c, new_h], 1)

        return new_h, new_state
Exemple #8
0
    def add_model(self):
        with tf.variable_scope("embed_lookup"):
            #modify initializer here to add glove/word2vec
            embedding = getGlove([wrd for wrd in self.vocab if wrd != '<unk>'],
                                 'wiki_300')
            _wrd_embed = tf.get_variable(
                'embed_matrix', [len(self.vocab) - 1, self.p.embed_dim],
                initializer=tf.constant_initializer(embedding),
                regularizer=self.regularizer)

            wrd_pad = tf.Variable(tf.zeros([1, self.p.embed_dim]),
                                  trainable=False)
            self.embed_matrix = tf.concat([_wrd_embed, wrd_pad], axis=0)

        #Embed the source and target sentences. Elmo can be added here
        self.enc_inp_embed = tf.nn.embedding_lookup(self.embed_matrix,
                                                    self.enc_inp)
        self.dec_inp_embed = tf.nn.embedding_lookup(self.embed_matrix,
                                                    self.dec_inp)

        self.logger.info("Building encoder")
        with tf.variable_scope('encoder'):
            self.enc_cell = self.build_enc_cell()
            self.enc_outputs, self.enc_last_state = tf.nn.dynamic_rnn(
                cell=self.enc_cell,
                inputs=self.enc_inp_embed,
                sequence_length=self.enc_inp_len,
                dtype=self.p.dtype,
                time_major=False,
                scope='enc_rnn')

            #DED part. Also used for get_hidden
            self.enc_outputs_para, self.enc_last_state_para = tf.nn.dynamic_rnn(
                cell=self.enc_cell,
                inputs=self.dec_inp_embed,
                sequence_length=self.dec_inp_len,
                dtype=self.p.dtype,
                time_major=False,
                scope='enc_rnn')
            if self.p.use_bidir:
                self.fw_cell, self.bw_cell = self.build_bi_enc_cell()
                self.enc_outputs, self.enc_last_state_fw, self.enc_last_state_bw = tf.contrib.rnn.stack_bidirectional_dynamic_rnn(
                    cells_fw=self.fw_cell,
                    cells_bw=self.bw_cell,
                    inputs=self.enc_inp_embed,
                    sequence_length=self.enc_inp_len,
                    dtype=self.p.dtype,
                    time_major=False,
                    scope='bi_enc_rnn')
                enc_last_state_fw = [state for state in self.enc_last_state_fw]
                enc_last_state_bw = [state for state in self.enc_last_state_bw]
                enc_last_state = []
                for st, _ in enumerate(enc_last_state_fw):
                    enc_last_state.append(
                        LSTMStateTuple(
                            tf.concat([
                                enc_last_state_fw[st].c,
                                enc_last_state_bw[st].c
                            ],
                                      axis=-1),
                            tf.concat([
                                enc_last_state_fw[st].h,
                                enc_last_state_bw[st].h
                            ],
                                      axis=-1)))
                self.enc_last_state = tuple(enc_last_state)

                self.enc_outputs_para, self.enc_last_state_fw_para, self.enc_last_state_bw_para = tf.contrib.rnn.stack_bidirectional_dynamic_rnn(
                    cells_fw=self.fw_cell,
                    cells_bw=self.bw_cell,
                    inputs=self.dec_inp_embed,
                    sequence_length=self.dec_inp_len,
                    dtype=self.p.dtype,
                    time_major=False,
                    scope='bi_enc_rnn')
                enc_last_state_fw_para = [
                    state for state in self.enc_last_state_fw_para
                ]
                enc_last_state_bw_para = [
                    state for state in self.enc_last_state_bw_para
                ]
                enc_last_state = []
                for st, _ in enumerate(enc_last_state_fw_para):
                    enc_last_state.append(
                        LSTMStateTuple(
                            tf.concat([
                                enc_last_state_fw_para[st].c,
                                enc_last_state_bw_para[st].c
                            ],
                                      axis=-1),
                            tf.concat([
                                enc_last_state_fw_para[st].h,
                                enc_last_state_bw_para[st].h
                            ],
                                      axis=-1)))

                self.enc_last_state_para = tuple(enc_last_state)

            if self.p.use_gan:
                self.transformation = self.build_generator(
                    self.p.hidden_size *
                    2 if self.p.use_bidir else self.p.hidden_size)
                enc_last_state = [state for state in self.enc_last_state]
                enc_last_state[-1] = LSTMStateTuple(
                    self.enc_last_state[-1].c,
                    self.enc_last_state[-1].h + 10 * self.transformation)
                self.enc_last_state = tuple(enc_last_state)

        self.dec_cell, self.dec_initial_state = self.build_dec_cell(
            self.p.hidden_size * 2 if self.p.use_bidir else self.p.hidden_size)

        self.input_layer = Dense(self.p.hidden_size *
                                 2 if self.p.use_bidir else self.p.hidden_size,
                                 name="input_projection")
        self.output_layer = Dense(len(self.vocab), name="output_projection")

        if self.p.mode == 'train':
            self.logger.info("Building training decoder")

            self.dec_inp_embed = self.input_layer(
                self.dec_inp_embed
            )  #decoder inputs dim should match encoder outputs dim

            training_helper = seq2seq.TrainingHelper(
                inputs=self.dec_inp_embed,
                sequence_length=self.dec_inp_len,
                time_major=False,
                name='training_helper')
            training_decoder = seq2seq.BasicDecoder(
                cell=self.dec_cell,
                helper=training_helper,
                initial_state=self.dec_initial_state,
                output_layer=self.output_layer)
            self.max_decoder_length = tf.reduce_max(self.dec_inp_len)
            # res = self.debug([self.dec_inp_embed]); pdb.set_trace()

            (self.dec_outputs_train, self.dec_last_state_train,
             self.dec_outputs_length_train) = (seq2seq.dynamic_decode(
                 decoder=training_decoder,
                 output_time_major=False,
                 impute_finished=True,
                 maximum_iterations=self.max_decoder_length))

            #since output layer is passed to decoder, logits = output
            self.dec_logits_train = self.dec_outputs_train.rnn_output
            self.dec_pred_train = tf.argmax(self.dec_logits_train,
                                            axis=-1,
                                            name='decoder_pred_train')
            masks = tf.sequence_mask(lengths=self.dec_inp_len,
                                     maxlen=tf.shape(self.dec_inp)[1],
                                     dtype=self.p.dtype,
                                     name='masks')

            self.loss = seq2seq.sequence_loss(logits=self.dec_logits_train,
                                              targets=self.dec_out,
                                              weights=masks,
                                              average_across_timesteps=True,
                                              average_across_batch=True)

            tf.summary.scalar('loss', self.loss)

        elif self.p.mode == 'decode':

            self.logger.info("building decoder for inference")
            start_tokens = tf.ones([self.p.batch_size], tf.int32) * tf.cast(
                self.vocab_table.lookup(tf.constant('<sos>')), tf.int32)
            self.start_tokens = start_tokens
            # pdb.set_trace()
            end_token = tf.cast(self.vocab_table.lookup(tf.constant('<eos>')),
                                tf.int32)

            def embed_and_input_proj(inputs):
                return self.input_layer(
                    tf.nn.embedding_lookup(self.embed_matrix, inputs))

            if not self.p.use_beam_search:
                self.logger.info("Building greedy decoder")

                decoding_helper = seq2seq.GreedyEmbeddingHelper(
                    start_tokens=start_tokens,
                    end_token=end_token,
                    embedding=embed_and_input_proj)

                inference_decoder = seq2seq.BasicDecoder(
                    cell=self.dec_cell,
                    helper=decoding_helper,
                    initial_state=self.dec_initial_state,
                    output_layer=self.output_layer)

            else:
                self.logger.info("Building beam search decoder")

                inference_decoder = beam_search_decoder.BeamSearchDecoder(
                    cell=self.dec_cell,
                    embedding=embed_and_input_proj,
                    start_tokens=start_tokens,
                    end_token=end_token,
                    initial_state=self.dec_initial_state,
                    beam_width=self.p.beam_width,
                    output_layer=self.output_layer)

            (self.dec_out_decode, self.dec_last_state_decode,
             self.dec_out_length_decode) = (seq2seq.dynamic_decode(
                 inference_decoder,
                 output_time_major=False,
                 maximum_iterations=self.p.max_decode_step))

            if not self.p.use_beam_search:
                #batchsize X seq_len X 1
                self.dec_pred_decode = tf.expand_dims(
                    self.dec_out_decode.sample_id, -1)
            else:
                #batch_size X seq_len X beam_width
                self.dec_pred_decode = self.dec_out_decode.predicted_ids
    encoder_fw_final_state,
    encoder_bw_final_state) = tf.nn.bidirectional_dynamic_rnn(
        cell_fw=encoder_cell,
        cell_bw=encoder_cell,
        inputs=encoder_inputs_embedded,
        sequence_length=encoder_inputs_length,
        dtype=tf.float32,
        time_major=True)

# 融合双向 LSTM 的状态
encoder_outputs = tf.concat((encoder_fw_outputs, encoder_bw_outputs), axis=2)
encoder_final_state_c = tf.concat(
    (encoder_fw_final_state.c, encoder_bw_final_state.c), axis=1)
encoder_final_state_h = tf.concat(
    (encoder_fw_final_state.h, encoder_bw_final_state.h), axis=1)
encoder_final_state = LSTMStateTuple(c=encoder_final_state_c,
                                     h=encoder_final_state_h)

# decoder
decoder_cell = LSTMCell(decoder_hidden_units)
encoder_max_time, batch_size = tf.unstack(tf.shape(encoder_inputs))

decoder_lengths = encoder_inputs_length + 3
"""
Decoder will contain manually specified by us transition step:
    output(t) -> output projection(t) -> prediction(t) (argmax) -> input embedding(t+1) -> input(t+1)
"""

assert EOS == 1 and PAD == 0
eos_time_slice = tf.ones((batch_size, ), dtype=tf.int32, name='EOS')
pad_time_slice = tf.zeros((batch_size, ), dtype=tf.int32, name='PAD')
Exemple #10
0
 def state_size(self):
     return (LSTMStateTuple(self._num_units, self._num_units)
             if self._state_is_tuple else 2 * self._num_units)
Exemple #11
0
def speller(encoder_outputs,
            encoder_state,
            decoder_inputs,
            source_sequence_length,
            target_sequence_length,
            mode,
            hparams):

    batch_size = tf.shape(encoder_outputs)[0]
    beam_width = hparams.beam_width

    if mode == tf.estimator.ModeKeys.PREDICT and beam_width > 0:
        source_sequence_length = tf.contrib.seq2seq.tile_batch(
            source_sequence_length, multiplier=beam_width)
        encoder_state = tf.contrib.seq2seq.tile_batch(
            encoder_state, multiplier=beam_width)
        batch_size = batch_size * beam_width

    def embedding_fn(ids):
        # pass callable object to avoid OOM when using one-hot encoding
        if hparams.embedding_size != 0:
            target_embedding = tf.get_variable(
                'target_embedding', [
                    hparams.target_vocab_size, hparams.embedding_size],
                dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer())

            return tf.nn.embedding_lookup(target_embedding, ids)
        else:
            return tf.one_hot(ids, hparams.target_vocab_size)

    cell_list = []
    for layer in range(hparams.num_layers):
        with tf.variable_scope('decoder_cell_'.format(layer)):
            cell = lstm_cell(hparams.num_units * 2, hparams.dropout, mode)
        cell_list.append(cell)
    decoder_cell = tf.nn.rnn_cell.MultiRNNCell(cell_list)

    projection_layer = tf.layers.Dense(
        hparams.target_vocab_size, use_bias=True, name='projection_layer')

    initial_state = tuple([LSTMStateTuple(c=tf.concat([es[0].c, es[1].c], axis=-1),
                                          h=tf.concat([es[0].h, es[1].h], axis=-1))
                           for es in encoder_state[-hparams.num_layers:]])

    maximum_iterations = None
    if mode != tf.estimator.ModeKeys.TRAIN:
        max_source_length = tf.reduce_max(source_sequence_length)
        maximum_iterations = tf.to_int32(tf.round(tf.to_float(
            max_source_length) * hparams.decoding_length_factor))

    if mode == tf.estimator.ModeKeys.TRAIN:
        decoder_inputs = embedding_fn(decoder_inputs)

        if hparams.sampling_probability > 0.0:
            helper = tf.contrib.seq2seq.ScheduledEmbeddingTrainingHelper(
                decoder_inputs, target_sequence_length,
                embedding_fn, hparams.sampling_probability)
        else:
            helper = tf.contrib.seq2seq.TrainingHelper(
                decoder_inputs, target_sequence_length)

        decoder = tf.contrib.seq2seq.BasicDecoder(
            decoder_cell, helper, initial_state, output_layer=projection_layer)

    elif mode == tf.estimator.ModeKeys.PREDICT and beam_width > 0:
        start_tokens = tf.fill(
            [tf.div(batch_size, beam_width)], hparams.sos_id)

        decoder = tf.contrib.seq2seq.BeamSearchDecoder(
            cell=decoder_cell,
            embedding=embedding_fn,
            start_tokens=start_tokens,
            end_token=hparams.eos_id,
            initial_state=initial_state,
            beam_width=beam_width,
            output_layer=projection_layer)
    else:
        start_tokens = tf.fill([batch_size], hparams.sos_id)

        helper = tf.contrib.seq2seq.GreedyEmbeddingHelper(
            embedding_fn, start_tokens, hparams.eos_id)

        decoder = tf.contrib.seq2seq.BasicDecoder(
            decoder_cell, helper, initial_state, output_layer=projection_layer)

    decoder_outputs, final_context_state, final_sequence_length = tf.contrib.seq2seq.dynamic_decode(
        decoder, maximum_iterations=maximum_iterations)

    return decoder_outputs, final_context_state, final_sequence_length
Exemple #12
0
    def build_encoding_model(self):
        self.fw_inputs = tf.placeholder(dtype=tf.int32,
                                        shape=(None, None, None),
                                        name='fw_inputs')
        self.bw_inputs = tf.reverse_sequence(input=self.fw_inputs,
                                             seq_lengths=self.seq_lens,
                                             seq_axis=0,
                                             batch_axis=1,
                                             name='bw_inputs')
        self.fw_char_lens = tf.placeholder(dtype=tf.int32,
                                           shape=[None, None],
                                           name='fw_char_lens')
        self.bw_char_lens = tf.reverse_sequence(input=self.fw_char_lens,
                                                seq_lengths=self.seq_lens,
                                                seq_axis=0,
                                                batch_axis=1,
                                                name='bw_char_lens')
        self.inputs = self.fw_inputs
        self.char_lens = self.fw_char_lens
        self.bptt = tf.placeholder(dtype=tf.int32, name='bptt', shape=())
        # seq_masks = tf.expand_dims(self.seq_masks, axis=-1)
        input_shape = tf.shape(self.inputs)
        B = input_shape[1]
        fw_model = UniModel(self.rnn_layers,
                            self.projection_dims,
                            self.skip_connection,
                            self.is_training,
                            self.fine_tune_lr[1:] if isinstance(
                                self.fine_tune_lr, list) else None,
                            self.reuse,
                            'LMFW',
                            is_cpu=self.is_cpu)
        bw_model = UniModel(self.rnn_layers,
                            self.projection_dims,
                            self.skip_connection,
                            self.is_training,
                            self.fine_tune_lr[1:] if isinstance(
                                self.fine_tune_lr, list) else None,
                            self.reuse,
                            'LMBW',
                            is_cpu=self.is_cpu)
        embed_model = Embedding(self.char_vocab_size, self.char_vec_size,
                                self.reuse, self.char_cnn_options['layers'],
                                self.char_cnn_options['n_highways'],
                                self.projection_dims, self.is_training,
                                self.drop_e)
        embed_model.build()
        if isinstance(self.fine_tune_lr, list):
            embed_custom_lr = apply_custom_lr(self.fine_tune_lr[0])
        else:

            def embed_custom_lr(x):
                return x

        fw_model.build(embed_model.output_shape)
        bw_model.build(embed_model.output_shape)
        initial_states = []
        start_max_vals = []
        start_mean_vals = []
        start_outputs = []
        start_last_outputs = []
        start_output_shapes = []
        projection_dims = self.projection_dims if isinstance(
            self.projection_dims, int) and self.projection_dims > 0 else None
        for layer in self.rnn_layers:
            if self.is_cpu:
                zeros = tf.fill(value=0.0, dims=(B, layer['units']))
                initial_states.append(LSTMStateTuple(zeros, zeros))
            else:
                zeros = tf.fill(value=0.0, dims=(1, B, layer['units']))
                initial_states.append((zeros, zeros))
            dims = projection_dims if self.projection_dims else layer['units']
            max_val = tf.fill(value=-1e6, dims=(B, dims))
            mean_val = tf.fill(value=0.0, dims=(B, dims))
            start_output = tf.fill(value=0.0, dims=(0, B, dims))
            start_max_vals.append(max_val)
            start_mean_vals.append(mean_val)
            start_last_outputs.append(mean_val)
            start_outputs.append(start_output)
            start_output_shapes.append(tf.TensorShape((None, None, dims)))
        max_len = tf.reduce_max(self.seq_lens)

        def cond(i, state, max_vals, mean_vals, all_outputs, last_outputs):
            return i < max_len

        def body(embed, model, inputs, char_lens, sl, bptt, max_len):
            def child(i, state, max_vals, mean_vals, all_outputs,
                      last_outputs):
                i_to = tf.minimum(i + bptt, max_len)
                slice_inputs = inputs[i:i_to]
                slice_char_lens = char_lens[i:i_to]
                slice_inputs = embed.call(slice_inputs, slice_char_lens)
                slice_inputs = embed_custom_lr(slice_inputs)
                output_dict = model.call(slice_inputs, state)
                slice_seq_lens = tf.minimum(sl - i, bptt)
                mask = tf.expand_dims(tf.transpose(
                    tf.sequence_mask(slice_seq_lens, dtype=tf.float32),
                    (1, 0)),
                                      axis=-1)
                next_max_vals = []
                next_mean_vals = []
                new_all_outputs = []
                new_last_outputs = []
                for max_val, mean_val, outputs, past_outputs, last_output in zip(
                        max_vals, mean_vals, output_dict['layer_outputs'][1:],
                        all_outputs, last_outputs):
                    max_outputs = outputs * mask + (1 - mask) * -1e6
                    max_val = tf.maximum(max_val,
                                         tf.reduce_max(max_outputs, axis=0))
                    mean_outputs = outputs * mask
                    mean_val = (
                        mean_val * tf.expand_dims(
                            tf.to_float(tf.minimum(i, sl)), axis=-1) +
                        tf.reduce_sum(mean_outputs, axis=0)) / tf.expand_dims(
                            tf.to_float(tf.minimum(i_to, sl)), axis=-1)
                    next_max_vals.append(max_val)
                    next_mean_vals.append(mean_val)
                    new_all_outputs.append(
                        tf.concat((past_outputs, mean_outputs), axis=0))
                    last_val = get_last_output(mean_outputs, slice_seq_lens)
                    last_val = tf.where(slice_seq_lens > 0, last_val,
                                        last_output)
                    new_last_outputs.append(last_val)
                return i_to, output_dict[
                    'states'], next_max_vals, next_mean_vals, new_all_outputs, new_last_outputs

            return child

        start_i = tf.constant(0, dtype=tf.int32, shape=(), name='start_i')
        _, _, fw_layerwise_max, fw_layerwise_avg, fw_outputs, fw_last_output = tf.while_loop(
            cond,
            body(embed_model, fw_model, self.fw_inputs, self.fw_char_lens,
                 self.seq_lens, self.bptt, max_len), [
                     start_i, initial_states, start_max_vals, start_mean_vals,
                     start_outputs, start_last_outputs
                 ],
            [
                start_i.get_shape(),
                [
                    LSTMStateTuple(x.get_shape(), y.get_shape())
                    if self.is_cpu else (x.get_shape(), y.get_shape())
                    for x, y in initial_states
                ], [x.get_shape() for x in start_max_vals],
                [x.get_shape() for x in start_mean_vals], start_output_shapes,
                [x.get_shape() for x in start_last_outputs]
            ],
            swap_memory=True)
        _, _, bw_layerwise_max, bw_layerwise_avg, bw_outputs, bw_last_output = tf.while_loop(
            cond,
            body(embed_model, bw_model, self.bw_inputs, self.bw_char_lens,
                 self.seq_lens, self.bptt, max_len), [
                     start_i, initial_states, start_max_vals, start_mean_vals,
                     start_outputs, start_last_outputs
                 ],
            [
                start_i.get_shape(),
                [
                    LSTMStateTuple(x.get_shape(), y.get_shape())
                    if self.is_cpu else (x.get_shape(), y.get_shape())
                    for x, y in initial_states
                ], [x.get_shape() for x in start_max_vals],
                [x.get_shape() for x in start_mean_vals], start_output_shapes,
                [x.get_shape() for x in start_last_outputs]
            ],
            swap_memory=True)
        self.layerwise_max = [
            tf.concat((fw, bw), axis=-1)
            for fw, bw in zip(fw_layerwise_max, bw_layerwise_max)
        ]
        self.layerwise_avg = [
            tf.concat((fw, bw), axis=-1)
            for fw, bw in zip(fw_layerwise_avg, bw_layerwise_avg)
        ]
        self.layerwise_last = [
            tf.concat((fw, bw), axis=-1)
            for fw, bw in zip(fw_last_output, bw_last_output)
        ]
        self.timewise_outputs = [
            tf.concat((fw,
                       tf.reverse_sequence(input=bw,
                                           seq_lengths=self.seq_lens,
                                           seq_axis=0,
                                           batch_axis=1)),
                      axis=-1) for fw, bw in zip(fw_outputs, bw_outputs)
        ]
        self.layerwise_encode = [
            tf.concat(out, axis=-1)
            for out in zip(fw_layerwise_avg, fw_layerwise_max,
                           bw_layerwise_avg, bw_layerwise_max)
        ]
Exemple #13
0
    def build_encoder(self, encoder_inputs, encoder_input_lengths):
        """ Builds an RNN encoder. Can be configured to be uni- / bi- directional.
        Can also enable dropout. Returns outputs of the RNN at each timestep and 
        also the final state """

        with tf.variable_scope("encoder"):

            # Embeddings for orthographic characters
            char_embeddings = tf.Variable(tf.random_uniform(
                (self.n_chars, self.embed_dims), -1.0, 1.0),
                                          name="char_embeddings")
            encoder_input_embeddings = tf.nn.embedding_lookup(
                char_embeddings, encoder_inputs)

            # Unidirectional Run
            if not self.bidir:
                encoder_cell = self.cell_class_fn(self.hidden_dims)
                if self.mode == "training":
                    encoder_cell = DropoutWrapper(
                        encoder_cell,
                        input_keep_prob=1.0 - self.dropout,
                        output_keep_prob=1.0 - self.dropout,
                        state_keep_prob=1.0 - self.dropout)
                encoder_outputs, encoder_final_state = tf.nn.dynamic_rnn(
                    encoder_cell,
                    encoder_input_embeddings,
                    dtype=tf.float32,
                    time_major=True)

            # Bidirectional Run
            else:
                with tf.variable_scope("fw"):
                    fw_encoder_cell = self.cell_class_fn(self.hidden_dims)
                    if self.mode == "training":
                        fw_encoder_cell = DropoutWrapper(
                            fw_encoder_cell,
                            input_keep_prob=1.0 - self.dropout,
                            output_keep_prob=1.0 - self.dropout,
                            state_keep_prob=1.0 - self.dropout)
                with tf.variable_scope("bw"):
                    bw_encoder_cell = self.cell_class_fn(self.hidden_dims)
                    if self.mode == "training":
                        bw_encoder_cell = DropoutWrapper(
                            bw_encoder_cell,
                            input_keep_prob=1.0 - self.dropout,
                            output_keep_prob=1.0 - self.dropout,
                            state_keep_prob=1.0 - self.dropout)

                ((encoder_fw_outputs, encoder_bw_outputs),
                 (encoder_fw_final_state,
                  encoder_bw_final_state)) = (tf.nn.bidirectional_dynamic_rnn(
                      cell_fw=fw_encoder_cell,
                      cell_bw=bw_encoder_cell,
                      inputs=encoder_input_embeddings,
                      sequence_length=encoder_input_lengths,
                      dtype=tf.float32,
                      time_major=True))

                # Concat final states of forward and backward run
                encoder_final_state_c = tf.concat(
                    (encoder_fw_final_state.c, encoder_bw_final_state.c), 1)
                encoder_final_state_h = tf.concat(
                    (encoder_fw_final_state.h, encoder_bw_final_state.h), 1)
                encoder_final_state = LSTMStateTuple(c=encoder_final_state_c,
                                                     h=encoder_final_state_h)
                encoder_outputs = tf.concat(
                    (encoder_fw_outputs, encoder_bw_outputs), -1)

            return encoder_outputs, encoder_final_state
    def build(self):
        """Build the model"""
        print("Building the sequence to sequence model ... ")

        vocab_size = self.vocab_size
        state_size = self.state_size
        enc_layers = self.enc_layers

        # Placeholders
        with tf.name_scope("placeholders"):
            enc_inputs = tf.placeholder(tf.int32, [None, None], "enc_inputs")
            inp_lens = tf.placeholder(tf.int32, [None], "inp_lens")
            self.drop_out = tf.placeholder(tf.float32, (), "drop_out")

            self.enc_inputs = enc_inputs
            self.inp_lens = inp_lens

            if (self.mode == "train"):
                dec_inputs = tf.placeholder(tf.int32, [None, None],
                                            "dec_inputs")
                targets = tf.placeholder(tf.int32, [None, None], "targets")
                out_lens = tf.placeholder(tf.int32, [None], "out_lens")
                self.learning_rate = tf.placeholder(tf.float32, (),
                                                    "learning_rate")
                self.lambda_kl = tf.placeholder(tf.float32, (), "lambda_kl")

                self.dec_inputs = dec_inputs
                self.targets = targets
                self.out_lens = out_lens

        batch_size = tf.shape(enc_inputs)[0]
        max_len = tf.shape(enc_inputs)[1]

        # Embedding
        with tf.variable_scope("embeddings"):
            embedding_matrix = tf.get_variable(
                name="embedding_matrix",
                shape=[vocab_size, state_size],
                dtype=tf.float32,
                initializer=tf.random_normal_initializer(stddev=0.05))
            enc_inputs = tf.nn.embedding_lookup(embedding_matrix, enc_inputs)

            if (self.mode == "train"):
                dec_inputs = tf.nn.embedding_lookup(embedding_matrix,
                                                    dec_inputs)

        # Encoder
        with tf.variable_scope("encoder"):
            # TODO: residual LSTM, layer normalization
            # if(self.bidirectional)
            #   enc_cell_fw = [create_cell(
            #     "enc-fw-%d" % i, state_size, self.drop_out, self.no_residual)
            #     for i in range(enc_layers)]
            #   enc_cell_bw = [create_cell(
            #     "enc-bw-%d" % i, state_size, self.drop_out, self.no_residual)
            #     for i in range(enc_layers)]
            # else:
            enc_cell = [
                create_cell("enc-%d" % i, state_size, self.drop_out,
                            self.no_residual) for i in range(enc_layers)
            ]
            enc_cell = tf.nn.rnn_cell.MultiRNNCell(enc_cell)
            enc_outputs, enc_state = tf.nn.dynamic_rnn(
                enc_cell,
                enc_inputs,
                sequence_length=inp_lens,
                dtype=tf.float32)

        # Decoder
        with tf.variable_scope("decoder"):
            dec_cell = [
                create_cell("dec-%d" % i, state_size, self.drop_out,
                            self.no_residual) for i in range(enc_layers)
            ]
            dec_cell = tf.nn.rnn_cell.MultiRNNCell(dec_cell)

            dec_proj = tf.layers.Dense(
                vocab_size,
                name="dec_proj",
                kernel_initializer=tf.random_normal_initializer(stddev=0.05),
                bias_initializer=tf.constant_initializer(0.))

        # latent code
        if (self.vae):
            print("Using vae model")
            with tf.variable_scope("latent_code"):
                enc_mean = tf.reduce_sum(enc_outputs, 1)
                enc_mean /= tf.expand_dims(tf.cast(inp_lens, tf.float32), [1])
                z_code = enc_mean

                if (self.prior == "gaussian"):
                    print("Gaussian prior")
                    latent_proj = tf.layers.Dense(
                        2 * state_size,
                        name="latent_proj",
                        kernel_initializer=tf.random_normal_initializer(
                            stddev=0.05),
                        bias_initializer=tf.constant_initializer(0.))
                    z_loc, z_scale = tf.split(latent_proj(z_code),
                                              [state_size, state_size], 1)
                    z_mvn = tfd.MultivariateNormalDiag(z_loc, z_scale)
                    z_sample = z_mvn.sample()

                elif (self.prior == "vmf"):
                    # print("vmf prior")
                    # latent_proj = tf.layers.Dense(state_size + 1, name="latent_proj",
                    #   kernel_initializer=tf.random_normal_initializer(stddev=0.05),
                    #   bias_initializer=tf.constant_initializer(0.))
                    # z_mu, z_conc = tf.split(
                    #   latent_proj(z_code), [state_size, 1], 1)
                    # z_mu /= tf.expand_dims(tf.norm(z_mu, axis=1), axis=1)
                    # z_conc = tf.reshape(z_conc, [batch_size])
                    # z_vmf = tfd.VonMisesFisher(z_mu, z_conc)
                    # z_sample = z_vmf.sample()
                    pass

                dec_init_state = (LSTMStateTuple(c=z_sample, h=z_sample),
                                  LSTMStateTuple(c=z_sample, h=z_sample))

        else:
            print("Using normal seq2seq, no latent variable")
            dec_init_state = enc_state

        with tf.variable_scope("decoding"):
            # greedy decoding
            _, dec_outputs_predict = decoding_infer(self.dec_start_id,
                                                    dec_cell,
                                                    dec_proj,
                                                    embedding_matrix,
                                                    dec_init_state,
                                                    enc_outputs,
                                                    batch_size,
                                                    max_len,
                                                    inp_lens,
                                                    max_len,
                                                    self.is_attn,
                                                    self.sampling_method,
                                                    self.topk_sampling_size,
                                                    state_size=self.state_size)
            # decoding with forward sampling
            # dec_outputs_sampling = decodeing_infer() #  TBC

            if (self.mode == "train"):
                # training decoding
                dec_logits_train, _, _, _, _ = decoding_train(
                    dec_inputs, dec_cell, dec_proj, dec_init_state,
                    enc_outputs, max_len, inp_lens, max_len, self.is_attn,
                    self.state_size)

                all_variables = slim.get_variables_to_restore()
                model_variables = [
                    var for var in all_variables
                    if var.name.split("/")[0] == self.model_name
                ]
                print("%s model, variable list:" % self.model_name)
                for v in model_variables:
                    print("  %s" % v.name)
                self.model_saver = tf.train.Saver(all_variables, max_to_keep=3)

                # loss and optimizer
                dec_mask = tf.sequence_mask(out_lens,
                                            max_len,
                                            dtype=tf.float32)
                dec_loss = tf.contrib.seq2seq.sequence_loss(
                    dec_logits_train, targets, dec_mask)

                if (self.vae):
                    if (self.prior == "gaussian"):
                        standard_normal = tfd.MultivariateNormalDiag(
                            tf.zeros(state_size), tf.ones(state_size))

                        prior_prob = standard_normal.log_prob(z_sample)  # [B]
                        posterior_prob = z_mvn.log_prob(z_sample)  # [B]
                        kl_loss = tf.reduce_mean(posterior_prob - prior_prob)
                        loss = dec_loss + self.lambda_kl * kl_loss

                    elif (self.prior == "vmf"):
                        # vmf_mu_0 = tf.ones(state_size) / tf.cast(state_size, tf.float32)
                        # standard_vmf = tfd.VonMisesFisher(vmf_mu_0, 0)
                        # prior_prob = standard_vmf.log_prob(z_sample) # [B]
                        # posterior_prob = z_vmf.log_prob(z_sample) # [B]
                        # kl_loss = tf.reduce_mean(posterior_prob - prior_prob)
                        # loss = dec_loss + self.lambda_kl * kl_loss
                        pass
                else:
                    loss = dec_loss

                optimizer = tf.train.AdamOptimizer(self.learning_rate)
                train_op = optimizer.minimize(loss)

                self.train_output = {"train_op": train_op, "loss": loss}
                self.train_output.update(self.inspect)
                if (self.vae):
                    self.train_output["dec_loss"] = dec_loss
                    self.train_output["kl_loss"] = kl_loss

                self.valid_output = {"nll": tf.exp(loss)}
                self.infer_output = {"dec_predict": dec_outputs_predict}

            else:
                self.infer_output = {"dec_predict": dec_outputs_predict}
        return
Exemple #15
0
    def build_graph(self):
        print("building generator graph...")
        with tf.variable_scope("seq2seq"):
            with tf.variable_scope("embedding"):
                # shared by encoder and decoder
                # self.embedding = tf.get_variable('embedding', [self.vocab_size, self.embedding_size], dtype=tf.float32,
                #                                  trainable=True, initializer=self.rand_uni_init)
                # using pretrain word vector
                self.embedding = tf.get_variable('embedding', [self.vocab_size, self.embedding_size], dtype=tf.float32,
                                                 trainable=True, initializer=tf.constant_initializer(self.pretrain_wv))
            with tf.variable_scope("encoder"):
                topic_embedded = tf.nn.embedding_lookup(self.embedding, self.topic_input)
                # encode topic to representation
                topic_average = tf.reduce_mean(topic_embedded, axis=1)
                topic_state = tf.layers.dense(topic_average, self.hidden_size)

            with tf.variable_scope("decoder"):
                def _get_cell(_num_units):
                    cell = tf.contrib.rnn.BasicLSTMCell(_num_units)

                    if self.training_flag:
                        cell = tf.contrib.rnn.DropoutWrapper(cell)
                    return cell

                # single layer

                self.initial_state = LSTMStateTuple(c=topic_state, h=topic_state)
                self.decoder_cell = _get_cell(self.hidden_size)
                self.decoder_input_embedded = tf.nn.embedding_lookup(self.embedding, self.target_input)
                self.output_layer = layers_core.Dense(self.vocab_size, use_bias=False)

                # pre-train with targets #
                helper_pt = tf.contrib.seq2seq.TrainingHelper(
                    inputs=self.decoder_input_embedded,
                    sequence_length=self.sequence_lengths,
                    time_major=False,
                )

                decoder_pt = tf.contrib.seq2seq.BasicDecoder(
                    cell=self.decoder_cell,
                    helper=helper_pt,
                    initial_state=self.initial_state,
                    output_layer=self.output_layer
                )

                outputs_pt, _final_state, sequence_lengths_pt = tf.contrib.seq2seq.dynamic_decode(
                    decoder=decoder_pt,
                    output_time_major=False,
                    maximum_iterations=self.max_len,
                    swap_memory=True,
                    impute_finished=True
                )

                self.logits_pt = outputs_pt.rnn_output
                self.g_predictions = tf.nn.softmax(self.logits_pt)

                masks = tf.sequence_mask(lengths=self.target_len,
                                         maxlen=self.max_len, dtype=tf.float32, name='masks')

                # print("target input:", self.target_input.shape)
                # print("logits:", self.logits_pt)

                self.target_output = tf.placeholder(tf.int32, [None, None])

                self.pretrain_loss = tf.contrib.seq2seq.sequence_loss(
                    self.logits_pt,
                    self.target_output,
                    masks,
                    average_across_timesteps=True,
                    average_across_batch=True)

                self.global_step = tf.Variable(0, trainable=False)

                # gradient clipping
                optimizer = tf.train.AdamOptimizer(self.learning_rate)
                gradients, v = zip(*optimizer.compute_gradients(self.pretrain_loss))
                gradients, _ = tf.clip_by_global_norm(gradients, self.grad_norm)
                self.pretrain_updates = optimizer.apply_gradients(zip(gradients, v), global_step=self.global_step)

            # infer
            helper_i = tf.contrib.seq2seq.GreedyEmbeddingHelper(
                self.embedding,
                tf.fill([self.batch_size], self.vocab_dict['<GO>']),
                end_token=self.vocab_dict['<EOS>']
            )

            decoder_i = tf.contrib.seq2seq.BasicDecoder(
                cell=self.decoder_cell,
                helper=helper_i,
                initial_state=self.initial_state,
                output_layer=self.output_layer
            )

            outputs_i, _final_state_i, sequence_lengths_i = tf.contrib.seq2seq.dynamic_decode(
                decoder=decoder_i,
                output_time_major=False,
                maximum_iterations=self.max_len,
                swap_memory=True,
                impute_finished=True
            )

            sample_id = outputs_i.sample_id
            self.infer_tokens = tf.unstack(sample_id, axis=0)

        print("generator graph built successfully")
  def build(self):
    """Build the model"""
    print("Building the Latent BOW - sequence to sequence model ... ")

    vocab_size = self.vocab_size
    key_size = self.key_size
    state_size = self.state_size
    enc_layers = self.enc_layers
    max_enc_bow = self.max_enc_bow
    lambda_enc_loss = self.lambda_enc_loss

    # Placeholders
    with tf.name_scope("placeholders"):
      enc_keys = tf.placeholder(tf.int32, [None, None], "enc_keys")
      enc_locs = tf.placeholder(tf.int32, [None, None], "enc_locs")
      enc_vals = tf.placeholder(tf.int32, [None, None], "enc_vals")
      enc_lens = tf.placeholder(tf.int32, [None], "enc_lens")
      self.drop_out = tf.placeholder(tf.float32, (), "drop_out")
      self.gumbel_tau = tf.placeholder(tf.float32, (), "gumbel_tau")

      self.enc_keys = enc_keys
      self.enc_locs = enc_locs
      self.enc_vals = enc_vals
      self.enc_lens = enc_lens

      enc_targets = tf.placeholder(tf.int32, [None, None], "enc_targets")
      dec_inputs = tf.placeholder(tf.int32, [None, None], "dec_inputs")
      dec_targets = tf.placeholder(tf.int32, [None, None], "dec_targets")
      dec_lens = tf.placeholder(tf.int32, [None], "dec_lens")

      self.enc_targets = enc_targets
      self.dec_inputs = dec_inputs
      self.dec_targets = dec_targets
      self.dec_lens = dec_lens

    batch_size = tf.shape(enc_keys)[0]
    max_enc_len = tf.shape(enc_keys)[1]
    max_dec_len = tf.shape(dec_targets)[1]

    # Embedding 
    with tf.variable_scope("embeddings"):
      embedding_matrix_vals = tf.get_variable(
        name="embedding_matrix_vals", 
        shape=[vocab_size, state_size],
        dtype=tf.float32,
        initializer=tf.random_normal_initializer(stddev=0.05))
      embedding_matrix_keys = tf.get_variable(
        name="embedding_matrix_keys", 
        shape=[key_size, state_size],
        dtype=tf.float32,
        initializer=tf.random_normal_initializer(stddev=0.05))
      embedding_matrix_locs = tf.get_variable(
        name="embedding_matrix_locs", 
        shape=[100, state_size],
        dtype=tf.float32,
        initializer=tf.random_normal_initializer(stddev=0.05))

      enc_keys = tf.nn.embedding_lookup(embedding_matrix_keys, enc_keys)
      enc_vals = tf.nn.embedding_lookup(embedding_matrix_vals, enc_vals)
      enc_locs = tf.nn.embedding_lookup(embedding_matrix_locs, enc_locs)
      enc_inputs = (enc_keys + enc_vals + enc_locs) / 3.
      dec_inputs = tf.nn.embedding_lookup(embedding_matrix_vals, dec_inputs)

    # Encoder
    with tf.variable_scope("encoder"):
      # TODO: residual LSTM, layer normalization
      enc_cell = [create_cell(
        "enc-%d" % i, state_size, self.drop_out, self.no_residual) 
        for i in range(enc_layers)]
      enc_cell = tf.nn.rnn_cell.MultiRNNCell(enc_cell)
      enc_outputs, enc_state = tf.nn.dynamic_rnn(enc_cell, enc_inputs,
        sequence_length=enc_lens, dtype=tf.float32)

    # Encoder bow prediction
    with tf.variable_scope("bow_output"):
      bow_topk_prob, gumbel_topk_prob, seq_neighbor_ind, seq_neighbor_prob = \
        bow_predict_seq_tag(vocab_size, batch_size, enc_outputs, enc_lens, 
        max_enc_len, self.is_gumbel, self.gumbel_tau)
      seq_neighbor_output = {"seq_neighbor_ind": seq_neighbor_ind, 
        "seq_neighbor_prob": seq_neighbor_prob}
  
    # Encoder output, loss and metrics 
    with tf.name_scope("enc_output"):
      # top k prediction 
      bow_pred_prob, pred_ind = tf.nn.top_k(bow_topk_prob, max_enc_bow)

      # loss function 
      enc_targets = _enc_target_list_to_khot(
        enc_targets, vocab_size, self.pad_id)
      enc_loss = enc_loss_fn(
        self.bow_loss_fn, enc_targets, bow_topk_prob, max_enc_bow)
      self.train_output = {"enc_loss": enc_loss}

      # performance monitor 
      bow_metrics_dict = bow_train_monitor(
        bow_topk_prob, pred_ind, vocab_size, batch_size, enc_targets)
      self.train_output.update(bow_metrics_dict)

    # Encoder soft sampling 
    with tf.name_scope("gumbel_topk_sampling"):
      sample_ind, sample_prob, sample_memory = bow_gumbel_topk_sampling(
        gumbel_topk_prob, embedding_matrix_vals, self.sample_size, vocab_size)
      sample_memory_lens = tf.ones(batch_size, tf.int32) * self.sample_size
      sample_memory_avg = tf.reduce_mean(sample_memory, 1) # [B, S]

      sample_memory_output = {"bow_pred_ind": pred_ind, 
                              "bow_pred_prob": bow_pred_prob, 
                              "sample_memory_ind": sample_ind, 
                              "sample_memory_prob": sample_prob }

    # Decoder 
    # The initial state of the decoder = 
    #   encoder meaning vector z + encoder bow vector b 
    with tf.variable_scope("decoder"):
      dec_cell = [create_cell(
        "dec-%d" % i, state_size, self.drop_out, self.no_residual) 
        for i in range(enc_layers)]
      dec_cell = tf.nn.rnn_cell.MultiRNNCell(dec_cell)
      dec_proj = tf.layers.Dense(vocab_size, name="dec_proj",
        kernel_initializer=tf.random_normal_initializer(stddev=0.05),
        bias_initializer=tf.constant_initializer(0.))
      dec_ptr_k_proj = [
        tf.layers.Dense(state_size, name="dec_ptr_k_proj_%d" % pi,
        kernel_initializer=tf.random_normal_initializer(stddev=0.05),
        bias_initializer=tf.constant_initializer(0.)) 
        for pi in range(self.num_pointers)]
      dec_ptr_g_proj = tf.layers.Dense(1, name="dec_ptr_g_proj",
        kernel_initializer=tf.random_normal_initializer(stddev=0.05),
        bias_initializer=tf.constant_initializer(0.),
        activation=tf.nn.sigmoid)
      bow_cond_gate_proj = tf.layers.Dense(1, name="bow_cond_gate_proj",
        kernel_initializer=tf.random_normal_initializer(stddev=0.05),
        bias_initializer=tf.constant_initializer(0.),
        activation=tf.nn.sigmoid)

      dec_init_state = []
      for l in range(enc_layers):
        dec_init_state.append(LSTMStateTuple(c=enc_state[0].c, 
                                h=enc_state[0].h + sample_memory_avg))
      dec_init_state = tuple(dec_init_state)

      # if(enc_layers == 2):
      #   dec_init_state = (LSTMStateTuple( c=enc_state[0].c, 
      #                                     h=enc_state[0].h + sample_memory_avg),
      #                     LSTMStateTuple( c=enc_state[1].c, 
      #                                     h=enc_state[1].h + sample_memory_avg) )
      # elif(enc_layers == 4):
      #   dec_init_state = (LSTMStateTuple(c=enc_state[0].c, 
      #                       h=enc_state[0].h + sample_memory_avg),
      #                     LSTMStateTuple( c=enc_state[1].c, 
      #                       h=enc_state[1].h + sample_memory_avg) )
      # else: raise Exception('enc_layers not in [2, 4]')

      if(self.source_attn):
        # [B, M + T, S]
        dec_memory = [sample_memory, enc_outputs]
        dec_mem_len = [sample_memory_lens, enc_lens]
        dec_max_mem_len = [self.sample_size, max_enc_len]
      else:
        dec_memory = sample_memory
        dec_mem_len = sample_memory_lens
        dec_max_mem_len = tf.shape(dec_memory)[1] 

      if(self.bow_cond): bow_cond = sample_memory_avg
      else: bow_cond = None

      if(self.bow_cond_gate == False): bow_cond_gate_proj = None

      (dec_outputs_predict, dec_logits_train, dec_prob_train, pointer_ent, 
        avg_max_ptr, avg_num_copy) = decode( 
        self.dec_start_id, dec_inputs, 
        dec_cell, dec_proj, embedding_matrix_vals, 
        dec_init_state, dec_memory, dec_mem_len, dec_max_mem_len, 
        batch_size, max_dec_len, self.sampling_method, self.topk_sampling_size,
        state_size, multi_source=True, copy=self.copy, copy_ind=sample_ind,
        dec_ptr_g_proj=dec_ptr_g_proj, dec_ptr_k_proj=dec_ptr_k_proj,
        bow_cond=bow_cond, bow_cond_gate_proj=bow_cond_gate_proj)

    # model saver, before the optimizer 
    all_variables = slim.get_variables_to_restore()
    model_variables = [var for var in all_variables 
      if var.name.split("/")[0] == self.model_name]
    print("%s model, variable list:" % self.model_name)
    for v in model_variables: print("  %s" % v.name)
    self.model_saver = tf.train.Saver(model_variables, max_to_keep=3)

    with tf.variable_scope("optimizer"):
      optimizer = tf.train.AdamOptimizer(self.learning_rate)

    # decoder output, training and inference, combined with encoder loss 
    with tf.name_scope("dec_output"):
      dec_mask = tf.sequence_mask(dec_lens, max_dec_len, dtype=tf.float32)
      if(self.copy == False):
        dec_loss = tf.contrib.seq2seq.sequence_loss(
          dec_logits_train, dec_targets, dec_mask)
      else: 
        dec_loss = _copy_loss(dec_prob_train, dec_targets, dec_mask)

      loss = dec_loss + lambda_enc_loss * enc_loss
      train_op = optimizer.minimize(loss)

      dec_output = {"train_op": train_op, "dec_loss": dec_loss, "loss": loss}
      self.train_output.update(dec_output)
      if(self.copy):
        pointer_ent =\
          tf.reduce_sum(pointer_ent * dec_mask) / tf.reduce_sum(dec_mask)
        self.train_output['pointer_ent'] = pointer_ent
        avg_max_ptr =\
          tf.reduce_sum(avg_max_ptr * dec_mask) / tf.reduce_sum(dec_mask)
        self.train_output['avg_max_ptr'] = avg_max_ptr
        avg_num_copy = tf.reduce_sum(avg_num_copy * dec_mask, 1)
        avg_num_copy = tf.reduce_mean(avg_num_copy)
        self.train_output['avg_num_copy'] = avg_num_copy

      self.infer_output = {"dec_predict": dec_outputs_predict}
      dec_out_mem_ratio = _calculate_dec_out_mem_ratio(dec_outputs_predict, 
        sample_ind, vocab_size, self.pad_id, self.dec_start_id, self.dec_end_id)
      self.infer_output.update(dec_out_mem_ratio)
      self.infer_output.update(sample_memory_output)
      self.infer_output.update(seq_neighbor_output)
    return 
Exemple #17
0
    def call(self, inputs, state):
        sigmoid = math_ops.sigmoid
        # Parameters of gates are concatenated into one multiply for efficiency.
        if self._state_is_tuple:
            c, h = state
        else:
            c, h = array_ops.split(value=state, num_or_size_splits=2, axis=1)

        concat = _linear([inputs, h], 4 * self._num_units + 2 * self.n_chunk,
                         True)

        # i = input_gate, j = new_input, f = forget_gate, o = output_gate
        f_master_t = concat[:, :self.n_chunk]

        # print(f_master_t.eval(session=self.sess))
        # plt.plot(f_master_t[:, 0].eval(), f_master_t[0, :])
        # plt.show()
        # tf.summary.scalar("f_before", f_master_t)
        # tf.summary.histogram("f_before", f_master_t)
        #f_master_t = tf.nn.softmax(f_master_t , axis=-1)
        #f_master_t = self.differentiable_gumbel_sample(f_master_t , axis=-1, temperature=5)
        # tf.summary.histogram("f_act" , f_master_t)
        f_master_t = self.cumsum(f_master_t)
        # tf.summary.histogram("f_after", f_master_t)
        f_master_t = tf.expand_dims(f_master_t, 2)

        i_master_t = concat[:, self.n_chunk:2 * self.n_chunk]

        #i_master_t = tf.nn.softmax(i_master_t , axis=-1)
        #i_master_t = self.differentiable_gumbel_sample(i_master_t , axis=-1, temperature=5)
        # tf.summary.histogram("i_act" , i_master_t)
        i_master_t = self.cumsum(i_master_t, 'left')
        # tf.summary.histogram("i_after", i_master_t)
        i_master_t = tf.expand_dims(i_master_t, 2)

        concat = concat[:, 2 * self.n_chunk:]
        concat = tf.reshape(concat, [-1, self.n_chunk * 4, self.chunk_size])

        f_t = tf.nn.sigmoid(concat[:, :self.n_chunk])
        i_t = tf.nn.sigmoid(concat[:, self.n_chunk:2 * self.n_chunk])
        o_t = tf.nn.sigmoid(concat[:, 2 * self.n_chunk:3 * self.n_chunk])
        c_t_hat = tf.tanh(concat[:, 3 * self.n_chunk:])

        w_t = f_master_t * i_master_t

        new_c = w_t * (f_t * tf.reshape(c , [-1 , self.n_chunk , self.chunk_size]) + i_t * c_t_hat) + \
                (i_master_t - w_t) * c_t_hat + \
                (f_master_t - w_t) * tf.reshape(c , [-1 , self.n_chunk , self.chunk_size])
        new_h = tf.tanh(new_c) * o_t
        new_c = tf.reshape(new_c, [-1, self._num_units])
        new_h = tf.reshape(new_h, [-1, self._num_units])

        #         i, j, f, o = array_ops.split(value=concat, num_or_size_splits=4, axis=1)

        #         new_c = (
        #             c * sigmoid(f + self._forget_bias) + sigmoid(i) * self._activation(j))
        #         new_h = self._activation(new_c) * sigmoid(o)

        if self._state_is_tuple:
            new_state = LSTMStateTuple(new_c, new_h)
        else:
            new_state = array_ops.concat([new_c, new_h], 1)

        return new_h, new_state
Exemple #18
0
  def build(self):
    """Build the model """
    print("Building the bow - sequence to sequence model ... ")

    vocab_size = self.vocab_size
    state_size = self.state_size
    enc_layers = self.enc_layers
    max_enc_bow = self.max_enc_bow
    num_paraphrase = self.num_paraphrase

    # Placeholders
    with tf.name_scope("placeholders"):
      enc_inputs = tf.placeholder(tf.int32, [None, None], "enc_inputs")
      enc_lens = tf.placeholder(tf.int32, [None], "enc_lens")
      self.drop_out = tf.placeholder(tf.float32, (), "drop_out")
      self.max_len = tf.placeholder(tf.int32, (), "max_len")
      dec_bow = tf.placeholder(tf.int32, [None, None], "dec_bow")
      dec_bow_len = tf.placeholder(tf.int32, [None], "dec_bow_len")

      self.enc_inputs = enc_inputs
      self.enc_lens = enc_lens
      self.dec_bow = dec_bow 
      self.dec_bow_len = dec_bow_len

      if(self.mode == "train"):
        enc_targets = tf.placeholder(tf.int32, [None, None], "enc_targets")
        enc_seq2seq_inputs = tf.placeholder(
          tf.int32, [None, num_paraphrase, None], "enc_seq2seq_inputs")
        enc_seq2seq_targets = tf.placeholder(
          tf.int32, [None, num_paraphrase, None], "enc_seq2seq_targets")
        enc_seq2seq_lens = tf.placeholder(
          tf.int32, [None, num_paraphrase], "enc_seq2seq_lens")

        dec_inputs = tf.placeholder(tf.int32, [None, None], "dec_inputs")
        dec_targets = tf.placeholder(tf.int32, [None, None], "dec_targets")
        dec_lens = tf.placeholder(tf.int32, [None], "dec_lens")

        self.enc_targets = enc_targets
        self.enc_seq2seq_inputs = enc_seq2seq_inputs
        self.enc_seq2seq_targets = enc_seq2seq_targets
        self.enc_seq2seq_lens = enc_seq2seq_lens
        self.dec_inputs = dec_inputs
        self.dec_targets = dec_targets
        self.dec_lens = dec_lens

    enc_batch_size = tf.shape(enc_inputs)[0]
    max_len = self.max_len

    dec_batch_size = tf.shape(dec_bow)[0]
    max_dec_bow = tf.shape(dec_bow)[1]

    # Embedding 
    with tf.variable_scope("embeddings"):
      embedding_matrix = tf.get_variable(
        name="embedding_matrix", 
        shape=[vocab_size, state_size],
        dtype=tf.float32,
        initializer=tf.random_normal_initializer(stddev=0.05))
      enc_inputs = tf.nn.embedding_lookup(embedding_matrix, enc_inputs)

      if(self.mode == "train"): 
        dec_inputs = tf.nn.embedding_lookup(embedding_matrix, dec_inputs)
        dec_bow = tf.nn.embedding_lookup(embedding_matrix, dec_bow)

    # Encoder
    with tf.variable_scope("encoder"):
      # TODO: residual LSTM, layer normalization
      enc_cell = [create_cell("enc-%d" % i, state_size, self.drop_out) 
        for i in range(enc_layers)]
      enc_cell = tf.nn.rnn_cell.MultiRNNCell(enc_cell)
      enc_outputs, enc_state = tf.nn.dynamic_rnn(enc_cell, enc_inputs,
        sequence_length=enc_lens, dtype=tf.float32)

    # Encoder bow prediction
    with tf.variable_scope("bow_output"):
      if(self.bow_pred_method == "mix_softmax"): 
        bow_topk_prob = bow_predict_mix_softmax(
          enc_batch_size, vocab_size, max_enc_bow, enc_state)

      elif(self.bow_pred_method == "seq_tag"):
        bow_topk_prob, _, _, _ = bow_predict_seq_tag(
          vocab_size, enc_batch_size, enc_outputs, enc_lens, max_len)

      elif(self.bow_pred_method == "seq2seq"):
        bow_topk_prob, enc_seq2seq_loss, enc_infer_pred = \
                                    bow_predict_seq2seq(enc_seq2seq_inputs, 
                                                        enc_seq2seq_targets,
                                                        enc_seq2seq_lens, 
                                                        embedding_matrix,
                                                        enc_outputs,
                                                        enc_state,
                                                        enc_layers,
                                                        num_paraphrase,
                                                        max_len,
                                                        enc_lens,
                                                        enc_batch_size,
                                                        vocab_size,
                                                        state_size, 
                                                        self.drop_out, 
                                                        self.dec_start_id)
      
    with tf.variable_scope("enc_optimizer"):
      enc_optimizer = tf.train.AdamOptimizer(self.learning_rate_enc)

    with tf.name_scope("enc_output"):
      # top k prediction 
      pred_prob, pred_ind = tf.nn.top_k(bow_topk_prob, max_enc_bow)
      pred_prob_unnorm = pred_prob
      pred_prob /= tf.expand_dims(tf.reduce_sum(pred_prob, axis=1), [1])

      pred_prob_dec, pred_ind_dec = tf.nn.top_k(bow_topk_prob, self.sample_size)
      pred_prob_dec /= tf.expand_dims(tf.reduce_sum(pred_prob_dec, axis=1), [1])

      if(self.mode == "train"):
        with tf.name_scope("enc_loss"):
          # loss function 
          enc_targets = _enc_target_list_to_khot(
            enc_targets, vocab_size, self.pad_id)
          enc_bow_loss = enc_loss_fn(
            self.bow_loss_fn, enc_targets, bow_topk_prob, max_enc_bow)
          if(self.bow_pred_method == "seq2seq"): 
            # pure sequence to sequence for now 
            enc_loss = enc_seq2seq_loss + 0.0 * enc_bow_loss
          else: 
            enc_loss = enc_bow_loss
          enc_train_op = enc_optimizer.minimize(enc_loss)

        # prediction preformance monitor during training 
        # write this in a function 
        # TODO: top 10 recall 
        with tf.name_scope("train_output"):
          # encoder training output
          self.enc_train_output = { "enc_train_op": enc_train_op, 
                                    "enc_bow_loss": enc_bow_loss,
                                    "enc_loss": enc_loss}
          bow_metrics_dict = bow_train_monitor(
            bow_topk_prob, pred_ind, vocab_size, enc_batch_size, enc_targets)
          self.enc_train_output.update(bow_metrics_dict)

          if(self.bow_pred_method == "seq2seq"): 
            self.enc_train_output["enc_seq2seq_loss"] = enc_seq2seq_loss

      # encoder inference output
      with tf.name_scope("infer_output"):
        if(self.bow_pred_method == "seq2seq"): 
          (infer_overlap, infer_pred_support, infer_target_support, infer_prec, 
            infer_recl) = bow_seq2seq_metrics(
              enc_targets, enc_infer_pred, vocab_size, self.pad_id)
          self.enc_infer_output = { 
            "enc_infer_overlap": infer_overlap,
            "enc_infer_pred_support": infer_pred_support,
            "enc_infer_target_support": infer_target_support,
            "enc_infer_precision": infer_prec,
            "enc_infer_recall": infer_recl,
            "enc_infer_pred": enc_infer_pred}
        else:
          self.enc_infer_output = { "pred_prob": pred_prob,
                                    "pred_ind": pred_ind,
                                    "pred_prob_dec": pred_prob_dec,
                                    "pred_ind_dec": pred_ind_dec}
        

    # Decoder bow encoding
    # TODO: sampling from encoder topk prediction
    with tf.variable_scope("dec_bow_encoding"):
      dec_bow_mask = tf.expand_dims(
        tf.sequence_mask(dec_bow_len, max_dec_bow, dtype=tf.float32), [2])
      
      # TODO: transformer based encoding, but our primary goal is to test the 
      # effectiveness of sampling, so we skip it for now 
      dec_bow_enc = tf.reduce_mean(dec_bow_mask * dec_bow, axis = 1) # [B, S]

    with tf.variable_scope("decoder"):
      dec_cell = [create_cell("dec-%d" % i, state_size, self.drop_out) 
        for i in range(enc_layers)]
      dec_cell = tf.nn.rnn_cell.MultiRNNCell(dec_cell)

      dec_init_state = (LSTMStateTuple(dec_bow_enc, dec_bow_enc), 
                        LSTMStateTuple(dec_bow_enc, dec_bow_enc))
      dec_proj = tf.layers.Dense(vocab_size, name="dec_proj",
        kernel_initializer=tf.random_normal_initializer(stddev=0.05),
        bias_initializer=tf.constant_initializer(0.))
      dec_memory = dec_bow
      dec_mem_len = dec_bow_len
      dec_max_mem_len = max_dec_bow


      # greedy decoding
      # _, dec_outputs_predict = decoding_infer(self.dec_start_id,
      #                                         dec_cell,
      #                                         dec_proj,
      #                                         embedding_matrix,
      #                                         dec_init_state,
      #                                         dec_bow,
      #                                         dec_batch_size,
      #                                         max_len,
      #                                         dec_bow_len,
      #                                         max_dec_bow,
      #                                         self.is_attn)

      # if(self.mode == "train"):
      #   # training decoding
      #   dec_outputs_train = decoding_train( dec_inputs, 
      #                                       dec_cell, 
      #                                       dec_init_state, 
      #                                       dec_bow,  
      #                                       max_len, 
      #                                       dec_bow_len, 
      #                                       max_dec_bow,
      #                                       self.is_attn)
      #   dec_logits_train = dec_proj(dec_outputs_train)

      dec_outputs_predict, dec_logits_train = decode( 
        self.dec_start_id, dec_inputs, 
        dec_cell, dec_proj, embedding_matrix, 
        dec_init_state, dec_memory, dec_mem_len, dec_max_mem_len, 
        dec_batch_size, max_len, self.sampling_method, self.topk_sampling_size,
        state_size, multi_source=False)

    all_variables = slim.get_variables_to_restore()
    model_variables = [var for var in all_variables 
      if var.name.split("/")[0] == self.model_name]
    print("%s model, variable list:" % self.model_name)
    for v in model_variables: print("  %s" % v.name)
    self.model_saver = tf.train.Saver(model_variables, max_to_keep=3)  

    with tf.variable_scope("dec_optimizer"):
      dec_optimizer = tf.train.AdamOptimizer(self.learning_rate_dec)

    with tf.name_scope("dec_output"):
      if(self.mode == "train"):
        dec_mask = tf.sequence_mask(dec_lens, max_len, dtype=tf.float32)
        dec_loss = tf.contrib.seq2seq.sequence_loss(
          dec_logits_train, dec_targets, dec_mask)
        dec_train_op = dec_optimizer.minimize(dec_loss)

        self.dec_train_output = { "dec_train_op": dec_train_op, 
                                  "dec_loss": dec_loss}
    
      self.dec_infer_output = {"dec_predict": dec_outputs_predict}
    return 
Exemple #19
0
            tf.nn.rnn_cell.LSTMCell(
                num_units=no_units,
                initializer=tf.keras.initializers.glorot_normal(),
                state_is_tuple=True)
        ]))

LSTM_outputs, LSTM_fw_state, LSTM_bw_state = tf.contrib.rnn.stack_bidirectional_dynamic_rnn(
    cells_fw=LSTM_fw, cells_bw=LSTM_bw, inputs=train, dtype=tf.float32)

# LSTM_outputs = tf.concat((LSTM_fw_output, LSTM_bw_output), 2)

LSTM_state_c = tf.concat((LSTM_fw_state[-1][0].c, LSTM_bw_state[-1][0].c), 1)

LSTM_state_h = tf.concat((LSTM_fw_state[-1][0].h, LSTM_bw_state[-1][0].h), 1)

LSTM_final_state = LSTMStateTuple(c=LSTM_state_c, h=LSTM_state_h)

output = tf.layers.Dense(2)(LSTM_state_h)

#Defining Loss function
losss = tf.losses.softmax_cross_entropy(target, output)
trainop = tf.train.AdamOptimizer(learning_rate=0.001).minimize(losss)

# losx=[]
saver = tf.train.Saver()
# # with tf.device('/gp):
# # with tf.device('/gpu:0'):
# with tf.Session() as sess:

#     sess.run(tf.global_variables_initializer())