Beispiel #1
0
def zglobal_encoder(label_input,zsent_sample,seq_len, batch_size):
    """
    Pre-stochastic layer encoder for z1 (latent segment variable)
    Args:
        x(tf.Tensor): tensor of shape (bs, T, F)
        z2(tf.Tensor): tensor of shape (bs, D1)
        rhus(list): list of numbers of LSTM layer hidden units
    Return:
        out(tf.Tensor): concatenation of hidden states of all LSTM layers
    """
    # prepare input

    # print("------------",label_input.shape)
    # print("********",zsent_sample.shape)
    # # zsent_sample=[zsent_sample]*(tf.shape(label_input)[1])
    # z_dash=tf.tile(zsent_sample,[tf.shape(label_input.shape)[1],1])
    # z_dash=tf.split(z_dash,tf.shape(label_input.shape)[1], axis=0)
    # zsent_sample_1=tf.stack(z_dash,axis=0)
    # # for i in range(int(label_input.shape[1])):
    # # zsent_sample_1=tf.stack(zsent_sample,axis=1)
    # l_zsent = tf.concat([label_input,zsent_sample_1],axis=-1) ##MIGHT NEED MODIFICATION
    # print(l_zsent.shape)
    # encoder_input=l_zsent

    bs, T = tf.shape(label_input)[0], tf.shape(label_input)[1]
    zsent_sample = tf.tile(tf.expand_dims(zsent_sample, 1), (1, T, 1))
    x_z2 = tf.concat([label_input, zsent_sample], axis=-1)
    encoder_input=x_z2

    if params.base_cell == 'lstm':
      base_cell = tf.contrib.rnn.LSTMCell
    elif params.base_cell == 'rnn':
      base_cell = tf.contrib.rnn.RNNCell
    else:
      base_cell = tf.contrib.rnn.GRUCell

    cell = model.make_rnn_cell([params.encoder_hidden for _ in range(
        params.decoder_rnn_layers)], base_cell=base_cell)

    #cell2=model.make_rnn_cell([params.decoder_hidden for _ in range(params.decoder_rnn_layers)], base_cell=base_cell)

    initial = cell.zero_state(batch_size, dtype=tf.float64)
    #initial2=cell.zero_state(batch_size, dtype=tf.float64)

    if params.keep_rate < 1:
        encoder_input = tf.nn.dropout(encoder_input, params.keep_rate)
        #encoder_input2=tf.nn.dropout(encoder_input2, params.keep_rate)        
    outputs, final_state = tf.nn.dynamic_rnn(cell,
                                             inputs=encoder_input,
                                             sequence_length=seq_len,
                                             initial_state=initial,
                                             swap_memory=True,
                                             dtype=tf.float64,
                                             scope="zglobal_encoder_rnn")
    final_state = tf.concat(final_state[0], 1)
    return final_state
Beispiel #2
0
def q_net(x, seq_len, batch_size=params.batch_size):
    with zs.BayesianNet() as encoder:
        # construct lstm
        # cell = tf.nn.rnn_cell.BasicLSTMCell(params.cell_hidden_size)
        # cells = tf.nn.rnn_cell.MultiRNNCell([cell]*params.rnn_layers)
        cell = model.make_rnn_cell(
            [params.decoder_hidden for _ in range(params.decoder_rnn_layers)],
            base_cell=params.base_cell)
        initial = cell.zero_state(batch_size, dtype=tf.float32)
        print(int)
        if params.keep_rate < 1:
            x = tf.nn.dropout(x, params.keep_rate)
        s_l = tf.shape(x)[1]
        # Higway network [S.Sementiuta et.al]
        for i in range(params.highway_lc):
            with tf.variable_scope("hw_layer_enc{0}".format(i)) as scope:
                if i == 0:  # first, input layer
                    x = tf.reshape(x, [-1, params.embed_size])
                    prev_y = tf.layers.dense(x, params.highway_ls, tf.nn.relu)
                elif i == params.highway_lc - 1:  # last, output layer
                    encoder_input = tf.layers.dense(prev_y, params.embed_size)
                    encoder_input = tf.reshape(
                        encoder_input,
                        [params.batch_size, s_l, params.embed_size])
                else:  # hidden layers
                    print(i)
                    prev_y = model.highway_network(prev_y, params.highway_ls)

        outputs, final_state = tf.nn.dynamic_rnn(cell,
                                                 inputs=encoder_input,
                                                 sequence_length=seq_len,
                                                 initial_state=initial,
                                                 swap_memory=True,
                                                 dtype=tf.float32)
        final_state = tf.concat(final_state[0], 1)
        lz_mean = tf.layers.dense(inputs=final_state,
                                  units=params.latent_size,
                                  activation=None)
        lz_logstd = tf.layers.dense(inputs=final_state,
                                    units=params.latent_size,
                                    activation=None)
        # define latent variable`s Stochastic Tensor
        z = zs.Normal('z', lz_mean, lz_logstd, group_event_ndims=1)
        tf.summary.histogram('latent_space', z)
        return z
Beispiel #3
0
def q_net(encoder_input, seq_len, batch_size):
    with zs.BayesianNet() as encoder:
        # construct lstm
        # cell = tf.nn.rnn_cell.BasicLSTMCell(params.cell_hidden_size)
        # cells = tf.nn.rnn_cell.MultiRNNCell([cell]*params.rnn_layers)
        if params.base_cell == 'lstm':
          base_cell = tf.contrib.rnn.LSTMCell
        else:
          base_cell = tf.contrib.rnn.GRUCell
        cell = model.make_rnn_cell([params.decoder_hidden for _ in range(
            params.decoder_rnn_layers)], base_cell=base_cell)
        initial = cell.zero_state(batch_size, dtype=tf.float32)
        if params.keep_rate < 1:
            encoder_input = tf.nn.dropout(encoder_input, params.keep_rate)
        outputs, final_state = tf.nn.dynamic_rnn(cell,
                                                 inputs=encoder_input,
                                                 sequence_length=seq_len,
                                                 initial_state=initial,
                                                 swap_memory=True,
                                                 dtype=tf.float32)
        final_state = tf.concat(final_state[0], 1)
        if params.encode == 'hw':
            # Higway network [S.Sementiuta et.al]
            for i in range(params.highway_lc):
                with tf.variable_scope("hw_layer_enc{0}".format(i)) as scope:
                    if i == 0:  # first, input layer
                        prev_y = tf.layers.dense(final_state, params.highway_ls)
                    elif i == params.highway_lc - 1:  # last, output layer
                        final_state = tf.layers.dense(prev_y,
                                                      params.latent_size * 2)
                    else:  # hidden layers
                        prev_y = model.highway_network(prev_y,
                                                       params.highway_ls)
            lz_mean, lz_logstd = tf.split(final_state, 2, axis=1)
        elif params.encode == 'mlp':
            lz_mean = tf.layers.dense(inputs=final_state,
                                      units=params.latent_size)
            lz_logstd = tf.layers.dense(inputs=final_state,
                                        units=params.latent_size)
        # define latent variable`s Stochastic Tensor
        z = zs.Normal('z', lz_mean, lz_logstd, group_event_ndims=1)
        tf.summary.histogram('latent_space', z)
        return z
Beispiel #4
0
def zsent_encoder(encoder_input, seq_len, batch_size):
    """
    Pre-stochastic layer encoder for z2 (latent sequence variable)
    Args:
        x(tf.Tensor): tensor of shape (bs, T, F)
        rhus(list): list of numbers of LSTM layer hidden units
    Return:
        out(tf.Tensor): concatenation of hidden states of all LSTM layers
    """
    # construct lstm
    # cell = tf.nn.rnn_cell.BasicLSTMCell(params.cell_hidden_size)
    # cells = tf.nn.rnn_cell.MultiRNNCell([cell]*params.rnn_layers)
    if params.base_cell == 'lstm':
      base_cell = tf.contrib.rnn.LSTMCell
    elif params.base_cell == 'rnn':
      base_cell = tf.contrib.rnn.RNNCell
    else:
      base_cell = tf.contrib.rnn.GRUCell

    cell = model.make_rnn_cell([params.encoder_hidden for _ in range(params.decoder_rnn_layers)], base_cell=base_cell)

    #cell2=model.make_rnn_cell([params.decoder_hidden for _ in range(params.decoder_rnn_layers)], base_cell=base_cell)

    initial = cell.zero_state(batch_size, dtype=tf.float64)
    #initial2=cell.zero_state(batch_size, dtype=tf.float64)

    if params.keep_rate < 1:
        encoder_input = tf.nn.dropout(encoder_input, params.keep_rate)
        #encoder_input2=tf.nn.dropout(encoder_input2, params.keep_rate)
    # print(encoder_input.shape)
    outputs, final_state = tf.nn.dynamic_rnn(cell,
                                             inputs=encoder_input,
                                             sequence_length=seq_len,
                                             initial_state=initial,
                                             swap_memory=True,
                                             dtype=tf.float64,
                                             scope="zsent_encoder_rnn")
    final_state = tf.concat(final_state[0], 1)
    return final_state
Beispiel #5
0
            './PTB_DATA/data')
        data, labels_arr, _, data_dict = data_.prepare_data(
            train_data_raw, params_c)
    with tf.Graph().as_default() as graph:
        inputs = tf.placeholder(shape=[None, None], dtype=tf.int32)
        with tf.device("/cpu:0"):
            embedding = tf.get_variable(
                "embedding", [data_dict.vocab_size, params['embed_size']], dtype=tf.float32)
            vect_inputs = tf.nn.embedding_lookup(embedding, inputs)
        # inputs = tf.unstack(inputs, num=num_steps, axis=1)
        keep_rate = tf.placeholder(tf.float32)
        if params['mode_train'] and params['keep_rate'] < 1:
            vect_inputs = tf.nn.dropout(vect_inputs, keep_rate)

        labels = tf.placeholder(shape=[None, None], dtype=tf.int64)
        cell = model.make_rnn_cell([params['num_hidden']]*params['num_layers'],
                                   base_cell=tf.contrib.rnn.GRUCell)

        initial_state = tf.placeholder_with_default(input=cell.zero_state(tf.shape(vect_inputs)[0], dtype=tf.float32),
                                          shape=[None, None, params['num_hidden']])
        zs = cell.zero_state(params['batch_size'], dtype=tf.float32)
        length = tf.placeholder(shape=[None], dtype=tf.float32)
        ins = tf.reshape(initial_state, [-1, params['num_hidden']])
        # TODO: find a way how to initialize 2-layers network
        outputs, final_state = tf.nn.dynamic_rnn(cell, inputs=vect_inputs, sequence_length=length,
                                                initial_state=(ins, )*params['num_layers'], swap_memory=False,
                                                 dtype=tf.float32)
        fc_layer = tf.layers.dense(inputs=outputs, units=data_dict.vocab_size, activation=None)
        prnt = tf.Print(fc_layer, [tf.shape(final_state), tf.shape(zs)])
        # define optimization with lr decay, lr decay can be use with SGD oprtimizer
        global_step = tf.Variable(0, trainable=False)
        # learning_rate = tf.train.exponential_decay(params['learning_rate'], global_step, 500, 0.96)
Beispiel #6
0
def vae_lstm(observed, batch_size, d_seq_l, embed, d_inputs, vocab_size, gen_mode=False):
    with zs.BayesianNet(observed=observed) as decoder:
        # prepare input
        z_mean = tf.zeros([batch_size, params.latent_size])
        z = zs.Normal('z', mean=z_mean, std=0.1, group_event_ndims=0)
        tf.summary.histogram('z|x', z)
        # z = [bath_size, l_s] -> [batch_size, seq_len, l_s]
        with tf.device("/cpu:0"):
            dec_inps = tf.nn.embedding_lookup(embed, d_inputs)
        # turn off dropout for generation:
        if params.dec_keep_rate < 1 and not gen_mode:
            dec_inps = tf.nn.dropout(dec_inps, params.dec_keep_rate)
        max_sl = tf.shape(dec_inps)[1]
        # define cell
        if params.base_cell == 'lstm':
          base_cell = tf.contrib.rnn.LSTMCell
        else:
          # not working for now
          base_cell = tf.contrib.rnn.GRUCell
        cell = model.make_rnn_cell([
          params.decoder_hidden for _ in range(
            params.decoder_rnn_layers)], base_cell=base_cell)
        if params.decode == 'hw':
            # Higway network [S.Sementiuta et.al]
            for i in range(params.highway_lc):
                with tf.variable_scope("hw_layer_dec{0}".format(i)) as scope:
                    if i == 0:  # first, input layer
                        prev_y = tf.layers.dense(z,
                                                 params.decoder_hidden * 2)
                    elif i == params.highway_lc - 1:  # last, output layer
                        z_dec = tf.layers.dense(prev_y,
                                                params.decoder_hidden * 2)
                    else:  # hidden layers
                        prev_y = model.highway_network(prev_y,
                                                       params.highway_ls)
            inp_h, inp_c = tf.split(z_dec, 2, axis=1)
            initial_state = rnn_placeholders(
                (tf.contrib.rnn.LSTMStateTuple(inp_c, inp_h), ))
        elif params.decode == 'concat':
            z_out = tf.reshape(
              tf.tile(tf.expand_dims(z, 1), (1, max_sl, 1)),
              [batch_size, -1, params.latent_size])
            dec_inps = tf.concat([dec_inps, z_out], 2)
            initial_state = rnn_placeholders(
                cell.zero_state(tf.shape(dec_inps)[0], tf.float32))
        elif params.decode == 'mlp':
            # z->decoder initial state
            w1 = tf.get_variable('whl', [params.latent_size, params.highway_ls],
                                 tf.float32,
                                 initializer=tf.truncated_normal_initializer())
            b1 = tf.get_variable('bhl', [params.highway_ls], tf.float32,
                                 initializer=tf.ones_initializer())
            z_dec = tf.matmul(z, w1) + b1
            inp_h, inp_c = tf.split(tf.layers.dense(z_dec,
                                                    params.decoder_hidden * 2),
                                    2, axis=1)
            initial_state = rnn_placeholders(
                (tf.contrib.rnn.LSTMStateTuple(inp_c, inp_h), ))
        outputs, final_state = tf.nn.dynamic_rnn(cell, inputs=dec_inps,
                                                 sequence_length=d_seq_l,
                                                 initial_state=initial_state,
                                                 swap_memory=True,
                                                 dtype=tf.float32)
        # define decoder network
        if gen_mode:
                # only interested in the last output
                outputs = outputs[:, -1, :]
        outputs_r = tf.reshape(outputs, [-1, params.decoder_hidden])
        x_logits = tf.layers.dense(outputs_r, units=vocab_size, activation=None)
        if params.beam_search:
            sample = tf.nn.softmax(x_logits)
        else:
            sample = tf.multinomial(x_logits / params.temperature, 1)[0][0]
        return x_logits, (initial_state, final_state), sample
Beispiel #7
0
def lstm_decoder_words(z_in,
                       d_inputs,
                       label_logits,
                       d_seq_l,
                       batch_size,
                       embed,
                       vocab_size,
                       gen_mode=False,
                       zsent=None,
                       scope=None):

    with tf.variable_scope(scope, "decoder") as sc:
        with tf.device("/cpu:0"):
            dec_inps = tf.nn.embedding_lookup(embed, d_inputs)
        # turn off dropout for generation:
        if params.dec_keep_rate < 1 and not gen_mode:
            dec_inps = tf.nn.dropout(dec_inps, params.dec_keep_rate)

        label_logits = tf.nn.softmax(label_logits)
        dep = int(label_logits.shape[1])
        bs, T = tf.shape(dec_inps)[0], tf.shape(dec_inps)[1]
        print(bs, T)
        label_logits = tf.reshape(label_logits, [bs, T, dep])
        print(label_logits)
        print(dec_inps)
        dec_inps = tf.concat([dec_inps, label_logits], axis=-1)
        print(dec_inps)
        # exit()
        max_sl = tf.shape(dec_inps)[1]
        # define cell
        if params.base_cell == 'lstm':
            base_cell = tf.contrib.rnn.LSTMCell
        elif params.base_cell == 'rnn':
            base_cell = tf.contrib.rnn.RNNCell
        else:
            # not working for now
            base_cell = tf.contrib.rnn.GRUCell

        cell = model.make_rnn_cell(
            [params.decoder_hidden for _ in range(params.decoder_rnn_layers)],
            base_cell=base_cell)

        if gen_mode:
            z = zsent
        else:
            z = z_in
        if params.decode == 'hw':
            # Higway network [S.Sementiuta et.al]
            for i in range(params.highway_lc):
                with tf.variable_scope("hw_layer_dec{0}".format(i)) as scope:
                    z_dec = fully_connected(
                        z,
                        params.decoder_hidden * 2,
                        activation_fn=tf.nn.sigmoid,
                        weights_initializer=xavier_initializer(),
                        biases_initializer=tf.zeros_initializer(),
                        scope="decoder_inp_state")

            inp_h, inp_c = tf.split(z_dec, 2, axis=1)
            initial_state = rnn_placeholders(
                (tf.contrib.rnn.LSTMStateTuple(inp_c, inp_h), ))
        elif params.decode == 'concat':
            z_out = tf.reshape(tf.tile(tf.expand_dims(z, 1), (1, max_sl, 1)),
                               [batch_size, -1, params.latent_size])
            dec_inps = tf.concat([dec_inps, z_out], 2)
            initial_state = rnn_placeholders(
                cell.zero_state(tf.shape(dec_inps)[0], tf.float64))
        elif params.decode == 'mlp':
            # z->decoder initial state
            w1 = tf.get_variable('whl',
                                 [params.latent_size, params.highway_ls],
                                 tf.float64,
                                 initializer=tf.truncated_normal_initializer())
            b1 = tf.get_variable('bhl', [params.highway_ls],
                                 tf.float64,
                                 initializer=tf.ones_initializer())
            z_dec = tf.matmul(z, w1) + b1
            inp_h, inp_c = tf.split(tf.layers.dense(z_dec,
                                                    params.decoder_hidden * 2),
                                    2,
                                    axis=1)
            initial_state = rnn_placeholders(
                (tf.contrib.rnn.LSTMStateTuple(inp_c, inp_h), ))

        outputs, final_state = tf.nn.dynamic_rnn(cell,
                                                 inputs=dec_inps,
                                                 sequence_length=d_seq_l,
                                                 initial_state=initial_state,
                                                 swap_memory=True,
                                                 dtype=tf.float64)
        # define decoder network
        if gen_mode:
            # only interested in the last output
            outputs = outputs[:, -1, :]
        # print(outputs.shape)
        outputs_r = tf.reshape(outputs, [-1, params.decoder_hidden])
        # print(outputs_r.shape,     "===============")
        x_logits = tf.layers.dense(outputs_r,
                                   units=vocab_size,
                                   activation=None)
        print(x_logits)
        if params.beam_search:
            sample = tf.nn.softmax(x_logits)
        else:
            sample = tf.multinomial(x_logits / params.temperature, 10)[0]
        print(sample)
        return x_logits, (initial_state, final_state), sample
Beispiel #8
0
def vae_lstm(observed,
             batch_size,
             d_seq_l,
             embed,
             d_inputs,
             vocab_size,
             dropout_off=False):
    with zs.BayesianNet(observed=observed) as decoder:
        # prepare input
        z_mean = tf.zeros([batch_size, params.latent_size])
        z_logstd = tf.zeros([batch_size, params.latent_size])
        z = zs.Normal('z', mean=z_mean, logstd=z_logstd, group_event_ndims=0)
        tf.summary.histogram('z|x', z)
        # z = [bath_size, l_s] -> [batch_size, seq_len, l_s]
        with tf.device("/cpu:0"):
            dec_inps = tf.nn.embedding_lookup(embed, d_inputs)
        # turn off dropout for generation:
        if params.dec_keep_rate < 1 and not dropout_off:
            dec_inps = tf.nn.dropout(dec_inps, params.dec_keep_rate)
        max_sl = tf.shape(dec_inps)[1]
        z_out = tf.reshape(tf.tile(tf.expand_dims(z, 1), (1, max_sl, 1)),
                           [batch_size, -1, params.latent_size])
        c_inputs = tf.concat([dec_inps, z_out], 2)
        # z->decoder initial state
        w1 = tf.get_variable('whl', [params.latent_size, params.highway_ls],
                             tf.float32,
                             initializer=tf.truncated_normal_initializer())
        b1 = tf.get_variable('bhl', [params.highway_ls],
                             tf.float32,
                             initializer=tf.ones_initializer())
        z_dec = tf.nn.relu(tf.matmul(z, w1) + b1)
        inp_h = tf.layers.dense(z_dec, params.decoder_hidden)
        inp_c = tf.layers.dense(z_dec, params.decoder_hidden)
        cell = model.make_rnn_cell(
            [params.decoder_hidden for _ in range(params.decoder_rnn_layers)],
            base_cell=params.base_cell)
        initial_state = rnn_placeholders(
            (tf.contrib.rnn.LSTMStateTuple(inp_c, inp_h), ))
        for tensor in flatten(initial_state):
            tf.add_to_collection('rnn_decoder_state_input', tensor)
        outputs, final_state = tf.nn.dynamic_rnn(cell,
                                                 inputs=c_inputs,
                                                 sequence_length=d_seq_l,
                                                 initial_state=initial_state,
                                                 swap_memory=True,
                                                 dtype=tf.float32)
        for tensor in flatten(final_state):
            tf.add_to_collection('rnn_decoder_state_output', tensor)
        # define decoder network
        outputs_r = tf.reshape(outputs, [-1, params.decoder_hidden])
        x_logits = tf.layers.dense(outputs_r,
                                   units=vocab_size,
                                   activation=None)
        print("x_logits", x_logits)
        # take unnormalized log-prob of the last word in sequence and sample from multinomial distibution
        if params.beam_search:
            logits_ = tf.reshape(
                x_logits,
                [tf.shape(outputs)[0],
                 tf.shape(outputs)[1], vocab_size])[:, -1]
            top_k = tf.nn.top_k(logits_, params.beam_size)
            sample = top_k.indices
            norm_log_prob = tf.log(tf.nn.softmax(top_k.values))
        sample_gr = tf.multinomial(
            tf.reshape(
                x_logits,
                [tf.shape(outputs)[0],
                 tf.shape(outputs)[1], vocab_size])[:, -1] /
            params.temperature, 1)[:, 0][:]
        return decoder, x_logits, initial_state, final_state, sample_gr, sample, norm_log_prob