Exemple #1
0
    def collate_fn(self, batch):
        r"""
            Perform preprocessing and create a final data batch:
            1. PAD sequences with the longest sequence in the batch
            2. Convert Audio signal to Spectrograms.
            3. PAD sequences that can be divided by r.
            4. Convert Numpy to Torch tensors.
        """

        # Puts each data field into a tensor with outer dimension batch size
        if isinstance(batch[0], collections.Mapping):
            keys = list()

            wav = [d['wav'] for d in batch]
            item_idxs = [d['item_idx'] for d in batch]
            text = [d['text'] for d in batch]

            text_lenghts = np.array([len(x) for x in text])
            max_text_len = np.max(text_lenghts)

            linear = [self.ap.spectrogram(w).astype('float32') for w in wav]
            mel = [self.ap.melspectrogram(w).astype('float32') for w in wav]
            mel_lengths = [m.shape[1] + 1 for m in mel]  # +1 for zero-frame

            # compute 'stop token' targets
            stop_targets = [
                np.array([0.] * (mel_len - 1)) for mel_len in mel_lengths
            ]

            # PAD stop targets
            stop_targets = prepare_stop_target(stop_targets,
                                               self.outputs_per_step)

            # PAD sequences with largest length of the batch
            text = prepare_data(text).astype(np.int32)
            wav = prepare_data(wav)

            # PAD features with largest length + a zero frame
            linear = prepare_tensor(linear, self.outputs_per_step)
            mel = prepare_tensor(mel, self.outputs_per_step)
            assert mel.shape[2] == linear.shape[2]
            timesteps = mel.shape[2]

            # B x T x D
            linear = linear.transpose(0, 2, 1)
            mel = mel.transpose(0, 2, 1)

            # convert things to pytorch
            text_lenghts = torch.LongTensor(text_lenghts)
            text = torch.LongTensor(text)
            linear = torch.FloatTensor(linear)
            mel = torch.FloatTensor(mel)
            mel_lengths = torch.LongTensor(mel_lengths)
            stop_targets = torch.FloatTensor(stop_targets)

            return text, text_lenghts, linear, mel, mel_lengths, stop_targets, item_idxs[
                0]

        raise TypeError(("batch must contain tensors, numbers, dicts or lists;\
                         found {}".format(type(batch[0]))))
Exemple #2
0
def create_speech(m, s, CONFIG, use_cuda, ap):
    text_cleaner = [CONFIG.text_cleaner]
    texts = [np.asarray(text_to_sequence(text, text_cleaner), dtype=np.int32) for text in s]
    texts = prepare_data(texts).astype(np.int32)
    
    texts = torch.LongTensor(texts)
    if use_cuda:
        texts = texts.cuda()
    mel_out, linear_outs, alignments, stop_tokens = m.forward(texts.long())
    linear_outs = [linear_out.data.cpu().numpy() for linear_out in linear_outs]
    alignments = [alignment_.cpu().data.numpy() for alignment_ in alignments]
    specs = [ap._denormalize(linear_out) for linear_out in linear_outs]
    wavs = [ap.inv_spectrogram(linear_out.T) for linear_out in linear_outs]
    # wav = wav[:ap.find_endpoint(wav)]
    out = io.BytesIO()
    # ap.save_wav(wav, out)
    return wavs, alignments, specs, stop_tokens
                length: [len(input_sent_vect)],
                keep_rate: 1.0}
        # for the first decoder step, the state is None
        if state is not None:
             feed.update({in_state: state})
        index, state = sess.run([sample, out_state], feed)
        sentence += [data_dict.idx2word[idx] for idx in index]
    print(' '.join([word for word in sentence if word not in ['<EOS>',
                                                             '<PAD>', '<BOS>']]))

if __name__ == "__main__":
    if params['input'] == 'GOT':
        # GOT corpus
        corpus_path = "/home/luoyy/datasets_small/got"
        data_raw = data_.got_read(corpus_path)
        data, labels_arr, _, data_dict = data_.prepare_data(data_raw,
                                                                    params_c)
        vocab_size = data_dict.vocab_size
        print("Most common words : {}", [data_dict.idx2word[i] for i in range(vocab_size - 1, vocab_size - 7, -1)])
        del(data_raw)
    elif params['input'] == 'PTB':
        # data in form [data, labels]
        train_data_raw, valid_data_raw, test_data_raw = data_.ptb_read('./PTB_DATA/data')
        # data in form [data, labels]
        train_data_raw, valid_data_raw, test_data_raw = data_.ptb_read(
            './PTB_DATA/data')
        data, labels_arr, _, data_dict = data_.prepare_data(
            train_data_raw, params_c)
    with tf.Graph().as_default() as graph:
        inputs = tf.placeholder(shape=[None, None], dtype=tf.int32)
        with tf.device("/cpu:0"):
            embedding = tf.get_variable(
Exemple #4
0
import xgboost as xgb
from sklearn.utils import shuffle
from utils.data import prepare_data
from utils.gini import eval_gini, gini_xgb
import numpy as np
import pandas as pd
import xgboost as xgb
import time

data_path = './input'

train = pd.read_csv(data_path + '/train.csv')
test = pd.read_csv(data_path + '/test.csv')

prep = prepare_data(train, test)
train, targets, test = prep(True, False)

X, y = train.as_matrix()[:, 1:], targets.as_matrix()
X, y = shuffle(X, y)
cutoff = int(len(X) * 0.9)

train_X, train_y = X[:cutoff], y[:cutoff]
X_test, y_test = X[cutoff:], y[cutoff:]
X_sub = test.as_matrix()[:, 1:]

del X, y, train, targets, test

param = {
    'max_depth': 5,
    'objective': 'binary:logistic',
    'subsample': 0.8,
Exemple #5
0
def main(params):
    if params.input_ == 'PTB':
        # data in form [data, labels]
        train_data_raw, train_label_raw = data_.ptb_read(
            './DATA/parallel_data_10k/')
        word_data, encoder_word_data, word_labels_arr, word_embed_arr, word_data_dict = data_.prepare_data(
            train_data_raw, train_label_raw, params,
            './DATA/parallel_data_10k/')

        train_label_raw, valid_label_raw, test_label_raw = label_data_.ptb_read(
            './DATA/parallel_data_10k/')
        label_data, label_labels_arr, label_embed_arr, label_data_dict = label_data_.prepare_data(
            train_label_raw, params)

    with tf.Graph().as_default() as graph:

        label_inputs = tf.placeholder(dtype=tf.int32,
                                      shape=[None, None],
                                      name="lable_inputs")
        word_inputs = tf.placeholder(dtype=tf.int32,
                                     shape=[None, None],
                                     name="word_inputs")

        d_word_inputs = tf.placeholder(dtype=tf.int32,
                                       shape=[None, None],
                                       name="d_word_inputs")
        d_label_inputs = tf.placeholder(dtype=tf.int32,
                                        shape=[None, None],
                                        name="d_label_inputs")

        d_word_labels = tf.placeholder(shape=[None, None],
                                       dtype=tf.int32,
                                       name="d_word_labels")
        d_label_labels = tf.placeholder(shape=[None, None],
                                        dtype=tf.int32,
                                        name="d_label_labels")

        with tf.device("/cpu:0"):
            if not params.pre_trained_embed:
                word_embedding = tf.get_variable(
                    "word_embedding",
                    [data_dict.vocab_size, params.embed_size],
                    dtype=tf.float64)
                vect_inputs = tf.nn.embedding_lookup(word_embedding,
                                                     word_inputs)
            else:
                # [data_dict.vocab_size, params.embed_size]
                word_embedding = tf.Variable(
                    word_embed_arr,
                    trainable=params.fine_tune_embed,
                    name="word_embedding",
                    dtype=tf.float64
                )  # creates a variable that can be used as a tensor
                vect_inputs = tf.nn.embedding_lookup(word_embedding,
                                                     word_inputs,
                                                     name="word_lookup")

                label_embedding = tf.Variable(
                    label_embed_arr,
                    trainable=params.fine_tune_embed,
                    name="label_embedding",
                    dtype=tf.float64
                )  # creates a variable that can be used as a tensor

                label_inputs_1 = tf.nn.embedding_lookup(label_embedding,
                                                        label_inputs,
                                                        name="label_lookup")

        # inputs = tf.unstack(inputs, num=num_steps, axis=1)
        sizes = word_data_dict.sizes
        word_vocab_size = max(sizes[1], sizes[2], sizes[0])
        label_vocab_size = label_data_dict.vocab_size
        seq_length = tf.placeholder_with_default([0.0], shape=[None])
        d_seq_length = tf.placeholder(shape=[None], dtype=tf.float64)
        # qz = q_net(word_inputs, seq_length, params.batch_size)

        Zsent_distribution, zsent_sample, Zglobal_distribition, zglobal_sample = encoder(
            vect_inputs, label_inputs_1, seq_length, params.batch_size)
        word_logits, label_logits, Zsent_dec_distribution, Zglobal_dec_distribution, _, _, _ = decoder(
            zglobal_sample, d_word_inputs, d_label_inputs, seq_length,
            params.batch_size, label_embedding, word_embedding,
            word_vocab_size, label_vocab_size)

        neg_kld_zsent = -1 * tf.reduce_mean(
            tf.reduce_sum(
                kld(Zsent_distribution[0], Zsent_distribution[1],
                    Zsent_dec_distribution[0], Zsent_dec_distribution[1]),
                axis=1))
        neg_kld_zglobal = -1 * tf.reduce_mean(
            tf.reduce_sum(
                kld(Zglobal_distribition[0], Zglobal_distribition[1],
                    Zglobal_dec_distribution[0], Zglobal_dec_distribution[1]),
                axis=1))

        # label reconstruction loss
        d_label_labels_flat = tf.reshape(d_label_labels, [-1])
        l_cross_entr = tf.nn.sparse_softmax_cross_entropy_with_logits(
            logits=label_logits, labels=d_label_labels_flat)
        l_mask_labels = tf.sign(tf.cast(d_label_labels_flat, dtype=tf.float64))
        l_masked_losses = l_mask_labels * l_cross_entr
        # reshape again
        l_masked_losses = tf.reshape(l_masked_losses, tf.shape(d_label_labels))
        l_mean_loss_by_example = tf.reduce_sum(
            l_masked_losses, reduction_indices=1) / d_seq_length
        label_rec_loss = tf.reduce_mean(l_mean_loss_by_example)
        label_perplexity = tf.exp(label_rec_loss)

        # Word reconstruction loss
        # print(word_logits.shape)

        d_word_labels_flat = tf.reshape(d_word_labels, [-1])
        print(d_word_labels_flat.shape)
        w_cross_entr = tf.nn.sparse_softmax_cross_entropy_with_logits(
            logits=word_logits, labels=d_word_labels_flat)
        w_mask_labels = tf.sign(tf.cast(d_word_labels_flat, dtype=tf.float64))
        w_masked_losses_1 = w_mask_labels * w_cross_entr
        w_masked_losses = tf.reshape(w_masked_losses_1,
                                     tf.shape(d_word_labels))
        w_mean_loss_by_example = tf.reduce_sum(
            w_masked_losses, reduction_indices=1) / d_seq_length
        word_rec_loss = tf.reduce_mean(w_mean_loss_by_example)
        word_perplexity = tf.exp(word_rec_loss)

        rec_loss = word_rec_loss + label_rec_loss

        #anneal = tf.placeholder(tf.float64)
        # annealing=tf.to_float(anneal)
        #annealing = (tf.tanh((tf.to_float(anneal) - 5000)/1800) + 1)/2
        # overall loss reconstruction loss - kl_regularization
        #kld_loss = -1*(neg_kld_zglobal + neg_kld_zsent)
        #kl_term_weight = tf.multiply(
        #    tf.cast(annealing, dtype=tf.float64), tf.cast(kld_loss, dtype=tf.float64))

        alpha = tf.placeholder(tf.float64)
        alpha_val = tf.to_float(alpha)
        beta = tf.placeholder(tf.float64)
        beta_val = tf.to_float(beta)
        kl_term_weight = tf.multiply(tf.cast(alpha_val, dtype=tf.float64), tf.cast(neg_kld_zsent, dtype=tf.float64)) \
                         + tf.multiply(tf.cast(beta_val, dtype=tf.float64), tf.cast(neg_kld_zglobal, dtype=tf.float64))

        total_lower_bound = rec_loss - kl_term_weight

        gradients = tf.gradients(total_lower_bound, tf.trainable_variables())
        opt = tf.train.AdamOptimizer(learning_rate=params.learning_rate)
        clipped_grad, _ = tf.clip_by_global_norm(gradients, 5)
        optimize = opt.apply_gradients(
            zip(clipped_grad, tf.trainable_variables()))

        saver = tf.train.Saver(max_to_keep=10)

        with tf.Session() as sess:
            print("*********")
            sess.run([
                tf.global_variables_initializer(),
                tf.local_variables_initializer()
            ])

            # try:

            #     path="./models_ckpts_"+params.name+"/vae_lstm_model-11900"
            #     # print(path)
            #     # chkp.print_tensors_in_checkpoint_file(path, tensor_name='', all_tensors=True)
            #     saver.restore(sess,path )
            # # saver.restore(sess, "./models_ckpts_1/vae_lstm_model-258600")
            # except:
            #     print("-----exception occurred--------")
            #     exit()
            #     # traceback.print_exc()

            # print("Model Restored")

            total_parameters = 0
            #print_vars("trainable variables")
            for variable in tf.trainable_variables():
                # shape is an array of tf.Dimension
                shape = variable.get_shape()
                print(shape, variable.name)
                # print(len(shape))
                variable_parameters = 1
                for dim in shape:
                    print(dim)
                    variable_parameters *= dim.value
                print(variable_parameters, total_parameters)
                total_parameters += variable_parameters
            print(total_parameters)

            # exit()
            if params.debug:
                sess = tf_debug.LocalCLIDebugWrapperSession(sess)
            summary_writer = tf.summary.FileWriter(params.LOG_DIR, sess.graph)
            summary_writer.add_graph(sess.graph)
            #ptb_data = PTBInput(params.batch_size, train_data)
            num_iters = len(word_data) // params.batch_size
            cur_it = 0
            iters, tlb_arr, wppl_arr, klw_arr, kld_zg_arr, kld_zs_arr = [], [], [], [], [], []
            alpha_arr, beta_arr = [], []
            for e in range(params.num_epochs):
                epoch_start_time = datetime.datetime.now()
                print("Epoch: {} started at: {}".format(e, epoch_start_time))
                total_tlb = 0
                total_wppl = 0
                total_klw = 0
                total_kld_zg = 0
                total_kld_zs = 0
                for it in tqdm(range(num_iters)):
                    params.is_training = True

                    sent_batch = word_data[it * params.batch_size:(it + 1) *
                                           params.batch_size]
                    label_batch = label_data[it * params.batch_size:(it + 1) *
                                             params.batch_size]

                    sent_dec_l_batch = word_labels_arr[it *
                                                       params.batch_size:(it +
                                                                          1) *
                                                       params.batch_size]

                    sent_l_batch = encoder_word_data[it *
                                                     params.batch_size:(it +
                                                                        1) *
                                                     params.batch_size]
                    label_l_batch = label_labels_arr[it *
                                                     params.batch_size:(it +
                                                                        1) *
                                                     params.batch_size]

                    # zero padding
                    pad = len(max(sent_batch, key=len))
                    # not optimal!!
                    length_ = np.array([len(sent) for sent in sent_batch
                                        ]).reshape(params.batch_size)
                    # prepare encoder and decoder inputs to feed
                    sent_batch = np.array([
                        sent + [0] * (pad - len(sent)) for sent in sent_batch
                    ])
                    label_batch = np.array([
                        sent + [0] * (pad - len(sent)) for sent in label_batch
                    ])

                    sent_dec_l_batch = np.array([
                        (sent + [0] * (pad - len(sent)))
                        for sent in sent_dec_l_batch
                    ])

                    sent_l_batch = np.array([(sent + [0] * (pad - len(sent)))
                                             for sent in sent_l_batch])
                    label_l_batch = np.array([(sent + [0] * (pad - len(sent)))
                                              for sent in label_l_batch])

                    alpha_v = beta_v = 0
                    if e > 2000:
                        alpha_v = beta_v = float(e) / 5000 - 0.4
                    feed = {
                        word_inputs: sent_l_batch,
                        label_inputs: label_l_batch,
                        d_word_inputs: sent_batch,
                        d_label_inputs: label_batch,
                        d_word_labels: sent_dec_l_batch,
                        d_label_labels: label_l_batch,
                        seq_length: length_,
                        d_seq_length: length_,
                        alpha: alpha_v,
                        beta: beta_v
                    }

                    z1a, z1b, z3a, z3b, kzg, kzs, tlb, wppl, lppl, klw, o, alpha_, beta_ = sess.run(
                        [
                            Zsent_distribution[0], Zsent_distribution[1],
                            Zsent_dec_distribution[0],
                            Zsent_dec_distribution[1], neg_kld_zglobal,
                            neg_kld_zsent, total_lower_bound, word_perplexity,
                            label_perplexity, kl_term_weight, optimize,
                            alpha_val, beta_val
                        ],
                        feed_dict=feed)

                    # print(c.shape)
                    # print(d.shape)
                    # print(c,d)
                    # print(e,f)
                    # print(d[69],d[119])
                    ##print('zsent', z1a, z1b)
                    # print('zglobal',z2a,z2b)
                    ##print('zsent dec', z3a, z3b)
                    # if cur_it % 100 == 0 and cur_it != 0:
                    #     print("TotalLB after {} ({}) iterations (epoch): {} Neg_KLD_Zglobal: "
                    #           "{} Neg_KLD_Zsent: {}".format(
                    #               cur_it, e, tlb, kzg, kzs))
                    #     print(
                    #         "Word Perplexity: {}, Label Perplexity: {}".format(wppl, lppl))

                    cur_it += 1
                    # iters.append(cur_it)
                    # tlb_arr.append(tlb)
                    # wppl_arr.append(wppl)
                    total_tlb += tlb
                    total_wppl += wppl
                    total_klw += klw
                    total_kld_zg += -kzg
                    total_kld_zs += -kzs
                    if cur_it % 100 == 0 and cur_it != 0:
                        path_to_save = os.path.join(params.MODEL_DIR,
                                                    "vae_lstm_model")
                        # print(path_to_save)
                        model_path_name = saver.save(sess,
                                                     path_to_save,
                                                     global_step=cur_it)
                        # print(model_path_name)

                avg_tlb = total_tlb / num_iters
                avg_wppl = total_wppl / num_iters
                avg_klw = total_klw / num_iters
                avg_kld_zg = total_kld_zg / num_iters
                avg_kld_zs = total_kld_zs / num_iters

                iters.append(e)
                tlb_arr.append(avg_tlb)
                wppl_arr.append(avg_wppl)
                klw_arr.append(avg_klw)
                kld_zg_arr.append(avg_kld_zg)
                kld_zs_arr.append(avg_kld_zs)
                alpha_arr.append(alpha_)
                beta_arr.append(beta_)

                print("Time Taken:",
                      datetime.datetime.now() - epoch_start_time)

            import matplotlib as mpl
            mpl.use('Agg')
            import matplotlib.pyplot as plt
            plot_filename = "./plot_values_" + str(params.num_epochs) + ".txt"
            with open(plot_filename, 'w') as wf:
                _ = [wf.write(str(s) + ' ') for s in iters]
                wf.write('\n')
                _ = [wf.write(str(s) + ' ') for s in tlb_arr]
                # wf.write('\n')
                # _ = [wf.write(str(s) + ' ') for s in wppl_arr]
                wf.write('\n')
                _ = [wf.write(str(s) + ' ') for s in klw_arr]
                wf.write('\n')
                _ = [wf.write(str(s) + ' ') for s in kld_zg_arr]
                wf.write('\n')
                _ = [wf.write(str(s) + ' ') for s in kld_zs_arr]
                wf.write('\n')
                _ = [wf.write(str(s) + ' ') for s in alpha_arr]
                wf.write('\n')
                _ = [wf.write(str(s) + ' ') for s in beta_arr]

            plt.plot(iters, tlb_arr, color='blue', label='Total lower bound')
            plt.title("Total Lower Bound vs Epochs")
            plt.xlabel('Epochs')
            plt.ylabel('Total Lower Bound')
            plt.savefig('./graph_elbo_epochs_' + str(params.num_epochs) +
                        '.png')
            plt.clf()

            # plt.plot(iters, wppl_arr, color='red', label='Word Perplexity')
            # plt.title("Word Perplexity vs Epochs")
            # plt.xlabel('Epochs')
            # plt.ylabel('Word Perplexity')
            # plt.savefig('./graph_ppl_epochs_'+str(params.num_epochs)+'.png')

            # plt.legend(bbox_to_anchor=(1.05, 1),
            #           loc=1, borderaxespad=0.)
            # plt.clf()

            plt.plot(iters, klw_arr, color='green', label='KL term Value')
            plt.title("KL Term Value vs Epochs")
            plt.xlabel('Epochs')
            plt.ylabel('KL term Value')
            plt.savefig('./graph_klw_epochs_' + str(params.num_epochs) +
                        '.png')
            plt.clf()

            plt.plot(iters, kld_zg_arr, color='yellow', label='kld_zg')
            plt.title("KLD zg")
            plt.xlabel('Epochs')
            plt.ylabel('KLD zg term Value')
            plt.savefig('./graph_kld_zg_epochs_' + str(params.num_epochs) +
                        '.png')
            plt.clf()

            plt.plot(iters, kld_zs_arr, color='black', label='kld_zs')
            plt.title("KLD zs")
            plt.xlabel('Epochs')
            plt.ylabel('KLD zs term Value')
            plt.savefig('./graph_kld_zs_epochs_' + str(params.num_epochs) +
                        '.png')
            plt.clf()

            plt.plot(iters, alpha_arr, color='blue', label='alpha')
            plt.title("Alpha")
            plt.xlabel('Epochs')
            plt.ylabel('Alpha Value')
            plt.savefig('./graph_alpha_epochs_' + str(params.num_epochs) +
                        '.png')
            plt.clf()
Exemple #6
0
def main(params):
    if params.input == 'GOT':
        corpus_path = "/home/luoyy/datasets_small/got"
        data_raw = data_.got_read(corpus_path)
        data, labels_arr, embed_arr, data_dict = data_.prepare_data(data_raw,
                                                                    params)
    elif params.input == 'PTB':
        # data in form [data, labels]
        train_data_raw, valid_data_raw, test_data_raw = data_.ptb_read(
            './PTB_DATA/data')
        data, labels_arr, embed_arr, data_dict = data_.prepare_data(
            train_data_raw, params)
    with tf.Graph().as_default() as graph:
        inputs = tf.placeholder(shape=[None, None], dtype=tf.int32)
        d_inputs_ps = tf.placeholder(dtype=tf.int32, shape=[None, None])
        labels = tf.placeholder(shape=[None, None], dtype=tf.int32)
        with tf.device("/cpu:0"):
            if not params.pre_trained_embed:
                embedding = tf.get_variable(
                    "embedding", [data_dict.vocab_size,
                                  params.embed_size], dtype=tf.float32)
                vect_inputs = tf.nn.embedding_lookup(embedding, inputs)
            else:
                # [data_dict.vocab_size, params.embed_size]
                embedding = tf.Variable(
                    embed_arr,
                    trainable=params.fine_tune_embed,
                    name="embedding", dtype=tf.float32)
                vect_inputs = tf.nn.embedding_lookup(embedding, inputs)
        # inputs = tf.unstack(inputs, num=num_steps, axis=1)
        vocab_size = data_dict.vocab_size
        seq_length = tf.placeholder_with_default([0.0], shape=[None])
        d_seq_length = tf.placeholder(shape=[None], dtype=tf.float32)
        qz = q_net(vect_inputs, seq_length, params.batch_size)
        x_logits, _, _ = vae_lstm({'z': qz}, params.batch_size,
                                  d_seq_length, embedding,
                                  d_inputs_ps, vocab_size=vocab_size)
        # loss, masking <PAD>
        current_len = tf.placeholder_with_default(params.sent_max_size,
                                                  shape=())
        # tf.sequence_mask, tf.contrib.seq2seq.sequence_loss
        labels_flat = tf.reshape(labels, [-1])
        cross_entr = tf.nn.sparse_softmax_cross_entropy_with_logits(
            logits=x_logits, labels=labels_flat)
        mask_labels = tf.sign(tf.to_float(labels_flat))
        masked_losses = mask_labels * cross_entr
        # reshape again
        masked_losses = tf.reshape(masked_losses, tf.shape(labels))
        mean_loss_by_example = tf.reduce_sum(masked_losses,
                                             reduction_indices=1) / d_seq_length
        rec_loss = tf.reduce_mean(mean_loss_by_example)
        perplexity = tf.exp(rec_loss)
        # kl divergence calculation
        kld = -0.5 * tf.reduce_mean(
                tf.reduce_sum(
                    1 + tf.log(tf.square(qz.distribution.std) + 0.0001)
                    - tf.square(qz.distribution.mean)
                    - tf.square(qz.distribution.std), 1))
        tf.summary.scalar('kl_divergence', kld)
        # kld weight annealing
        anneal = tf.placeholder(tf.int32)
        annealing = (tf.tanh((tf.to_float(anneal) - 3500)/1000) + 1)/2
        # overall loss reconstruction loss - kl_regularization
        lower_bound = rec_loss + tf.multiply(
            tf.to_float(annealing), tf.to_float(kld)) / 10
        #lower_bound = rec_loss
        sm2 = [tf.summary.scalar('lower_bound', lower_bound),
               tf.summary.scalar('kld_coeff', annealing)]
        gradients = tf.gradients(lower_bound, tf.trainable_variables())
        opt = tf.train.AdamOptimizer(learning_rate=params.learning_rate)
        clipped_grad, _ = tf.clip_by_global_norm(gradients, 5)
        optimize = opt.apply_gradients(zip(clipped_grad,
                                           tf.trainable_variables()))
        #sample
        logits, states, smpl = vae_lstm({}, 1, d_seq_length, embedding,
                                        d_inputs_ps, vocab_size=vocab_size,
                                        gen_mode=True)
        init_state = states[0]
        fin_output = states[1]
        # merge summaries
        merged = tf.summary.merge_all()
        with tf.Session() as sess:
            sess.run([tf.global_variables_initializer(),
                      tf.local_variables_initializer()])
            if params.debug:
                sess = tf_debug.LocalCLIDebugWrapperSession(sess)
            summary_writer = tf.summary.FileWriter(params.LOG_DIR, sess.graph)
            summary_writer.add_graph(sess.graph)
            #ptb_data = PTBInput(params.batch_size, train_data)
            num_iters = len(data) // params.batch_size
            cur_it = 0
            iters, kld_arr, coeff = [], [], []
            for e in range(params.num_epochs):
                for it in range(num_iters):
                    params.is_training = True
                    batch = data[it * params.batch_size: (it + 1) * params.batch_size]
                    l_batch = labels_arr[it * params.batch_size:(it + 1) * params.batch_size]
                    # zero padding
                    pad = len(max(batch, key=len))
                    # not optimal!!
                    length_ = np.array([len(sent) for sent in batch]).reshape(params.batch_size)
                    # prepare encoder and decoder inputs to feed
                    batch = np.array([sent + [0] * (pad - len(sent)) for sent in batch])
                    l_batch = np.array([(sent + [0] * (pad - len(sent))) for sent in l_batch])
                    # encoder feed=[....<EOS>], decoder feed=[<BOS>....], labels=[.....<EOS>]
                    feed = {inputs: l_batch, d_inputs_ps: batch, labels: l_batch,
                            seq_length: length_, d_seq_length: length_, anneal: cur_it, current_len: pad}
                    lb, _, kld_, ann_, r_loss, perplexity_ = sess.run([lower_bound, optimize,
                                                                       kld, annealing, rec_loss, perplexity],
                                                                      feed_dict=feed)
                    cur_it += 1
                    iters.append(cur_it)
                    kld_arr.append(kld_)
                    coeff.append(ann_)
                    if cur_it % 100 == 0 and cur_it != 0:
                        print("VLB after {} ({}) iterations (epoch): {} KLD: "
                              "{} Annealing Coeff: {} CE: {}".format(
                                  cur_it, e,lb, kld_, ann_, r_loss))
                        print("Perplexity: {}".format(perplexity_))
                    if cur_it % 150 == 0:
                        if not params.beam_search:
                            params.is_training = False
                            online_inference(sess, data_dict,
                                             sample=smpl, seq=d_inputs_ps,
                                             in_state=init_state,
                                             out_state=fin_output,
                                             length=d_seq_length)
                        else:
                            gen_sentence = beam_search(sess, data_dict, states,
                                                       smpl, (d_inputs_ps,
                                                        d_seq_length), params,
                                                       beam_size=params.beam_size)
                            print(gen_sentence)
                    if cur_it % 400 == 0 and cur_it!=0:
                       # saver = tf.train.Saver()
                        summary = sess.run(merged, feed_dict=feed)
                        summary_writer.add_summary(summary)
                        # saver.save(sess, os.path.join(params.LOG_DIR, "lstmlstm_model.ckpt"), cur_it)
                    if params.visualise:
                        if cur_it % 30000 == 0 and cur_it!=0:
                           import matplotlib.pyplot as plt
                           with open("./run_kld" + str(params.dec_keep_rate), 'w') as wf:
                               _ = [wf.write(str(s) + ' ')for s in iters]
                               wf.write('\n')
                               _ = [wf.write(str(s) + ' ')for s in kld_arr]
                               wf.write('\n')
                               _ = [wf.write(str(s) + ' ') for s in coeff]
                           plt.plot(iters, kld_arr, label='KLD')
                           plt.xlabel('Iterations')
                           plt.legend(bbox_to_anchor=(1.05, 1),
                                      loc=1, borderaxespad=0.)
                           plt.show()
                           plt.plot(iters, coeff, 'r--', label='annealing')
                           plt.legend(bbox_to_anchor=(1.05, 1),
                                      loc=1, borderaxespad=0.)
                           plt.show()
Exemple #7
0
def main(params):
    if params.input == 'PTB':
        # data in form [data, labels]
        train_data_raw, train_label_raw = data_.ptb_read('./DATA/train_untrans_6k/')
        word_data, encoder_word_data,word_labels_arr, word_embed_arr, word_data_dict = data_.prepare_data(train_data_raw,train_label_raw, params,'./DATA/train_untrans_6k/')

        train_label_raw, valid_label_raw, test_label_raw = label_data_.ptb_read('./DATA/train_untrans_6k/')
        label_data, label_labels_arr, label_embed_arr, label_data_dict = label_data_.prepare_data(train_label_raw, params)
        
    with tf.Graph().as_default() as graph:
        
        label_inputs = tf.placeholder(dtype=tf.int32, shape=[None, None],name="lable_inputs")
        word_inputs = tf.placeholder(dtype=tf.int32, shape=[None, None],name="word_inputs")

        d_word_inputs = tf.placeholder(dtype=tf.int32, shape=[None, None],name="d_word_inputs")
        d_label_inputs = tf.placeholder(dtype=tf.int32, shape=[None, None],name="d_label_inputs")
        
        
        
        d_word_labels = tf.placeholder(shape=[None, None], dtype=tf.int32,name="d_word_labels")
        d_label_labels = tf.placeholder(shape=[None, None], dtype=tf.int32,name="d_label_labels")
        
        with tf.device("/cpu:0"):
            if not params.pre_trained_embed:
                word_embedding = tf.get_variable(
                    "word_embedding", [data_dict.vocab_size,
                                  params.embed_size], dtype=tf.float64)
                vect_inputs = tf.nn.embedding_lookup(word_embedding, word_inputs)
            else:
                # [data_dict.vocab_size, params.embed_size]
                word_embedding = tf.Variable(
                    word_embed_arr,
                    trainable=params.fine_tune_embed,
                    name="word_embedding", dtype=tf.float64) #creates a variable that can be used as a tensor
                vect_inputs = tf.nn.embedding_lookup(word_embedding, word_inputs,name="word_lookup")
                
                label_embedding = tf.Variable(
                    label_embed_arr,
                    trainable=params.fine_tune_embed,
                    name="label_embedding", dtype=tf.float64) #creates a variable that can be used as a tensor

                label_inputs_1=tf.nn.embedding_lookup(label_embedding, label_inputs,name="label_lookup")
        # inputs = tf.unstack(inputs, num=num_steps, axis=1)
        sizes=word_data_dict.sizes
        word_vocab_size = max(sizes[1],sizes[2],sizes[0])
        label_vocab_size=label_data_dict.vocab_size
        seq_length = tf.placeholder_with_default([0.0], shape=[None])
        d_seq_length = tf.placeholder(shape=[None], dtype=tf.float64)
        # qz = q_net(word_inputs, seq_length, params.batch_size)
        zglobal_sample = tf.placeholder(dtype=tf.float64,shape=[None, params.latent_size])
        zsent_sample=tf.placeholder(dtype=tf.float64,shape=[None,params.latent_size])
        inp_logits=tf.placeholder(dtype=tf.float64,shape=[None,params.label_embed_size])
        word_logits,label_logits,_,_,l_smpl,w_smpl,zs=decoder(zglobal_sample, d_word_inputs, d_label_inputs,seq_length,params.batch_size,label_embedding,word_embedding, word_vocab_size, label_vocab_size,gen_mode=True,zsent=zsent_sample,inp_logits=inp_logits)

        # word_logits,_,_, _,_,w_smpl,_=decoder(zglobal_sample, d_word_inputs, d_label_inputs,seq_length,params.batch_size,label_embedding,word_embedding, word_vocab_size, label_vocab_size,gen_mode_1=False,gen_mode_2=True,zsent=zsent_sample)

        saver = tf.train.Saver()
        with tf.Session() as sess:
            sess.run([tf.global_variables_initializer(),
                      tf.local_variables_initializer()])
            print("here")
            try:

                path="./models_ckpts_"+params.name+"/vae_lstm_model-11900"
                # print(path)
                # chkp.print_tensors_in_checkpoint_file(path, tensor_name='', all_tensors=True)
                saver.restore(sess,path )
            # saver.restore(sess, "./models_ckpts_1/vae_lstm_model-258600")
            except:
                print("-----exception occurred--------")
                exit()
                # traceback.print_exc()

            print("Model Restored")

           
            total_parameters = 0
            #print_vars("trainable variables")
            for variable in tf.trainable_variables():
                # shape is an array of tf.Dimension
                shape = variable.get_shape()
                print(shape, variable.name)
                #print(len(shape))
                variable_parameters = 1
                for dim in shape:
                    print(dim)
                    variable_parameters *= dim.value
                print(variable_parameters)
                total_parameters += variable_parameters
            print(total_parameters)

            batch_size=1
            # number_of_samples=params.number_of_samples
            number_of_samples=10000
            same_context_sentences=1
            sentence_file="./r_VACS_kl_10k.txt"
            labels_file="./r_VACS_kl_10k_labels.txt"

            f1=open(sentence_file,'w+')
            f2=open(labels_file,'w+')

            print("----------------------SENTENCES------------------------\n\n")
            for num in range(number_of_samples):
                params.is_training = False
                
                sentence=['<BOS>']
                label_seq = ['3']
                state = None
                input_sent_vect = [word_data_dict.word2idx[word] for word in sentence]
                # z = tf.random_normal(tf.shape([1, params.latent_size]), name='z',dtype=tf.float64).eval()
                z=np.random.normal(0,1,(1,params.latent_size))

                ###initialising random variables for label decoder
                z_1=np.random.normal(0,1,(1,params.latent_size))
                l_1=np.random.rand(1,params.label_embed_size)
                # print(input_sent_vect)
                # print(z)
                # print(z)

                for i in range(params.gen_length):
                    # generate until <EOS> tag
                    if "4" in label_seq:
                        break
                    input_label_vect = [label_data_dict.word2idx[word] for word in label_seq]
                    feed = {d_label_inputs: np.array(input_label_vect).reshape([1, len(input_label_vect)]),zsent_sample: z_1,inp_logits:l_1,
                            seq_length: [len(input_label_vect)],zglobal_sample:z, d_word_inputs: np.array(input_sent_vect).reshape([1, len(input_sent_vect)]) }
                    # for the first decoder step, the state is None
                    # if state is not None:
                    #      feed.update({in_state: state})
                    a,index = sess.run([label_logits,l_smpl], feed)
                    # print(a,a.shape)
                    if(i==0):
                        logit_arr=np.array(a)
                    else:
                        logit_arr=np.concatenate((logit_arr,a))

                    # print(index)
                    # exit()
                    index=index[0]
                    label_seq += [label_data_dict.idx2word[int(index)]]
                label_seq=[word for word in label_seq if word not in ['3','4']]
                label_out = ' '.join([w for w in label_seq])
                print(label_out,len(label_seq))
                # sizes=word_data_dict.sizes
                # print(logit_arr)
                print(logit_arr.shape)
                # exit()
                

                for num in range(same_context_sentences):
                    print(num)
                    i=0
                    z_sent_sample=sess.run(zs,feed)
                    
                    b1=sizes[0]
                    b2=sizes[0]+sizes[1]
                    b3=sizes[0]+sizes[1]+sizes[2]
                    sentence=['<BOS>']
                    input_sent_vect = [word_data_dict.word2idx[word] for word in sentence]
                    while(i<len(label_seq)):
                        # for i in range(len(label_seq)):
                        # generate until <EOS> tag
                        
                        input_sent_vect = [word_data_dict.word2idx[word] for word in sentence]
                        feed = {d_label_inputs: np.array(input_label_vect).reshape([1, len(input_label_vect)]),zsent_sample: z_sent_sample,inp_logits:logit_arr[:i+1],
                                seq_length: [len(input_sent_vect)],zglobal_sample:z, d_word_inputs: np.array(input_sent_vect).reshape([1, len(input_sent_vect)]) }
                        tmp=np.array(input_sent_vect).reshape([1, len(input_sent_vect)])
                        # print(tmp, tmp.shape)
                        # print(a,a.shape)

                        w_logits= sess.run(word_logits, feed)
                        # print(w_logits)
                        if(label_seq[i]=='0'):
                            w_logits=w_logits[0][:sizes[1]]
                            w_probs=softmax(w_logits)
                            # index_arr=np.argsort(np.array(w_probs))    
                            index_arr=np.random.choice(len(w_probs),5,p=w_probs)
                            index_arr=index_arr+b1

                        elif(label_seq[i]=='1'):
                            w_logits=w_logits[0][:sizes[2]]
                            w_probs=softmax(w_logits)
                            # index_arr=np.argsort(np.array(w_probs))    
                            index_arr=np.random.choice(len(w_probs),5,p=w_probs)
                            index_arr=index_arr+b2

                        elif(label_seq[i]=='2'):
                            w_logits=w_logits[0][:sizes[0]]
                            w_probs=softmax(w_logits)
                            # index_arr=np.argsort(np.array(w_probs))    
                            index_arr=np.random.choice(len(w_probs),5,p=w_probs)

                        for j in index_arr:
                            index=j
                            word=word_data_dict.idx2word[int(index)]
                            if(word!="<EOS>" and word!="<BOS>"):
                                i+=1
                                # print(i,index)
                                # print(word)
                                sentence += [word]
                                
                                break
                        # print(w_logits)
                        # print(w_logits.shape)
                        # print(min(w_logits[0]),max(w_logits[0]))
                        # exit()
                        # print(label_seq[i])
                        # if(label_seq[i]=='0'):
                        #     # print(label_seq[i])
                        #     req_logits=w_logits[0][sizes[0]:sizes[0]+sizes[1]]
                        #     req_probs=softmax(req_logits)
                        #     req_index=np.argmax(np.array(req_probs))
                        #     index=sizes[0]+req_index
                        # elif (label_seq[i]=='1'):
                        #     # print(label_seq[i])
                        #     req_logits=w_logits[0][(sizes[0]+sizes[1]):(sizes[0]+sizes[1]+sizes[2])]
                        #     req_probs=softmax(req_logits)
                        #     req_index=np.argmax(np.array(req_probs))
                        #     index=sizes[0]+sizes[1]+req_index
                        # elif (label_seq[i]=='2'):
                        #     # print(label_seq[i])
                        #     req_logits=w_logits[0][:sizes[0]]
                        #     req_probs=softmax(req_logits)
                        #     req_index=np.argmax(np.array(req_probs))
                        #     index=req_index
                        # # print(label_seq[i],i,index)
                        # print(b,b.shape)
                        # print(index)
                        
                    sentence=[word for word in sentence if word not in ['<BOS>','<EOS>']]
                    sentence_cm = ' '.join([w for w in sentence])
                    print(sentence_cm,len(sentence))
                    print("\n")
                    f1.write(sentence_cm)
                    f1.write("\n")
                    f2.write(label_out)
                    f2.write("\n")
                print("-----------------------------------------\n")

            f1.close()
            f2.close()
Exemple #8
0
# model = CoronaVirusPredictor(n_features=1,n_hidden=512,seq_len=seq_len,n_layers=2)
# model,train_hist,_ = train_model(model,X_train,y_train,num_epochs=120)
# DAYS_TO_PREDICT = 12
# predicted_cases,_ = predict_daily_cases(model,X_train,y_train,DAYS_TO_PREDICT,seq_len,scaler)
# predicted_cases = pd.Series(data=predicted_cases,
#     index=pd.date_range(start=diff_daily_cases.index[-1],
#                         periods=DAYS_TO_PREDICT + 1,
#                         closed='right'))
#
# plot_data(predicted_cases,'Predictions',label='Predicted Daily Cases')
# plot_real_predicted(diff_daily_cases,predicted_cases)

if __name__ == '__main__':
    setup_params()

    diff_daily_cases = prepare_data('time_series_19-covid-Confirmed.csv')
    train_data, test_data = split_data(diff_daily_cases, 20)
    train_data, test_data, scaler = scale_data(diff_daily_cases, train_data,
                                               test_data)
    seq_len = 5
    X_train, y_train = create_sequences(train_data, seq_len)
    X_test, y_test = create_sequences(test_data, seq_len)

    model = CoronaVirusPredictor(n_features=1,
                                 n_hidden=512,
                                 seq_len=seq_len,
                                 n_layers=2)

    model, train_hist, test_hist = train_model(model, X_train, y_train, X_test,
                                               y_test)
Exemple #9
0
    def collate_fn(self, batch):
        r"""
            Perform preprocessing and create a final data batch:
            1. Sort batch instances by text-length
            2. Convert Audio signal to Spectrograms.
            3. PAD sequences wrt r.
            4. Load to Torch.
        """

        # Puts each data field into a tensor with outer dimension batch size
        if isinstance(batch[0], collections.Mapping):

            text_lenghts = np.array([len(d["text"]) for d in batch])

            # sort items with text input length for RNN efficiency
            text_lenghts, ids_sorted_decreasing = torch.sort(
                torch.LongTensor(text_lenghts), dim=0, descending=True)

            wav = [batch[idx]['wav'] for idx in ids_sorted_decreasing]
            item_idxs = [
                batch[idx]['item_idx'] for idx in ids_sorted_decreasing
            ]
            text = [batch[idx]['text'] for idx in ids_sorted_decreasing]
            speaker_name = [
                batch[idx]['speaker_name'] for idx in ids_sorted_decreasing
            ]

            # compute features
            mel = [self.ap.melspectrogram(w).astype('float32') for w in wav]
            linear = [self.ap.spectrogram(w).astype('float32') for w in wav]

            mel_lengths = [m.shape[1] for m in mel]

            # compute 'stop token' targets
            stop_targets = [
                np.array([0.] * (mel_len - 1) + [1.])
                for mel_len in mel_lengths
            ]

            # PAD stop targets
            stop_targets = prepare_stop_target(stop_targets,
                                               self.outputs_per_step)

            # PAD sequences with longest instance in the batch
            text = prepare_data(text).astype(np.int32)
            wav = prepare_data(wav)

            # PAD features with longest instance
            linear = prepare_tensor(linear, self.outputs_per_step)
            mel = prepare_tensor(mel, self.outputs_per_step)
            assert mel.shape[2] == linear.shape[2]

            # B x D x T --> B x T x D
            linear = linear.transpose(0, 2, 1)
            mel = mel.transpose(0, 2, 1)

            # convert things to pytorch
            text_lenghts = torch.LongTensor(text_lenghts)
            text = torch.LongTensor(text)
            linear = torch.FloatTensor(linear).contiguous()
            mel = torch.FloatTensor(mel).contiguous()
            mel_lengths = torch.LongTensor(mel_lengths)
            stop_targets = torch.FloatTensor(stop_targets)

            return text, text_lenghts, speaker_name, linear, mel, mel_lengths, \
                   stop_targets, item_idxs

        raise TypeError(("batch must contain tensors, numbers, dicts or lists;\
                         found {}".format(type(batch[0]))))
Exemple #10
0
def main(params):
    if params.input == 'PTB':
        # data in form [data, labels]
        train_data_raw, train_label_raw = data_.ptb_read('./DATA/train_untrans_6k/')
        word_data, encoder_word_data,word_labels_arr, word_embed_arr, word_data_dict = data_.prepare_data(train_data_raw,train_label_raw, params,'./DATA/train_untrans_6k/')

        train_label_raw, valid_label_raw, test_label_raw = label_data_.ptb_read('./DATA/train_untrans_6k/')
        label_data, label_labels_arr, label_embed_arr, label_data_dict = label_data_.prepare_data(train_label_raw, params)
        
    with tf.Graph().as_default() as graph:
        
        label_inputs = tf.placeholder(dtype=tf.int32, shape=[None, None],name="lable_inputs")
        word_inputs = tf.placeholder(dtype=tf.int32, shape=[None, None],name="word_inputs")

        d_word_inputs = tf.placeholder(dtype=tf.int32, shape=[None, None],name="d_word_inputs")
        d_label_inputs = tf.placeholder(dtype=tf.int32, shape=[None, None],name="d_label_inputs")
        
        
        
        d_word_labels = tf.placeholder(shape=[None, None], dtype=tf.int32,name="d_word_labels")
        d_label_labels = tf.placeholder(shape=[None, None], dtype=tf.int32,name="d_label_labels")
        
        with tf.device("/cpu:0"):
            if not params.pre_trained_embed:
                word_embedding = tf.get_variable(
                    "word_embedding", [data_dict.vocab_size,
                                  params.embed_size], dtype=tf.float64)
                vect_inputs = tf.nn.embedding_lookup(word_embedding, word_inputs)
            else:
                # [data_dict.vocab_size, params.embed_size]
                word_embedding = tf.Variable(
                    word_embed_arr,
                    trainable=params.fine_tune_embed,
                    name="word_embedding", dtype=tf.float64) #creates a variable that can be used as a tensor
                vect_inputs = tf.nn.embedding_lookup(word_embedding, word_inputs,name="word_lookup")
                
                label_embedding = tf.Variable(
                    label_embed_arr,
                    trainable=params.fine_tune_embed,
                    name="label_embedding", dtype=tf.float64) #creates a variable that can be used as a tensor

                label_inputs_1=tf.nn.embedding_lookup(label_embedding, label_inputs,name="label_lookup")
        # inputs = tf.unstack(inputs, num=num_steps, axis=1)
        sizes=word_data_dict.sizes
        word_vocab_size = max(sizes[1],sizes[2],sizes[0])
        label_vocab_size=label_data_dict.vocab_size
        seq_length = tf.placeholder_with_default([0.0], shape=[None])
        d_seq_length = tf.placeholder(shape=[None], dtype=tf.float64)
        # qz = q_net(word_inputs, seq_length, params.batch_size)
        
        #Encoder and Decoder model
        Zsent_distribution, zsent_sample, Zglobal_distribition, zglobal_sample=encoder(vect_inputs, label_inputs_1, seq_length, params.batch_size)
        word_logits,label_logits,Zsent_dec_distribution, Zglobal_dec_distribution,_,_,_=decoder(zglobal_sample, d_word_inputs, d_label_inputs,seq_length,params.batch_size,label_embedding,word_embedding, word_vocab_size, label_vocab_size)

        # neg_kld_zsent = -1 * tf.reduce_mean(tf.reduce_sum(kld(Zsent_distribution[0], Zsent_distribution[1], Zsent_dec_distribution[0], Zsent_dec_distribution[1]), axis=1))
        # neg_kld_zglobal = -1 * tf.reduce_mean(tf.reduce_sum(kld(Zglobal_distribition[0], Zglobal_distribition[1], Zglobal_dec_distribution[0], Zglobal_dec_distribution[1]), axis=1))

        ###KLD MODIFIED

        neg_kld_zsent = -1 * kld_mod(Zsent_distribution[0], Zsent_distribution[1], Zsent_dec_distribution[0], Zsent_dec_distribution[1])
        neg_kld_zglobal = -1 * kld_mod(Zglobal_distribition[0], Zglobal_distribition[1], Zglobal_dec_distribution[0], Zglobal_dec_distribution[1])

        ##MAXIMUM LIKELIHOOD LOSS WORDS

        # w_probs = tf.nn.softmax(word_logits)
        # d_word_labels_flat = tf.reshape(d_word_labels, [-1])
        # w_mask_labels = tf.sign(tf.cast(d_word_labels_flat,dtype=tf.float64))
        # w_probs_flat=tf.reshape(w_probs, [-1])
        # w_index=tf.range(tf.shape(d_word_labels_flat)[0])*tf.shape(w_probs)[1]+d_word_labels_flat
        # w_index_probs=tf.gather(w_probs_flat,w_index)
        # w_log = -tf.log(w_index_probs+1e-8)
        # w_masked_cost = w_log * w_mask_labels
        # w_cost_1=tf.reshape(w_masked_cost,tf.shape(word_inputs))
        # w_cost = tf.reduce_sum(w_cost_1,axis=1)/(tf.cast(tf.shape(d_seq_length),dtype=tf.float64))

        ##MAXIMUM LIKELIHOOD LOSS LABELS

        # l_probs = tf.nn.softmax(label_logits)
        # d_label_labels_flat = tf.reshape(d_label_labels, [-1])        
        # l_mask_labels = tf.sign(tf.cast(d_label_labels_flat,dtype=tf.float64))
        # l_probs_flat=tf.reshape(l_probs, [-1])
        # l_index=tf.range(tf.shape(d_label_labels_flat)[0])*tf.shape(l_probs)[1]+d_label_labels_flat
        # l_index_probs=tf.gather(l_probs_flat,l_index)
        # l_log = -tf.log(l_index_probs+1e-8)
        # l_masked_cost = l_log * l_mask_labels
        # l_cost_1=tf.reshape(l_masked_cost,tf.shape(label_inputs))
        # l_cost = tf.reduce_sum(l_cost_1,axis=1)/(tf.cast(tf.shape(d_seq_length),dtype=tf.float64))

        # x=1/(tf.cast(tf.shape(d_seq_length),dtype=tf.float64))

        #######label reconstruction loss
        d_label_labels_flat = tf.reshape(d_label_labels, [-1])
        l_cross_entr = tf.nn.sparse_softmax_cross_entropy_with_logits(
            logits=label_logits, labels=d_label_labels_flat)
        l_mask_labels = tf.sign(tf.cast(d_label_labels_flat,dtype=tf.float64))
        l_masked_losses = l_mask_labels * l_cross_entr
        # reshape again
        l_masked_losses = tf.reshape(l_masked_losses, tf.shape(d_label_labels))
        l_mean_loss_by_example = tf.reduce_sum(l_masked_losses,reduction_indices=1) / d_seq_length
        label_rec_loss = tf.reduce_mean(l_mean_loss_by_example)
        label_perplexity = tf.exp(label_rec_loss)

        ######Word reconstruction loss
        # print(word_logits.shape)

        d_word_labels_flat = tf.reshape(d_word_labels, [-1])
        print(d_word_labels_flat.shape)
        w_cross_entr = tf.nn.sparse_softmax_cross_entropy_with_logits(
            logits=word_logits, labels=d_word_labels_flat)
        w_mask_labels = tf.sign(tf.cast(d_word_labels_flat,dtype=tf.float64))
        w_masked_losses_1 = w_mask_labels * w_cross_entr
        w_masked_losses = tf.reshape(w_masked_losses_1, tf.shape(d_word_labels))
        w_mean_loss_by_example = tf.reduce_sum(w_masked_losses,reduction_indices=1) / d_seq_length
        word_rec_loss = tf.reduce_mean(w_mean_loss_by_example)
        word_perplexity = tf.exp(word_rec_loss)


        #using maximum likelihood 
        # total_lower_bound=-1*(w_cost+l_cost+neg_kld_zglobal+neg_kld_zsent)

        #using reconstruction loss
        # total_lower_bound=word_rec_loss+label_rec_loss-neg_kld_zglobal-neg_kld_zsent

        rec_loss=word_rec_loss+label_rec_loss
        kld_loss= -1*(neg_kld_zglobal + neg_kld_zsent)

        anneal = tf.placeholder(tf.float64)
        annealing=tf.to_float(anneal)
        # annealing = (tf.tanh((tf.to_float(anneal) - 3500)/1000) + 1)/2
        # overall loss reconstruction loss - kl_regularization
        kl_term_weight=tf.multiply(tf.cast(annealing,dtype=tf.float64), tf.cast(kld_loss,dtype=tf.float64))

        total_lower_bound = rec_loss + kl_term_weight
        #lower_bound = rec_loss
        # sm2 = [tf.summary.scalar('lower_bound', lower_bound),
        #        tf.summary.scalar('kld_coeff', annealing)]
        # gradients = tf.gradients(lower_bound, tf.trainable_variables())
        # opt = tf.train.AdamOptimizer(learning_rate=params.learning_rate)
        # clipped_grad, _ = tf.clip_by_global_norm(gradients, 5)
        # optimize = opt.apply_gradients(zip(clipped_grad,
        #                                    tf.trainable_variables()))
        # #sample

        gradients = tf.gradients(total_lower_bound, tf.trainable_variables())
        opt = tf.train.AdamOptimizer(learning_rate=params.learning_rate)
        clipped_grad, _ = tf.clip_by_global_norm(gradients, 5)
        optimize = opt.apply_gradients(zip(clipped_grad,
                                           tf.trainable_variables()))

        saver = tf.train.Saver(max_to_keep=10)

        with tf.Session() as sess:
            print("*********")
            sess.run([tf.global_variables_initializer(),
                      tf.local_variables_initializer()])

            #Uncomment and specify the path to model to restore it
            # try:

            #     path="./models_ckpts_"+params.name+"/vae_lstm_model-11900"
            #     # print(path)
            #     # chkp.print_tensors_in_checkpoint_file(path, tensor_name='', all_tensors=True)
            #     saver.restore(sess,path )
            # # saver.restore(sess, "./models_ckpts_1/vae_lstm_model-258600")
            # except:
            #     print("-----exception occurred--------")
            #     exit()
            #     # traceback.print_exc()

            # print("Model Restored")

            #total number of parameters
            total_parameters = 0
            #print_vars("trainable variables")
            for variable in tf.trainable_variables():
                # shape is an array of tf.Dimension
                shape = variable.get_shape()
                print(shape, variable.name)
                #print(len(shape))
                variable_parameters = 1
                for dim in shape:
                    print(dim)
                    variable_parameters *= dim.value
                print(variable_parameters, total_parameters)
                total_parameters += variable_parameters
            print(total_parameters)
            
            # exit()
            if params.debug:
                sess = tf_debug.LocalCLIDebugWrapperSession(sess)
            summary_writer = tf.summary.FileWriter(params.LOG_DIR, sess.graph)
            summary_writer.add_graph(sess.graph)
            #ptb_data = PTBInput(params.batch_size, train_data)
            num_iters = len(word_data) // params.batch_size
            cur_it = 0
            iters, tlb_arr, wppl_arr,klw_arr = [], [], [],[]
            print("Number of iterations: "+str(num_iters))
            #training the model
            for e in range(params.num_epochs):
            	total_tlb=0
            	total_wppl=0
            	total_klw=0
                for it in range(num_iters):
                    params.is_training = True

                    sent_batch = word_data[it * params.batch_size: (it + 1) * params.batch_size]
                    label_batch= label_data[it * params.batch_size: (it + 1) * params.batch_size]
                    
                    sent_dec_l_batch = word_labels_arr[it * params.batch_size:(it + 1) * params.batch_size]
                    
                    sent_l_batch = encoder_word_data[it * params.batch_size:(it + 1) * params.batch_size]
                    label_l_batch = label_labels_arr[it * params.batch_size:(it + 1) * params.batch_size]
                    
                    # zero padding
                    pad = len(max(sent_batch, key=len))
                    # not optimal!!
                    length_ = np.array([len(sent) for sent in sent_batch]).reshape(params.batch_size)
                    # prepare encoder and decoder inputs to feed
                    sent_batch = np.array([sent + [0] * (pad - len(sent)) for sent in sent_batch])
                    label_batch = np.array([sent + [0] * (pad - len(sent)) for sent in label_batch])
                    
                    sent_dec_l_batch = np.array([(sent + [0] * (pad - len(sent))) for sent in sent_dec_l_batch])

                    sent_l_batch = np.array([(sent + [0] * (pad - len(sent))) for sent in sent_l_batch])
                    label_l_batch = np.array([(sent + [0] * (pad - len(sent))) for sent in label_l_batch])

                    feed = {word_inputs: sent_l_batch,label_inputs:label_l_batch ,d_word_inputs: sent_batch, 
                            d_label_inputs:label_batch, d_word_labels: sent_dec_l_batch, d_label_labels:label_l_batch,
                            seq_length: length_, d_seq_length: length_,anneal: params.anneal_value}

                    # a,b=sess.run([w_masked_losses,w_mean_loss_by_example ],feed_dict=feed)
                    # z1a,z1b,z2a,z2b,z3a,z3b,kzg,kzs,tlb,wppl,lppl, klw,o=sess.run([Zsent_distribution[0],Zsent_distribution[1],Zglobal_distribition[0],Zglobal_distribition[1],Zglobal_dec_distribution[0],Zglobal_dec_distribution[1] ,neg_kld_zglobal,neg_kld_zsent,total_lower_bound,word_perplexity, label_perplexity,kl_term_weight, optimize],feed_dict=feed)
                    # print("============")
                    z1a,z1b,z3a,z3b,kzg,kzs,tlb,wppl,lppl, klw,o=sess.run([Zsent_distribution[0],Zsent_distribution[1],Zsent_dec_distribution[0],Zsent_dec_distribution[1],neg_kld_zglobal,neg_kld_zsent,total_lower_bound,word_perplexity, label_perplexity,kl_term_weight, optimize],feed_dict=feed)

                    # print(c.shape)
                    # print(d.shape)
                    # print(c,d)
                    # print(e,f)
                    # print(d[69],d[119])
                    # print('zsent',z1a,z1b)
                    # # print('zglobal',z2a,z2b)
                    # print('zsent dec',z3a,z3b)
                    if cur_it % 100 == 0 and cur_it != 0:
                        print("TotalLB after {} ({}) iterations (epoch): {} Neg_KLD_Zglobal: "
                              "{} Neg_KLD_Zsent: {}".format(
                                  cur_it, e,tlb, kzg, kzs))
                        print("Word Perplexity: {}, Label Perplexity: {}".format(wppl,lppl))
                    
                    cur_it += 1
                    # iters.append(cur_it)
                    # tlb_arr.append(tlb)
                    # wppl_arr.append(wppl)
                    total_tlb+=tlb
                    total_wppl+=wppl
                    total_klw+=klw
                    if cur_it % 100 == 0 and cur_it!=0:

                        path_to_save=os.path.join(params.MODEL_DIR, "vae_lstm_model")
                        #print(path_to_save)
                        model_path_name=saver.save(sess, path_to_save,global_step=cur_it)
                        # print(model_path_name)

                avg_tlb=total_tlb/num_iters
                avg_wppl=total_wppl/num_iters
                avg_klw=total_klw/num_iters

                print("----------> After epoch {}: TLB:{}, Word PPL: {}, KLD: {}\n".format(
                          e,avg_tlb, avg_wppl, avg_klw))
                print("Word Perplexity: {}, Label Perplexity: {}".format(wppl,lppl))


                iters.append(e)
                tlb_arr.append(avg_tlb)
                wppl_arr.append(avg_wppl)
                klw_arr.append(avg_klw)


            #Save the values and plot the graph
            import matplotlib.pyplot as plt
            plot_filename="./plot_values_"+str(params.anneal_value)+".txt"
            with open(plot_filename, 'w') as wf:
               _ = [wf.write(str(s) + ' ')for s in iters]
               wf.write('\n')
               _ = [wf.write(str(s) + ' ')for s in tlb_arr]
               wf.write('\n')
               _ = [wf.write(str(s) + ' ') for s in wppl_arr]
               wf.write('\n')
               _ = [wf.write(str(s) + ' ') for s in klw_arr]


            plt.subplot(3, 1, 1,title="Total Lower Bound vs Epochs")
            plt.plot(iters, tlb_arr,color='blue', label='Total lower bound')
            plt.xlabel('Epochs')
            # plt.title('Lower bound and Word ppl vs iterations')
            plt.ylabel('Total Lower Bound')

            plt.subplot(3, 1, 2,title="Word Perplexity vs Epochs")
            plt.plot(iters, wppl_arr,color='red', label='Word Perplexity')
            plt.xlabel('Epochs')
            plt.ylabel('Word Perplexity')

            # plt.legend(bbox_to_anchor=(1.05, 1),
            #           loc=1, borderaxespad=0.)

            plt.subplot(3, 1, 3,title="KL Term Value vs Epochs")
            plt.plot(iters, klw_arr,color='green', label='KL term Value')
            plt.xlabel('Epochs')
            plt.ylabel('KL term Value')

            figure_name='./graph_'+str(params.anneal_value)+'.png'
            plt.savefig(figure_name)            # plt.plot(iters, coeff, 'r--', label='annealing')