rnn.py

# -*- coding: utf-8 -*-
__author__ = 'keven'

import numpy as np
import tensorflow as tf
import networkx as nx
from tensorflow.python.ops import tensor_array_ops, control_flow_ops
import gan_seq_tree
import utils


class Generator(object):
    def __init__(self, num_emb, batch_size, emb_dim, hidden_dim, sequence_length, start_token, input_length=5,
                 learning_rate=0.01, num_epochs=100):
        self.num_emb = num_emb
        self.batch_size = batch_size
        self.emb_dim = emb_dim
        self.hidden_dim = hidden_dim
        self.sequence_length = sequence_length
        self.input_length = input_length
        self.start_token = tf.constant([start_token] * self.batch_size, dtype=tf.int32)
        self.learning_rate = tf.Variable(float(learning_rate), trainable=False)
        self.num_epochs = num_epochs
        self.params = []
        self.temperature = 1.0
        self.grad_clip = 5.0

        with tf.variable_scope('generator'):
            self.embeddings = tf.Variable(self.init_matrix([self.num_emb, self.emb_dim]))
            self.params.append(self.embeddings)
            self.recurrent_unit = self.create_recurrent_unit(self.params)  # maps h_tm1 to h_t for generator
            self.output_unit = self.create_output_unit(self.params)  # maps h_t to o_t (output token logits)

        # placeholder definition
        self.x = tf.placeholder(tf.int32, shape=[self.batch_size, self.sequence_length]) # sequence of tokens generated by generator

        # processed for batch
        self.processed_x = tf.transpose(tf.nn.embedding_lookup(self.embeddings, self.x), perm=[1, 0, 2])  # seq_length x batch_size x emb_dim
        self.x_seq = tf.transpose(self.x, perm=[1, 0])

        # Initial states
        self.h0 = tf.zeros([self.batch_size, self.hidden_dim])
        self.h0 = tf.stack([self.h0, self.h0])

        self.generate()
        self.update()


    def generate(self,):
        # supervised pretraining for generator
        ta_emb_x = tensor_array_ops.TensorArray(
            dtype=tf.float32, size=self.sequence_length)
        ta_emb_x = ta_emb_x.unstack(self.processed_x)

        ta_ind_x = tensor_array_ops.TensorArray(
            dtype=tf.int32, size=self.sequence_length)
        ta_ind_x = ta_ind_x.unstack(self.x_seq)


        def _pretrain_recurrence_2(i, x_t, h_tm1, g_predictions):
            h_t = self.recurrent_unit(x_t, h_tm1)
            o_t = self.output_unit(h_t)
            prob = tf.nn.softmax(o_t)
            prob = tf.nn.l2_normalize(prob, dim=1)
            g_predictions = g_predictions.write(i, prob)  # batch x vocab_size
            x_tp1 = ta_emb_x.read(i)
            x_ind = ta_ind_x.read(i)
            return i + 1, x_tp1, h_t, g_predictions

        def _g_recurrence_2(i, x_t, h_tm1, gen_o, gen_x, g_predictions):
            h_t = self.recurrent_unit(x_t, h_tm1)  # hidden_memory_tuple
            o_t = self.output_unit(h_t)
            prob = tf.nn.softmax(o_t)
            prob = tf.nn.l2_normalize(prob, dim=1)
            g_predictions = g_predictions.write(i + self.input_length, prob)  # batch x vocab_size
            log_prob = tf.log(prob)
            next_token = tf.cast(
                tf.reshape(tf.multinomial(log_prob, 1), [self.batch_size]), tf.int32)
            x_tp1 = tf.nn.embedding_lookup(self.embeddings, next_token)  # batch x emb_dim
            gen_o = gen_o.write(i, tf.reduce_sum(tf.multiply(tf.one_hot(next_token, self.num_emb, 1.0, 0.0),
                                                             tf.nn.softmax(o_t)), 1))  # [batch_size] , prob
            gen_x = gen_x.write(i, next_token)  # indices, batch_size
            return i + 1, x_tp1, h_t, gen_o, gen_x, g_predictions

        predictions = tensor_array_ops.TensorArray(
            dtype=tf.float32, size=self.sequence_length,
            dynamic_size=False, infer_shape=True)

        gen_o_f2 = tensor_array_ops.TensorArray(dtype=tf.float32, size=self.sequence_length - self.input_length,
                                                dynamic_size=False, infer_shape=True)
        gen_x_f2 = tensor_array_ops.TensorArray(dtype=tf.int32, size=self.sequence_length - self.input_length,
                                                dynamic_size=False, infer_shape=True)


        _, x_f2, h_f2, predictions = control_flow_ops.while_loop(
            cond=lambda i, _1, _2, _3: i < self.input_length,
            body=_pretrain_recurrence_2,
            loop_vars=(tf.constant(0, dtype=tf.int32),
                       tf.nn.embedding_lookup(self.embeddings, self.start_token),
                       self.h0, predictions))
        _, _, _, _, gen_seq, self.predictions = control_flow_ops.while_loop(
            cond=lambda i, _1, _2, _3, _4, _5: i < self.sequence_length - self.input_length,
            body=_g_recurrence_2,
            loop_vars=(tf.constant(0, dtype=tf.int32),
                       x_f2, h_f2, gen_o_f2, gen_x_f2, predictions))

        self.predictions = tf.transpose(self.predictions.stack(), perm=[1, 0, 2])  # batch_size x seq_length x vocab_size
        self.gen_seq = tf.transpose(gen_seq.stack(), perm=[1, 0])  # batch_size x seq_length / 2
        self.gen_x = tf.concat([self.x[:, :self.input_length], self.gen_seq], axis=1)

        def compute_accuracy(x, y):
            intersection = tf.sets.set_intersection(x, y)
            union = tf.sets.set_union(x, y)
            correct_number = tf.cast(tf.sets.set_size(intersection), tf.float32)
            total_number = tf.cast(tf.sets.set_size(union), tf.float32)
            return tf.reduce_mean(correct_number * 1.0 / total_number)

        ground_truth = self.x[:, self.sequence_length - self.input_length:]
        self.accuracy = compute_accuracy(self.gen_seq, ground_truth)

        self.train_loss = -tf.reduce_sum(
            tf.one_hot(tf.to_int32(tf.reshape(self.x, [-1])), self.num_emb, 1.0, 0.0) * tf.log(
                tf.clip_by_value(tf.reshape(self.predictions, [-1, self.num_emb]), 1e-20, 1.0)
            )
        ) / (self.sequence_length * self.batch_size)


    def update(self,):
        self.loss = -tf.reduce_sum(
            tf.one_hot(tf.to_int32(tf.reshape(self.x, [-1])), self.num_emb, 1.0, 0.0) * tf.log(
                tf.clip_by_value(tf.reshape(self.predictions, [-1, self.num_emb]), 1e-20, 1.0)
            )
        ) / (self.sequence_length * self.batch_size)

        opt = self.g_optimizer(self.learning_rate)
        self.grad, _ = tf.clip_by_global_norm(tf.gradients(self.loss, self.params), self.grad_clip)
        self.updates = opt.apply_gradients(zip(self.grad, self.params))


    def generate_step(self, sess, x):
        feed_dict = {self.x: x}
        generate_sequence, generate_prob_table = sess.run([self.gen_x, self.predictions], feed_dict=feed_dict)
        return generate_sequence, generate_prob_table


    def get_accuracy(self, sess, x):
        feed_dict = {self.x: x}
        accuracy, loss = sess.run([self.accuracy, self.train_loss], feed_dict=feed_dict)
        return accuracy, loss


    def update_step(self, sess, x):
        feed_dict = {self.x: x}
        _ = sess.run(self.updates, feed_dict)


    def init_matrix(self, shape):
        return tf.random_normal(shape, stddev=0.1)

    def init_vector(self, shape):
        return tf.zeros(shape)

    def create_recurrent_unit(self, params):
        # Weights and Bias for input and hidden tensor
        self.Wi = tf.Variable(self.init_matrix([self.emb_dim, self.hidden_dim]))
        self.Ui = tf.Variable(self.init_matrix([self.hidden_dim, self.hidden_dim]))
        self.bi = tf.Variable(self.init_matrix([self.hidden_dim]))

        self.Wf = tf.Variable(self.init_matrix([self.emb_dim, self.hidden_dim]))
        self.Uf = tf.Variable(self.init_matrix([self.hidden_dim, self.hidden_dim]))
        self.bf = tf.Variable(self.init_matrix([self.hidden_dim]))

        self.Wog = tf.Variable(self.init_matrix([self.emb_dim, self.hidden_dim]))
        self.Uog = tf.Variable(self.init_matrix([self.hidden_dim, self.hidden_dim]))
        self.bog = tf.Variable(self.init_matrix([self.hidden_dim]))

        self.Wc = tf.Variable(self.init_matrix([self.emb_dim, self.hidden_dim]))
        self.Uc = tf.Variable(self.init_matrix([self.hidden_dim, self.hidden_dim]))
        self.bc = tf.Variable(self.init_matrix([self.hidden_dim]))
        params.extend([
            self.Wi, self.Ui, self.bi,
            self.Wf, self.Uf, self.bf,
            self.Wog, self.Uog, self.bog,
            self.Wc, self.Uc, self.bc])

        def unit(x, hidden_memory_tm1):
            previous_hidden_state, c_prev = tf.unstack(hidden_memory_tm1)

            # Input Gate
            i = tf.sigmoid(
                tf.matmul(x, self.Wi) +
                tf.matmul(previous_hidden_state, self.Ui) + self.bi
            )

            # Forget Gate
            f = tf.sigmoid(
                tf.matmul(x, self.Wf) +
                tf.matmul(previous_hidden_state, self.Uf) + self.bf
            )

            # Output Gate
            o = tf.sigmoid(
                tf.matmul(x, self.Wog) +
                tf.matmul(previous_hidden_state, self.Uog) + self.bog
            )

            # New Memory Cell
            c_ = tf.nn.tanh(
                tf.matmul(x, self.Wc) +
                tf.matmul(previous_hidden_state, self.Uc) + self.bc
            )

            # Final Memory cell
            c = f * c_prev + i * c_

            # Current Hidden state
            current_hidden_state = o * tf.nn.tanh(c)

            return tf.stack([current_hidden_state, c])

        return unit

    def create_output_unit(self, params):
        self.Wo = tf.Variable(self.init_matrix([self.hidden_dim, self.num_emb]))
        self.bo = tf.Variable(self.init_matrix([self.num_emb]))
        params.extend([self.Wo, self.bo])

        def unit(hidden_memory_tuple):
            hidden_state, c_prev = tf.unstack(hidden_memory_tuple)
            # hidden_state : batch x hidden_dim
            logits = tf.matmul(hidden_state, self.Wo) + self.bo
            # output = tf.nn.softmax(logits)
            return logits

        return unit

    def g_optimizer(self, *args, **kwargs):
        return tf.train.AdamOptimizer(*args, **kwargs)


def main():
    args = utils.parse_args_new()
    #  Data Parameters
    origin_data_file = args.data_file
    graph_file = args.graph_file
    generated_num = args.num_train_sample
    generated_num_test = args.num_test_sample
    seq_length = args.seq_length
    vocab_size = args.num_node
    batch_size = args.batch_size
    num_epochs = args.num_epochs

    #  Generator  Hyper-parameters
    g_emb_dim = args.g_dim_emb
    g_hidden_size = args.g_hidden_size
    train_percent = args.train_percent
    g_num_expend = args.g_num_expend

    input_length = int(seq_length * train_percent)
    train_batch = int(generated_num / batch_size)
    test_batch = int(generated_num_test / batch_size)

    # Model
    START_TOKEN = 0

    utils.prepare_data(origin_data_file)
    graph = nx.read_edgelist(graph_file, nodetype=int, create_using=nx.DiGraph())
    adjacency_matrix = np.asarray(nx.adjacency_matrix(graph).todense()).transpose()


    generator = Generator(vocab_size, batch_size, g_emb_dim, g_hidden_size, seq_length, START_TOKEN, input_length)

    init = tf.global_variables_initializer()
    sess = tf.InteractiveSession()
    sess.run(init)

    print 'Start training...'
    for epoch in range(num_epochs):
        for it in range(train_batch):
            batch = utils.train_next_batch(generator.batch_size, hard=True)
            generator.update_step(sess, batch)
        if epoch % 5 == 0:
            accuracy, test_loss, p_n, n_n = gan_seq_tree.test_accuracy_epoch(sess, generator, generator.batch_size,
                                                                             test_batch, generator.input_length,
                                                                             adjacency_matrix, g_num_expend)
            print 'training epoch:%d loss:%.5f jaccard:%.5f p@n:%.5f, n@n:%.5f' % (epoch, test_loss, accuracy, p_n, n_n)


if __name__ == '__main__':
    main()