def __init__(self,
                 hparams,
                 placeholders,
                 num_nodes,
                 num_features,
                 edges,
                 log_fact_k,
                 hde,
                 istest=False):
        self.features_dim = num_features
        self.input_dim = num_nodes
        self.dropout = placeholders['dropout']
        self.k = hparams.random_walk
        self.lr = placeholders['lr']
        self.decay = placeholders['decay']
        self.n = num_nodes
        self.d = num_features
        self.z_dim = hparams.z_dim
        self.bin_dim = hparams.bin_dim
        self.edges = edges
        self.count = 0
        self.mask_weight = hparams.mask_weight
        self.log_fact_k = log_fact_k
        self.hde = hde

        # For masking we calculated the likelihood like this
        def masked_ll(weight_temp, weight_negative, posscore, posweightscore,
                      temp_pos_score, temp):

            degree = np.zeros([self.n], dtype=np.float32)
            indicator = np.ones([self.n, self.bin_dim], dtype=np.float32)
            indicator_bridge = np.ones([self.n, self.n], dtype=np.float32)
            #ring_indicator = np.ones([self.n])
            ll = 0.0
            adj = np.zeros([self.n, self.n], dtype=np.float32)

            #for (u, v, w) in self.edges[self.count]:
            for i in range(len(self.edges[self.count])):

                (u, v, w) = self.edges[self.count][i]

                degree[u] += w
                degree[v] += w

                modified_weight = tf.reduce_sum(
                    tf.multiply(np.multiply(indicator[u], indicator[v]),
                                weight_temp[u][v])) / weight_negative[u][v]
                modified_posscore_weighted = modified_weight * posscore[u][
                    v] * indicator_bridge[u][v] * 1.0

                currentscore = modified_posscore_weighted * 1.0 / (
                    temp_pos_score + temp)
                ll += tf.log(currentscore + 1e-9)

                modified_weight = tf.reduce_sum(
                    tf.multiply(np.multiply(indicator[v], indicator[u]),
                                weight_temp[v][u])) / weight_negative[v][u]
                modified_posscore_weighted = modified_weight * posscore[v][
                    u] * indicator_bridge[v][u] * 1.0

                currentscore = modified_posscore_weighted * 1.0 / (
                    temp_pos_score + temp)
                ll += tf.log(currentscore + 1e-9)

                #indicator = np.ones([3], dtype = np.float32)

                #if degree[u] >=5 :
                #    indicator[u][0] = 0

                if degree[u] >= 4:
                    indicator[u][0] = 0
                    indicator[u][1] = 0
                if degree[u] >= 3:
                    indicator[u][1] = 0
                    indicator[u][2] = 0

                #if degree[v] >=5 :
                #    indicator[v][0] = 0

                if degree[v] >= 4:
                    indicator[v][0] = 0
                    indicator[v][1] = 0
                if degree[v] >= 3:
                    indicator[v][1] = 0
                    indicator[v][2] = 0

                # From the next there will be no double bond, ensures there will be alternating bonds
                # there will ne bo bridge
                if w == 2:
                    indicator[u][1] = 0
                    indicator[v][1] = 0

                #If we don't want negative sampling we can uncomment the following
                '''
                    for i in range(self.n): 
                        modified_weight = tf.reduce_sum(tf.multiply(indicator[u], weight_temp[u][i])) / weight_negative[u][i]
                        modified_posscore_weighted = modified_weight * posscore[u][i] * 1.0  
                        temp_pos_score = temp_pos_score - posweightscore[u][i] + modified_posscore_weighted
                        #posweightscore[u][i] = modified_posscore_weighted
                        #temp_posscore[u][i] = tf.reduce_sum(-posweightscore[u][i] + modified_posscore_weighted)

                        modified_weight = tf.reduce_sum(tf.multiply(indicator[u], weight_temp[i][u])) / weight_negative[i][u]
                        modified_posscore_weighted = modified_weight * posscore[i][u] * 1.0  
                        temp_pos_score = temp_pos_score - posweightscore[i][u] + modified_posscore_weighted
                        #posweightscore[i][u] = modified_posscore_weighted
                        #temp_posscore[i][u] = tf.reduce_sum(-posweightscore[i][u] + modified_posscore_weighted)

                        modified_weight = tf.reduce_sum(tf.multiply(indicator[v], weight_temp[v][i])) / weight_negative[v][i]
                        modified_posscore_weighted = modified_weight * posscore[v][i] * 1.0  
                        temp_pos_score = temp_pos_score - posweightscore[v][i] + modified_posscore_weighted
                        #posweightscore[v][i] = modified_posscore_weighted
                        #temp_posscore[v][i] = tf.reduce_sum(-posweightscore[v][i] + modified_posscore_weighted)

                        modified_weight = tf.reduce_sum(tf.multiply(indicator[v], weight_temp[i][v])) / weight_negative[i][v]
                        modified_posscore_weighted = modified_weight * posscore[i][v] * 1.0  
                        temp_pos_score = temp_pos_score - posweightscore[i][v] + modified_posscore_weighted
                    '''
            return ll

        def neg_loglikelihood(prob_dict, w_edge):
            '''
            negative loglikelihood of the edges
            '''
            ll = 0
            k = 0
            with tf.variable_scope('NLL'):
                dec_mat_temp = tf.reshape(prob_dict, [self.n, self.n])
                w_edge_new = tf.reshape(w_edge, [self.n, self.n, self.bin_dim])

                #dec_mat = tf.exp(tf.minimum(tf.reshape(prob_dict, [self.n, self.n]),tf.fill([self.n, self.n], 10.0)))
                weight_negative = []
                weight_stack = []

                w_edge_new = tf.exp(
                    tf.minimum(w_edge_new,
                               tf.fill([self.n, self.n, self.bin_dim], 10.0)))
                weight_temp = tf.multiply(self.weight_bin, w_edge_new)

                for i in range(self.n):
                    for j in range(self.n):
                        weight_negative.append(tf.reduce_sum(w_edge_new[i][j]))
                        weight_stack.append(tf.reduce_sum(weight_temp[i][j]))

                weight_stack = tf.reshape(weight_stack, [self.n, self.n])
                weight_negative = tf.reshape(weight_negative, [self.n, self.n])

                w_score = tf.truediv(weight_stack, weight_negative)
                weight_comp = tf.subtract(tf.fill([self.n, self.n], 1.0),
                                          w_score)

                dec_mat = tf.exp(
                    tf.minimum(dec_mat_temp, tf.fill([self.n, self.n], 10.0)))
                dec_mat = tf.Print(dec_mat, [dec_mat],
                                   message="my decscore values:")

                comp = tf.subtract(tf.ones([self.n, self.n], tf.float32),
                                   self.adj)
                comp = tf.Print(comp, [comp], message="my comp values:")

                temp = tf.reduce_sum(tf.multiply(comp, dec_mat))
                negscore = tf.multiply(tf.fill([self.n, self.n], temp + 1e-9),
                                       weight_comp)
                negscore = tf.Print(negscore, [negscore],
                                    message="my negscore values:")

                posscore = tf.multiply(self.adj, dec_mat)
                posscore = tf.Print(posscore, [posscore],
                                    message="my posscore values:")

                posweightscore = tf.multiply(posscore, w_score)
                temp_pos_score = tf.reduce_sum(posweightscore)
                posweightscore = tf.Print(posweightscore, [posweightscore],
                                          message="my weighted posscore")

                softmax_out = tf.truediv(posweightscore,
                                         tf.add(posweightscore, negscore))

                if self.mask_weight:
                    #print("Mask weight option")
                    ll = masked_ll(weight_temp, weight_negative, posscore,
                                   posweightscore, temp_pos_score, temp)
                else:
                    ll = tf.reduce_sum(
                        tf.log(
                            tf.add(tf.multiply(self.adj, softmax_out),
                                   tf.fill([self.n, self.n], 1e-9))))
                ll = tf.Print(ll, [ll], message="My loss")

            return (-ll)

        def kl_gaussian(mu_1, sigma_1, debug_sigma, mu_2, sigma_2):
            '''
                Kullback leibler divergence for two gaussian distributions
            '''
            print sigma_1.shape, sigma_2.shape
            with tf.variable_scope("kl_gaussisan"):
                temp_stack = []
                for i in range(self.n):
                    temp_stack.append(tf.square(sigma_1[i]))
                first_term = tf.trace(tf.stack(temp_stack))

                temp_stack = []
                for i in range(self.n):
                    temp_stack.append(tf.matmul(tf.transpose(mu_1[i]),
                                                mu_1[i]))
                second_term = tf.reshape(tf.stack(temp_stack), [self.n])

                #k = tf.fill([self.n], tf.cast(self.d, tf.float32))
                k = tf.fill([self.n], tf.cast(self.z_dim, tf.float32))

                temp_stack = []
                for i in range(self.n):
                    temp_stack.append(tf.reduce_prod(tf.square(
                        debug_sigma[i])))
                third_term = tf.log(
                    tf.add(tf.stack(temp_stack), tf.fill([self.n], 1e-09)))

                return 0.5 * tf.add(
                    tf.subtract(tf.add(first_term, second_term), k),
                    third_term)

        def ll_poisson(lambda_, x):
            return -(x * np.log(lambda_) - lambda_ * np.log(2.72) -
                     self.log_fact_k[x - 1])

        def label_loss_predict(label, predicted_label):
            predicted_label_new = tf.reshape(predicted_label, [self.n, self.d])
            return tf.nn.softmax_cross_entropy_with_logits(
                labels=label, logits=predicted_label_new)

        def get_lossfunc(enc_mu, enc_sigma, debug_sigma, prior_mu, prior_sigma,
                         dec_out, w_edge, label):
            kl_loss = kl_gaussian(enc_mu, enc_sigma, debug_sigma, prior_mu,
                                  prior_sigma)  # KL_divergence loss
            likelihood_loss = neg_loglikelihood(dec_out,
                                                w_edge)  # Cross entropy loss
            self.ll = likelihood_loss
            self.kl = kl_loss
            # For ZINC
            lambda_e = 31
            lambda_n = 30
            #lambda_hde = 5
            #lambda_e = 24
            #lambda_n = 24
            edgeprob = ll_poisson(lambda_e, len(self.edges[self.count]))
            nodeprob = ll_poisson(lambda_n, self.n)
            label_loss = label_loss_predict(self.features, label)

            #return tf.reduce_mean(kl_loss) + edgeprob + nodeprob + likelihood_loss
            return tf.reduce_mean(
                kl_loss + label_loss) + edgeprob + nodeprob + likelihood_loss

        self.adj = tf.placeholder(dtype=tf.float32,
                                  shape=[self.n, self.n],
                                  name='adj')
        self.features = tf.placeholder(dtype=tf.float32,
                                       shape=[self.n, self.d],
                                       name='features')
        self.weight = tf.placeholder(dtype=tf.float32,
                                     shape=[self.n, self.n],
                                     name="weight")
        self.weight_bin = tf.placeholder(
            dtype=tf.float32,
            shape=[self.n, self.n, hparams.bin_dim],
            name="weight_bin")
        self.input_data = tf.placeholder(dtype=tf.float32,
                                         shape=[self.k, self.n, self.d],
                                         name='input')
        self.eps = tf.placeholder(dtype=tf.float32,
                                  shape=[self.n, self.z_dim, 1],
                                  name='eps')

        self.cell = VAEGCell(self.adj, self.weight, self.features, self.z_dim,
                             self.bin_dim)
        self.c_x, enc_mu, enc_sigma, debug_sigma, dec_out, prior_mu, prior_sigma, z_encoded, w_edge, label = self.cell.call(
            self.input_data, self.n, self.d, self.k, self.eps, hparams.sample)
        self.prob = dec_out
        #print('Debug', dec_out.shape)
        self.z_encoded = z_encoded
        self.enc_mu = enc_mu
        self.enc_sigma = enc_sigma
        self.w_edge = w_edge
        self.label = label

        self.cost = get_lossfunc(enc_mu, enc_sigma, debug_sigma, prior_mu,
                                 prior_sigma, dec_out, w_edge, label)

        print_vars("trainable_variables")
        # self.lr = tf.Variable(self.lr, trainable=False)
        self.train_op = tf.train.AdamOptimizer(learning_rate=self.lr)
        self.grad = self.train_op.compute_gradients(self.cost)
        self.grad_placeholder = [(tf.placeholder("float",
                                                 shape=gr[1].get_shape()),
                                  gr[1]) for gr in self.grad]
        self.apply_transform_op = self.train_op.apply_gradients(self.grad)

        #self.lr = tf.Variable(self.lr, trainable=False)
        self.sess = tf.Session()
Example #2
0
    def __init__(self,
                 hparams,
                 placeholders,
                 num_nodes,
                 num_features,
                 edges,
                 istest=False):
        self.features_dim = num_features
        self.input_dim = num_nodes
        self.dropout = placeholders['dropout']
        self.k = hparams.random_walk
        self.lr = placeholders['lr']
        self.decay = placeholders['decay']
        self.n = num_nodes
        self.d = num_features
        self.z_dim = hparams.z_dim
        self.count = 0
        self.edges = edges
        self.mask_weight = hparams.mask_weight

        #self.edges, self.non_edges = edges, non_edges
        #logger.info("Building model starts...")
        def masked_gen(posscore, negscore):
            indicator = []
            for i in range(self.n):
                indicator.append(tf.ones(self.n))
            temp_posscore = tf.reduce_sum(posscore)
            ll = 0.0
            for (u, v) in self.edges[self.count]:
                print("Debug", posscore[0].shape, indicator[0].shape)
                #tf.multiply(tf.reshape(posscore[u], [1, self.n]), indicator[u])[0][v]
                ll += tf.log(
                    tf.multiply(tf.reshape(posscore[u], [1, self.n]),
                                indicator[u])[0][v] /
                    (temp_posscore + negscore[u][v]) + 1e-09)
                ll += tf.log(
                    tf.multiply(tf.reshape(posscore[v], [1, self.n]),
                                indicator[v])[0][u] /
                    (temp_posscore + negscore[v][u]) + 1e-09)

                indicator[u] = np.multiply(
                    tf.subtract(tf.ones([1, self.n]), self.adj[v]),
                    indicator[u])
                indicator[v] = np.multiply(
                    tf.subtract(tf.ones([1, self.n]), self.adj[u]),
                    indicator[v])

                temp_posscore = temp_posscore - tf.reduce_sum(posscore[u])
                temp = tf.multiply(indicator[u],
                                   tf.reshape(posscore[u], [self.n]))

                temp_posscore += tf.reduce_sum(temp)
                temp_posscore = temp_posscore - \
                    tf.reduce_sum(posscore[v]) + \
                    tf.reduce_sum(tf.multiply(indicator[v], posscore[v]))

                temp_posscore = temp_posscore - \
                    tf.reduce_sum(tf.transpose(posscore)[
                                  u]) + tf.reduce_sum(tf.multiply(indicator[u], tf.transpose(posscore)[u]))
                temp_posscore = temp_posscore - \
                    tf.reduce_sum(tf.transpose(posscore)[
                                  v]) + tf.reduce_sum(tf.multiply(indicator[v], tf.transpose(posscore)[v]))

            return ll

        def neg_loglikelihood(prob_dict):
            '''
            negative loglikelihood of the edges
            '''
            ll = 0
            k = 0
            with tf.variable_scope('NLL'):

                dec_mat_temp = tf.reshape(prob_dict, [self.n, self.n])
                '''
                dec_mat_temp = np.zeros((self.n, self.n))
                for i in range(self.n):
                    for j in range(i+1, self.n):
                        print("Debug", prob_dict[k])
                        dec_mat_temp[i][j] = prob_dict[k][0]
                        dec_mat_temp[j][i] = prob_dict[k][0]
                        k+=1
                #'''

                #dec_mat = tf.exp(tf.minimum(tf.reshape(prob_dict, [self.n, self.n]),tf.fill([self.n, self.n], 10.0)))
                dec_mat = tf.exp(
                    tf.minimum(dec_mat_temp, tf.fill([self.n, self.n], 10.0)))

                dec_mat = tf.Print(dec_mat, [dec_mat],
                                   message="my decscore values:")

                print("Debug dec_mat", dec_mat.shape, dec_mat.dtype, dec_mat)
                comp = tf.subtract(tf.ones([self.n, self.n], tf.float32),
                                   self.adj)
                comp = tf.Print(comp, [comp], message="my comp values:")

                temp = tf.reduce_sum(tf.multiply(comp, dec_mat))
                negscore = tf.fill([self.n, self.n], temp + 1e-9)
                negscore = tf.Print(negscore, [negscore],
                                    message="my negscore values:")

                posscore = tf.multiply(self.adj, dec_mat)
                posscore = tf.Print(posscore, [posscore],
                                    message="my posscore values:")

                #dec_out = tf.multiply(self.adj, dec_mat)
                softmax_out = tf.truediv(posscore, tf.add(posscore, negscore))
                ll = tf.reduce_sum(
                    tf.log(
                        tf.add(tf.multiply(self.adj, softmax_out),
                               tf.fill([self.n, self.n], 1e-9))), 1)
                if hparams.mask_weight:
                    ll = masked_gen(posscore, negscore)
                    #ll = masked_ll(posscore, negscore)
            return (-ll)

        def kl_gaussian(mu_1, sigma_1, debug_sigma, mu_2, sigma_2):
            '''
                Kullback leibler divergence for two gaussian distributions
            '''
            print(sigma_1.shape, sigma_2.shape)
            with tf.variable_scope("kl_gaussisan"):
                temp_stack = []
                for i in range(self.n):
                    temp_stack.append(tf.square(sigma_1[i]))
                first_term = tf.trace(tf.stack(temp_stack))

                temp_stack = []
                for i in range(self.n):
                    temp_stack.append(tf.matmul(tf.transpose(mu_1[i]),
                                                mu_1[i]))
                second_term = tf.reshape(tf.stack(temp_stack), [self.n])

                #k = tf.fill([self.n], tf.cast(self.d, tf.float32))
                k = tf.fill([self.n], tf.cast(self.z_dim, tf.float32))

                temp_stack = []
                # for i in range(self.n):
                #    temp_stack.append(tf.log(tf.truediv(tf.matrix_determinant(sigma_2[i]),tf.add(tf.matrix_determinant(sigma_1[i]), tf.fill([self.d, self.d], 1e-9)))))

                for i in range(self.n):
                    temp_stack.append(tf.reduce_prod(tf.square(
                        debug_sigma[i])))

                print("Debug", tf.stack(temp_stack).shape)
                third_term = tf.log(
                    tf.add(tf.stack(temp_stack), tf.fill([self.n], 1e-09)))

                print("debug KL", first_term.shape, second_term.shape, k.shape,
                      third_term.shape, sigma_1[0].shape)
                # return 0.5 *tf.reduce_sum((
                return 0.5 * tf.add(
                    tf.subtract(tf.add(first_term, second_term), k),
                    third_term)

        def get_lossfunc(enc_mu, enc_sigma, debug_sigma, prior_mu, prior_sigma,
                         dec_out):
            kl_loss = kl_gaussian(enc_mu, enc_sigma, debug_sigma, prior_mu,
                                  prior_sigma)  # KL_divergence loss
            likelihood_loss = neg_loglikelihood(dec_out)  # Cross entropy loss
            self.ll = likelihood_loss
            self.kl = kl_loss
            return tf.reduce_mean(kl_loss + likelihood_loss)

        self.adj = tf.placeholder(dtype=tf.float32,
                                  shape=[self.n, self.n],
                                  name='adj')
        self.features = tf.placeholder(dtype=tf.float32,
                                       shape=[self.n, self.d],
                                       name='features')
        self.input_data = tf.placeholder(dtype=tf.float32,
                                         shape=[self.k, self.n, self.d],
                                         name='input')
        self.eps = tf.placeholder(dtype=tf.float32,
                                  shape=[self.n, self.z_dim, 1],
                                  name='eps')

        self.cell = VAEGCell(self.adj, self.features, self.z_dim)
        self.c_x, enc_mu, enc_sigma, debug_sigma, dec_out, prior_mu, prior_sigma, z_encoded = self.cell.call(
            self.input_data, self.n, self.d, self.k, self.eps, hparams.sample)
        self.prob = dec_out
        print('Debug', dec_out.shape)
        self.z_encoded = z_encoded
        self.enc_mu = enc_mu
        self.enc_sigma = enc_sigma
        self.cost = get_lossfunc(enc_mu, enc_sigma, debug_sigma, prior_mu,
                                 prior_sigma, dec_out)

        print_vars("trainable_variables")
        # self.lr = tf.Variable(self.lr, trainable=False)
        self.train_op = tf.train.AdamOptimizer(learning_rate=self.lr)
        self.grad = self.train_op.compute_gradients(self.cost)
        self.grad_placeholder = [(tf.placeholder("float",
                                                 shape=gr[1].get_shape()),
                                  gr[1]) for gr in self.grad]
        #self.capped_gvs = [(tf.clip_by_value(grad, -1., 1.), var) for grad, var in self.grad]
        #self.tgv = [self.grad]
        # self.apply_transform_op = self.train_op.apply_gradients(self.grad_placeholder)
        #self.apply_transform_op = self.train_op.apply_gradients(self.capped_gvs)
        self.apply_transform_op = self.train_op.apply_gradients(self.grad)

        #self.lr = tf.Variable(self.lr, trainable=False)
        #self.gradient = tf.train.AdamOptimizer(learning_rate=self.lr, epsilon=1e-4).compute_gradients(self.cost)
        #self.train_op = tf.train.AdamOptimizer(learning_rate=self.lr, epsilon=1e-4).minimize(self.cost)
        #self.check_op = tf.add_check_numerics_ops()
        self.sess = tf.Session()
class VAEG(VAEGConfig):
    def __init__(self,
                 hparams,
                 placeholders,
                 num_nodes,
                 num_features,
                 edges,
                 log_fact_k,
                 hde,
                 istest=False):
        self.features_dim = num_features
        self.input_dim = num_nodes
        self.dropout = placeholders['dropout']
        self.k = hparams.random_walk
        self.lr = placeholders['lr']
        self.decay = placeholders['decay']
        self.n = num_nodes
        self.d = num_features
        self.z_dim = hparams.z_dim
        self.bin_dim = hparams.bin_dim
        self.edges = edges
        self.count = 0
        self.mask_weight = hparams.mask_weight
        self.log_fact_k = log_fact_k
        self.hde = hde

        # For masking we calculated the likelihood like this
        def masked_ll(weight_temp, weight_negative, posscore, posweightscore,
                      temp_pos_score, temp):

            degree = np.zeros([self.n], dtype=np.float32)
            indicator = np.ones([self.n, self.bin_dim], dtype=np.float32)
            indicator_bridge = np.ones([self.n, self.n], dtype=np.float32)
            #ring_indicator = np.ones([self.n])
            ll = 0.0
            adj = np.zeros([self.n, self.n], dtype=np.float32)

            #for (u, v, w) in self.edges[self.count]:
            for i in range(len(self.edges[self.count])):

                (u, v, w) = self.edges[self.count][i]

                degree[u] += w
                degree[v] += w

                modified_weight = tf.reduce_sum(
                    tf.multiply(np.multiply(indicator[u], indicator[v]),
                                weight_temp[u][v])) / weight_negative[u][v]
                modified_posscore_weighted = modified_weight * posscore[u][
                    v] * indicator_bridge[u][v] * 1.0

                currentscore = modified_posscore_weighted * 1.0 / (
                    temp_pos_score + temp)
                ll += tf.log(currentscore + 1e-9)

                modified_weight = tf.reduce_sum(
                    tf.multiply(np.multiply(indicator[v], indicator[u]),
                                weight_temp[v][u])) / weight_negative[v][u]
                modified_posscore_weighted = modified_weight * posscore[v][
                    u] * indicator_bridge[v][u] * 1.0

                currentscore = modified_posscore_weighted * 1.0 / (
                    temp_pos_score + temp)
                ll += tf.log(currentscore + 1e-9)

                #indicator = np.ones([3], dtype = np.float32)

                #if degree[u] >=5 :
                #    indicator[u][0] = 0

                if degree[u] >= 4:
                    indicator[u][0] = 0
                    indicator[u][1] = 0
                if degree[u] >= 3:
                    indicator[u][1] = 0
                    indicator[u][2] = 0

                #if degree[v] >=5 :
                #    indicator[v][0] = 0

                if degree[v] >= 4:
                    indicator[v][0] = 0
                    indicator[v][1] = 0
                if degree[v] >= 3:
                    indicator[v][1] = 0
                    indicator[v][2] = 0

                # From the next there will be no double bond, ensures there will be alternating bonds
                # there will ne bo bridge
                if w == 2:
                    indicator[u][1] = 0
                    indicator[v][1] = 0

                #If we don't want negative sampling we can uncomment the following
                '''
                    for i in range(self.n): 
                        modified_weight = tf.reduce_sum(tf.multiply(indicator[u], weight_temp[u][i])) / weight_negative[u][i]
                        modified_posscore_weighted = modified_weight * posscore[u][i] * 1.0  
                        temp_pos_score = temp_pos_score - posweightscore[u][i] + modified_posscore_weighted
                        #posweightscore[u][i] = modified_posscore_weighted
                        #temp_posscore[u][i] = tf.reduce_sum(-posweightscore[u][i] + modified_posscore_weighted)

                        modified_weight = tf.reduce_sum(tf.multiply(indicator[u], weight_temp[i][u])) / weight_negative[i][u]
                        modified_posscore_weighted = modified_weight * posscore[i][u] * 1.0  
                        temp_pos_score = temp_pos_score - posweightscore[i][u] + modified_posscore_weighted
                        #posweightscore[i][u] = modified_posscore_weighted
                        #temp_posscore[i][u] = tf.reduce_sum(-posweightscore[i][u] + modified_posscore_weighted)

                        modified_weight = tf.reduce_sum(tf.multiply(indicator[v], weight_temp[v][i])) / weight_negative[v][i]
                        modified_posscore_weighted = modified_weight * posscore[v][i] * 1.0  
                        temp_pos_score = temp_pos_score - posweightscore[v][i] + modified_posscore_weighted
                        #posweightscore[v][i] = modified_posscore_weighted
                        #temp_posscore[v][i] = tf.reduce_sum(-posweightscore[v][i] + modified_posscore_weighted)

                        modified_weight = tf.reduce_sum(tf.multiply(indicator[v], weight_temp[i][v])) / weight_negative[i][v]
                        modified_posscore_weighted = modified_weight * posscore[i][v] * 1.0  
                        temp_pos_score = temp_pos_score - posweightscore[i][v] + modified_posscore_weighted
                    '''
            return ll

        def neg_loglikelihood(prob_dict, w_edge):
            '''
            negative loglikelihood of the edges
            '''
            ll = 0
            k = 0
            with tf.variable_scope('NLL'):
                dec_mat_temp = tf.reshape(prob_dict, [self.n, self.n])
                w_edge_new = tf.reshape(w_edge, [self.n, self.n, self.bin_dim])

                #dec_mat = tf.exp(tf.minimum(tf.reshape(prob_dict, [self.n, self.n]),tf.fill([self.n, self.n], 10.0)))
                weight_negative = []
                weight_stack = []

                w_edge_new = tf.exp(
                    tf.minimum(w_edge_new,
                               tf.fill([self.n, self.n, self.bin_dim], 10.0)))
                weight_temp = tf.multiply(self.weight_bin, w_edge_new)

                for i in range(self.n):
                    for j in range(self.n):
                        weight_negative.append(tf.reduce_sum(w_edge_new[i][j]))
                        weight_stack.append(tf.reduce_sum(weight_temp[i][j]))

                weight_stack = tf.reshape(weight_stack, [self.n, self.n])
                weight_negative = tf.reshape(weight_negative, [self.n, self.n])

                w_score = tf.truediv(weight_stack, weight_negative)
                weight_comp = tf.subtract(tf.fill([self.n, self.n], 1.0),
                                          w_score)

                dec_mat = tf.exp(
                    tf.minimum(dec_mat_temp, tf.fill([self.n, self.n], 10.0)))
                dec_mat = tf.Print(dec_mat, [dec_mat],
                                   message="my decscore values:")

                comp = tf.subtract(tf.ones([self.n, self.n], tf.float32),
                                   self.adj)
                comp = tf.Print(comp, [comp], message="my comp values:")

                temp = tf.reduce_sum(tf.multiply(comp, dec_mat))
                negscore = tf.multiply(tf.fill([self.n, self.n], temp + 1e-9),
                                       weight_comp)
                negscore = tf.Print(negscore, [negscore],
                                    message="my negscore values:")

                posscore = tf.multiply(self.adj, dec_mat)
                posscore = tf.Print(posscore, [posscore],
                                    message="my posscore values:")

                posweightscore = tf.multiply(posscore, w_score)
                temp_pos_score = tf.reduce_sum(posweightscore)
                posweightscore = tf.Print(posweightscore, [posweightscore],
                                          message="my weighted posscore")

                softmax_out = tf.truediv(posweightscore,
                                         tf.add(posweightscore, negscore))

                if self.mask_weight:
                    #print("Mask weight option")
                    ll = masked_ll(weight_temp, weight_negative, posscore,
                                   posweightscore, temp_pos_score, temp)
                else:
                    ll = tf.reduce_sum(
                        tf.log(
                            tf.add(tf.multiply(self.adj, softmax_out),
                                   tf.fill([self.n, self.n], 1e-9))))
                ll = tf.Print(ll, [ll], message="My loss")

            return (-ll)

        def kl_gaussian(mu_1, sigma_1, debug_sigma, mu_2, sigma_2):
            '''
                Kullback leibler divergence for two gaussian distributions
            '''
            print sigma_1.shape, sigma_2.shape
            with tf.variable_scope("kl_gaussisan"):
                temp_stack = []
                for i in range(self.n):
                    temp_stack.append(tf.square(sigma_1[i]))
                first_term = tf.trace(tf.stack(temp_stack))

                temp_stack = []
                for i in range(self.n):
                    temp_stack.append(tf.matmul(tf.transpose(mu_1[i]),
                                                mu_1[i]))
                second_term = tf.reshape(tf.stack(temp_stack), [self.n])

                #k = tf.fill([self.n], tf.cast(self.d, tf.float32))
                k = tf.fill([self.n], tf.cast(self.z_dim, tf.float32))

                temp_stack = []
                for i in range(self.n):
                    temp_stack.append(tf.reduce_prod(tf.square(
                        debug_sigma[i])))
                third_term = tf.log(
                    tf.add(tf.stack(temp_stack), tf.fill([self.n], 1e-09)))

                return 0.5 * tf.add(
                    tf.subtract(tf.add(first_term, second_term), k),
                    third_term)

        def ll_poisson(lambda_, x):
            return -(x * np.log(lambda_) - lambda_ * np.log(2.72) -
                     self.log_fact_k[x - 1])

        def label_loss_predict(label, predicted_label):
            predicted_label_new = tf.reshape(predicted_label, [self.n, self.d])
            return tf.nn.softmax_cross_entropy_with_logits(
                labels=label, logits=predicted_label_new)

        def get_lossfunc(enc_mu, enc_sigma, debug_sigma, prior_mu, prior_sigma,
                         dec_out, w_edge, label):
            kl_loss = kl_gaussian(enc_mu, enc_sigma, debug_sigma, prior_mu,
                                  prior_sigma)  # KL_divergence loss
            likelihood_loss = neg_loglikelihood(dec_out,
                                                w_edge)  # Cross entropy loss
            self.ll = likelihood_loss
            self.kl = kl_loss
            # For ZINC
            lambda_e = 31
            lambda_n = 30
            #lambda_hde = 5
            #lambda_e = 24
            #lambda_n = 24
            edgeprob = ll_poisson(lambda_e, len(self.edges[self.count]))
            nodeprob = ll_poisson(lambda_n, self.n)
            label_loss = label_loss_predict(self.features, label)

            #return tf.reduce_mean(kl_loss) + edgeprob + nodeprob + likelihood_loss
            return tf.reduce_mean(
                kl_loss + label_loss) + edgeprob + nodeprob + likelihood_loss

        self.adj = tf.placeholder(dtype=tf.float32,
                                  shape=[self.n, self.n],
                                  name='adj')
        self.features = tf.placeholder(dtype=tf.float32,
                                       shape=[self.n, self.d],
                                       name='features')
        self.weight = tf.placeholder(dtype=tf.float32,
                                     shape=[self.n, self.n],
                                     name="weight")
        self.weight_bin = tf.placeholder(
            dtype=tf.float32,
            shape=[self.n, self.n, hparams.bin_dim],
            name="weight_bin")
        self.input_data = tf.placeholder(dtype=tf.float32,
                                         shape=[self.k, self.n, self.d],
                                         name='input')
        self.eps = tf.placeholder(dtype=tf.float32,
                                  shape=[self.n, self.z_dim, 1],
                                  name='eps')

        self.cell = VAEGCell(self.adj, self.weight, self.features, self.z_dim,
                             self.bin_dim)
        self.c_x, enc_mu, enc_sigma, debug_sigma, dec_out, prior_mu, prior_sigma, z_encoded, w_edge, label = self.cell.call(
            self.input_data, self.n, self.d, self.k, self.eps, hparams.sample)
        self.prob = dec_out
        #print('Debug', dec_out.shape)
        self.z_encoded = z_encoded
        self.enc_mu = enc_mu
        self.enc_sigma = enc_sigma
        self.w_edge = w_edge
        self.label = label

        self.cost = get_lossfunc(enc_mu, enc_sigma, debug_sigma, prior_mu,
                                 prior_sigma, dec_out, w_edge, label)

        print_vars("trainable_variables")
        # self.lr = tf.Variable(self.lr, trainable=False)
        self.train_op = tf.train.AdamOptimizer(learning_rate=self.lr)
        self.grad = self.train_op.compute_gradients(self.cost)
        self.grad_placeholder = [(tf.placeholder("float",
                                                 shape=gr[1].get_shape()),
                                  gr[1]) for gr in self.grad]
        self.apply_transform_op = self.train_op.apply_gradients(self.grad)

        #self.lr = tf.Variable(self.lr, trainable=False)
        self.sess = tf.Session()

    def initialize(self):
        logger.info("Initialization of parameters")
        #self.sess.run(tf.initialize_all_variables())
        self.sess.run(tf.global_variables_initializer())

    def restore(self, savedir):
        saver = tf.train.Saver(tf.global_variables())
        ckpt = tf.train.get_checkpoint_state(savedir)
        if ckpt == None or ckpt.model_checkpoint_path == None:
            self.initialize()
        else:
            print("Load the model from {}".format(ckpt.model_checkpoint_path))
            saver.restore(self.sess, ckpt.model_checkpoint_path)

    def train(self, placeholders, hparams, adj, weight, weight_bin, features):
        savedir = hparams.out_dir
        lr = hparams.learning_rate
        dr = hparams.dropout_rate
        decay = hparams.decay_rate

        f1 = open(hparams.out_dir + '/iteration.txt', 'r')
        iteration = int(f1.read().strip())
        # training
        num_epochs = hparams.num_epochs
        create_dir(savedir)
        ckpt = tf.train.get_checkpoint_state(savedir)
        saver = tf.train.Saver(tf.global_variables())

        if ckpt:
            saver.restore(self.sess, ckpt.model_checkpoint_path)
            print("Load the model from %s" % ckpt.model_checkpoint_path)

        for epoch in range(num_epochs):
            start = time.time()
            for i in range(len(adj)):
                self.count = i
                if len(self.edges[self.count]) == 0:
                    continue
                # Learning rate decay
                #self.sess.run(tf.assign(self.lr, self.lr * (self.decay ** epoch)))
                feed_dict = construct_feed_dict(lr, dr, self.k, self.n, self.d,
                                                decay, placeholders)
                feed_dict.update({self.adj: adj[i]})
                #print "Debug", features[i].shape

                eps = np.random.randn(self.n, self.z_dim, 1)
                #tf.random_normal((self.n, 5, 1), 0.0, 1.0, dtype=tf.float32)

                feed_dict.update({self.features: features[i]})
                feed_dict.update({self.weight_bin: weight_bin[i]})
                feed_dict.update({self.weight: weight[i]})
                feed_dict.update(
                    {self.input_data: np.zeros([self.k, self.n, self.d])})
                feed_dict.update({self.eps: eps})

                grad_vals = self.sess.run([g[0] for g in self.grad],
                                          feed_dict=feed_dict)
                for j in xrange(len(self.grad_placeholder)):
                    feed_dict.update(
                        {self.grad_placeholder[j][0]: grad_vals[j]})
                input_, train_loss, _, probdict, cx, w_edge = self.sess.run(
                    [
                        self.input_data, self.cost, self.apply_transform_op,
                        self.prob, self.c_x, self.w_edge
                    ],
                    feed_dict=feed_dict)

                iteration += 1

                if iteration % hparams.log_every == 0 and iteration > 0:
                    print(train_loss)
                    print("{}/{}(epoch {}), train_loss = {:.6f}".format(
                        iteration, num_epochs, epoch + 1, train_loss))
                    #print(probdict)
                    checkpoint_path = os.path.join(savedir, 'model.ckpt')
                    saver.save(self.sess,
                               checkpoint_path,
                               global_step=iteration)
                    logger.info("model saved to {}".format(checkpoint_path))
            end = time.time()
            print("Time taken for a batch: ", end - start)
        f1 = open(hparams.out_dir + '/iteration.txt', 'w')
        f1.write(str(iteration))

    def getembeddings(self, hparams, placeholders, adj, deg, weight_bin,
                      weight):

        eps = np.random.randn(self.n, self.z_dim, 1)

        feed_dict = construct_feed_dict(hparams.learning_rate,
                                        hparams.dropout_rate, self.k, self.n,
                                        self.d, hparams.decay_rate,
                                        placeholders)
        feed_dict.update({self.adj: adj})
        feed_dict.update({self.features: deg})
        feed_dict.update({self.input_data: np.zeros([self.k, self.n, self.d])})
        feed_dict.update({self.eps: eps})
        feed_dict.update({self.weight_bin: weight_bin})
        feed_dict.update({self.weight: weight})

        prob, ll, kl, w_edge, embedding = self.sess.run(
            [self.prob, self.ll, self.kl, self.w_edge, self.z_encoded],
            feed_dict=feed_dict)
        return embedding

    def sample_graph_slerp(self,
                           hparams,
                           placeholders,
                           s_num,
                           G_good,
                           G_bad,
                           inter,
                           ratio,
                           index,
                           num=10):
        # Agrs :
        # G_good : embedding of the train graph or good sample
        # G_bad : embedding of the bad graph

        list_edges = []
        for i in range(self.n):
            for j in range(i + 1, self.n):
                #list_edges.append((i,j))
                list_edges.append((i, j, 1))
                list_edges.append((i, j, 2))
                list_edges.append((i, j, 3))
        list_weights = [1, 2, 3]

        #for sample in range(s_num):
        new_graph = []
        for i in range(self.n):
            #for i in range(index, index+1):
            node_good = G_good[i]
            node_bad = G_bad[i]
            if i == index:
                if inter == "lerp":
                    new_graph.append(
                        lerp(np.reshape(node_good, -1),
                             np.reshape(node_bad, -1), ratio))
                else:
                    new_graph.append(
                        slerp(np.reshape(node_good, -1),
                              np.reshape(node_bad, -1), ratio))
            else:
                new_graph.append(np.reshape(node_good, -1))
        eps = np.array(new_graph)
        eps = eps.reshape(eps.shape + (1, ))
        hparams.sample = True
        feed_dict = construct_feed_dict(hparams.learning_rate,
                                        hparams.dropout_rate, self.k, self.n,
                                        self.d, hparams.decay_rate,
                                        placeholders)

        #TODO adj and deg are filler and does not required while sampling. Need to clean this part
        adj = np.zeros([self.n, self.n])
        deg = np.zeros([self.n, 1], dtype=np.float)
        weight_bin = np.zeros([self.n, self.n, self.bin_dim])
        weight = np.zeros([self.n, self.n])
        feed_dict.update({self.adj: adj})
        feed_dict.update({self.features: deg})
        feed_dict.update({self.input_data: np.zeros([self.k, self.n, self.d])})
        feed_dict.update({self.eps: eps})
        feed_dict.update({self.weight_bin: weight_bin})
        feed_dict.update({self.weight: weight})

        prob, ll, kl, w_edge = self.sess.run(
            [self.prob, self.ll, self.kl, self.w_edge], feed_dict=feed_dict)
        prob = np.reshape(prob, (self.n, self.n))
        w_edge = np.reshape(w_edge, (self.n, self.n, self.bin_dim))

        indicator = np.ones([self.n, self.bin_dim])
        p, list_edges, w_new = normalise(prob, w_edge, self.n, self.bin_dim,
                                         [], list_edges, indicator)

        candidate_edges = [
            list_edges[i] for i in np.random.choice(
                range(len(list_edges)), [1], p=p, replace=False)
        ]

        probtotal = 1.0
        degree = np.zeros([self.n])

        for i in range(hparams.edges - 1):
            (u, v, w) = candidate_edges[i]
            #(u,v) = candidate_edges[i]
            #w = weight_lists[i]
            degree[u] += w
            degree[v] += w

            if degree[u] >= 4:
                indicator[u][0] = 0
                indicator[u][1] = 0

            if degree[u] >= 3:
                indicator[u][1] = 0
                indicator[u][2] = 0

            #if degree[v] >=5 :
            #    indicator[v][0] = 0
            if degree[v] >= 4:
                indicator[v][0] = 0
                indicator[v][1] = 0
            if degree[v] >= 3:
                indicator[v][1] = 0
                indicator[v][2] = 0

            p, list_edges, w_new = normalise(prob, w_edge, self.n,
                                             self.bin_dim, candidate_edges,
                                             list_edges, indicator)
            candidate_edges.extend([
                list_edges[k] for k in np.random.choice(
                    range(len(list_edges)), [1], p=p, replace=False)
            ])

        for (u, v, w) in candidate_edges:
            with open(
                    hparams.sample_file + '/inter/' + str(index) + inter +
                    str(s_num) + '.txt', 'a') as f:
                #f.write(str(u)+'\t'+str(v)+'\n')
                f.write(
                    str(u) + ' ' + str(v) + ' {\'weight\':' + str(w) + '}\n')

        with open(
                hparams.z_dir + '/inter/' + str(index) + inter + str(s_num) +
                '.txt', 'a') as f:
            for z_i in eps:
                f.write('[' + ','.join([str(el[0]) for el in z_i]) + ']\n')

        return new_graph

    def get_stat(self, hparams, placeholders, num=10, outdir=None):

        adj, features = load_data(hparams.graph_file, hparams.nodes)
        hparams.sample = True
        eps = np.random.randn(self.n, self.z_dim, 1)
        for i in range(len(adj)):
            ll_total = 0.0
            loss_total = 0.0
            prob_derived = 0.0

            for j in range(10):
                eps = np.random.randn(self.n, self.z_dim, 1)
                feed_dict = construct_feed_dict(hparams.learning_rate,
                                                hparams.dropout_rate, self.k,
                                                self.n, self.d,
                                                hparams.decay_rate,
                                                placeholders)
                feed_dict.update({self.adj: adj[i]})
                feed_dict.update({self.features: features[i]})
                feed_dict.update(
                    {self.input_data: np.zeros([self.k, self.n, self.d])})
                feed_dict.update({self.eps: eps})
                prob, ll, z_encoded, enc_mu, enc_sigma, loss, kl = self.sess.run(
                    [
                        self.prob, self.ll, self.z_encoded, self.enc_mu,
                        self.enc_sigma, self.cost, self.kl
                    ],
                    feed_dict=feed_dict)
                ll_total += np.mean(ll)
                loss_total += np.mean(loss)

                prob = np.triu(np.reshape(prob, (self.n, self.n)), 1)
                prob = np.divide(prob, np.sum(prob))

                for k in range(self.n):
                    for l in range(k + 1, self.n):
                        if adj[i][k][l] == 1:
                            prob_derived += log(prob[k][l])

            with open(hparams.sample_file + '/reconstruction_ll.txt',
                      'a') as f:
                f.write(str(-np.mean(ll_total) // 10) + '\n')

            #with open(hparams.graph_file+'/kl.txt', 'a') as f:
            #    f.write(str(-np.mean(kl))+'\n')

            with open(hparams.sample_file + '/elbo.txt', 'a') as f:
                f.write(str(-np.mean(loss_total) // 10) + '\n')

            with open(hparams.sample_file + '/prob_derived.txt', 'a') as f:
                f.write(str(-np.mean(loss_total) // 10) + '\n')

    def get_masked_candidate_with_atom_ratio_new(self, prob, w_edge,
                                                 atom_count, num_edges, hde):
        #node_list = defaultdict()
        rest = range(self.n)
        '''
        p_temp = prob[0]
        nodes = []
        sorted_index = np.argsort(np.array(p_temp))
        hn = sorted_index[:atom_count[0]]
        on = sorted_index[atom_count[0]: atom_count[0] + atom_count[1]]
        nn = sorted_index[atom_count[1] + atom_count[0]: atom_count[1] + atom_count[0] + atom_count[2]]
        cn = sorted_index[-atom_count[3]:]
        '''
        nodes = []
        hn = []
        on = []
        nn = []
        cn = []

        for i in range(self.n):
            if atom_count[i] == 1:
                hn.append(i)
            if atom_count[i] == 2:
                on.append(i)
            if atom_count[i] == 3 or atom_count[i] == 5:
                nn.append(i)
            if atom_count[i] == 4:
                cn.append(i)

        nodes.extend(hn)
        nodes.extend(cn)
        nodes.extend(on)
        nodes.extend(nn)

        node_list = atom_count
        print("Debug nodelist", node_list)

        indicator = np.ones([self.n, self.bin_dim])
        edge_mask = np.ones([self.n, self.n])
        degree = np.zeros(self.n)

        for node in hn:
            indicator[node][1] = 0
            indicator[node][2] = 0
        for node in on:
            indicator[node][2] = 0

        # two hydrogen atom cannot have an edge between them
        for n1 in hn:
            for n2 in hn:
                edge_mask[n1][n2] = 0
        candidate_edges = []
        # first generate edges joining with Hydrogen atoms sequentially
        print("Debug atom ratio", hn, on, nn, cn)
        print("Debug_degree", node_list)
        print("Debug nodes", nodes)
        index = 0
        i = 0
        hydro_sat = np.zeros(self.n)
        #first handle hydro
        try:
            for node in nodes:
                deg_req = node_list[node]
                d = degree[node]
                list_edges = get_candidate_neighbor_edges(node, self.n)
                #for (u,v,w) in list_edges:
                #    print("list edges", u, node_list[u], degree[u], indicator[u], v, node_list[v], degree[v], indicator[v])
                #print("Debug list edges", node, list_edges)
                #print("Edge mask", edge_mask[node])
                if node in hn:
                    for i1 in range(self.n):
                        if hydro_sat[i1] == node_list[i1] - 1:
                            edge_mask[i1][node] = 0
                            edge_mask[node][i1] = 0
                while d < deg_req:
                    p = normalise_h1(prob, w_edge, self.bin_dim, indicator,
                                     edge_mask, node)
                    #print("Debug p", p)

                    #list_edges = get_candidate_neighbor_edges(node, self.n)
                    #for (u,v,w) in list_edges:
                    #    print("Debug list edges", u, v, node_list[u], node_list[v])

                    candidate_edges.extend([
                        list_edges[k] for k in np.random.choice(
                            range(len(list_edges)), [1], p=p, replace=False)
                    ])

                    (u, v, w) = candidate_edges[i]
                    degree[u] += w
                    degree[v] += w
                    d += w
                    if u in hn:
                        hydro_sat[v] += 1
                    if v in hn:
                        hydro_sat[u] += 1
                    edge_mask[u][v] = 0
                    edge_mask[v][u] = 0

                    if (node_list[u] - degree[u]) == 0:
                        indicator[u][0] = 0
                    if (node_list[u] - degree[u]) <= 1:
                        indicator[u][1] = 0
                    if (node_list[u] - degree[u]) <= 2:
                        indicator[u][2] = 0

                    if (node_list[v] - degree[v]) == 0:
                        indicator[v][0] = 0
                    if (node_list[v] - degree[v]) <= 1:
                        indicator[v][1] = 0
                    if (node_list[v] - degree[v]) <= 2:
                        indicator[v][2] = 0

                    #check_diconnected

                    i += 1
                    print("Debug candidate_edges", candidate_edges[i - 1])
                    #    print("change state", el, degree[el], node_list[el], indicator[el])
                    #'''
            #list_edges = get_candidate_edges(self.n)
            #if abs(len(candidate_edges) - num_edges) > 1 :
            #    return ''
            #'''
            candidate_rest = ''
            candidate_edges_new = ''
            for (u, v, w) in candidate_edges:
                if u < v:
                    candidate_edges_new += ' ' + str(u) + '-' + str(
                        v) + '-' + str(w)
                else:
                    candidate_edges_new += ' ' + str(v) + '-' + str(
                        u) + '-' + str(w)
            print("Candidate_edges_new", candidate_edges_new)
            return candidate_edges_new + ' ' + candidate_rest
        except:
            return ''

    def get_masked_candidate(self,
                             list_edges,
                             prob,
                             w_edge,
                             num_edges,
                             hde,
                             indicator=[],
                             degree=[]):

        list_edges_original = copy.copy(list_edges)
        n = len(prob[0])
        #sample 1000 times
        count = 0
        structure_list = defaultdict(int)

        #while(count < 50):
        while (count < 1):
            applyrules = False
            list_edges = copy.copy(list_edges_original)
            if len(indicator) == 0:
                print("Debug indi new assign")
                indicator = np.ones([self.n, self.bin_dim])
            reach = np.ones([n, n])

            p, list_edges, w = normalise(prob, w_edge, self.n, self.bin_dim,
                                         [], list_edges, indicator)
            candidate_edges = [
                list_edges[k] for k in np.random.choice(
                    range(len(list_edges)), [1], p=p, replace=False)
            ]
            #if degree == None:
            if len(degree) == 0:
                print("Debug degree new assign")
                degree = np.zeros([self.n])
            G = None
            saturation = 0

            for i1 in range(num_edges - 1):
                (u, v, w) = candidate_edges[i1]
                for j in range(n):

                    if reach[u][j] == 0:
                        reach[v][j] = 0
                        reach[j][v] = 0
                    if reach[v][j] == 0:
                        reach[u][j] = 0
                        reach[j][u] = 0

                reach[u][v] = 0
                reach[v][u] = 0

                degree[u] += w
                degree[v] += w

                if degree[u] >= 4:
                    indicator[u][0] = 0
                if degree[u] >= 3:
                    indicator[u][1] = 0
                if degree[u] >= 2:
                    indicator[u][2] = 0

                if degree[v] >= 4:
                    indicator[v][0] = 0
                if degree[v] >= 3:
                    indicator[v][1] = 0
                if degree[v] >= 2:
                    indicator[v][2] = 0

                # there will ne bo bridge
                p, list_edges, w = normalise(prob, w_edge, self.n,
                                             self.bin_dim, candidate_edges,
                                             list_edges, indicator)

                try:
                    candidate_edges.extend([
                        list_edges[k] for k in np.random.choice(
                            range(len(list_edges)), [1], p=p, replace=False)
                    ])
                except:
                    #candidate_edges = []
                    continue
            structure_list[' '.join([
                str(u) + '-' + str(v) + '-' + str(w)
                for (u, v, w) in sorted(candidate_edges)
            ])] += 1
            count += 1

        #return the element which has been sampled maximum time
        return max(structure_list.iteritems(), key=itemgetter(1))[0]

    def get_unmasked_candidate(self, list_edges, prob, w_edge, num_edges):
        # sample 1000 times
        count = 0
        structure_list = defaultdict(int)

        #while (count < 1000):
        while (count < 50):
            indicator = np.ones([self.n, self.bin_dim])
            p, list_edges, w = normalise(prob, w_edge, self.n, self.bin_dim,
                                         [], list_edges, indicator)
            candidate_edges = [
                list_edges[k] for k in np.random.choice(
                    range(len(list_edges)), [num_edges], p=p, replace=False)
            ]
            structure_list[' '.join([
                str(u) + '-' + str(v) + '-' + str(w)
                for (u, v, w) in sorted(candidate_edges, key=itemgetter(0))
            ])] += 1

            #structure_list[sorted(candidate_edges, key=itemgetter(1))] += 1
            count += 1

        # return the element which has been sampled maximum time
        return max(structure_list.iteritems(), key=itemgetter(1))[0]

    def sample_graph_posterior_new(self,
                                   hparams,
                                   placeholders,
                                   adj,
                                   features,
                                   weight_bins,
                                   weights,
                                   embeddings,
                                   k=0):
        list_edges = get_candidate_edges(self.n)
        feed_dict = construct_feed_dict(hparams.learning_rate,
                                        hparams.dropout_rate, self.k, self.n,
                                        self.d, hparams.decay_rate,
                                        placeholders)
        feed_dict.update({self.adj: adj})
        feed_dict.update({self.features: features})
        feed_dict.update({self.weight_bin: weight_bins})
        feed_dict.update({self.weight: weights})
        feed_dict.update({self.input_data: np.zeros([self.k, self.n, self.d])})
        feed_dict.update({self.eps: embeddings})
        hparams.sample = True

        prob, ll, z_encoded, enc_mu, enc_sigma, elbo, w_edge, labels = self.sess.run(
            [
                self.prob, self.ll, self.z_encoded, self.enc_mu,
                self.enc_sigma, self.cost, self.w_edge, self.label
            ],
            feed_dict=feed_dict)
        # prob = np.triu(np.reshape(prob,(self.n,self.n)),1)
        prob = np.reshape(prob, (self.n, self.n))

        w_edge = np.reshape(w_edge, (self.n, self.n, self.bin_dim))

        #indicator = np.ones([self.n, self.bin_dim])
        #p, list_edges_new, w_new = normalise(prob, w_edge, self.n, hparams.bin_dim, [], list_edges_new, indicator)
        #(val_arr, atom_list) = self.getatoms(hparams.nodes, labels)
        #atom_list = [16,2,1,11]
        #atom_list = [4, 4, 2, 4, 3, 1, 4, 4, 4, 4, 1, 4, 4, 4, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
        #atom_list = [4, 4, 4, 4, 1, 4, 4, 3, 4, 4, 4, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
        #atom_list = [4, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 4, 4, 3, 4, 4, 2, 4, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
        atom_list = [
            4, 4, 2, 4, 4, 3, 4, 4, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
        ]
        #self.getatoms(atom_list)
        if not hparams.mask_weight:
            candidate_edges = self.get_unmasked_candidate(
                list_edges, prob, w_edge, hparams.edges)
        else:
            i = 0
            hde = 1
            #while (i < 1000):
            candidate_edges = self.get_masked_candidate_with_atom_ratio_new(
                prob, w_edge, atom_list, hparams.edges, hde)
            #if len(candidate_edges) > 0:
            #        break
            #    i += 1

            #candidate_edges = self.get_masked_candidate(list_edges, prob, w_edge, hparams.edges, hde)
        with open(hparams.sample_file + 'temp.txt' + str(k), 'w') as f:
            for uvw in candidate_edges.split():
                [u, v, w] = uvw.split("-")
                u = int(u)
                v = int(v)
                w = int(w)
                if (u >= 0 and v >= 0):
                    #with open(hparams.sample_file + 'temp.txt', 'a') as f:
                    f.write(
                        str(u) + ' ' + str(v) + ' {\'weight\':' + str(w) +
                        '}\n')

    def getatoms(self, node, label):
        label_new = np.reshape(label, (node, self.d))
        print("Debug label original shape:", label_new)

        label_new = np.exp(label_new)
        s = label_new.shape[0]
        print("Debug label shape:", label_new.shape, s)

        label_new_sum = np.reshape(np.sum(label_new, axis=1), (s, 1))
        print("Debug label sum:", label_new_sum.shape)

        prob_label = label_new / label_new_sum
        pred_label = np.zeros(4)
        valency_arr = np.zeros(node)

        print("Debug prob label shape:", prob_label.shape, prob_label)

        #print("Debug label", label_new)
        for i in range(node):
            valency = np.random.choice(range(4), [1], p=prob_label[i])
            pred_label[valency] += 1
            valency_arr[i] = valency + 1

        print("Debug pred_label", pred_label, valency_arr)
        return (pred_label, valency_arr)

    def sample_graph_neighborhood(self,
                                  hparams,
                                  placeholders,
                                  adj,
                                  features,
                                  weights,
                                  weight_bins,
                                  s_num,
                                  node,
                                  ratio,
                                  hde,
                                  num=10,
                                  outdir=None):
        list_edges = get_candidate_edges(self.n)

        #eps = load_embeddings(hparams.z_dir+'encoded_input0'+'.txt', hparams.z_dim)
        eps = np.random.randn(self.n, self.z_dim, 1)

        train_mu = []
        train_sigma = []
        hparams.sample = False

        # approach 1
        for i in range(len(adj)):
            feed_dict = construct_feed_dict(hparams.learning_rate,
                                            hparams.dropout_rate, self.k,
                                            self.n, self.d, hparams.decay_rate,
                                            placeholders)
            feed_dict.update({self.adj: adj[i]})
            feed_dict.update({self.features: features[i]})
            feed_dict.update({self.weight_bin: weight_bins[i]})
            feed_dict.update({self.weight: weights[i]})
            feed_dict.update(
                {self.input_data: np.zeros([self.k, self.n, self.d])})
            feed_dict.update({self.eps: eps})
            hparams.sample = False
            prob, ll, z_encoded, enc_mu, enc_sigma, elbo, w_edge = self.sess.run(
                [
                    self.prob, self.ll, self.z_encoded, self.enc_mu,
                    self.enc_sigma, self.cost, self.w_edge
                ],
                feed_dict=feed_dict)

            with open(hparams.z_dir + 'encoded_input' + str(i) + '.txt',
                      'a') as f:
                for z_i in z_encoded:
                    f.write('[' + ','.join([str(el[0]) for el in z_i]) + ']\n')
                f.write("\n")

            with open(hparams.z_dir + 'encoded_mu' + str(i) + '.txt',
                      'a') as f:
                for z_i in enc_mu:
                    f.write('[' + ','.join([str(el[0]) for el in z_i]) + ']\n')
                f.write("\n")

            with open(hparams.z_dir + 'encoded_sigma' + str(i) + '.txt',
                      'a') as f:
                for x in range(self.n):
                    for z_i in enc_sigma[x]:
                        f.write('[' + ','.join([str(el)
                                                for el in z_i]) + ']\n')
                    f.write("\n")

            hparams.sample = True

            #for j in range(self.n):
            #for j in [1, 5, 15]:
            for j in [1]:
                z_encoded_neighborhood = copy.copy(z_encoded)
                feed_dict.update({self.eps: z_encoded_neighborhood})
                prob, ll, z_encoded_neighborhood, enc_mu, enc_sigma, elbo, w_edge, labels = self.sess.run(
                    [
                        self.prob, self.ll, self.z_encoded, self.enc_mu,
                        self.enc_sigma, self.cost, self.w_edge, self.label
                    ],
                    feed_dict=feed_dict)
                # prob = np.triu(np.reshape(prob,(self.n,self.n)),1)
                with open(hparams.z_dir + 'sampled_z' + str(i) + '.txt',
                          'a') as f:
                    for z_i in z_encoded:
                        f.write('[' + ','.join([str(el[0])
                                                for el in z_i]) + ']\n')
                    f.write("\n")

                prob = np.reshape(prob, (self.n, self.n))
                w_edge = np.reshape(w_edge, (self.n, self.n, self.bin_dim))
                with open(hparams.z_dir + 'prob_mat' + str(i) + '.txt',
                          'a') as f:
                    for x in range(self.n):
                        f.write('[' + ','.join([str(el)
                                                for el in prob[x]]) + ']\n')
                    f.write("\n")
                with open(hparams.z_dir + 'weight_mat' + str(i) + '.txt',
                          'a') as f:
                    for x in range(self.n):
                        f.write('[' + ','.join([
                            str(el[0]) + ' ' + str(el[1]) + ' ' + str(el[2])
                            for el in w_edge[x]
                        ]) + ']\n')
                    f.write("\n")

                if not hparams.mask_weight:
                    print("Non mask")
                    candidate_edges = self.get_unmasked_candidate(
                        list_edges, prob, w_edge, hparams.edges)
                else:
                    print("Mask")
                    (atom_list,
                     valency_arr) = self.getatoms(hparams.nodes, labels)
                    candidate_edges = self.get_masked_candidate_with_atom_ratio_new(
                        prob, w_edge, valency_arr, hparams.edges, hde)

                for uvw in candidate_edges.split():
                    [u, v, w] = uvw.split("-")
                    u = int(u)
                    v = int(v)
                    w = int(w)
                    if (u >= 0 and v >= 0):
                        with open(
                                hparams.sample_file + "approach_1_node_" +
                                str(j) + "_" + str(s_num) + '.txt', 'a') as f:
                            f.write(
                                str(u) + ' ' + str(v) + ' {\'weight\':' +
                                str(w) + '}\n')

    def sample_graph(self,
                     hparams,
                     placeholders,
                     adj,
                     features,
                     weights,
                     weight_bins,
                     s_num,
                     node,
                     hde,
                     num=10,
                     outdir=None):
        '''
        Args :
            num - int
                10
                number of edges to be sampled
            outdir - string
            output dir
        '''
        list_edges = []

        for i in range(self.n):
            for j in range(i + 1, self.n):
                list_edges.append((i, j, 1))
                list_edges.append((i, j, 2))
                list_edges.append((i, j, 3))
        #list_edges.append((-1, -1, 0))

        list_weight = [1, 2, 3]

        hparams.sample = True

        eps = np.random.randn(self.n, self.z_dim, 1)
        with open(hparams.z_dir + 'test_prior_' + str(s_num) + '.txt',
                  'a') as f:
            for z_i in eps:
                f.write('[' + ','.join([str(el[0]) for el in z_i]) + ']\n')

        feed_dict = construct_feed_dict(hparams.learning_rate,
                                        hparams.dropout_rate, self.k, self.n,
                                        self.d, hparams.decay_rate,
                                        placeholders)
        feed_dict.update({self.adj: adj[0]})
        feed_dict.update({self.features: features[0]})
        feed_dict.update({self.weight_bin: weight_bins[0]})
        feed_dict.update({self.weight: weights[0]})

        feed_dict.update({self.input_data: np.zeros([self.k, self.n, self.d])})
        feed_dict.update({self.eps: eps})

        prob, ll, z_encoded, kl, sample_mu, sample_sigma, loss, w_edge, labels = self.sess.run(
            [
                self.prob, self.ll, self.z_encoded, self.kl, self.enc_mu,
                self.enc_sigma, self.cost, self.w_edge, self.label
            ],
            feed_dict=feed_dict)
        prob = np.reshape(prob, (self.n, self.n))
        w_edge = np.reshape(w_edge, (self.n, self.n, self.bin_dim))

        indicator = np.ones([self.n, 3])
        p, list_edges, w_new = normalise(prob, w_edge, self.n, self.bin_dim,
                                         [], list_edges, indicator)

        if not hparams.mask_weight:
            trial = 0
            while trial < 5000:
                candidate_edges = [
                    list_edges[i] for i in np.random.choice(range(
                        len(list_edges)), [hparams.edges],
                                                            p=p,
                                                            replace=False)
                ]
                with open(hparams.sample_file + 'test.txt', 'w') as f:
                    for (u, v, w) in candidate_edges:
                        if (u >= 0 and v >= 0):
                            f.write(
                                str(u) + ' ' + str(v) + ' {\'weight\':' +
                                str(w) + '}\n')
                f = open(hparams.sample_file + 'test.txt')
                G = nx.read_edgelist(f, nodetype=int)
                if nx.is_connected(G):
                    for (u, v, w) in candidate_edges:
                        if (u >= 0 and v >= 0):
                            with open(
                                    hparams.sample_file + "approach_2_" +
                                    str(trial) + "_" + str(s_num) + '.txt',
                                    'a') as f:
                                f.write(
                                    str(u) + ' ' + str(v) + ' {\'weight\':' +
                                    str(w) + '}\n')
                trial += 1

        else:
            trial = 0
            while trial < 5000:
                candidate_edges = self.get_masked_candidate(
                    list_edges, prob, w_edge, hparams.edges, hde)
                #print("Debug candidate", candidate_edges)
                if len(candidate_edges) > 0:
                    with open(hparams.sample_file + 'test.txt', 'w') as f:
                        for uvw in candidate_edges.split():
                            [u, v, w] = uvw.split("-")
                            u = int(u)
                            v = int(v)
                            w = int(w)
                            if (u >= 0 and v >= 0):
                                f.write(
                                    str(u) + ' ' + str(v) + ' {\'weight\':' +
                                    str(w) + '}\n')
                    f = open(hparams.sample_file + 'test.txt')
                    #try:
                    G = nx.read_edgelist(f, nodetype=int)
                    #except:
                    #continue

                    if nx.is_connected(G):
                        for uvw in candidate_edges.split():
                            [u, v, w] = uvw.split("-")
                            u = int(u)
                            v = int(v)
                            w = int(w)
                            if (u >= 0 and v >= 0):
                                with open(
                                        hparams.sample_file + "approach_2_" +
                                        str(trial) + "_" + str(s_num) + '.txt',
                                        'a') as f:
                                    f.write(
                                        str(u) + ' ' + str(v) +
                                        ' {\'weight\':' + str(w) + '}\n')
                trial += 1
Example #4
0
    def __init__(self,
                 hparams,
                 placeholders,
                 num_nodes,
                 num_features,
                 log_fact_k,
                 input_size,
                 istest=False):
        self.features_dim = num_features
        self.input_dim = num_nodes
        self.dropout = placeholders['dropout']
        self.k = hparams.random_walk
        self.lr = placeholders['lr']
        self.decay = placeholders['decay']
        self.n = num_nodes
        self.d = num_features
        self.z_dim = hparams.z_dim
        self.bin_dim = hparams.bin_dim
        self.mask_weight = hparams.mask_weight
        self.log_fact_k = log_fact_k
        self.neg_sample_size = hparams.neg_sample_size
        self.input_size = input_size
        self.combination = hparams.node_sample * hparams.bfs_sample

        def neg_loglikelihood(prob_dicts, w_edges):
            '''
            negative loglikelihood of the edges
            '''
            ll = 0
            k = 0
            with tf.variable_scope('NLL'):
                for i in range(self.combination):
                    prob_dict = prob_dicts[i]
                    w_edge = w_edges[i]

                    prob_dict = tf.Print(prob_dict, [prob_dict],
                                         message="my prob dict values:")
                    print("Debug prob dict shape", tf.shape(prob_dict))
                    prob_dict_resized = tf.reshape(prob_dict, [-1])

                    prob_dict_resized = tf.Print(
                        prob_dict_resized, [prob_dict_resized],
                        message="my prob dict resized values:")
                    w_edge_size = tf.stack([tf.shape(w_edge)[0]])[0]
                    w_edge_size = tf.Print(w_edge_size, [w_edge_size],
                                           message="my size values:")
                    print("Debug w_edge_shape", tf.shape(w_edge),
                          w_edge.get_shape(),
                          tf.stack([tf.shape(w_edge)[0]])[0])
                    w_edge_resized = tf.reshape(w_edge, [-1, self.bin_dim])

                    if self.neg_sample_size > 0:
                        w_edge_resized = tf.reshape(
                            w_edge[:-self.bin_dim * self.neg_sample_size],
                            [-1, self.bin_dim])
                    w_edge_size_r = tf.stack([tf.shape(w_edge_resized)[0]])[0]

                    w_edge_size_r = tf.Print(w_edge_size_r, [w_edge_size_r],
                                             message="my size values r:")
                    w_edge_exp = tf.exp(
                        tf.minimum(
                            w_edge_resized,
                            tf.fill([w_edge_size_r, self.bin_dim], 10.0)))
                    w_edge_pos = tf.reduce_sum(tf.multiply(
                        self.weight_bin[i], w_edge_exp),
                                               axis=1)
                    w_edge_total = tf.reduce_sum(w_edge_exp, axis=1)
                    w_edge_score = tf.divide(w_edge_pos, w_edge_total)

                    w_edge_score = tf.Print(w_edge_score, [w_edge_score],
                                            message="my w_edge_score values:")

                    prob_dict_resized_shape = tf.stack(
                        [tf.shape(prob_dict_resized)[0]])[0]
                    prob_dict_resized_shape = tf.Print(
                        prob_dict_resized_shape, [prob_dict_resized_shape],
                        message="my prob dict size values:")
                    prob_dict_exp = tf.exp(
                        tf.minimum(prob_dict_resized,
                                   tf.fill([prob_dict_resized_shape], 10.0)))
                    prob_dict_exp = tf.Print(prob_dict_exp, [prob_dict_exp],
                                             message="my decscore values:")
                    pos_score = prob_dict_exp
                    if self.neg_sample_size > 0:
                        pos_score = prob_dict_exp[:-self.neg_sample_size]
                    st = tf.stack([tf.shape(pos_score)[0]])[0]
                    st = tf.Print(st, [st], message="my st values:")
                    pos_score = tf.Print(pos_score, [pos_score],
                                         message="my posscore values:")
                    #pos_weight_score = tf.multiply(tf.reshape(pos_score,[st, 1]), w_edge_score)
                    pos_weight_score = tf.multiply(
                        pos_score, tf.reshape(w_edge_score, [1, -1]))
                    neg_score = tf.cumsum(prob_dict_exp, reverse=True)
                    if self.neg_sample_size > 0:
                        neg_score = tf.cumsum(
                            prob_dict_exp[1:],
                            reverse=True)[:-self.neg_sample_size + 1]
                    softmax_out = tf.divide(pos_weight_score, neg_score)

                    ll += tf.reduce_sum(
                        tf.log(tf.add(softmax_out, tf.fill([1, st], 1e-9))))
                    #ll = tf.reduce_sum(tf.log(tf.add(tf.multiply(self.adj, softmax_out), tf.fill([self.n,self.n], 1e-9))))
                ll = ll / self.combination
                ll = tf.Print(ll, [ll], message="My loss")

            return (-ll)

        def kl_gaussian(mu_1, sigma_1, debug_sigma, mu_2, sigma_2):
            '''
                Kullback leibler divergence for two gaussian distributions
            '''
            print sigma_1.shape, sigma_2.shape
            with tf.variable_scope("kl_gaussisan"):
                temp_stack = []
                for i in range(self.n):
                    temp_stack.append(tf.square(sigma_1[i]))
                first_term = tf.trace(tf.stack(temp_stack))
                temp_stack = []
                for i in range(self.n):
                    temp_stack.append(tf.matmul(tf.transpose(mu_1[i]),
                                                mu_1[i]))
                second_term = tf.reshape(tf.stack(temp_stack), [self.n])
                k = tf.fill([self.n], tf.cast(self.z_dim, tf.float32))
                temp_stack = []
                for i in range(self.n):
                    temp_stack.append(tf.reduce_prod(tf.square(
                        debug_sigma[i])))
                third_term = tf.log(
                    tf.add(tf.stack(temp_stack), tf.fill([self.n], 1e-09)))
                return 0.5 * tf.add(
                    tf.subtract(tf.add(first_term, second_term), k),
                    third_term)

        def ll_poisson(lambda_, x):
            #x_convert = tf.cast(tf.convert_to_tensor([x]), tf.float32)
            x = tf.Print(x, [x], message="My debug_x_tf")
            log_fact_tf = tf.convert_to_tensor([self.log_fact_k[x - 1]],
                                               dtype=tf.float32)
            return -tf.subtract(
                tf.subtract(tf.multiply(x, tf.log(lambda_ + 1e-09)), lambda_),
                log_fact_tf)

        def label_loss_predict(label, predicted_labels, label1):
            loss = 0.0
            #for i in range(self.combination):
            predicted_label = predicted_labels

            predicted_label_resized = tf.reshape(predicted_label,
                                                 [self.n, self.d])
            n_class_labels = tf.fill([self.n, 1], tf.cast(4, tf.float32))

            #predicted_label_resized_new = tf.concat(values =(predicted_label_resized, n_class_labels), axis=1)
            loss += tf.nn.sparse_softmax_cross_entropy_with_logits(
                labels=label1, logits=predicted_label_resized)
            return loss
            #return loss/self.combination

        def get_lossfunc(enc_mu, enc_sigma, debug_sigma, prior_mu, prior_sigma,
                         dec_out, w_edge, label, lambda_n, lambda_e):
            kl_loss = kl_gaussian(enc_mu, enc_sigma, debug_sigma, prior_mu,
                                  prior_sigma)  # KL_divergence loss
            likelihood_loss = neg_loglikelihood(dec_out,
                                                w_edge)  # Cross entropy loss
            self.ll = likelihood_loss
            self.kl = kl_loss

            lambda_e = tf.Print(lambda_e, [lambda_e], message="My edge_lambda")
            lambda_n = tf.Print(lambda_n, [lambda_n], message="My node_lambda")

            #print("Debug self count", self.count, self.edges[self.count])
            edgeprob = ll_poisson(
                lambda_e,
                tf.cast(
                    tf.subtract(
                        tf.shape(self.edges[0])[0], self.neg_sample_size),
                    tf.float32))
            nodeprob = ll_poisson(
                lambda_n, tf.cast(tf.convert_to_tensor([self.n]), tf.float32))

            edgeprob = tf.Print(edgeprob, [edgeprob],
                                message="My edge_prob_loss")
            nodeprob = tf.Print(nodeprob, [nodeprob],
                                message="My node_prob_loss")

            label_loss = label_loss_predict(self.features, label,
                                            self.features1)
            label_loss = tf.Print(label_loss, [label_loss],
                                  message="My label_loss")

            loss_1 = tf.reduce_mean(kl_loss + label_loss)
            loss_1 = tf.Print(loss_1, [loss_1], message="My label_loss1")

            total_loss = loss_1 + tf.reduce_mean(edgeprob + nodeprob +
                                                 likelihood_loss)
            #return tf.reduce_mean(kl_loss) + edgeprob + nodeprob + likelihood_loss
            total_loss = tf.Print(total_loss, [total_loss],
                                  message="My total_loss")
            return total_loss

        self.adj = tf.placeholder(dtype=tf.float32,
                                  shape=[self.n, self.n],
                                  name='adj')
        self.features = tf.placeholder(dtype=tf.float32,
                                       shape=[self.n, self.d],
                                       name='features')
        self.features1 = tf.placeholder(dtype=tf.int32,
                                        shape=[self.n],
                                        name='features1')
        self.weight = tf.placeholder(dtype=tf.float32,
                                     shape=[self.n, self.n],
                                     name="weight")
        self.weight_bin = tf.placeholder(
            dtype=tf.float32,
            shape=[self.combination, None, hparams.bin_dim],
            name="weight_bin")
        self.input_data = tf.placeholder(dtype=tf.float32,
                                         shape=[self.k, self.n, self.d],
                                         name='input')
        self.eps = tf.placeholder(dtype=tf.float32,
                                  shape=[self.n, self.z_dim, 1],
                                  name='eps')
        #self.neg_index = tf.placeholder(dtype=tf.int32,shape=[None], name='neg_index')
        self.edges = tf.placeholder(dtype=tf.int32,
                                    shape=[self.combination, None, 2],
                                    name='edges')
        self.count = tf.placeholder(dtype=tf.int32)

        #node_count = [len(edge_list) for edge_list in self.edges]
        print("Debug Input size", self.input_size)
        node_count_tf = tf.fill([1, self.input_size],
                                tf.cast(self.n, tf.float32))
        node_count_tf = tf.Print(node_count_tf, [node_count_tf],
                                 message="My node_count_tf")
        print("Debug size node_count", node_count_tf.get_shape())

        #tf.convert_to_tensor(node_count, dtype=tf.int32)
        self.cell = VAEGCell(self.adj, self.weight, self.features,
                             self.z_dim, self.bin_dim,
                             tf.to_float(node_count_tf), self.edges)
        self.c_x, enc_mu, enc_sigma, debug_sigma, dec_out, prior_mu, prior_sigma, z_encoded, w_edge, label, lambda_n, lambda_e = self.cell.call(
            self.input_data, self.n, self.d, self.k, self.combination,
            self.eps, hparams.sample)
        self.prob = dec_out
        #print('Debug', dec_out.shape)
        self.z_encoded = z_encoded
        self.enc_mu = enc_mu
        self.enc_sigma = enc_sigma
        self.w_edge = w_edge
        self.label = label
        self.lambda_n = lambda_n
        self.lambda_e = lambda_e
        self.cost = get_lossfunc(enc_mu, enc_sigma, debug_sigma, prior_mu,
                                 prior_sigma, dec_out, w_edge, label, lambda_n,
                                 lambda_e)

        print_vars("trainable_variables")
        # self.lr = tf.Variable(self.lr, trainable=False)
        self.train_op = tf.train.AdamOptimizer(learning_rate=self.lr)
        self.grad = self.train_op.compute_gradients(self.cost)
        self.grad_placeholder = [(tf.placeholder("float",
                                                 shape=gr[1].get_shape()),
                                  gr[1]) for gr in self.grad]
        self.apply_transform_op = self.train_op.apply_gradients(self.grad)

        #self.lr = tf.Variable(self.lr, trainable=False)
        self.sess = tf.Session()
Example #5
0
class VAEG(VAEGConfig):
    def __init__(self,
                 hparams,
                 placeholders,
                 num_nodes,
                 num_features,
                 edges,
                 istest=False):
        self.features_dim = num_features
        self.input_dim = num_nodes
        self.dropout = placeholders['dropout']
        self.k = hparams.random_walk
        self.lr = placeholders['lr']
        self.decay = placeholders['decay']
        self.n = num_nodes
        self.d = num_features
        self.z_dim = hparams.z_dim
        self.count = 0
        self.edges = edges
        self.mask_weight = hparams.mask_weight

        #self.edges, self.non_edges = edges, non_edges
        #logger.info("Building model starts...")
        def masked_gen(posscore, negscore):
            indicator = []
            for i in range(self.n):
                indicator.append(tf.ones(self.n))
            temp_posscore = tf.reduce_sum(posscore)
            ll = 0.0
            for (u, v) in self.edges[self.count]:
                print("Debug", posscore[0].shape, indicator[0].shape)
                #tf.multiply(tf.reshape(posscore[u], [1, self.n]), indicator[u])[0][v]
                ll += tf.log(
                    tf.multiply(tf.reshape(posscore[u], [1, self.n]),
                                indicator[u])[0][v] /
                    (temp_posscore + negscore[u][v]) + 1e-09)
                ll += tf.log(
                    tf.multiply(tf.reshape(posscore[v], [1, self.n]),
                                indicator[v])[0][u] /
                    (temp_posscore + negscore[v][u]) + 1e-09)

                indicator[u] = np.multiply(
                    tf.subtract(tf.ones([1, self.n]), self.adj[v]),
                    indicator[u])
                indicator[v] = np.multiply(
                    tf.subtract(tf.ones([1, self.n]), self.adj[u]),
                    indicator[v])

                temp_posscore = temp_posscore - tf.reduce_sum(posscore[u])
                temp = tf.multiply(indicator[u],
                                   tf.reshape(posscore[u], [self.n]))

                temp_posscore += tf.reduce_sum(temp)
                temp_posscore = temp_posscore - \
                    tf.reduce_sum(posscore[v]) + \
                    tf.reduce_sum(tf.multiply(indicator[v], posscore[v]))

                temp_posscore = temp_posscore - \
                    tf.reduce_sum(tf.transpose(posscore)[
                                  u]) + tf.reduce_sum(tf.multiply(indicator[u], tf.transpose(posscore)[u]))
                temp_posscore = temp_posscore - \
                    tf.reduce_sum(tf.transpose(posscore)[
                                  v]) + tf.reduce_sum(tf.multiply(indicator[v], tf.transpose(posscore)[v]))

            return ll

        def neg_loglikelihood(prob_dict):
            '''
            negative loglikelihood of the edges
            '''
            ll = 0
            k = 0
            with tf.variable_scope('NLL'):

                dec_mat_temp = tf.reshape(prob_dict, [self.n, self.n])
                '''
                dec_mat_temp = np.zeros((self.n, self.n))
                for i in range(self.n):
                    for j in range(i+1, self.n):
                        print("Debug", prob_dict[k])
                        dec_mat_temp[i][j] = prob_dict[k][0]
                        dec_mat_temp[j][i] = prob_dict[k][0]
                        k+=1
                #'''

                #dec_mat = tf.exp(tf.minimum(tf.reshape(prob_dict, [self.n, self.n]),tf.fill([self.n, self.n], 10.0)))
                dec_mat = tf.exp(
                    tf.minimum(dec_mat_temp, tf.fill([self.n, self.n], 10.0)))

                dec_mat = tf.Print(dec_mat, [dec_mat],
                                   message="my decscore values:")

                print("Debug dec_mat", dec_mat.shape, dec_mat.dtype, dec_mat)
                comp = tf.subtract(tf.ones([self.n, self.n], tf.float32),
                                   self.adj)
                comp = tf.Print(comp, [comp], message="my comp values:")

                temp = tf.reduce_sum(tf.multiply(comp, dec_mat))
                negscore = tf.fill([self.n, self.n], temp + 1e-9)
                negscore = tf.Print(negscore, [negscore],
                                    message="my negscore values:")

                posscore = tf.multiply(self.adj, dec_mat)
                posscore = tf.Print(posscore, [posscore],
                                    message="my posscore values:")

                #dec_out = tf.multiply(self.adj, dec_mat)
                softmax_out = tf.truediv(posscore, tf.add(posscore, negscore))
                ll = tf.reduce_sum(
                    tf.log(
                        tf.add(tf.multiply(self.adj, softmax_out),
                               tf.fill([self.n, self.n], 1e-9))), 1)
                if hparams.mask_weight:
                    ll = masked_gen(posscore, negscore)
                    #ll = masked_ll(posscore, negscore)
            return (-ll)

        def kl_gaussian(mu_1, sigma_1, debug_sigma, mu_2, sigma_2):
            '''
                Kullback leibler divergence for two gaussian distributions
            '''
            print(sigma_1.shape, sigma_2.shape)
            with tf.variable_scope("kl_gaussisan"):
                temp_stack = []
                for i in range(self.n):
                    temp_stack.append(tf.square(sigma_1[i]))
                first_term = tf.trace(tf.stack(temp_stack))

                temp_stack = []
                for i in range(self.n):
                    temp_stack.append(tf.matmul(tf.transpose(mu_1[i]),
                                                mu_1[i]))
                second_term = tf.reshape(tf.stack(temp_stack), [self.n])

                #k = tf.fill([self.n], tf.cast(self.d, tf.float32))
                k = tf.fill([self.n], tf.cast(self.z_dim, tf.float32))

                temp_stack = []
                # for i in range(self.n):
                #    temp_stack.append(tf.log(tf.truediv(tf.matrix_determinant(sigma_2[i]),tf.add(tf.matrix_determinant(sigma_1[i]), tf.fill([self.d, self.d], 1e-9)))))

                for i in range(self.n):
                    temp_stack.append(tf.reduce_prod(tf.square(
                        debug_sigma[i])))

                print("Debug", tf.stack(temp_stack).shape)
                third_term = tf.log(
                    tf.add(tf.stack(temp_stack), tf.fill([self.n], 1e-09)))

                print("debug KL", first_term.shape, second_term.shape, k.shape,
                      third_term.shape, sigma_1[0].shape)
                # return 0.5 *tf.reduce_sum((
                return 0.5 * tf.add(
                    tf.subtract(tf.add(first_term, second_term), k),
                    third_term)

        def get_lossfunc(enc_mu, enc_sigma, debug_sigma, prior_mu, prior_sigma,
                         dec_out):
            kl_loss = kl_gaussian(enc_mu, enc_sigma, debug_sigma, prior_mu,
                                  prior_sigma)  # KL_divergence loss
            likelihood_loss = neg_loglikelihood(dec_out)  # Cross entropy loss
            self.ll = likelihood_loss
            self.kl = kl_loss
            return tf.reduce_mean(kl_loss + likelihood_loss)

        self.adj = tf.placeholder(dtype=tf.float32,
                                  shape=[self.n, self.n],
                                  name='adj')
        self.features = tf.placeholder(dtype=tf.float32,
                                       shape=[self.n, self.d],
                                       name='features')
        self.input_data = tf.placeholder(dtype=tf.float32,
                                         shape=[self.k, self.n, self.d],
                                         name='input')
        self.eps = tf.placeholder(dtype=tf.float32,
                                  shape=[self.n, self.z_dim, 1],
                                  name='eps')

        self.cell = VAEGCell(self.adj, self.features, self.z_dim)
        self.c_x, enc_mu, enc_sigma, debug_sigma, dec_out, prior_mu, prior_sigma, z_encoded = self.cell.call(
            self.input_data, self.n, self.d, self.k, self.eps, hparams.sample)
        self.prob = dec_out
        print('Debug', dec_out.shape)
        self.z_encoded = z_encoded
        self.enc_mu = enc_mu
        self.enc_sigma = enc_sigma
        self.cost = get_lossfunc(enc_mu, enc_sigma, debug_sigma, prior_mu,
                                 prior_sigma, dec_out)

        print_vars("trainable_variables")
        # self.lr = tf.Variable(self.lr, trainable=False)
        self.train_op = tf.train.AdamOptimizer(learning_rate=self.lr)
        self.grad = self.train_op.compute_gradients(self.cost)
        self.grad_placeholder = [(tf.placeholder("float",
                                                 shape=gr[1].get_shape()),
                                  gr[1]) for gr in self.grad]
        #self.capped_gvs = [(tf.clip_by_value(grad, -1., 1.), var) for grad, var in self.grad]
        #self.tgv = [self.grad]
        # self.apply_transform_op = self.train_op.apply_gradients(self.grad_placeholder)
        #self.apply_transform_op = self.train_op.apply_gradients(self.capped_gvs)
        self.apply_transform_op = self.train_op.apply_gradients(self.grad)

        #self.lr = tf.Variable(self.lr, trainable=False)
        #self.gradient = tf.train.AdamOptimizer(learning_rate=self.lr, epsilon=1e-4).compute_gradients(self.cost)
        #self.train_op = tf.train.AdamOptimizer(learning_rate=self.lr, epsilon=1e-4).minimize(self.cost)
        #self.check_op = tf.add_check_numerics_ops()
        self.sess = tf.Session()

    def initialize(self):
        logger.info("Initialization of parameters")
        # self.sess.run(tf.initialize_all_variables())
        self.sess.run(tf.global_variables_initializer())

    def restore(self, savedir):
        saver = tf.train.Saver(tf.global_variables())
        ckpt = tf.train.get_checkpoint_state(savedir)
        print("Load the model from {}".format(ckpt.model_checkpoint_path))
        saver.restore(self.sess, ckpt.model_checkpoint_path)

    def train(self, placeholders, hparams, adj, features):
        savedir = hparams.out_dir
        lr = hparams.learning_rate
        dr = hparams.dropout_rate
        decay = hparams.decay_rate

        # training
        num_epochs = hparams.num_epochs
        create_dir(savedir)
        ckpt = tf.train.get_checkpoint_state(savedir)
        saver = tf.train.Saver(tf.global_variables())

        if ckpt:
            saver.restore(self.sess, ckpt.model_checkpoint_path)
            print("Load the model from %s" % ckpt.model_checkpoint_path)

        #f = open(hparams.out_dir+"iteration.txt")
        iteration = 10000
        # 1000
        for epoch in range(num_epochs):
            for i in range(len(adj)):
                self.count = i
                # Learning rate decay

                #self.sess.run(tf.assign(self.lr, self.lr * (self.decay ** epoch)))
                feed_dict = construct_feed_dict(lr, dr, self.k, self.n, self.d,
                                                decay, placeholders)
                feed_dict.update({self.adj: adj[i]})
                # print "Debug", features[i].shape
                eps = np.random.randn(self.n, self.z_dim, 1)
                #tf.random_normal((self.n, 5, 1), 0.0, 1.0, dtype=tf.float32)
                feed_dict.update({self.features: features[i]})
                feed_dict.update(
                    {self.input_data: np.zeros([self.k, self.n, self.d])})
                feed_dict.update({self.eps: eps})
                grad_vals = self.sess.run([g[0] for g in self.grad],
                                          feed_dict=feed_dict)
                for j in range(len(self.grad_placeholder)):
                    feed_dict.update(
                        {self.grad_placeholder[j][0]: grad_vals[j]})
                input_, train_loss, _, probdict, cx = self.sess.run(
                    [
                        self.input_data, self.cost, self.apply_transform_op,
                        self.prob, self.c_x
                    ],
                    feed_dict=feed_dict)

                iteration += 1
                # print "Debug Grad", grad_vals[0]
                # print "Debug CX", cx
                if iteration % hparams.log_every == 0 and iteration > 0:
                    print("{}/{}(epoch {}), train_loss = {:.6f}".format(
                        iteration, num_epochs, epoch + 1, train_loss))
                    # print(probdict)
                    checkpoint_path = os.path.join(savedir, 'model.ckpt')
                    saver.save(self.sess,
                               checkpoint_path,
                               global_step=iteration)
                    logger.info("model saved to {}".format(checkpoint_path))

    def plot_hspace(self, hparams, placeholders, num):
        # plot the coordinate in hspace

        adj, deg = load_data(hparams.graph_file, num)

        hparams.sample = False
        #'''
        for i in range(len(adj)):
            eps = np.random.randn(self.n, hparams.z_dim, 1)
            feed_dict = construct_feed_dict(hparams.learning_rate,
                                            hparams.dropout_rate, self.k,
                                            self.n, self.d, hparams.decay_rate,
                                            placeholders)
            feed_dict.update({self.adj: adj[i]})
            feed_dict.update({self.features: deg[i]})
            feed_dict.update(
                {self.input_data: np.zeros([self.k, self.n, self.d])})
            feed_dict.update({self.eps: eps})
            prob, ll, z = self.sess.run([self.prob, self.ll, self.z_encoded],
                                        feed_dict=feed_dict)
            with open(hparams.z_dir + 'train' + str(i) + '.txt', 'a') as f:
                for z_i in z:
                    f.write('[' + ','.join([str(el[0]) for el in z_i]) + ']\n')
        #hparams.sample = True
        #'''
        adj, deg = load_data(hparams.sample_file, num)
        for i in range(len(adj)):
            eps = np.random.randn(self.n, 5, 1)
            feed_dict = construct_feed_dict(hparams.learning_rate,
                                            hparams.dropout_rate, self.k,
                                            self.n, self.d, hparams.decay_rate,
                                            placeholders)
            feed_dict.update({self.adj: adj[i]})
            feed_dict.update({self.features: deg[i]})
            feed_dict.update(
                {self.input_data: np.zeros([self.k, self.n, self.d])})
            feed_dict.update({self.eps: eps})
            prob, ll, z = self.sess.run([self.prob, self.ll, self.z_encoded],
                                        feed_dict=feed_dict)
            with open(hparams.z_dir + 'test_' + str(i) + '.txt', 'a') as f:
                for z_i in z:
                    f.write('[' + ','.join([str(el[0]) for el in z_i]) + ']\n')
        #'''

    def sample_graph_slerp(self,
                           hparams,
                           placeholders,
                           s_num,
                           G_good,
                           G_bad,
                           inter,
                           ratio,
                           num=10):
        # Agrs :
        # G_good : embedding of the train graph or good sample
        # G_bad : embedding of the bad graph

        list_edges = []
        for i in range(self.n):
            for j in range(i + 1, self.n):
                list_edges.append((i, j))

        # for sample in range(s_num):
        new_graph = []
        for i in range(self.n):
            node_good = G_good[i]
            node_bad = G_bad[i]
            if inter == "lerp":
                new_graph.append(
                    lerp(np.reshape(node_good, -1), np.reshape(node_bad, -1),
                         ratio))
            else:
                new_graph.append(
                    slerp(np.reshape(node_good, -1), np.reshape(node_bad, -1),
                          ratio))

        eps = np.array(new_graph)
        eps = eps.reshape(eps.shape + (1, ))

        hparams.sample = True
        feed_dict = construct_feed_dict(hparams.learning_rate,
                                        hparams.dropout_rate, self.k, self.n,
                                        self.d, hparams.decay_rate,
                                        placeholders)
        # TODO adj and deg are filler and does not required while sampling. Need to clean this part

        adj = np.zeros([self.n, self.n])
        deg = np.zeros([self.n, 1], dtype=np.float)

        feed_dict.update({self.adj: adj})
        feed_dict.update({self.features: deg})
        feed_dict.update({self.input_data: np.zeros([self.k, self.n, self.d])})
        feed_dict.update({self.eps: eps})

        prob, ll, kl = self.sess.run([self.prob, self.ll, self.kl],
                                     feed_dict=feed_dict)

        prob = np.triu(np.reshape(prob, (self.n, self.n)), 1)
        prob = np.divide(prob, np.sum(prob))

        print("Debug", prob)

        problist = []
        try:
            for i in range(self.n):
                for j in range(i + 1, self.n):
                    problist.append(prob[i][j])
            p = np.array(problist)
            # list to numpy conversion can change negligible precision. so it is
            # desirable to further normalise it
            p /= p.sum()
            max_prob = max(p)
            min_prob = min(p)
            diff = min_prob + (max_prob - min_prob) * 0.1
            print("Debug max prob", max_prob, p)
            #candidate_edges = [ list_edges[i] for i in np.random.choice(range(len(list_edges)),[num], p=p, replace=False)]
            candidate_edges = [
                list_edges[i] for i in range(len(list_edges)) if p[i] >= diff
            ]
        except:
            return
        #adj = np.zeros([self.n, self.n])
        probmul = 1.0

        for (u, v) in candidate_edges:
            #adj[u][v] = 1
            #adj[v][u] = 1
            probmul *= prob[u][v]
            with open(
                    hparams.sample_file + '/inter/' + inter + str(s_num) +
                    '.txt', 'a') as f:
                f.write(str(u) + '\t' + str(v) + '\n')

        with open(hparams.z_dir + '/inter/' + inter + str(s_num) + '.txt',
                  'a') as f:
            for z_i in eps:
                f.write('[' + ','.join([str(el[0]) for el in z_i]) + ']\n')

        #kl_gaussian_mul(np.mean(G_good, axis=0), np.diag(np.var(G_good, axis=0)), np.mean(G_bad, axis = 0), np.diag(np.var(G_bad, axis = 0)))
        #ll1 = log(probmul)

        # with open(hparams.sample_file+'/inter/ll.txt', 'a') as f:
        #    f.write(str(ll1)+'\n')

        #kl1 = np.mean(kl)
        # with open(hparams.sample_file+'/inter/kl.txt', 'a') as f:
        #    f.write(str(kl1)+'\n')
        #G_bad = new_graph
        return new_graph

    def kl_gaussian_mul(self, mu_1, sigma_1, mu_2, sigma_2):
        '''
            Kullback leibler divergence for two gaussian distributions
        '''
        #print("Debug sigma1", debug_sigma_1, len(debug_sigma_1[0]))
        # print sigma_1.shape, sigma_2.shape
        n = self.n
        temp_stack_1 = []
        temp_stack_2 = []
        #debug_sigma_1 = np.diag(sigma_1)
        #debug_sigma_2 = np.diag(sigma_2)
        for i in range(n):
            #print("DEBUG i", i)
            temp_stack_1.append(np.prod(sigma_1[i].diagonal()))
            temp_stack_2.append(np.prod(sigma_2[i].diagonal()))

        # Inverse of diaginal covariance
        ones = np.ones(sigma_2.shape)
        inverse_sigma_2 = np.subtract(
            ones, np.true_divide(ones, np.add(ones, sigma_2)))
        #inverse_sigma_2 = tf.matrix_diag(np.true_divide(np.ones(np.shape(debug_sigma_2)), debug_sigma_2))

        term_2 = []
        print("DEBUG2", len(inverse_sigma_2))
        for i in range(n):
            term_2.append(np.trace(np.matmul(inverse_sigma_2[i], sigma_1[i])))
        # Difference between the mean
        term_3 = []
        k = np.zeros([self.n])
        k.fill(mu_1.shape[1])
        diff_mean = np.subtract(mu_2, mu_1)

        for i in range(self.n):
            term_3.append(
                np.matmul(
                    np.matmul(np.transpose(diff_mean[i]), inverse_sigma_2[i]),
                    diff_mean[i]))

        term1 = np.log(np.true_divide(temp_stack_2, temp_stack_1))
        # term2 = np.trace(term_2[])
        # print "Debug", len(term1), len(term_2), len(term_3), len(term_2), len(term_2[0][0])

        KL = 0.5 * np.subtract(np.add(np.add(term1, term_2), term_3), k)
        #KL = tf.Print(KL, [KL], message="my KL values:")

        #print("Debug mu1", tf.shape(mu_1)[1])
        return np.sum(KL)

    def get_stat(self, hparams, placeholders, num=10, outdir=None):

        adj, features, edges = load_data(hparams.graph_file, hparams.nodes)

        # for i in range(self.n):
        #    deg[i][0] = 2 * np.sum(adj[i])/(self.n*(self.n - 1))
        hparams.sample = True
        eps = np.random.randn(self.n, self.z_dim, 1)
        if hparams.sample:
            print("Debug Sample", hparams.sample)
        for i in range(len(adj)):
            ll_total = 0.0
            loss_total = 0.0
            prob_derived = 0.0

            for j in range(10):
                eps = np.random.randn(self.n, self.z_dim, 1)
                feed_dict = construct_feed_dict(hparams.learning_rate,
                                                hparams.dropout_rate, self.k,
                                                self.n, self.d,
                                                hparams.decay_rate,
                                                placeholders)
                feed_dict.update({self.adj: adj[i]})
                feed_dict.update({self.features: features[i]})
                feed_dict.update(
                    {self.input_data: np.zeros([self.k, self.n, self.d])})
                feed_dict.update({self.eps: eps})
                prob, ll, z_encoded, enc_mu, enc_sigma, loss, kl = self.sess.run(
                    [
                        self.prob, self.ll, self.z_encoded, self.enc_mu,
                        self.enc_sigma, self.cost, self.kl
                    ],
                    feed_dict=feed_dict)
                ll_total += np.mean(ll)
                loss_total += np.mean(loss)

                prob = np.triu(np.reshape(prob, (self.n, self.n)), 1)
                prob = np.divide(prob, np.sum(prob))

                for k in range(self.n):
                    for l in range(k + 1, self.n):
                        if adj[i][k][l] == 1:
                            prob_derived += log(prob[k][l] + 0.1)

            # with open(hparams.sample_file+'/reconstruction_ll.txt', 'a') as f:
            with open(hparams.out_dir + '/reconstruction_ll1.txt', 'a') as f:
                f.write(str(-np.mean(ll_total) // 10) + '\n')

                # with open(hparams.graph_file+'/kl.txt', 'a') as f:
                #    f.write(str(-np.mean(kl))+'\n')

            # with open(hparams.sample_file+'/elbo.txt', 'a') as f:
            with open(hparams.out_dir + '/elbo1.txt', 'a') as f:
                f.write(str(-np.mean(loss_total) // 10) + '\n')

            # with open(hparams.sample_file+'/prob_derived.txt', 'a') as f:
            with open(hparams.out_dir + '/prob_derived1.txt', 'a') as f:
                f.write(str(-np.mean(loss_total) // 10) + '\n')

    def zspace_analysis(self, hparams, placeholders, num=10, outdir=None):
        adj, features = load_data(hparams.graph_file, hparams.nodes)
        eps = np.random.randn(self.n, self.z_dim, 1)
        train_z = []
        list_edges = []
        for i in range(self.n):
            for j in range(i + 1, self.n):
                list_edges.append((i, j))
        for i in range(len(adj)):
            hparams.sample = False
            feed_dict = construct_feed_dict(hparams.learning_rate,
                                            hparams.dropout_rate, self.k,
                                            self.n, self.d, hparams.decay_rate,
                                            placeholders)
            feed_dict.update({self.adj: adj[i]})
            feed_dict.update({self.features: features[i]})
            feed_dict.update(
                {self.input_data: np.zeros([self.k, self.n, self.d])})
            feed_dict.update({self.eps: eps})

            prob, ll, z_encoded, enc_mu, enc_sigma, elbo = self.sess.run(
                [
                    self.prob, self.ll, self.z_encoded, self.enc_mu,
                    self.enc_sigma, self.cost
                ],
                feed_dict=feed_dict)
            train_z.append(z_encoded)

            with open(hparams.z_dir + 'train_' + str(i) + '.txt', 'a') as f:
                for z_i in z_encoded:
                    f.write('[' + ','.join([str(el[0]) for el in z_i]) + ']\n')

            prob = np.triu(np.reshape(prob, (self.n, self.n)), 1)
            prob = np.divide(prob, np.sum(prob))

            problist = []
            for k in range(self.n):
                for l in range(k + 1, self.n):
                    problist.append(prob[k][l])
            p = np.array(problist)
            p /= p.sum()
            if i < 20:
                num = 32
            else:
                num = 78
            candidate_edges = [
                list_edges[k]
                for k in np.random.choice(range(len(list_edges)), [num], p=p)
            ]

            probtotal = 1.0
            adjnew = np.zeros([self.n, self.n])
            featuresnew = np.zeros([self.n, 1])

            for (u, v) in candidate_edges:
                probtotal *= prob[u][v]
                adjnew[u][v] = 1
                adjnew[v][u] = 1
                featuresnew[u][0] += 1 // self.n
                featuresnew[v][0] += 1 // self.n
                if i < 20:
                    with open(
                            hparams.sample_file + "type_1_test" + "_" +
                            str(i) + '.txt', 'a') as f:
                        f.write(str(u) + '\t' + str(v) + '\n')
                else:
                    with open(
                            hparams.sample_file + "type_2_test" + "_" +
                            str(i) + '.txt', 'a') as f:
                        f.write(str(u) + '\t' + str(v) + '\n')
            # hparams.sample=False
            eps1 = np.random.randn(self.n, self.z_dim, 1)
            feed_dict = construct_feed_dict(hparams.learning_rate,
                                            hparams.dropout_rate, self.k,
                                            self.n, self.d, hparams.decay_rate,
                                            placeholders)
            feed_dict.update({self.adj: adjnew})
            feed_dict.update({self.features: featuresnew})
            feed_dict.update(
                {self.input_data: np.zeros([self.k, self.n, self.d])})
            feed_dict.update({self.eps: eps1})
            prob, z_encoded = self.sess.run([self.prob, self.z_encoded],
                                            feed_dict=feed_dict)
            print("DebugZ", len(z_encoded), len(z_encoded[0]))
            if i < 20:
                with open(hparams.z_dir + 'type_1_test_' + str(i) + '.txt',
                          'a') as f:
                    for z_i in z_encoded:
                        f.write('[' + ','.join([str(el[0])
                                                for el in z_i]) + ']\n')

            else:
                with open(hparams.z_dir + 'type_2_test_' + str(i) + '.txt',
                          'a') as f:
                    for z_i in z_encoded:
                        f.write('[' + ','.join([str(el[0])
                                                for el in z_i]) + ']\n')
            with open(hparams.sample_file + 'll_' + '.txt', 'a') as f:
                f.write(str(-np.mean(prob)) + '\n')

        # Interpolation Finding the likelihood
        count = 0
        for i in range(20):
            for j in range(20, 40):
                self.sample_graph_slerp(hparams, placeholders, count,
                                        train_z[i], train_z[j], "slerp", 50)
                count += 1
                self.sample_graph_slerp(hparams, placeholders, count,
                                        train_z[i], train_z[j], "lerp", 50)
                count += 1

    def getcandidate(self, num, n, p, prob, list_edges):
        print("Inside gencanidate")
        adj = np.zeros([n, n])
        candidate_edges = [
            list_edges[i]
            for i in np.random.choice(range(len(list_edges)), [1], p=p)
        ]
        indicator = np.ones([n, n])
        unseen = np.ones(n)
        probnew = prob
        for k in range(num - 1):
            (u, v) = candidate_edges[k]
            adj[u][v] = 1
            adj[v][u] = 1
            #unseen[u] = 0
            #unseen[v] = 0
            indicator[u] = np.multiply(
                np.multiply(np.subtract(np.ones(n), adj[v]), indicator[u]),
                unseen)
            indicator[v] = np.multiply(
                np.multiply(np.subtract(np.ones(n), adj[u]), indicator[v]),
                unseen)
            probnew = np.multiply(np.multiply(probnew, indicator),
                                  np.transpose(indicator))
            problist = []
            for i in range(self.n):
                for j in range(i + 1, self.n):
                    if (i, j) in candidate_edges:
                        if (i, j) in list_edges:
                            list_edges.remove((i, j))
                        continue
                    problist.append(probnew[i][j])
            p = np.array(problist)
            p /= p.sum()
            print("Debug p", p)
            candidate_edges.extend([
                list_edges[i]
                for i in np.random.choice(range(len(list_edges)), [1], p=p)
            ])

        return candidate_edges

    def getembeddings(self, hparams, placeholders, adj, deg):

        eps = np.random.randn(self.n, self.z_dim, 1)
        feed_dict = construct_feed_dict(hparams.learning_rate,
                                        hparams.dropout_rate, self.k, self.n,
                                        self.d, hparams.decay_rate,
                                        placeholders)
        feed_dict.update({self.adj: adj})
        feed_dict.update({self.features: deg})
        feed_dict.update({self.input_data: np.zeros([self.k, self.n, self.d])})
        feed_dict.update({self.eps: eps})
        prob, ll, kl, embedding = self.sess.run(
            [self.prob, self.ll, self.kl, self.z_encoded], feed_dict=feed_dict)
        return embedding

    def sample_graph(self,
                     hparams,
                     placeholders,
                     s_num,
                     node,
                     num=10,
                     outdir=None,
                     eps_passed=None):
        '''
        Args :
            num - int
                10
                number of edges to be sampled
            outdir - string
            output dir
        '''

        list_edges = []
        for i in range(self.n):
            for j in range(i + 1, self.n):
                list_edges.append((i, j))
        adj, features, edges = load_data(hparams.graph_file, node)
        eps = np.random.randn(self.n, self.z_dim, 1)
        with open(hparams.z_dir + 'test_prior_' + str(s_num) + '.txt',
                  'a') as f:
            for z_i in eps:
                f.write('[' + ','.join([str(el[0]) for el in z_i]) + ']\n')

        #tf.random_normal((self.n, 5, 1), 0.0, 1.0, dtype=tf.float32)
        train_mu = []
        train_sigma = []
        hparams.sample = False
        for i in range(len(adj)):
            feed_dict = construct_feed_dict(hparams.learning_rate,
                                            hparams.dropout_rate, self.k,
                                            self.n, self.d, hparams.decay_rate,
                                            placeholders)
            feed_dict.update({self.adj: adj[i]})
            feed_dict.update({self.features: features[i]})
            feed_dict.update(
                {self.input_data: np.zeros([self.k, self.n, self.d])})
            feed_dict.update({self.eps: eps})

            prob, ll, z_encoded, enc_mu, enc_sigma, elbo = self.sess.run(
                [
                    self.prob, self.ll, self.z_encoded, self.enc_mu,
                    self.enc_sigma, self.cost
                ],
                feed_dict=feed_dict)

            prob = np.triu(np.exp(np.reshape(prob, [self.n, self.n])), 1)
            prob = np.divide(prob, np.sum(prob))

            problist = []
            for i in range(self.n):
                for j in range(i + 1, self.n):
                    problist.append(prob[i][j])
            p = np.array(problist)
            p /= p.sum()

            if hparams.mask_weight:
                candidate_edges = self.getcandidate(num, self.n, p, prob,
                                                    list_edges)
            else:
                candidate_edges = [
                    list_edges[i]
                    for i in np.random.choice(range(len(list_edges)), [num],
                                              p=p)
                ]

            probtotal = 1.0

            for (u, v) in candidate_edges:
                probtotal *= prob[u][v]
                with open(
                        hparams.sample_file + "approach_1_train" + str(i) +
                        "_" + str(s_num) + '.txt', 'a') as f:
                    f.write(str(u) + ' ' + str(v) + ' {}' + '\n')

            #ll1 = np.mean(ll)
            ll1 = log(probtotal)
            with open(
                    hparams.sample_file + "/approach_1_train" + str(i) +
                    '_ll.txt', 'a') as f:
                f.write(
                    str(ll1) + "\t" + str(np.mean(ll)) + "\t" +
                    str(np.mean(elbo)) + '\n')

        # approach 2
        hparams.sample = True

        eps = np.random.randn(self.n, self.z_dim, 1)

        if eps_passed != None:
            eps = eps_passed

        with open(hparams.z_dir + 'test_prior_' + str(s_num) + '.txt',
                  'a') as f:
            for z_i in eps:
                f.write('[' + ','.join([str(el[0]) for el in z_i]) + ']\n')

        feed_dict = construct_feed_dict(hparams.learning_rate,
                                        hparams.dropout_rate, self.k, self.n,
                                        self.d, hparams.decay_rate,
                                        placeholders)
        feed_dict.update({self.adj: adj[0]})
        feed_dict.update({self.features: features[0]})
        feed_dict.update({self.input_data: np.zeros([self.k, self.n, self.d])})
        feed_dict.update({self.eps: eps})

        prob, ll, z_encoded, kl, sample_mu, sample_sigma, loss = self.sess.run(
            [
                self.prob, self.ll, self.z_encoded, self.kl, self.enc_mu,
                self.enc_sigma, self.cost
            ],
            feed_dict=feed_dict)

        prob = np.triu(np.exp((np.reshape(prob, (self.n, self.n)))), 1)
        prob = np.divide(prob, np.sum(prob))

        problist = []
        for i in range(self.n):
            for j in range(i + 1, self.n):
                problist.append(prob[i][j])
        p = np.array(problist)
        p /= p.sum()
        if hparams.mask_weight:
            candidate_edges = self.getcandidate(num, self.n, p, prob,
                                                list_edges)
        else:
            candidate_edges = [
                list_edges[i] for i in np.random.choice(
                    range(len(list_edges)), [num], p=p, replace=False)
            ]

        probtotal = 1.0
        adj = np.zeros([self.n, self.n])
        deg = np.zeros([self.n, 1])

        for (u, v) in candidate_edges:
            #adj[u][v] += 1
            #adj[v][u] += 1
            probtotal *= prob[u][v]
            with open(
                    hparams.sample_file + "approach_2" + "_" + str(s_num) +
                    '.txt', 'a') as f:
                f.write(str(u) + ' ' + str(v) + ' {}' + '\n')
        ll1 = log(probtotal)

        with open(hparams.sample_file + '/reconstruction_ll.txt', 'a') as f:
            f.write(str(np.mean(ll)) + '\n')

        with open(hparams.sample_file + '/elbo.txt', 'a') as f:
            f.write(str(np.mean(loss)) + '\n')
    def __init__(self,
                 hparams,
                 placeholders,
                 num_nodes,
                 num_features,
                 edges,
                 log_fact_k,
                 hde,
                 istest=False):
        self.features_dim = num_features
        self.input_dim = num_nodes
        self.dropout = placeholders['dropout']
        self.k = hparams.random_walk
        self.lr = placeholders['lr']
        self.decay = placeholders['decay']
        self.n = num_nodes
        self.d = num_features
        self.z_dim = hparams.z_dim
        self.bin_dim = hparams.bin_dim
        self.edges = edges
        self.count = 0
        self.mask_weight = hparams.mask_weight
        self.log_fact_k = log_fact_k
        self.hde = hde
        self.temperature = hparams.temperature

        def neg_loglikelihood(prob_dict, w_edge, edge_list):
            '''
            negative loglikelihood of the edges
            '''
            ll = 0
            k = 0
            with tf.variable_scope('NLL'):
                w_edge_new = tf.exp(
                    tf.minimum(w_edge,
                               tf.fill([self.n, self.n, self.bin_dim], 10.0)))
                weight_temp = tf.multiply(self.weight_bin, w_edge_new)
                len_logits = prob_dict.shape[0]
                print "Debug len_logits", len_logits, prob_dict.shape
                dec_mat = tf.exp(
                    tf.minimum(prob_dict, tf.fill([len_logits, 1], 10.0)))
                dec_mat = tf.Print(dec_mat, [dec_mat],
                                   message="my decscore values:")

                posscoremat = dec_mat[:2 * len(self.edges[self.count])]
                print "Posscore softmax", posscoremat.shape

                negscore = tf.reduce_sum(dec_mat[2 *
                                                 len(self.edges[self.count]):])
                print "Negative softmax", negscore.shape

                negscore = tf.Print(negscore, [negscore],
                                    message="my negscore values:")
                negscoremat = tf.fill([2 * len(self.edges[self.count])],
                                      negscore)
                print "negscore", negscoremat.shape

                softmax_out = tf.truediv(posscoremat, negscore)
                print "Shape softmax", softmax_out.shape

                for i in range(len(edge_list)):
                    (u, v, w) = edge_list[i]
                    ll += tf.log(softmax_out[i] * w_edge[i][w - 1] + 1e-10)
                ll = tf.Print(ll, [ll], message="My loss")
            return (-ll)

        def get_trajectories(p_theta, w_theta, node_list, n_edges):

            indicator = np.ones([self.n, self.bin_dim])
            edge_mask = np.ones([self.n, self.n])
            degree = np.zeros(self.n)

            for (u, v, w) in self.edges[self.count]:
                edge_mask[u][v] = 0
                edge_mask[v][u] = 0
                degree[u] += 1
                degree[v] += 1
                if (node_list[u] - degree[u]) == 0:
                    indicator[u][0] = 0
                if (node_list[u] - degree[u]) <= 1:
                    indicator[u][1] = 0
                if (node_list[u] - degree[u]) <= 2:
                    indicator[u][2] = 0

                if (node_list[v] - degree[v]) == 0:
                    indicator[v][0] = 0
                if (node_list[v] - degree[v]) <= 1:
                    indicator[v][1] = 0
                if (node_list[v] - degree[v]) <= 2:
                    indicator[v][2] = 0

            trial = 0
            candidate_edges = []
            G = nx.Graph()

            while trial < 500:
                candidate_edges = get_weighted_edges(indicator, p_theta,
                                                     edge_mask, w_theta,
                                                     n_edges, node_list,
                                                     degree)
                G = nx.Graph()
                G.add_weighted_edges_from(candidate_edges)
                if nx.is_connected(G):
                    break
                trial += 1
            return candidate_edges, G

        self.adj = tf.placeholder(dtype=tf.float32,
                                  shape=[self.n, self.n],
                                  name='adj')
        self.features = tf.placeholder(dtype=tf.float32,
                                       shape=[self.n, self.d],
                                       name='features')
        self.weight = tf.placeholder(dtype=tf.float32,
                                     shape=[self.n, self.n],
                                     name="weight")
        self.weight_bin = tf.placeholder(
            dtype=tf.float32,
            shape=[self.n, self.n, hparams.bin_dim],
            name="weight_bin")
        self.input_data = tf.placeholder(dtype=tf.float32,
                                         shape=[self.k, self.n, self.d],
                                         name='input')
        self.index = tf.placeholder(dtype=tf.float32,
                                    shape=[self.n * (self.n - 1) / 2],
                                    name='index')
        self.eps = tf.placeholder(dtype=tf.float32,
                                  shape=[self.n, self.z_dim, 1],
                                  name='eps')

        self.cell = VAEGCell(self.adj, self.weight, self.features, self.z_dim,
                             self.bin_dim)
        self.c_x, enc_mu, enc_sigma, debug_sigma, dec_out, prior_mu, prior_sigma, z_encoded, w_edge, label = self.cell.call(
            self.input_data, self.n, self.d, self.k, self.eps, hparams.sample)

        self.rlcell = VAEGRLCell(self.adj, self.weight, self.features,
                                 self.z_dim, self.bin_dim, enc_mu, enc_sigma,
                                 self.edges, self.index)
        #self, adj, weight, features, z_dim, bin_dim, enc_mu, enc_sigma, edges, index
        rl_dec_out, rl_w_edge = self.rlcell.call(self.input_data, self.n,
                                                 self.d, self.k, self.eps,
                                                 hparams.sample)

        # We are considering 10 trajectories only
        self.train_op = tf.train.AdamOptimizer(learning_rate=self.lr)
        self.grad = []
        for j in range(10):
            trajectory, G = get_trajectories(dec_out, w_edge, label,
                                             len(self.edges[self.count]))
            ll_rl = neg_loglikelihood(rl_dec_out, rl_w_edge, trajectory)
            ll = neg_loglikelihood(debug_sigma, w_edge, trajectory)
            importance_weight = tf.exp(
                1 / self.temperature * compute_cost(G)) * (ll / ll_rl)
            self.cost = ll_rl * importance_weight
            grad = self.train_op.compute_gradients(ll_rl)
            for i in range(len(grad)):
                g = grad[i][1] * importance_weight
                if len(self.grad) > i:
                    self.grad[i] = (grad[i][0], self.grad[i][1] + g / 10)
                else:
                    self.grad[i] = grad[i]

        self.prob = dec_out
        # print('Debug', dec_out.shape)
        self.z_encoded = z_encoded
        self.enc_mu = enc_mu
        self.enc_sigma = enc_sigma
        self.w_edge = w_edge
        self.label = label

        print_vars("trainable_variables")
        self.apply_transform_op = self.train_op.apply_gradients(self.grad)
        self.sess = tf.Session()
Example #7
0
class VAEG(VAEGConfig):
    def __init__(self,
                 hparams,
                 placeholders,
                 num_nodes,
                 num_features,
                 log_fact_k,
                 input_size,
                 istest=False):
        self.features_dim = num_features
        self.input_dim = num_nodes
        self.dropout = placeholders['dropout']
        self.k = hparams.random_walk
        self.lr = placeholders['lr']
        self.decay = placeholders['decay']
        self.n = num_nodes
        self.d = num_features
        self.z_dim = hparams.z_dim
        self.bin_dim = hparams.bin_dim
        self.mask_weight = hparams.mask_weight
        self.log_fact_k = log_fact_k
        self.neg_sample_size = hparams.neg_sample_size
        self.input_size = input_size
        self.combination = hparams.node_sample * hparams.bfs_sample

        def neg_loglikelihood(prob_dicts, w_edges):
            '''
            negative loglikelihood of the edges
            '''
            ll = 0
            k = 0
            with tf.variable_scope('NLL'):
                for i in range(self.combination):
                    prob_dict = prob_dicts[i]
                    w_edge = w_edges[i]

                    prob_dict = tf.Print(prob_dict, [prob_dict],
                                         message="my prob dict values:")
                    print("Debug prob dict shape", tf.shape(prob_dict))
                    prob_dict_resized = tf.reshape(prob_dict, [-1])

                    prob_dict_resized = tf.Print(
                        prob_dict_resized, [prob_dict_resized],
                        message="my prob dict resized values:")
                    w_edge_size = tf.stack([tf.shape(w_edge)[0]])[0]
                    w_edge_size = tf.Print(w_edge_size, [w_edge_size],
                                           message="my size values:")
                    print("Debug w_edge_shape", tf.shape(w_edge),
                          w_edge.get_shape(),
                          tf.stack([tf.shape(w_edge)[0]])[0])
                    w_edge_resized = tf.reshape(w_edge, [-1, self.bin_dim])

                    if self.neg_sample_size > 0:
                        w_edge_resized = tf.reshape(
                            w_edge[:-self.bin_dim * self.neg_sample_size],
                            [-1, self.bin_dim])
                    w_edge_size_r = tf.stack([tf.shape(w_edge_resized)[0]])[0]

                    w_edge_size_r = tf.Print(w_edge_size_r, [w_edge_size_r],
                                             message="my size values r:")
                    w_edge_exp = tf.exp(
                        tf.minimum(
                            w_edge_resized,
                            tf.fill([w_edge_size_r, self.bin_dim], 10.0)))
                    w_edge_pos = tf.reduce_sum(tf.multiply(
                        self.weight_bin[i], w_edge_exp),
                                               axis=1)
                    w_edge_total = tf.reduce_sum(w_edge_exp, axis=1)
                    w_edge_score = tf.divide(w_edge_pos, w_edge_total)

                    w_edge_score = tf.Print(w_edge_score, [w_edge_score],
                                            message="my w_edge_score values:")

                    prob_dict_resized_shape = tf.stack(
                        [tf.shape(prob_dict_resized)[0]])[0]
                    prob_dict_resized_shape = tf.Print(
                        prob_dict_resized_shape, [prob_dict_resized_shape],
                        message="my prob dict size values:")
                    prob_dict_exp = tf.exp(
                        tf.minimum(prob_dict_resized,
                                   tf.fill([prob_dict_resized_shape], 10.0)))
                    prob_dict_exp = tf.Print(prob_dict_exp, [prob_dict_exp],
                                             message="my decscore values:")
                    pos_score = prob_dict_exp
                    if self.neg_sample_size > 0:
                        pos_score = prob_dict_exp[:-self.neg_sample_size]
                    st = tf.stack([tf.shape(pos_score)[0]])[0]
                    st = tf.Print(st, [st], message="my st values:")
                    pos_score = tf.Print(pos_score, [pos_score],
                                         message="my posscore values:")
                    #pos_weight_score = tf.multiply(tf.reshape(pos_score,[st, 1]), w_edge_score)
                    pos_weight_score = tf.multiply(
                        pos_score, tf.reshape(w_edge_score, [1, -1]))
                    neg_score = tf.cumsum(prob_dict_exp, reverse=True)
                    if self.neg_sample_size > 0:
                        neg_score = tf.cumsum(
                            prob_dict_exp[1:],
                            reverse=True)[:-self.neg_sample_size + 1]
                    softmax_out = tf.divide(pos_weight_score, neg_score)

                    ll += tf.reduce_sum(
                        tf.log(tf.add(softmax_out, tf.fill([1, st], 1e-9))))
                    #ll = tf.reduce_sum(tf.log(tf.add(tf.multiply(self.adj, softmax_out), tf.fill([self.n,self.n], 1e-9))))
                ll = ll / self.combination
                ll = tf.Print(ll, [ll], message="My loss")

            return (-ll)

        def kl_gaussian(mu_1, sigma_1, debug_sigma, mu_2, sigma_2):
            '''
                Kullback leibler divergence for two gaussian distributions
            '''
            print sigma_1.shape, sigma_2.shape
            with tf.variable_scope("kl_gaussisan"):
                temp_stack = []
                for i in range(self.n):
                    temp_stack.append(tf.square(sigma_1[i]))
                first_term = tf.trace(tf.stack(temp_stack))
                temp_stack = []
                for i in range(self.n):
                    temp_stack.append(tf.matmul(tf.transpose(mu_1[i]),
                                                mu_1[i]))
                second_term = tf.reshape(tf.stack(temp_stack), [self.n])
                k = tf.fill([self.n], tf.cast(self.z_dim, tf.float32))
                temp_stack = []
                for i in range(self.n):
                    temp_stack.append(tf.reduce_prod(tf.square(
                        debug_sigma[i])))
                third_term = tf.log(
                    tf.add(tf.stack(temp_stack), tf.fill([self.n], 1e-09)))
                return 0.5 * tf.add(
                    tf.subtract(tf.add(first_term, second_term), k),
                    third_term)

        def ll_poisson(lambda_, x):
            #x_convert = tf.cast(tf.convert_to_tensor([x]), tf.float32)
            x = tf.Print(x, [x], message="My debug_x_tf")
            log_fact_tf = tf.convert_to_tensor([self.log_fact_k[x - 1]],
                                               dtype=tf.float32)
            return -tf.subtract(
                tf.subtract(tf.multiply(x, tf.log(lambda_ + 1e-09)), lambda_),
                log_fact_tf)

        def label_loss_predict(label, predicted_labels, label1):
            loss = 0.0
            #for i in range(self.combination):
            predicted_label = predicted_labels

            predicted_label_resized = tf.reshape(predicted_label,
                                                 [self.n, self.d])
            n_class_labels = tf.fill([self.n, 1], tf.cast(4, tf.float32))

            #predicted_label_resized_new = tf.concat(values =(predicted_label_resized, n_class_labels), axis=1)
            loss += tf.nn.sparse_softmax_cross_entropy_with_logits(
                labels=label1, logits=predicted_label_resized)
            return loss
            #return loss/self.combination

        def get_lossfunc(enc_mu, enc_sigma, debug_sigma, prior_mu, prior_sigma,
                         dec_out, w_edge, label, lambda_n, lambda_e):
            kl_loss = kl_gaussian(enc_mu, enc_sigma, debug_sigma, prior_mu,
                                  prior_sigma)  # KL_divergence loss
            likelihood_loss = neg_loglikelihood(dec_out,
                                                w_edge)  # Cross entropy loss
            self.ll = likelihood_loss
            self.kl = kl_loss

            lambda_e = tf.Print(lambda_e, [lambda_e], message="My edge_lambda")
            lambda_n = tf.Print(lambda_n, [lambda_n], message="My node_lambda")

            #print("Debug self count", self.count, self.edges[self.count])
            edgeprob = ll_poisson(
                lambda_e,
                tf.cast(
                    tf.subtract(
                        tf.shape(self.edges[0])[0], self.neg_sample_size),
                    tf.float32))
            nodeprob = ll_poisson(
                lambda_n, tf.cast(tf.convert_to_tensor([self.n]), tf.float32))

            edgeprob = tf.Print(edgeprob, [edgeprob],
                                message="My edge_prob_loss")
            nodeprob = tf.Print(nodeprob, [nodeprob],
                                message="My node_prob_loss")

            label_loss = label_loss_predict(self.features, label,
                                            self.features1)
            label_loss = tf.Print(label_loss, [label_loss],
                                  message="My label_loss")

            loss_1 = tf.reduce_mean(kl_loss + label_loss)
            loss_1 = tf.Print(loss_1, [loss_1], message="My label_loss1")

            total_loss = loss_1 + tf.reduce_mean(edgeprob + nodeprob +
                                                 likelihood_loss)
            #return tf.reduce_mean(kl_loss) + edgeprob + nodeprob + likelihood_loss
            total_loss = tf.Print(total_loss, [total_loss],
                                  message="My total_loss")
            return total_loss

        self.adj = tf.placeholder(dtype=tf.float32,
                                  shape=[self.n, self.n],
                                  name='adj')
        self.features = tf.placeholder(dtype=tf.float32,
                                       shape=[self.n, self.d],
                                       name='features')
        self.features1 = tf.placeholder(dtype=tf.int32,
                                        shape=[self.n],
                                        name='features1')
        self.weight = tf.placeholder(dtype=tf.float32,
                                     shape=[self.n, self.n],
                                     name="weight")
        self.weight_bin = tf.placeholder(
            dtype=tf.float32,
            shape=[self.combination, None, hparams.bin_dim],
            name="weight_bin")
        self.input_data = tf.placeholder(dtype=tf.float32,
                                         shape=[self.k, self.n, self.d],
                                         name='input')
        self.eps = tf.placeholder(dtype=tf.float32,
                                  shape=[self.n, self.z_dim, 1],
                                  name='eps')
        #self.neg_index = tf.placeholder(dtype=tf.int32,shape=[None], name='neg_index')
        self.edges = tf.placeholder(dtype=tf.int32,
                                    shape=[self.combination, None, 2],
                                    name='edges')
        self.count = tf.placeholder(dtype=tf.int32)

        #node_count = [len(edge_list) for edge_list in self.edges]
        print("Debug Input size", self.input_size)
        node_count_tf = tf.fill([1, self.input_size],
                                tf.cast(self.n, tf.float32))
        node_count_tf = tf.Print(node_count_tf, [node_count_tf],
                                 message="My node_count_tf")
        print("Debug size node_count", node_count_tf.get_shape())

        #tf.convert_to_tensor(node_count, dtype=tf.int32)
        self.cell = VAEGCell(self.adj, self.weight, self.features,
                             self.z_dim, self.bin_dim,
                             tf.to_float(node_count_tf), self.edges)
        self.c_x, enc_mu, enc_sigma, debug_sigma, dec_out, prior_mu, prior_sigma, z_encoded, w_edge, label, lambda_n, lambda_e = self.cell.call(
            self.input_data, self.n, self.d, self.k, self.combination,
            self.eps, hparams.sample)
        self.prob = dec_out
        #print('Debug', dec_out.shape)
        self.z_encoded = z_encoded
        self.enc_mu = enc_mu
        self.enc_sigma = enc_sigma
        self.w_edge = w_edge
        self.label = label
        self.lambda_n = lambda_n
        self.lambda_e = lambda_e
        self.cost = get_lossfunc(enc_mu, enc_sigma, debug_sigma, prior_mu,
                                 prior_sigma, dec_out, w_edge, label, lambda_n,
                                 lambda_e)

        print_vars("trainable_variables")
        # self.lr = tf.Variable(self.lr, trainable=False)
        self.train_op = tf.train.AdamOptimizer(learning_rate=self.lr)
        self.grad = self.train_op.compute_gradients(self.cost)
        self.grad_placeholder = [(tf.placeholder("float",
                                                 shape=gr[1].get_shape()),
                                  gr[1]) for gr in self.grad]
        self.apply_transform_op = self.train_op.apply_gradients(self.grad)

        #self.lr = tf.Variable(self.lr, trainable=False)
        self.sess = tf.Session()

    def initialize(self):
        logger.info("Initialization of parameters")
        #self.sess.run(tf.initialize_all_variables())
        self.sess.run(tf.global_variables_initializer())

    def restore(self, savedir):
        saver = tf.train.Saver(tf.global_variables())
        ckpt = tf.train.get_checkpoint_state(savedir)
        if ckpt == None or ckpt.model_checkpoint_path == None:
            self.initialize()
        else:
            print("Load the model from {}".format(ckpt.model_checkpoint_path))
            saver.restore(self.sess, ckpt.model_checkpoint_path)

    def train(self, placeholders, hparams, adj, weight, weight_bin, features,
              edges, neg_edges, features1):
        savedir = hparams.out_dir
        lr = hparams.learning_rate
        dr = hparams.dropout_rate
        decay = hparams.decay_rate

        f1 = open(hparams.out_dir + '/iteration.txt', 'r')
        iteration = int(f1.read().strip())
        # training
        num_epochs = hparams.num_epochs
        create_dir(savedir)
        ckpt = tf.train.get_checkpoint_state(savedir)
        saver = tf.train.Saver(tf.global_variables())

        if ckpt:
            saver.restore(self.sess, ckpt.model_checkpoint_path)
            print("Load the model from %s" % ckpt.model_checkpoint_path)

        start_before_epoch = time.time()
        for epoch in range(num_epochs):
            start = time.time()
            for i in range(len(adj)):
                #self.count = i
                if len(edges[i]) == 0:
                    continue
                # Learning rate decay
                #self.sess.run(tf.assign(self.lr, self.lr * (self.decay ** epoch)))
                feed_dict = construct_feed_dict(lr, dr, self.k, self.n, self.d,
                                                decay, placeholders)
                feed_dict.update({self.adj: adj[i]})

                eps = np.random.randn(self.n, self.z_dim, 1)
                #tf.random_normal((self.n, 5, 1), 0.0, 1.0, dtype=tf.float32)

                feed_dict.update({self.features: features[i]})
                feed_dict.update({self.features1: features1[i]})
                feed_dict.update({self.weight_bin: weight_bin[i]})
                feed_dict.update({self.weight: weight[i]})
                feed_dict.update(
                    {self.input_data: np.zeros([self.k, self.n, self.d])})
                feed_dict.update({self.eps: eps})
                neg_indices = np.random.choice(range(len(neg_edges[i])),
                                               hparams.neg_sample_size,
                                               replace=False)
                combined_edges = []
                neg_edges_to_be_extended = [
                    neg_edges[i][index] for index in neg_indices
                ]
                copy_edge = copy.deepcopy(edges[i])
                for j in range(len(edges[i])):
                    #print("Debug edge_list", edge)
                    copy_edge[j].extend(neg_edges_to_be_extended)

                #print("Debug edge_list_combined", combined_edges)
                print("Debug feed edges", i, len(edges[i][0]),
                      len(copy_edge[0]))
                feed_dict.update({self.edges: copy_edge})
                input_, train_loss, _, probdict, cx, w_edge, lambda_e, lambda_n = self.sess.run(
                    [
                        self.input_data, self.cost, self.apply_transform_op,
                        self.prob, self.c_x, self.w_edge, self.lambda_e,
                        self.lambda_n
                    ],
                    feed_dict=feed_dict)

                iteration += 1
                #print("Lambda_e, lambda_n", lambda_e, lambda_n, i)
                if iteration % hparams.log_every == 0 and iteration > 0:
                    #print(train_loss)
                    print("{}/{}(epoch {}), train_loss = {:.6f}".format(
                        iteration, num_epochs, epoch + 1, train_loss))
                    checkpoint_path = os.path.join(savedir, 'model.ckpt')
                    saver.save(self.sess,
                               checkpoint_path,
                               global_step=iteration)
                    logger.info("model saved to {}".format(checkpoint_path))
            end = time.time()
            print("Time taken for a batch: ", end - start)
        end_after_epoch = time.time()
        print("Time taken to completed all epochs",
              -start_before_epoch + end_after_epoch)
        f1 = open(hparams.out_dir + '/iteration.txt', 'w')
        f1.write(str(iteration))

    def getembeddings(self, hparams, placeholders, adj, deg, weight_bin,
                      weight, edges, features1):
        eps = np.random.randn(self.n, self.z_dim, 1)
        feed_dict = construct_feed_dict(hparams.learning_rate,
                                        hparams.dropout_rate, self.k, self.n,
                                        self.d, hparams.decay_rate,
                                        placeholders)
        feed_dict.update({self.adj: adj})
        feed_dict.update({self.features: deg})
        feed_dict.update({self.input_data: np.zeros([self.k, self.n, self.d])})
        feed_dict.update({self.eps: eps})
        feed_dict.update({self.weight_bin: weight_bin})
        feed_dict.update({self.weight: weight})
        feed_dict.update({self.edges: edges})
        feed_dict.update({self.features1: features1})

        prob, ll, kl, w_edge, embedding = self.sess.run(
            [self.prob, self.ll, self.kl, self.w_edge, self.z_encoded],
            feed_dict=feed_dict)
        return embedding

    def get_masked_candidate_new(self, prob, w_edge, n_edges, labels):
        list_edges = get_candidate_edges(self.n)
        max_node = np.argmax(labels)
        #max_node = np.argmin(labels)
        indicator = np.ones([self.n, self.bin_dim])
        edge_mask = np.ones([self.n, self.n])
        degree = np.zeros(self.n)
        candidate_edges = get_weighted_edges_connected(indicator, prob,
                                                       edge_mask, w_edge,
                                                       n_edges, labels, degree,
                                                       max_node)
        candidate_edges_new = []
        for (u, v, w) in candidate_edges:
            if u < v:
                candidate_edges_new.append(
                    str(u) + ' ' + str(v) + ' ' + "{'weight':" + str(w) + "}")
            else:
                candidate_edges_new.append(
                    str(v) + ' ' + str(u) + ' ' + "{'weight':" + str(w) + "}")
        return candidate_edges_new

    def get_unmasked_candidate(self, list_edges, prob, w_edge, num_edges):
        # sample 1000 times
        count = 0
        structure_list = defaultdict(int)

        #while (count < 1000):
        while (count < 50):
            indicator = np.ones([self.n, self.bin_dim])
            p, list_edges, w = normalise(prob, w_edge, self.n, self.bin_dim,
                                         [], list_edges, indicator)
            candidate_edges = [
                list_edges[k] for k in np.random.choice(
                    range(len(list_edges)), [num_edges], p=p, replace=False)
            ]
            structure_list[' '.join([
                str(u) + '-' + str(v) + '-' + str(w)
                for (u, v, w) in sorted(candidate_edges, key=itemgetter(0))
            ])] += 1
            #structure_list[sorted(candidate_edges, key=itemgetter(1))] += 1
            count += 1

        # return the element which has been sampled maximum time
        return max(structure_list.iteritems(), key=itemgetter(1))[0]

    def getatoms(self, node, label, edges):
        label_new = np.reshape(label, (node, self.d))

        label_new_exp = np.exp(label_new)
        s = label_new_exp.shape[0]

        label_new_sum = np.reshape(np.sum(label_new_exp, axis=1), (s, 1))

        prob_label = label_new_exp / label_new_sum
        pred_label = np.zeros(4)
        valency_arr = np.zeros(node)

        pred_label = np.zeros(4)
        valency_arr = np.zeros(node)

        n_c = 0
        n_h = 0
        n_n = 0
        n_o = 0

        for x in range(1000):
            pred_label = np.zeros(4)
            valency_arr = np.zeros(node)

            for i in range(node):
                '''
                '''
                valency = np.random.choice(range(0, 4), [1], p=prob_label[i])
                if valency == 0:
                    n_h += 1
                if valency == 1:
                    n_o += 1
                if valency == 2:
                    n_n += 1
                if valency == 3:
                    n_c += 1
                pred_label[valency] += 1
                valency_arr[i] = valency + 1

            if (pred_label[0] + pred_label[1] * 2 + pred_label[2] * 3 +
                    pred_label[3] * 4) >= 2 * (node - 1):
                break
        return (pred_label, valency_arr)

    def sample_graph(self,
                     hparams,
                     placeholders,
                     adj,
                     features,
                     features1,
                     weights,
                     weight_bins,
                     edges,
                     k=0,
                     outdir=None):
        '''
        Args :
            num - int
                10
                number of edges to be sampled
            outdir - string
            output dir
        '''
        list_edges = []

        #for i in range(self.n):
        feed_dict = construct_feed_dict(hparams.learning_rate,
                                        hparams.dropout_rate, self.k, self.n,
                                        self.d, hparams.decay_rate,
                                        placeholders)
        feed_dict.update({self.adj: adj[0]})
        feed_dict.update({self.features: features[0]})
        #feed_dict.update({self.weight_bin: weight_bins[0]})
        feed_dict.update({self.weight: weights[0]})
        feed_dict.update({self.input_data: np.zeros([self.k, self.n, self.d])})
        feed_dict.update({self.eps: eps})
        feed_dict.update({self.features1: features1[0]})
        feed_dict.update({self.weight_bin: [weight_bin]})
        feed_dict.update({self.edges: [edges]})

        prob, ll, z_encoded, kl, sample_mu, sample_sigma, loss, w_edge, labels = self.sess.run(
            [
                self.prob, self.ll, self.z_encoded, self.kl, self.enc_mu,
                self.enc_sigma, self.cost, self.w_edge, self.label
            ],
            feed_dict=feed_dict)
        prob = np.reshape(prob, (self.n, self.n))
        w_edge = np.reshape(w_edge, (self.n, self.n, self.bin_dim))
        smiles = []
        trial = 0
        while trial < 1000:
            atom_list = [4 for x in range(self.n)]
            candidate_edges = self.get_masked_candidate_new(
                prob, w_edge, hparams.edges, atom_list)
            if len(candidate_edges) == 0:
                smiles.append('None')
                trial += 1
                continue
            G = nx.parse_edgelist(candidate_edges, nodetype=int)
            edges = G.edges(data=True)
            if not nx.is_connected(G):
                smiles.append('None')
            else:
                with open(hparams.sample_file + 'temp.txt' + str(trial),
                          'w') as f:

                    for (u, v, w) in edges:
                        #for (u, v, w) in candidate_edges:
                        u = int(u)
                        v = int(v)
                        #w = int(w)
                        w = w['weight']
                        if (u >= 0 and v >= 0):
                            f.write(
                                str(u) + ' ' + str(v) + ' {\'weight\':' +
                                str(w) + '}\n')
                if guess_correct_molecules(
                        hparams.sample_file + 'temp.txt' + str(trial),
                        hparams.sample_file + 'temp.txt', self.n, 1):
                    m1 = Chem.MolFromMol2File(hparams.sample_file + 'temp.txt')
                    s = 'None'
                    if m1 != None:
                        s = Chem.MolToSmiles(m1)
                        smiles.append(s)
                else:
                    print("Reason: Wrong mol")

            trial += 1
        return smiles
class VAEGRL(VAEGConfig):
    def __init__(self,
                 hparams,
                 placeholders,
                 num_nodes,
                 num_features,
                 edges,
                 log_fact_k,
                 hde,
                 istest=False):
        self.features_dim = num_features
        self.input_dim = num_nodes
        self.dropout = placeholders['dropout']
        self.k = hparams.random_walk
        self.lr = placeholders['lr']
        self.decay = placeholders['decay']
        self.n = num_nodes
        self.d = num_features
        self.z_dim = hparams.z_dim
        self.bin_dim = hparams.bin_dim
        self.edges = edges
        self.count = 0
        self.mask_weight = hparams.mask_weight
        self.log_fact_k = log_fact_k
        self.hde = hde
        self.temperature = hparams.temperature

        def neg_loglikelihood(prob_dict, w_edge, edge_list):
            '''
            negative loglikelihood of the edges
            '''
            ll = 0
            k = 0
            with tf.variable_scope('NLL'):
                w_edge_new = tf.exp(
                    tf.minimum(w_edge,
                               tf.fill([self.n, self.n, self.bin_dim], 10.0)))
                weight_temp = tf.multiply(self.weight_bin, w_edge_new)
                len_logits = prob_dict.shape[0]
                print "Debug len_logits", len_logits, prob_dict.shape
                dec_mat = tf.exp(
                    tf.minimum(prob_dict, tf.fill([len_logits, 1], 10.0)))
                dec_mat = tf.Print(dec_mat, [dec_mat],
                                   message="my decscore values:")

                posscoremat = dec_mat[:2 * len(self.edges[self.count])]
                print "Posscore softmax", posscoremat.shape

                negscore = tf.reduce_sum(dec_mat[2 *
                                                 len(self.edges[self.count]):])
                print "Negative softmax", negscore.shape

                negscore = tf.Print(negscore, [negscore],
                                    message="my negscore values:")
                negscoremat = tf.fill([2 * len(self.edges[self.count])],
                                      negscore)
                print "negscore", negscoremat.shape

                softmax_out = tf.truediv(posscoremat, negscore)
                print "Shape softmax", softmax_out.shape

                for i in range(len(edge_list)):
                    (u, v, w) = edge_list[i]
                    ll += tf.log(softmax_out[i] * w_edge[i][w - 1] + 1e-10)
                ll = tf.Print(ll, [ll], message="My loss")
            return (-ll)

        def get_trajectories(p_theta, w_theta, node_list, n_edges):

            indicator = np.ones([self.n, self.bin_dim])
            edge_mask = np.ones([self.n, self.n])
            degree = np.zeros(self.n)

            for (u, v, w) in self.edges[self.count]:
                edge_mask[u][v] = 0
                edge_mask[v][u] = 0
                degree[u] += 1
                degree[v] += 1
                if (node_list[u] - degree[u]) == 0:
                    indicator[u][0] = 0
                if (node_list[u] - degree[u]) <= 1:
                    indicator[u][1] = 0
                if (node_list[u] - degree[u]) <= 2:
                    indicator[u][2] = 0

                if (node_list[v] - degree[v]) == 0:
                    indicator[v][0] = 0
                if (node_list[v] - degree[v]) <= 1:
                    indicator[v][1] = 0
                if (node_list[v] - degree[v]) <= 2:
                    indicator[v][2] = 0

            trial = 0
            candidate_edges = []
            G = nx.Graph()

            while trial < 500:
                candidate_edges = get_weighted_edges(indicator, p_theta,
                                                     edge_mask, w_theta,
                                                     n_edges, node_list,
                                                     degree)
                G = nx.Graph()
                G.add_weighted_edges_from(candidate_edges)
                if nx.is_connected(G):
                    break
                trial += 1
            return candidate_edges, G

        self.adj = tf.placeholder(dtype=tf.float32,
                                  shape=[self.n, self.n],
                                  name='adj')
        self.features = tf.placeholder(dtype=tf.float32,
                                       shape=[self.n, self.d],
                                       name='features')
        self.weight = tf.placeholder(dtype=tf.float32,
                                     shape=[self.n, self.n],
                                     name="weight")
        self.weight_bin = tf.placeholder(
            dtype=tf.float32,
            shape=[self.n, self.n, hparams.bin_dim],
            name="weight_bin")
        self.input_data = tf.placeholder(dtype=tf.float32,
                                         shape=[self.k, self.n, self.d],
                                         name='input')
        self.index = tf.placeholder(dtype=tf.float32,
                                    shape=[self.n * (self.n - 1) / 2],
                                    name='index')
        self.eps = tf.placeholder(dtype=tf.float32,
                                  shape=[self.n, self.z_dim, 1],
                                  name='eps')

        self.cell = VAEGCell(self.adj, self.weight, self.features, self.z_dim,
                             self.bin_dim)
        self.c_x, enc_mu, enc_sigma, debug_sigma, dec_out, prior_mu, prior_sigma, z_encoded, w_edge, label = self.cell.call(
            self.input_data, self.n, self.d, self.k, self.eps, hparams.sample)

        self.rlcell = VAEGRLCell(self.adj, self.weight, self.features,
                                 self.z_dim, self.bin_dim, enc_mu, enc_sigma,
                                 self.edges, self.index)
        #self, adj, weight, features, z_dim, bin_dim, enc_mu, enc_sigma, edges, index
        rl_dec_out, rl_w_edge = self.rlcell.call(self.input_data, self.n,
                                                 self.d, self.k, self.eps,
                                                 hparams.sample)

        # We are considering 10 trajectories only
        self.train_op = tf.train.AdamOptimizer(learning_rate=self.lr)
        self.grad = []
        for j in range(10):
            trajectory, G = get_trajectories(dec_out, w_edge, label,
                                             len(self.edges[self.count]))
            ll_rl = neg_loglikelihood(rl_dec_out, rl_w_edge, trajectory)
            ll = neg_loglikelihood(debug_sigma, w_edge, trajectory)
            importance_weight = tf.exp(
                1 / self.temperature * compute_cost(G)) * (ll / ll_rl)
            self.cost = ll_rl * importance_weight
            grad = self.train_op.compute_gradients(ll_rl)
            for i in range(len(grad)):
                g = grad[i][1] * importance_weight
                if len(self.grad) > i:
                    self.grad[i] = (grad[i][0], self.grad[i][1] + g / 10)
                else:
                    self.grad[i] = grad[i]

        self.prob = dec_out
        # print('Debug', dec_out.shape)
        self.z_encoded = z_encoded
        self.enc_mu = enc_mu
        self.enc_sigma = enc_sigma
        self.w_edge = w_edge
        self.label = label

        print_vars("trainable_variables")
        self.apply_transform_op = self.train_op.apply_gradients(self.grad)
        self.sess = tf.Session()

    def initialize(self):
        logger.info("Initialization of parameters")
        # self.sess.run(tf.initialize_all_variables())
        self.sess.run(tf.global_variables_initializer())

    def restore(self, savedir):
        saver = tf.train.Saver(tf.global_variables())
        ckpt = tf.train.get_checkpoint_state(savedir)
        if ckpt == None or ckpt.model_checkpoint_path == None:
            self.initialize()
        else:
            print("Load the model from {}".format(ckpt.model_checkpoint_path))
            saver.restore(self.sess, ckpt.model_checkpoint_path)

    def copy_weight(self, copydir):
        self.initialize()
        var_old = [v for v in tf.global_variables() if "RL" not in v.name][0]
        saver = tf.train.Saver(var_old)
        ckpt = tf.train.get_checkpoint_state(copydir)
        print("Load the model from {}".format(ckpt.model_checkpoint_path))
        saver.restore(self.sess, ckpt.model_checkpoint_path)

    def train(self, placeholders, hparams, adj, weight, weight_bin, features):
        savedir = hparams.out_dir
        lr = hparams.learning_rate
        dr = hparams.dropout_rate
        decay = hparams.decay_rate

        f1 = open(hparams.out_dir + '/iteration.txt', 'r')
        iteration = int(f1.read().strip())
        # training
        num_epochs = hparams.num_epochs
        create_dir(savedir)
        ckpt = tf.train.get_checkpoint_state(savedir)
        saver = tf.train.Saver(tf.global_variables())

        if ckpt:
            saver.restore(self.sess, ckpt.model_checkpoint_path)
            print("Load the model from %s" % ckpt.model_checkpoint_path)

        for epoch in range(num_epochs):
            start = time.time()
            for i in range(len(adj)):
                self.count = i
                if len(self.edges[self.count]) == 0:
                    continue
                # Learning rate decay
                # self.sess.run(tf.assign(self.lr, self.lr * (self.decay ** epoch)))
                feed_dict = construct_feed_dict(lr, dr, self.k, self.n, self.d,
                                                decay, placeholders)
                feed_dict.update({self.adj: adj[i]})
                # print "Debug", features[i].shape
                np.random

                eps = np.random.randn(self.n, self.z_dim, 1)
                # tf.random_normal((self.n, 5, 1), 0.0, 1.0, dtype=tf.float32)

                feed_dict.update({self.features: features[i]})
                feed_dict.update({self.weight_bin: weight_bin[i]})
                feed_dict.update({self.weight: weight[i]})
                feed_dict.update(
                    {self.input_data: np.zeros([self.k, self.n, self.d])})
                feed_dict.update({self.eps: eps})

                grad_vals = self.sess.run([g[0] for g in self.grad],
                                          feed_dict=feed_dict)
                for j in xrange(len(self.grad_placeholder)):
                    feed_dict.update(
                        {self.grad_placeholder[j][0]: grad_vals[j]})
                input_, train_loss, _, probdict, cx, w_edge = self.sess.run(
                    [
                        self.input_data, self.cost, self.apply_transform_op,
                        self.prob, self.c_x, self.w_edge
                    ],
                    feed_dict=feed_dict)
                iteration += 1
                if iteration % hparams.log_every == 0 and iteration > 0:
                    print(train_loss)
                    print("{}/{}(epoch {}), train_loss = {:.6f}".format(
                        iteration, num_epochs, epoch + 1, train_loss))
                    checkpoint_path = os.path.join(savedir, 'model.ckpt')
                    saver.save(self.sess,
                               checkpoint_path,
                               global_step=iteration)
                    logger.info("model saved to {}".format(checkpoint_path))
            end = time.time()
            print("Time taken for a batch: ", end - start)
        f1 = open(hparams.out_dir + '/iteration.txt', 'w')
        f1.write(str(iteration))

    def getembeddings(self, hparams, placeholders, adj, deg, weight_bin,
                      weight):

        eps = np.random.randn(self.n, self.z_dim, 1)

        feed_dict = construct_feed_dict(hparams.learning_rate,
                                        hparams.dropout_rate, self.k, self.n,
                                        self.d, hparams.decay_rate,
                                        placeholders)
        feed_dict.update({self.adj: adj})
        feed_dict.update({self.features: deg})
        feed_dict.update({self.input_data: np.zeros([self.k, self.n, self.d])})
        feed_dict.update({self.eps: eps})
        feed_dict.update({self.weight_bin: weight_bin})
        feed_dict.update({self.weight: weight})

        prob, ll, kl, w_edge, embedding = self.sess.run(
            [self.prob, self.ll, self.kl, self.w_edge, self.z_encoded],
            feed_dict=feed_dict)
        return embedding

    def get_masked_candidate_with_atom_ratio_new(self, prob, w_edge,
                                                 atom_count, num_edges, hde):
        # node_list = defaultdict()
        rest = range(self.n)
        nodes = []
        hn = []
        on = []
        nn = []
        cn = []

        for i in range(self.n):
            if atom_count[i] == 1:
                hn.append(i)
            if atom_count[i] == 2:
                on.append(i)
            if atom_count[i] == 3 or atom_count[i] == 5:
                nn.append(i)
            if atom_count[i] == 4:
                cn.append(i)

        nodes.extend(hn)
        nodes.extend(cn)
        nodes.extend(on)
        nodes.extend(nn)

        node_list = atom_count
        print("Debug nodelist", node_list)

        indicator = np.ones([self.n, self.bin_dim])
        edge_mask = np.ones([self.n, self.n])
        degree = np.zeros(self.n)

        for node in hn:
            indicator[node][1] = 0
            indicator[node][2] = 0
        for node in on:
            indicator[node][2] = 0

        # two hydrogen atom cannot have an edge between them
        for n1 in hn:
            for n2 in hn:
                edge_mask[n1][n2] = 0
        candidate_edges = []
        # first generate edges joining with Hydrogen atoms sequentially
        print("Debug atom ratio", hn, on, nn, cn)
        print("Debug_degree", node_list)
        print("Debug nodes", nodes)
        index = 0
        i = 0
        hydro_sat = np.zeros(self.n)
        # first handle hydro
        try:
            for node in nodes:
                deg_req = node_list[node]
                d = degree[node]
                list_edges = get_candidate_neighbor_edges(node, self.n)
                # for (u,v,w) in list_edges:
                #    print("list edges", u, node_list[u], degree[u], indicator[u], v, node_list[v], degree[v], indicator[v])
                # print("Debug list edges", node, list_edges)
                # print("Edge mask", edge_mask[node])
                if node in hn:
                    for i1 in range(self.n):
                        if hydro_sat[i1] == node_list[i1] - 1:
                            edge_mask[i1][node] = 0
                            edge_mask[node][i1] = 0
                while d < deg_req:
                    p = normalise_h1(prob, w_edge, self.bin_dim, indicator,
                                     edge_mask, node)
                    candidate_edges.extend([
                        list_edges[k] for k in np.random.choice(
                            range(len(list_edges)), [1], p=p, replace=False)
                    ])

                    (u, v, w) = candidate_edges[i]
                    degree[u] += w
                    degree[v] += w
                    d += w
                    if u in hn:
                        hydro_sat[v] += 1
                    if v in hn:
                        hydro_sat[u] += 1
                    edge_mask[u][v] = 0
                    edge_mask[v][u] = 0

                    if (node_list[u] - degree[u]) == 0:
                        indicator[u][0] = 0
                    if (node_list[u] - degree[u]) <= 1:
                        indicator[u][1] = 0
                    if (node_list[u] - degree[u]) <= 2:
                        indicator[u][2] = 0

                    if (node_list[v] - degree[v]) == 0:
                        indicator[v][0] = 0
                    if (node_list[v] - degree[v]) <= 1:
                        indicator[v][1] = 0
                    if (node_list[v] - degree[v]) <= 2:
                        indicator[v][2] = 0

                    # check_diconnected

                    i += 1
                    print("Debug candidate_edges", candidate_edges[i - 1])
                    #    print("change state", el, degree[el], node_list[el], indicator[el])
                    # '''
            # list_edges = get_candidate_edges(self.n)
            # if abs(len(candidate_edges) - num_edges) > 1 :
            #    return ''
            # '''
            candidate_rest = ''
            candidate_edges_new = ''
            for (u, v, w) in candidate_edges:
                if u < v:
                    candidate_edges_new += ' ' + str(u) + '-' + str(
                        v) + '-' + str(w)
                else:
                    candidate_edges_new += ' ' + str(v) + '-' + str(
                        u) + '-' + str(w)
            print("Candidate_edges_new", candidate_edges_new)
            return candidate_edges_new + ' ' + candidate_rest
        except:
            return ''

    def get_masked_candidate(self,
                             list_edges,
                             prob,
                             w_edge,
                             num_edges,
                             hde,
                             indicator=[],
                             degree=[]):

        list_edges_original = copy.copy(list_edges)
        n = len(prob[0])
        # sample 1000 times
        count = 0
        structure_list = defaultdict(int)

        # while(count < 50):
        while (count < 1):
            applyrules = False
            list_edges = copy.copy(list_edges_original)
            if len(indicator) == 0:
                print("Debug indi new assign")
                indicator = np.ones([self.n, self.bin_dim])
            reach = np.ones([n, n])

            p, list_edges, w = normalise(prob, w_edge, self.n, self.bin_dim,
                                         [], list_edges, indicator)
            candidate_edges = [
                list_edges[k] for k in np.random.choice(
                    range(len(list_edges)), [1], p=p, replace=False)
            ]
            # if degree == None:
            if len(degree) == 0:
                print("Debug degree new assign")
                degree = np.zeros([self.n])
            G = None
            saturation = 0

            for i1 in range(num_edges - 1):
                (u, v, w) = candidate_edges[i1]
                for j in range(n):

                    if reach[u][j] == 0:
                        reach[v][j] = 0
                        reach[j][v] = 0
                    if reach[v][j] == 0:
                        reach[u][j] = 0
                        reach[j][u] = 0

                reach[u][v] = 0
                reach[v][u] = 0

                degree[u] += w
                degree[v] += w

                if degree[u] >= 4:
                    indicator[u][0] = 0
                if degree[u] >= 3:
                    indicator[u][1] = 0
                if degree[u] >= 2:
                    indicator[u][2] = 0

                if degree[v] >= 4:
                    indicator[v][0] = 0
                if degree[v] >= 3:
                    indicator[v][1] = 0
                if degree[v] >= 2:
                    indicator[v][2] = 0

                # there will ne bo bridge
                p, list_edges, w = normalise(prob, w_edge, self.n,
                                             self.bin_dim, candidate_edges,
                                             list_edges, indicator)

                try:
                    candidate_edges.extend([
                        list_edges[k] for k in np.random.choice(
                            range(len(list_edges)), [1], p=p, replace=False)
                    ])
                except:
                    # candidate_edges = []
                    continue
            structure_list[' '.join([
                str(u) + '-' + str(v) + '-' + str(w)
                for (u, v, w) in sorted(candidate_edges)
            ])] += 1
            count += 1

        # return the element which has been sampled maximum time
        return max(structure_list.iteritems(), key=itemgetter(1))[0]

    def get_unmasked_candidate(self, list_edges, prob, w_edge, num_edges):
        # sample 1000 times
        count = 0
        structure_list = defaultdict(int)

        # while (count < 1000):
        while (count < 50):
            indicator = np.ones([self.n, self.bin_dim])
            p, list_edges, w = normalise(prob, w_edge, self.n, self.bin_dim,
                                         [], list_edges, indicator)
            candidate_edges = [
                list_edges[k] for k in np.random.choice(
                    range(len(list_edges)), [num_edges], p=p, replace=False)
            ]
            structure_list[' '.join([
                str(u) + '-' + str(v) + '-' + str(w)
                for (u, v, w) in sorted(candidate_edges, key=itemgetter(0))
            ])] += 1

            # structure_list[sorted(candidate_edges, key=itemgetter(1))] += 1
            count += 1

        # return the element which has been sampled maximum time
        return max(structure_list.iteritems(), key=itemgetter(1))[0]

    def sample_graph_posterior_new(self,
                                   hparams,
                                   placeholders,
                                   adj,
                                   features,
                                   weight_bins,
                                   weights,
                                   embeddings,
                                   k=0):
        list_edges = get_candidate_edges(self.n)
        feed_dict = construct_feed_dict(hparams.learning_rate,
                                        hparams.dropout_rate, self.k, self.n,
                                        self.d, hparams.decay_rate,
                                        placeholders)
        feed_dict.update({self.adj: adj})
        feed_dict.update({self.features: features})
        feed_dict.update({self.weight_bin: weight_bins})
        feed_dict.update({self.weight: weights})
        feed_dict.update({self.input_data: np.zeros([self.k, self.n, self.d])})
        feed_dict.update({self.eps: embeddings})
        hparams.sample = True

        prob, ll, z_encoded, enc_mu, enc_sigma, elbo, w_edge, labels = self.sess.run(
            [
                self.prob, self.ll, self.z_encoded, self.enc_mu,
                self.enc_sigma, self.cost, self.w_edge, self.label
            ],
            feed_dict=feed_dict)
        # prob = np.triu(np.reshape(prob,(self.n,self.n)),1)
        prob = np.reshape(prob, (self.n, self.n))

        w_edge = np.reshape(w_edge, (self.n, self.n, self.bin_dim))

        atom_list = [
            4, 4, 2, 4, 4, 3, 4, 4, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
        ]
        # self.getatoms(atom_list)
        if not hparams.mask_weight:
            candidate_edges = self.get_unmasked_candidate(
                list_edges, prob, w_edge, hparams.edges)
        else:
            i = 0
            hde = 1
            # while (i < 1000):
            candidate_edges = self.get_masked_candidate_with_atom_ratio_new(
                prob, w_edge, atom_list, hparams.edges, hde)
            # if len(candidate_edges) > 0:
            #        break
            #    i += 1

            # candidate_edges = self.get_masked_candidate(list_edges, prob, w_edge, hparams.edges, hde)
        with open(hparams.sample_file + 'temp.txt' + str(k), 'w') as f:
            for uvw in candidate_edges.split():
                [u, v, w] = uvw.split("-")
                u = int(u)
                v = int(v)
                w = int(w)
                if (u >= 0 and v >= 0):
                    # with open(hparams.sample_file + 'temp.txt', 'a') as f:
                    f.write(
                        str(u) + ' ' + str(v) + ' {\'weight\':' + str(w) +
                        '}\n')

    def getatoms(self, node, label):
        label_new = np.reshape(label, (node, self.d))
        print("Debug label original shape:", label_new)

        label_new = np.exp(label_new)
        s = label_new.shape[0]
        print("Debug label shape:", label_new.shape, s)

        label_new_sum = np.reshape(np.sum(label_new, axis=1), (s, 1))
        print("Debug label sum:", label_new_sum.shape)

        prob_label = label_new / label_new_sum
        pred_label = np.zeros(4)
        valency_arr = np.zeros(node)

        print("Debug prob label shape:", prob_label.shape, prob_label)

        # print("Debug label", label_new)
        for i in range(node):
            valency = np.random.choice(range(4), [1], p=prob_label[i])
            pred_label[valency] += 1
            valency_arr[i] = valency + 1

        print("Debug pred_label", pred_label, valency_arr)
        return (pred_label, valency_arr)

    def sample_graph_neighborhood(self,
                                  hparams,
                                  placeholders,
                                  adj,
                                  features,
                                  weights,
                                  weight_bins,
                                  s_num,
                                  node,
                                  ratio,
                                  hde,
                                  num=10,
                                  outdir=None):
        list_edges = get_candidate_edges(self.n)

        # eps = load_embeddings(hparams.z_dir+'encoded_input0'+'.txt', hparams.z_dim)
        eps = np.random.randn(self.n, self.z_dim, 1)

        train_mu = []
        train_sigma = []
        hparams.sample = False

        # approach 1
        for i in range(len(adj)):
            feed_dict = construct_feed_dict(hparams.learning_rate,
                                            hparams.dropout_rate, self.k,
                                            self.n, self.d, hparams.decay_rate,
                                            placeholders)
            feed_dict.update({self.adj: adj[i]})
            feed_dict.update({self.features: features[i]})
            feed_dict.update({self.weight_bin: weight_bins[i]})
            feed_dict.update({self.weight: weights[i]})
            feed_dict.update(
                {self.input_data: np.zeros([self.k, self.n, self.d])})
            feed_dict.update({self.eps: eps})
            hparams.sample = False
            prob, ll, z_encoded, enc_mu, enc_sigma, elbo, w_edge = self.sess.run(
                [
                    self.prob, self.ll, self.z_encoded, self.enc_mu,
                    self.enc_sigma, self.cost, self.w_edge
                ],
                feed_dict=feed_dict)

            with open(hparams.z_dir + 'encoded_input' + str(i) + '.txt',
                      'a') as f:
                for z_i in z_encoded:
                    f.write('[' + ','.join([str(el[0]) for el in z_i]) + ']\n')
                f.write("\n")

            with open(hparams.z_dir + 'encoded_mu' + str(i) + '.txt',
                      'a') as f:
                for z_i in enc_mu:
                    f.write('[' + ','.join([str(el[0]) for el in z_i]) + ']\n')
                f.write("\n")

            with open(hparams.z_dir + 'encoded_sigma' + str(i) + '.txt',
                      'a') as f:
                for x in range(self.n):
                    for z_i in enc_sigma[x]:
                        f.write('[' + ','.join([str(el)
                                                for el in z_i]) + ']\n')
                    f.write("\n")

            hparams.sample = True

            # for j in range(self.n):
            # for j in [1, 5, 15]:
            for j in [1]:
                z_encoded_neighborhood = copy.copy(z_encoded)
                feed_dict.update({self.eps: z_encoded_neighborhood})
                prob, ll, z_encoded_neighborhood, enc_mu, enc_sigma, elbo, w_edge, labels = self.sess.run(
                    [
                        self.prob, self.ll, self.z_encoded, self.enc_mu,
                        self.enc_sigma, self.cost, self.w_edge, self.label
                    ],
                    feed_dict=feed_dict)
                # prob = np.triu(np.reshape(prob,(self.n,self.n)),1)
                with open(hparams.z_dir + 'sampled_z' + str(i) + '.txt',
                          'a') as f:
                    for z_i in z_encoded:
                        f.write('[' + ','.join([str(el[0])
                                                for el in z_i]) + ']\n')
                    f.write("\n")

                prob = np.reshape(prob, (self.n, self.n))
                w_edge = np.reshape(w_edge, (self.n, self.n, self.bin_dim))
                with open(hparams.z_dir + 'prob_mat' + str(i) + '.txt',
                          'a') as f:
                    for x in range(self.n):
                        f.write('[' + ','.join([str(el)
                                                for el in prob[x]]) + ']\n')
                    f.write("\n")
                with open(hparams.z_dir + 'weight_mat' + str(i) + '.txt',
                          'a') as f:
                    for x in range(self.n):
                        f.write('[' + ','.join([
                            str(el[0]) + ' ' + str(el[1]) + ' ' + str(el[2])
                            for el in w_edge[x]
                        ]) + ']\n')
                    f.write("\n")

                if not hparams.mask_weight:
                    print("Non mask")
                    candidate_edges = self.get_unmasked_candidate(
                        list_edges, prob, w_edge, hparams.edges)
                else:
                    print("Mask")
                    (atom_list,
                     valency_arr) = self.getatoms(hparams.nodes, labels)
                    candidate_edges = self.get_masked_candidate_with_atom_ratio_new(
                        prob, w_edge, valency_arr, hparams.edges, hde)

                for uvw in candidate_edges.split():
                    [u, v, w] = uvw.split("-")
                    u = int(u)
                    v = int(v)
                    w = int(w)
                    if (u >= 0 and v >= 0):
                        with open(
                                hparams.sample_file + "approach_1_node_" +
                                str(j) + "_" + str(s_num) + '.txt', 'a') as f:
                            f.write(
                                str(u) + ' ' + str(v) + ' {\'weight\':' +
                                str(w) + '}\n')

    def sample_graph(self,
                     hparams,
                     placeholders,
                     adj,
                     features,
                     weights,
                     weight_bins,
                     s_num,
                     node,
                     hde,
                     num=10,
                     outdir=None):
        '''
        Args :
            num - int
                10
                number of edges to be sampled
            outdir - string
            output dir
        '''
        list_edges = []

        for i in range(self.n):
            for j in range(i + 1, self.n):
                list_edges.append((i, j, 1))
                list_edges.append((i, j, 2))
                list_edges.append((i, j, 3))
        # list_edges.append((-1, -1, 0))

        list_weight = [1, 2, 3]

        hparams.sample = True

        eps = np.random.randn(self.n, self.z_dim, 1)
        with open(hparams.z_dir + 'test_prior_' + str(s_num) + '.txt',
                  'a') as f:
            for z_i in eps:
                f.write('[' + ','.join([str(el[0]) for el in z_i]) + ']\n')

        feed_dict = construct_feed_dict(hparams.learning_rate,
                                        hparams.dropout_rate, self.k, self.n,
                                        self.d, hparams.decay_rate,
                                        placeholders)
        feed_dict.update({self.adj: adj[0]})
        feed_dict.update({self.features: features[0]})
        feed_dict.update({self.weight_bin: weight_bins[0]})
        feed_dict.update({self.weight: weights[0]})

        feed_dict.update({self.input_data: np.zeros([self.k, self.n, self.d])})
        feed_dict.update({self.eps: eps})

        prob, ll, z_encoded, kl, sample_mu, sample_sigma, loss, w_edge, labels = self.sess.run(
            [
                self.prob, self.ll, self.z_encoded, self.kl, self.enc_mu,
                self.enc_sigma, self.cost, self.w_edge, self.label
            ],
            feed_dict=feed_dict)
        prob = np.reshape(prob, (self.n, self.n))
        w_edge = np.reshape(w_edge, (self.n, self.n, self.bin_dim))

        indicator = np.ones([self.n, 3])
        p, list_edges, w_new = normalise(prob, w_edge, self.n, self.bin_dim,
                                         [], list_edges, indicator)

        if not hparams.mask_weight:
            trial = 0
            while trial < 5000:
                candidate_edges = [
                    list_edges[i] for i in np.random.choice(range(
                        len(list_edges)), [hparams.edges],
                                                            p=p,
                                                            replace=False)
                ]
                with open(hparams.sample_file + 'test.txt', 'w') as f:
                    for (u, v, w) in candidate_edges:
                        if (u >= 0 and v >= 0):
                            f.write(
                                str(u) + ' ' + str(v) + ' {\'weight\':' +
                                str(w) + '}\n')
                f = open(hparams.sample_file + 'test.txt')
                G = nx.read_edgelist(f, nodetype=int)
                if nx.is_connected(G):
                    for (u, v, w) in candidate_edges:
                        if (u >= 0 and v >= 0):
                            with open(
                                    hparams.sample_file + "approach_2_" +
                                    str(trial) + "_" + str(s_num) + '.txt',
                                    'a') as f:
                                f.write(
                                    str(u) + ' ' + str(v) + ' {\'weight\':' +
                                    str(w) + '}\n')
                trial += 1

        else:
            trial = 0
            while trial < 5000:
                candidate_edges = self.get_masked_candidate(
                    list_edges, prob, w_edge, hparams.edges, hde)
                # print("Debug candidate", candidate_edges)
                if len(candidate_edges) > 0:
                    with open(hparams.sample_file + 'test.txt', 'w') as f:
                        for uvw in candidate_edges.split():
                            [u, v, w] = uvw.split("-")
                            u = int(u)
                            v = int(v)
                            w = int(w)
                            if (u >= 0 and v >= 0):
                                f.write(
                                    str(u) + ' ' + str(v) + ' {\'weight\':' +
                                    str(w) + '}\n')
                    f = open(hparams.sample_file + 'test.txt')
                    # try:
                    G = nx.read_edgelist(f, nodetype=int)
                    # except:
                    # continue

                    if nx.is_connected(G):
                        for uvw in candidate_edges.split():
                            [u, v, w] = uvw.split("-")
                            u = int(u)
                            v = int(v)
                            w = int(w)
                            if (u >= 0 and v >= 0):
                                with open(
                                        hparams.sample_file + "approach_2_" +
                                        str(trial) + "_" + str(s_num) + '.txt',
                                        'a') as f:
                                    f.write(
                                        str(u) + ' ' + str(v) +
                                        ' {\'weight\':' + str(w) + '}\n')
                trial += 1
Example #9
0
    def __init__(self,
                 hparams,
                 placeholders,
                 num_nodes,
                 num_features,
                 log_fact_k,
                 input_size,
                 istest=False):
        self.features_dim = num_features
        self.input_dim = num_nodes
        self.dropout = placeholders['dropout']
        self.k = hparams.random_walk
        self.lr = placeholders['lr']
        self.decay = placeholders['decay']
        self.n = num_nodes
        self.d = num_features
        self.z_dim = hparams.z_dim
        self.bin_dim = hparams.bin_dim
        self.mask_weight = hparams.mask_weight
        self.log_fact_k = log_fact_k
        self.neg_sample_size = hparams.neg_sample_size
        self.input_size = input_size
        self.combination = hparams.node_sample * hparams.bfs_sample
        self.temperature = hparams.temperature
        self.E = 20

        self.adj = tf.placeholder(dtype=tf.float32,
                                  shape=[self.n, self.n],
                                  name='adj')
        self.features = tf.placeholder(dtype=tf.float32,
                                       shape=[self.n, self.d],
                                       name='features')
        self.features1 = tf.placeholder(dtype=tf.int32,
                                        shape=[self.n],
                                        name='features1')
        self.weight = tf.placeholder(dtype=tf.float32,
                                     shape=[self.n, self.n],
                                     name="weight")
        self.weight_bin1 = tf.placeholder(
            dtype=tf.float32,
            shape=[self.n, self.n, hparams.bin_dim],
            name="weight_bin1")
        self.weight_bin = tf.placeholder(
            dtype=tf.float32,
            shape=[self.combination, None, hparams.bin_dim],
            name="weight_bin")
        self.input_data = tf.placeholder(dtype=tf.float32,
                                         shape=[self.k, self.n, self.d],
                                         name='input')
        self.eps = tf.placeholder(dtype=tf.float32,
                                  shape=[self.n, self.z_dim, 1],
                                  name='eps')
        #self.neg_index = tf.placeholder(dtype=tf.int32,shape=[None], name='neg_index')
        self.edges = tf.placeholder(dtype=tf.int32,
                                    shape=[self.combination, None, 2],
                                    name='edges')
        self.all_edges = tf.placeholder(dtype=tf.int32,
                                        shape=[self.combination, None, 2],
                                        name='all_edges')
        self.n_fill_edges = tf.placeholder(dtype=tf.int32)
        #self.known_edges = tf.placeholder(dtype=tf.int32, shape=[None, 2], name='known_edges')

        #node_count = [len(edge_list) for edge_list in self.edges]
        print("Debug Input size", self.input_size)
        node_count_tf = tf.fill([1, self.input_size],
                                tf.cast(self.n, tf.float32))
        node_count_tf = tf.Print(node_count_tf, [node_count_tf],
                                 message="My node_count_tf")
        print("Debug size node_count", node_count_tf.get_shape())

        #tf.convert_to_tensor(node_count, dtype=tf.int32)
        self.cell = VAEGCell(self.adj, self.weight, self.features,
                             self.z_dim, self.bin_dim,
                             tf.to_float(node_count_tf), self.all_edges)
        self.c_x, enc_mu, enc_sigma, self.debug_sigma, dec_out, prior_mu, prior_sigma, z_encoded, w_edge, label, lambda_n, lambda_e = self.cell.call(
            self.input_data, self.n, self.d, self.k, self.combination,
            self.eps, hparams.sample)
        self.prob = dec_out
        #print('Debug', dec_out.shape)
        self.z_encoded = z_encoded
        self.enc_mu = enc_mu
        self.enc_sigma = enc_sigma
        self.w_edge = w_edge
        self.label = label
        self.lambda_n = lambda_n
        self.lambda_e = lambda_e

        #adj, weight, features, z_dim, bin_dim, node_count, edges, enc_mu, enc_sigma
        self.rlcell = VAEGRLCell(self.adj, self.weight, self.features,
                                 self.z_dim, self.bin_dim, self.all_edges,
                                 enc_mu, enc_sigma)
        #self, adj, weight, features, z_dim, bin_dim, enc_mu, enc_sigma, edges, index
        self.rl_dec_out, self.rl_w_edge = self.rlcell.call(
            self.input_data, self.n, self.d, self.k, self.combination,
            self.eps, hparams.sample)
        self.sess = tf.Session()
Example #10
0
class VAEGRL(VAEGConfig):
    def __init__(self,
                 hparams,
                 placeholders,
                 num_nodes,
                 num_features,
                 log_fact_k,
                 input_size,
                 istest=False):
        self.features_dim = num_features
        self.input_dim = num_nodes
        self.dropout = placeholders['dropout']
        self.k = hparams.random_walk
        self.lr = placeholders['lr']
        self.decay = placeholders['decay']
        self.n = num_nodes
        self.d = num_features
        self.z_dim = hparams.z_dim
        self.bin_dim = hparams.bin_dim
        self.mask_weight = hparams.mask_weight
        self.log_fact_k = log_fact_k
        self.neg_sample_size = hparams.neg_sample_size
        self.input_size = input_size
        self.combination = hparams.node_sample * hparams.bfs_sample
        self.temperature = hparams.temperature
        self.E = 20

        self.adj = tf.placeholder(dtype=tf.float32,
                                  shape=[self.n, self.n],
                                  name='adj')
        self.features = tf.placeholder(dtype=tf.float32,
                                       shape=[self.n, self.d],
                                       name='features')
        self.features1 = tf.placeholder(dtype=tf.int32,
                                        shape=[self.n],
                                        name='features1')
        self.weight = tf.placeholder(dtype=tf.float32,
                                     shape=[self.n, self.n],
                                     name="weight")
        self.weight_bin1 = tf.placeholder(
            dtype=tf.float32,
            shape=[self.n, self.n, hparams.bin_dim],
            name="weight_bin1")
        self.weight_bin = tf.placeholder(
            dtype=tf.float32,
            shape=[self.combination, None, hparams.bin_dim],
            name="weight_bin")
        self.input_data = tf.placeholder(dtype=tf.float32,
                                         shape=[self.k, self.n, self.d],
                                         name='input')
        self.eps = tf.placeholder(dtype=tf.float32,
                                  shape=[self.n, self.z_dim, 1],
                                  name='eps')
        #self.neg_index = tf.placeholder(dtype=tf.int32,shape=[None], name='neg_index')
        self.edges = tf.placeholder(dtype=tf.int32,
                                    shape=[self.combination, None, 2],
                                    name='edges')
        self.all_edges = tf.placeholder(dtype=tf.int32,
                                        shape=[self.combination, None, 2],
                                        name='all_edges')
        self.n_fill_edges = tf.placeholder(dtype=tf.int32)
        #self.known_edges = tf.placeholder(dtype=tf.int32, shape=[None, 2], name='known_edges')

        #node_count = [len(edge_list) for edge_list in self.edges]
        print("Debug Input size", self.input_size)
        node_count_tf = tf.fill([1, self.input_size],
                                tf.cast(self.n, tf.float32))
        node_count_tf = tf.Print(node_count_tf, [node_count_tf],
                                 message="My node_count_tf")
        print("Debug size node_count", node_count_tf.get_shape())

        #tf.convert_to_tensor(node_count, dtype=tf.int32)
        self.cell = VAEGCell(self.adj, self.weight, self.features,
                             self.z_dim, self.bin_dim,
                             tf.to_float(node_count_tf), self.all_edges)
        self.c_x, enc_mu, enc_sigma, self.debug_sigma, dec_out, prior_mu, prior_sigma, z_encoded, w_edge, label, lambda_n, lambda_e = self.cell.call(
            self.input_data, self.n, self.d, self.k, self.combination,
            self.eps, hparams.sample)
        self.prob = dec_out
        #print('Debug', dec_out.shape)
        self.z_encoded = z_encoded
        self.enc_mu = enc_mu
        self.enc_sigma = enc_sigma
        self.w_edge = w_edge
        self.label = label
        self.lambda_n = lambda_n
        self.lambda_e = lambda_e

        #adj, weight, features, z_dim, bin_dim, node_count, edges, enc_mu, enc_sigma
        self.rlcell = VAEGRLCell(self.adj, self.weight, self.features,
                                 self.z_dim, self.bin_dim, self.all_edges,
                                 enc_mu, enc_sigma)
        #self, adj, weight, features, z_dim, bin_dim, enc_mu, enc_sigma, edges, index
        self.rl_dec_out, self.rl_w_edge = self.rlcell.call(
            self.input_data, self.n, self.d, self.k, self.combination,
            self.eps, hparams.sample)
        self.sess = tf.Session()
        # We are considering 10 trajectories only
    def likelihood(self, prob_dict, w_edge, edge_list):
        '''
            negative loglikelihood of the edges
            '''
        ll = 0
        k = 0
        with tf.variable_scope('NLL'):
            dec_mat_temp = tf.reshape(prob_dict, [self.n, self.n])

            w_edge_exp = tf.exp(
                tf.minimum(tf.reshape(w_edge, [self.n, self.n, self.bin_dim]),
                           tf.fill([self.n, self.n, self.bin_dim], 10.0)))
            w_edge_pos = tf.multiply(self.weight_bin1, w_edge_exp)

            w_edge_total = tf.reduce_sum(w_edge_exp, axis=1)
            w_edge_score = tf.divide(w_edge_pos, w_edge_total)

            dec_mat = tf.exp(
                tf.minimum(dec_mat_temp, tf.fill([self.n, self.n], 10.0)))

            dec_mat = tf.Print(dec_mat, [dec_mat],
                               message="my decscore values:")

            print "Debug dec_mat", dec_mat.shape, dec_mat.dtype, dec_mat
            comp = tf.subtract(tf.ones([self.n, self.n], tf.float32), self.adj)
            comp = tf.Print(comp, [comp], message="my comp values:")

            temp = tf.reduce_sum(tf.multiply(comp, dec_mat))
            negscore = tf.fill([self.n, self.n], temp + 1e-9)
            negscore = tf.Print(negscore, [negscore],
                                message="my negscore values:")

            posscore = tf.multiply(self.adj, dec_mat)
            posscore = tf.Print(posscore, [posscore],
                                message="my posscore values:")

            #dec_out = tf.multiply(self.adj, dec_mat)
            softmax_out = tf.divide(posscore, tf.add(posscore, negscore))
            #ll = tf.reduce_sum(tf.log(tf.add(tf.multiply(self.adj, softmax_out), tf.fill([self.n,self.n], 1e-9))),1)
            ll = 1.0
            for i in range(len(edge_list)):
                (u, v, w) = edge_list[i]
                ll += softmax_out[u][v] * w_edge_score[u][v][w - 1] + 1e-10
            ll = tf.Print(ll, [ll], message="My loss")
        return (ll)

    def get_trajectories(self, p_theta, w_theta, edges, weight, n_fill_edges,
                         atom_list):

        indicator = np.ones([self.n, self.bin_dim])
        edge_mask = np.ones([self.n, self.n])
        degree = np.zeros(self.n)
        #print("Debug known edges", tf.shape(self.known_edges),self.known_edges.get_shape())
        #N = tf.stack([tf.shape(self.known_edges)[0]])[0]
        #known_edges = tf.unstack(self.known_edges)
        # For the time being make the number of known edges a constant E
        #'''
        known_edges = []
        for k in range(self.E):
            (u, v) = edges[k]
            edge_mask[u][v] = 0
            edge_mask[v][u] = 0
            degree[u] += weight[u][v]
            degree[v] += weight[v][u]
            known_edges.append((u, v, weight[u][v]))
            if (4 - degree[u]) == 0:
                indicator[u][0] = 0
            if (4 - degree[u]) <= 1:
                indicator[u][1] = 0
            if (4 - degree[u]) <= 2:
                indicator[u][2] = 0

            if (4 - degree[v]) == 0:
                indicator[v][0] = 0
            if (4 - degree[v]) <= 1:
                indicator[v][1] = 0
            if (4 - degree[v]) <= 2:
                indicator[v][2] = 0
        #'''
        trial = 0
        candidate_edges = []
        G = nx.Graph()

        while trial < 5:
            #candidate_edges =
            #candidate_edges =
            #self.get_masked_candidate_with_atom_ratio_new(p_theta, w_theta, node_list, self.n_fill_edges, 1)
            #get_weighted_edges(indicator, p_theta, edge_mask, w_theta, self.n_fill_edges, node_list, degree)
            candidate_edges = get_masked_candidate_new(p_theta, w_theta,
                                                       n_fill_edges, atom_list,
                                                       indicator, edge_mask,
                                                       degree)
            candidate_edges.extend(known_edges)
            G = nx.Graph()
            G.add_nodes_from(range(self.n))
            G.add_weighted_edges_from(candidate_edges)
            if nx.is_connected(G):
                print("Debug trial", trial)
                break
            trial += 1
            print("Trial", trial)
        return candidate_edges, G

    def compute_loss(self, prob, w_edge, rl_dec_out, rl_w_edge, edges, weight,
                     n_fill_edges, atom_list):
        self.train_op = tf.train.AdamOptimizer(learning_rate=self.lr)
        self.grad = []
        tvars = tf.trainable_variables()
        g_vars = [var for var in tvars if 'RL' in var.name]
        for j in range(1):
            trajectory, G = self.get_trajectories(rl_dec_out[0], rl_w_edge[0],
                                                  edges, weight, n_fill_edges,
                                                  atom_list)
            print("Debug trajectory", trajectory)
            #trajectory, G = get_trajectories(rl_dec_out, rl_w_edge, label, self.edges[0])
            ll_rl = self.likelihood(self.rl_dec_out[0], self.rl_w_edge[0],
                                    trajectory)
            ll_rl = tf.Print(ll_rl, [ll_rl], message="my ll_rl values:")
            ll = 1
            #self.likelihood(self.prob[0], self.w_edge[0], trajectory)
            importance_weight = tf.exp(
                1 / self.temperature * compute_cost(G)) * (ll / ll_rl)
            importance_weight = tf.Print(
                importance_weight, [importance_weight],
                message="my importance_weight values:")

            print("Debug importance weight", importance_weight)

            self.cost = ll_rl * importance_weight
            '''
            tensor = tf.constant([1], dtype=tf.float32)

            grad = self.train_op.compute_gradients(tf.log(ll_rl))
            #grad = self.train_op.compute_gradients(tensor, var_list=g_vars)
            #grad = tf.Print(grad, [grad], message="my grad values:")
            print("Debug grad", len(grad), grad, ll_rl)
            for i in range(len(grad)):
                g = grad[i][0] * importance_weight
                if len(self.grad) > i:
                    self.grad[i] = (self.grad[i][0] + g / 10, grad[i][1])
                else:
                    self.grad.append(grad[i])

            '''
        '''
        print_vars("trainable_variables")
        print("Debug self grads", self.grad)

        
        self.apply_transform_op = self.train_op.apply_gradients(self.grad)
        '''

    def initialize(self):
        logger.info("Initialization of parameters")
        # self.sess.run(tf.initialize_all_variables())
        self.sess.run(tf.global_variables_initializer())

    def restore(self, savedir):
        saver = tf.train.Saver(tf.global_variables())
        ckpt = tf.train.get_checkpoint_state(savedir)
        if ckpt == None or ckpt.model_checkpoint_path == None:
            self.initialize()
        else:
            print("Load the model from {}".format(ckpt.model_checkpoint_path))
            saver.restore(self.sess, ckpt.model_checkpoint_path)

    def copy_weight(self, copydir):
        self.initialize()
        print("Debug all", tf.global_variables())
        var_old = [v for v in tf.global_variables() if "RL" not in v.name]
        print("Debug var_old", var_old)
        saver = tf.train.Saver(var_old)
        ckpt = tf.train.get_checkpoint_state(copydir)
        print_tensors_in_checkpoint_file(file_name=ckpt.model_checkpoint_path,
                                         tensor_name='',
                                         all_tensors='')
        print("Load the model from {}".format(ckpt.model_checkpoint_path))
        saver.restore(self.sess, ckpt.model_checkpoint_path)

    def train(self, placeholders, hparams, adj, weight, weight_bin,
              weight_bin1, features, edges, all_edges, features1, atom_list):
        savedir = hparams.out_dir
        lr = hparams.learning_rate
        dr = hparams.dropout_rate
        decay = hparams.decay_rate

        f1 = open(hparams.out_dir + '/iteration.txt', 'r')
        iteration = int(f1.read().strip())

        # training
        num_epochs = hparams.num_epochs
        create_dir(savedir)
        ckpt = tf.train.get_checkpoint_state(savedir)
        saver = tf.train.Saver(tf.global_variables())

        if ckpt:
            saver.restore(self.sess, ckpt.model_checkpoint_path)
            print("Load the model from %s" % ckpt.model_checkpoint_path)
        start_before_epoch = time.time()
        for epoch in range(num_epochs):
            start = time.time()
            for i in range(len(adj)):
                #self.count = i

                if len(edges[i]) == 0:
                    continue
                # Learning rate decay
                #self.sess.run(tf.assign(self.lr, self.lr * (self.decay ** epoch)))
                feed_dict = construct_feed_dict(lr, dr, self.k, self.n, self.d,
                                                decay, placeholders)
                feed_dict.update({self.adj: adj[i]})

                eps = np.random.randn(self.n, self.z_dim, 1)
                #tf.random_normal((self.n, 5, 1), 0.0, 1.0, dtype=tf.float32)

                feed_dict.update({self.features: features[i]})
                feed_dict.update({self.features1: features1[i]})
                feed_dict.update({self.weight_bin: weight_bin[i]})
                feed_dict.update({self.weight_bin1: weight_bin1[i]})
                feed_dict.update({self.weight: weight[i]})
                feed_dict.update(
                    {self.input_data: np.zeros([self.k, self.n, self.d])})
                feed_dict.update({self.eps: eps})
                feed_dict.update({self.n_fill_edges: len(edges[i][0]) - 20})
                #neg_indices = np.random.choice(range(len(neg_edges[i])), hparams.neg_sample_size, replace=False)
                #combined_edges = []
                #neg_edges_to_be_extended = [neg_edges[i][index] for index in neg_indices]
                #copy_edge = copy.deepcopy(edges[i])
                #for j in range(len(edges[i])):
                #    #print("Debug edge_list", edge)
                #    copy_edge[j].extend(neg_edges_to_be_extended)

                #print("Debug edge_list_combined", combined_edges)
                #print("Debug feed edges", i, len(edges[i][0]), len(copy_edge[0]))
                feed_dict.update({self.edges: edges[i]})
                feed_dict.update({self.all_edges: [all_edges[i]]})
                #feed_dict.update({self.known_edges:copy_edge})

                #input_, train_loss, _, probdict, cx, w_edge, lambda_e, lambda_n= self.sess.run([self.input_data ,self.cost, self.apply_transform_op, self.prob, self.c_x, self.w_edge, self.lambda_e, self.lambda_n], feed_dict=feed_dict)
                prob, w_edge, rl_prob, rl_w_edge, lambda_e, lambda_n = self.sess.run(
                    [
                        self.prob, self.w_edge, self.rl_dec_out,
                        self.rl_w_edge, self.lambda_e, self.lambda_n
                    ],
                    feed_dict=feed_dict)
                print("Debug shapes", rl_prob[0].shape, rl_w_edge[0].shape)
                self.compute_loss(prob, w_edge, rl_prob, rl_w_edge,
                                  edges[i][0], weight[i],
                                  len(edges[i][0]) - 20, atom_list)
                #train_loss, _ = self.sess.run([self.cost, self.apply_transform_op])
                train_loss = self.sess.run([self.cost], feed_dict=feed_dict)
                #input_, train_loss, _, probdict, cx, w_edge, lambda_e, lambda_n= self.sess.run([self.input_data ,self.cost, self.apply_transform_op, self.prob, self.c_x, self.w_edge, self.lambda_e, self.lambda_n], feed_dict=feed_dict)

                iteration += 1
                #print("Lambda_e, lambda_n", lambda_e, lambda_n, i)
                if iteration % hparams.log_every == 0 and iteration > 0:
                    #print(train_loss)
                    print("{}/{}(epoch {}), train_loss = {:.6f}".format(
                        iteration, num_epochs, epoch + 1, train_loss))
                    checkpoint_path = os.path.join(savedir, 'model.ckpt')
                    saver.save(self.sess,
                               checkpoint_path,
                               global_step=iteration)
                    logger.info("model saved to {}".format(checkpoint_path))
            end = time.time()
            print("Time taken for a batch: ", end - start)
        end_after_epoch = time.time()
        print("Time taken to completed all epochs",
              -start_before_epoch + end_after_epoch)
        f1 = open(hparams.out_dir + '/iteration.txt', 'w')
        f1.write(str(iteration))

    def getembeddings(self, hparams, placeholders, adj, deg, weight_bin,
                      weight):

        eps = np.random.randn(self.n, self.z_dim, 1)

        feed_dict = construct_feed_dict(hparams.learning_rate,
                                        hparams.dropout_rate, self.k, self.n,
                                        self.d, hparams.decay_rate,
                                        placeholders)
        feed_dict.update({self.adj: adj})
        feed_dict.update({self.features: deg})
        feed_dict.update({self.input_data: np.zeros([self.k, self.n, self.d])})
        feed_dict.update({self.eps: eps})
        feed_dict.update({self.weight_bin: weight_bin})
        feed_dict.update({self.weight: weight})

        prob, ll, kl, w_edge, embedding = self.sess.run(
            [self.prob, self.ll, self.kl, self.w_edge, self.z_encoded],
            feed_dict=feed_dict)
        return embedding

    def get_masked_candidate_with_atom_ratio_new(self, prob, w_edge,
                                                 atom_count, num_edges, hde):
        rest = range(self.n)
        nodes = []
        hn = []
        on = []
        nn = []
        cn = []

        for i in range(self.n):
            if atom_count[i] == 1:
                hn.append(i)
            if atom_count[i] == 2:
                on.append(i)
            if atom_count[i] == 3 or atom_count[i] == 5:
                nn.append(i)
            if atom_count[i] == 4:
                cn.append(i)

        nodes.extend(hn)
        nodes.extend(cn)
        nodes.extend(on)
        nodes.extend(nn)

        node_list = atom_count
        print("Debug nodelist", node_list)

        indicator = np.ones([self.n, self.bin_dim])
        edge_mask = np.ones([self.n, self.n])
        degree = np.zeros(self.n)

        for node in hn:
            indicator[node][1] = 0
            indicator[node][2] = 0
        for node in on:
            indicator[node][2] = 0

        # two hydrogen atom cannot have an edge between them
        for n1 in hn:
            for n2 in hn:
                edge_mask[n1][n2] = 0
        candidate_edges = []
        # first generate edges joining with Hydrogen atoms sequentially
        index = 0
        i = 0
        hydro_sat = np.zeros(self.n)
        #first handle hydro
        try:
            for node in nodes:
                deg_req = node_list[node]
                d = degree[node]
                list_edges = get_candidate_neighbor_edges(node, self.n)
                if node in hn:
                    for i1 in range(self.n):
                        if hydro_sat[i1] == node_list[i1] - 1:
                            edge_mask[i1][node] = 0
                            edge_mask[node][i1] = 0
                while d < deg_req:
                    p = normalise_h1(prob, w_edge, self.bin_dim, indicator,
                                     edge_mask, node)

                    candidate_edges.extend([
                        list_edges[k] for k in np.random.choice(
                            range(len(list_edges)), [1], p=p, replace=False)
                    ])

                    (u, v, w) = candidate_edges[i]
                    degree[u] += w
                    degree[v] += w
                    d += w
                    if u in hn:
                        hydro_sat[v] += 1
                    if v in hn:
                        hydro_sat[u] += 1
                    edge_mask[u][v] = 0
                    edge_mask[v][u] = 0

                    if (node_list[u] - degree[u]) == 0:
                        indicator[u][0] = 0
                    if (node_list[u] - degree[u]) <= 1:
                        indicator[u][1] = 0
                    if (node_list[u] - degree[u]) <= 2:
                        indicator[u][2] = 0

                    if (node_list[v] - degree[v]) == 0:
                        indicator[v][0] = 0
                    if (node_list[v] - degree[v]) <= 1:
                        indicator[v][1] = 0
                    if (node_list[v] - degree[v]) <= 2:
                        indicator[v][2] = 0

                    i += 1
                    print("Debug candidate_edges", candidate_edges[i - 1])
                    #    print("change state", el, degree[el], node_list[el], indicator[el])
                    #'''
        except:
            if len(candidate_edges) < 1:
                candidate_edges = []
        candidate_edges_new = []
        for (u, v, w) in candidate_edges:
            if u < v:
                candidate_edges_new.append(
                    str(u) + ' ' + str(v) + ' ' + "{'weight':" + str(w) + "}")
            else:
                candidate_edges_new.append(
                    str(v) + ' ' + str(u) + ' ' + "{'weight':" + str(w) + "}")
        print("Candidate_edges_new", candidate_edges_new)
        return candidate_edges_new

    def get_masked_candidate(self,
                             list_edges,
                             prob,
                             w_edge,
                             num_edges,
                             hde,
                             indicator=[],
                             degree=[]):

        list_edges_original = copy.copy(list_edges)
        n = len(prob[0])
        # sample 1000 times
        count = 0
        structure_list = defaultdict(int)

        # while(count < 50):
        while (count < 1):
            applyrules = False
            list_edges = copy.copy(list_edges_original)
            if len(indicator) == 0:
                print("Debug indi new assign")
                indicator = np.ones([self.n, self.bin_dim])
            reach = np.ones([n, n])

            p, list_edges, w = normalise(prob, w_edge, self.n, self.bin_dim,
                                         [], list_edges, indicator)
            candidate_edges = [
                list_edges[k] for k in np.random.choice(
                    range(len(list_edges)), [1], p=p, replace=False)
            ]
            # if degree == None:
            if len(degree) == 0:
                print("Debug degree new assign")
                degree = np.zeros([self.n])
            G = None
            saturation = 0

            for i1 in range(num_edges - 1):
                (u, v, w) = candidate_edges[i1]
                for j in range(n):

                    if reach[u][j] == 0:
                        reach[v][j] = 0
                        reach[j][v] = 0
                    if reach[v][j] == 0:
                        reach[u][j] = 0
                        reach[j][u] = 0

                reach[u][v] = 0
                reach[v][u] = 0

                degree[u] += w
                degree[v] += w

                if degree[u] >= 4:
                    indicator[u][0] = 0
                if degree[u] >= 3:
                    indicator[u][1] = 0
                if degree[u] >= 2:
                    indicator[u][2] = 0

                if degree[v] >= 4:
                    indicator[v][0] = 0
                if degree[v] >= 3:
                    indicator[v][1] = 0
                if degree[v] >= 2:
                    indicator[v][2] = 0

                # there will ne bo bridge
                p, list_edges, w = normalise(prob, w_edge, self.n,
                                             self.bin_dim, candidate_edges,
                                             list_edges, indicator)

                try:
                    candidate_edges.extend([
                        list_edges[k] for k in np.random.choice(
                            range(len(list_edges)), [1], p=p, replace=False)
                    ])
                except:
                    # candidate_edges = []
                    continue
            structure_list[' '.join([
                str(u) + '-' + str(v) + '-' + str(w)
                for (u, v, w) in sorted(candidate_edges)
            ])] += 1
            count += 1

        # return the element which has been sampled maximum time
        return max(structure_list.iteritems(), key=itemgetter(1))[0]

    def get_unmasked_candidate(self, list_edges, prob, w_edge, num_edges):
        # sample 1000 times
        count = 0
        structure_list = defaultdict(int)

        # while (count < 1000):
        while (count < 50):
            indicator = np.ones([self.n, self.bin_dim])
            p, list_edges, w = normalise(prob, w_edge, self.n, self.bin_dim,
                                         [], list_edges, indicator)
            candidate_edges = [
                list_edges[k] for k in np.random.choice(
                    range(len(list_edges)), [num_edges], p=p, replace=False)
            ]
            structure_list[' '.join([
                str(u) + '-' + str(v) + '-' + str(w)
                for (u, v, w) in sorted(candidate_edges, key=itemgetter(0))
            ])] += 1

            # structure_list[sorted(candidate_edges, key=itemgetter(1))] += 1
            count += 1

        # return the element which has been sampled maximum time
        return max(structure_list.iteritems(), key=itemgetter(1))[0]

    def sample_graph_posterior_new(self,
                                   hparams,
                                   placeholders,
                                   adj,
                                   features,
                                   weight_bins,
                                   weights,
                                   embeddings,
                                   k=0):
        list_edges = get_candidate_edges(self.n)
        feed_dict = construct_feed_dict(hparams.learning_rate,
                                        hparams.dropout_rate, self.k, self.n,
                                        self.d, hparams.decay_rate,
                                        placeholders)
        feed_dict.update({self.adj: adj})
        feed_dict.update({self.features: features})
        feed_dict.update({self.weight_bin: weight_bins})
        feed_dict.update({self.weight: weights})
        feed_dict.update({self.input_data: np.zeros([self.k, self.n, self.d])})
        feed_dict.update({self.eps: embeddings})
        hparams.sample = True

        prob, ll, z_encoded, enc_mu, enc_sigma, elbo, w_edge, labels = self.sess.run(
            [
                self.prob, self.ll, self.z_encoded, self.enc_mu,
                self.enc_sigma, self.cost, self.w_edge, self.label
            ],
            feed_dict=feed_dict)
        # prob = np.triu(np.reshape(prob,(self.n,self.n)),1)
        prob = np.reshape(prob, (self.n, self.n))

        w_edge = np.reshape(w_edge, (self.n, self.n, self.bin_dim))

        atom_list = [
            4, 4, 2, 4, 4, 3, 4, 4, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
        ]
        # self.getatoms(atom_list)
        if not hparams.mask_weight:
            candidate_edges = self.get_unmasked_candidate(
                list_edges, prob, w_edge, hparams.edges)
        else:
            i = 0
            hde = 1
            # while (i < 1000):
            candidate_edges = self.get_masked_candidate_with_atom_ratio_new(
                prob, w_edge, atom_list, hparams.edges, hde)
            # if len(candidate_edges) > 0:
            #        break
            #    i += 1

            # candidate_edges = self.get_masked_candidate(list_edges, prob, w_edge, hparams.edges, hde)
        with open(hparams.sample_file + 'temp.txt' + str(k), 'w') as f:
            for uvw in candidate_edges.split():
                [u, v, w] = uvw.split("-")
                u = int(u)
                v = int(v)
                w = int(w)
                if (u >= 0 and v >= 0):
                    # with open(hparams.sample_file + 'temp.txt', 'a') as f:
                    f.write(
                        str(u) + ' ' + str(v) + ' {\'weight\':' + str(w) +
                        '}\n')

    def getatoms(self, node, label):
        label_new = np.reshape(label, (node, self.d))
        print("Debug label original shape:", label_new)

        label_new = np.exp(label_new)
        s = label_new.shape[0]
        print("Debug label shape:", label_new.shape, s)

        label_new_sum = np.reshape(np.sum(label_new, axis=1), (s, 1))
        print("Debug label sum:", label_new_sum.shape)

        prob_label = label_new / label_new_sum
        pred_label = np.zeros(4)
        valency_arr = np.zeros(node)

        print("Debug prob label shape:", prob_label.shape, prob_label)

        # print("Debug label", label_new)
        for i in range(node):
            valency = np.random.choice(range(4), [1], p=prob_label[i])
            pred_label[valency] += 1
            valency_arr[i] = valency + 1

        print("Debug pred_label", pred_label, valency_arr)
        return (pred_label, valency_arr)

    def sample_graph_neighborhood(self,
                                  hparams,
                                  placeholders,
                                  adj,
                                  features,
                                  weights,
                                  weight_bins,
                                  s_num,
                                  node,
                                  ratio,
                                  hde,
                                  num=10,
                                  outdir=None):
        list_edges = get_candidate_edges(self.n)

        # eps = load_embeddings(hparams.z_dir+'encoded_input0'+'.txt', hparams.z_dim)
        eps = np.random.randn(self.n, self.z_dim, 1)

        train_mu = []
        train_sigma = []
        hparams.sample = False

        # approach 1
        for i in range(len(adj)):
            feed_dict = construct_feed_dict(hparams.learning_rate,
                                            hparams.dropout_rate, self.k,
                                            self.n, self.d, hparams.decay_rate,
                                            placeholders)
            feed_dict.update({self.adj: adj[i]})
            feed_dict.update({self.features: features[i]})
            feed_dict.update({self.weight_bin: weight_bins[i]})
            feed_dict.update({self.weight: weights[i]})
            feed_dict.update(
                {self.input_data: np.zeros([self.k, self.n, self.d])})
            feed_dict.update({self.eps: eps})
            hparams.sample = False
            prob, ll, z_encoded, enc_mu, enc_sigma, elbo, w_edge = self.sess.run(
                [
                    self.prob, self.ll, self.z_encoded, self.enc_mu,
                    self.enc_sigma, self.cost, self.w_edge
                ],
                feed_dict=feed_dict)

            with open(hparams.z_dir + 'encoded_input' + str(i) + '.txt',
                      'a') as f:
                for z_i in z_encoded:
                    f.write('[' + ','.join([str(el[0]) for el in z_i]) + ']\n')
                f.write("\n")

            with open(hparams.z_dir + 'encoded_mu' + str(i) + '.txt',
                      'a') as f:
                for z_i in enc_mu:
                    f.write('[' + ','.join([str(el[0]) for el in z_i]) + ']\n')
                f.write("\n")

            with open(hparams.z_dir + 'encoded_sigma' + str(i) + '.txt',
                      'a') as f:
                for x in range(self.n):
                    for z_i in enc_sigma[x]:
                        f.write('[' + ','.join([str(el)
                                                for el in z_i]) + ']\n')
                    f.write("\n")

            hparams.sample = True

            # for j in range(self.n):
            # for j in [1, 5, 15]:
            for j in [1]:
                z_encoded_neighborhood = copy.copy(z_encoded)
                feed_dict.update({self.eps: z_encoded_neighborhood})
                prob, ll, z_encoded_neighborhood, enc_mu, enc_sigma, elbo, w_edge, labels = self.sess.run(
                    [
                        self.prob, self.ll, self.z_encoded, self.enc_mu,
                        self.enc_sigma, self.cost, self.w_edge, self.label
                    ],
                    feed_dict=feed_dict)
                # prob = np.triu(np.reshape(prob,(self.n,self.n)),1)
                with open(hparams.z_dir + 'sampled_z' + str(i) + '.txt',
                          'a') as f:
                    for z_i in z_encoded:
                        f.write('[' + ','.join([str(el[0])
                                                for el in z_i]) + ']\n')
                    f.write("\n")

                prob = np.reshape(prob, (self.n, self.n))
                w_edge = np.reshape(w_edge, (self.n, self.n, self.bin_dim))
                with open(hparams.z_dir + 'prob_mat' + str(i) + '.txt',
                          'a') as f:
                    for x in range(self.n):
                        f.write('[' + ','.join([str(el)
                                                for el in prob[x]]) + ']\n')
                    f.write("\n")
                with open(hparams.z_dir + 'weight_mat' + str(i) + '.txt',
                          'a') as f:
                    for x in range(self.n):
                        f.write('[' + ','.join([
                            str(el[0]) + ' ' + str(el[1]) + ' ' + str(el[2])
                            for el in w_edge[x]
                        ]) + ']\n')
                    f.write("\n")

                if not hparams.mask_weight:
                    print("Non mask")
                    candidate_edges = self.get_unmasked_candidate(
                        list_edges, prob, w_edge, hparams.edges)
                else:
                    print("Mask")
                    (atom_list,
                     valency_arr) = self.getatoms(hparams.nodes, labels)
                    candidate_edges = self.get_masked_candidate_with_atom_ratio_new(
                        prob, w_edge, valency_arr, hparams.edges, hde)

                for uvw in candidate_edges.split():
                    [u, v, w] = uvw.split("-")
                    u = int(u)
                    v = int(v)
                    w = int(w)
                    if (u >= 0 and v >= 0):
                        with open(
                                hparams.sample_file + "approach_1_node_" +
                                str(j) + "_" + str(s_num) + '.txt', 'a') as f:
                            f.write(
                                str(u) + ' ' + str(v) + ' {\'weight\':' +
                                str(w) + '}\n')

    def sample_graph(self,
                     hparams,
                     placeholders,
                     adj,
                     features,
                     weights,
                     weight_bins,
                     s_num,
                     node,
                     hde,
                     num=10,
                     outdir=None):
        '''
        Args :
            num - int
                10
                number of edges to be sampled
            outdir - string
            output dir
        '''
        list_edges = []

        for i in range(self.n):
            for j in range(i + 1, self.n):
                list_edges.append((i, j, 1))
                list_edges.append((i, j, 2))
                list_edges.append((i, j, 3))
        # list_edges.append((-1, -1, 0))

        list_weight = [1, 2, 3]

        hparams.sample = True

        eps = np.random.randn(self.n, self.z_dim, 1)
        with open(hparams.z_dir + 'test_prior_' + str(s_num) + '.txt',
                  'a') as f:
            for z_i in eps:
                f.write('[' + ','.join([str(el[0]) for el in z_i]) + ']\n')

        feed_dict = construct_feed_dict(hparams.learning_rate,
                                        hparams.dropout_rate, self.k, self.n,
                                        self.d, hparams.decay_rate,
                                        placeholders)
        feed_dict.update({self.adj: adj[0]})
        feed_dict.update({self.features: features[0]})
        feed_dict.update({self.weight_bin: weight_bins[0]})
        feed_dict.update({self.weight: weights[0]})

        feed_dict.update({self.input_data: np.zeros([self.k, self.n, self.d])})
        feed_dict.update({self.eps: eps})

        prob, ll, z_encoded, kl, sample_mu, sample_sigma, loss, w_edge, labels = self.sess.run(
            [
                self.prob, self.ll, self.z_encoded, self.kl, self.enc_mu,
                self.enc_sigma, self.cost, self.w_edge, self.label
            ],
            feed_dict=feed_dict)
        prob = np.reshape(prob, (self.n, self.n))
        w_edge = np.reshape(w_edge, (self.n, self.n, self.bin_dim))

        indicator = np.ones([self.n, 3])
        p, list_edges, w_new = normalise(prob, w_edge, self.n, self.bin_dim,
                                         [], list_edges, indicator)

        if not hparams.mask_weight:
            trial = 0
            while trial < 5000:
                candidate_edges = [
                    list_edges[i] for i in np.random.choice(range(
                        len(list_edges)), [hparams.edges],
                                                            p=p,
                                                            replace=False)
                ]
                with open(hparams.sample_file + 'test.txt', 'w') as f:
                    for (u, v, w) in candidate_edges:
                        if (u >= 0 and v >= 0):
                            f.write(
                                str(u) + ' ' + str(v) + ' {\'weight\':' +
                                str(w) + '}\n')
                f = open(hparams.sample_file + 'test.txt')
                G = nx.read_edgelist(f, nodetype=int)
                if nx.is_connected(G):
                    for (u, v, w) in candidate_edges:
                        if (u >= 0 and v >= 0):
                            with open(
                                    hparams.sample_file + "approach_2_" +
                                    str(trial) + "_" + str(s_num) + '.txt',
                                    'a') as f:
                                f.write(
                                    str(u) + ' ' + str(v) + ' {\'weight\':' +
                                    str(w) + '}\n')
                trial += 1

        else:
            trial = 0
            while trial < 5000:
                candidate_edges = self.get_masked_candidate(
                    list_edges, prob, w_edge, hparams.edges, hde)
                # print("Debug candidate", candidate_edges)
                if len(candidate_edges) > 0:
                    with open(hparams.sample_file + 'test.txt', 'w') as f:
                        for uvw in candidate_edges.split():
                            [u, v, w] = uvw.split("-")
                            u = int(u)
                            v = int(v)
                            w = int(w)
                            if (u >= 0 and v >= 0):
                                f.write(
                                    str(u) + ' ' + str(v) + ' {\'weight\':' +
                                    str(w) + '}\n')
                    f = open(hparams.sample_file + 'test.txt')
                    # try:
                    G = nx.read_edgelist(f, nodetype=int)
                    # except:
                    # continue

                    if nx.is_connected(G):
                        for uvw in candidate_edges.split():
                            [u, v, w] = uvw.split("-")
                            u = int(u)
                            v = int(v)
                            w = int(w)
                            if (u >= 0 and v >= 0):
                                with open(
                                        hparams.sample_file + "approach_2_" +
                                        str(trial) + "_" + str(s_num) + '.txt',
                                        'a') as f:
                                    f.write(
                                        str(u) + ' ' + str(v) +
                                        ' {\'weight\':' + str(w) + '}\n')
                trial += 1
Example #11
0
    def __init__(self, hparams, placeholders, num_nodes, num_features, log_fact_k, input_size, istest=False):
        self.features_dim = num_features
        self.input_dim = num_nodes
        self.dropout = placeholders['dropout']
        self.k = hparams.random_walk
        self.lr = placeholders['lr']
        self.decay = placeholders['decay']
        self.n = num_nodes
        self.d = num_features
        self.z_dim = hparams.z_dim
        self.bin_dim = hparams.bin_dim
        self.mask_weight = hparams.mask_weight
        self.log_fact_k = log_fact_k
        self.neg_sample_size = hparams.neg_sample_size
        self.input_size = input_size
        self.combination = hparams.node_sample * hparams.bfs_sample
        self.temperature = hparams.temperature
        self.E = hparams.E
        self.no_traj = hparams.no_traj

        self.adj = tf.placeholder(dtype=tf.float32, shape=[self.n, self.n], name='adj')
        self.features = tf.placeholder(dtype=tf.float32, shape=[self.n, self.d], name='features')
        self.input_data = tf.placeholder(dtype=tf.float32, shape=[self.k, self.n, self.d], name='input')
        self.eps = tf.placeholder(dtype=tf.float32, shape=[self.n, self.z_dim, 1], name='eps')
        
        #For every trajectory
        self.edges = tf.placeholder(dtype=tf.int32, shape=[self.no_traj, None, 2], name='edges') 
        self.weight_bin = tf.placeholder(dtype=tf.float32, shape=[self.no_traj, self.n, self.n, hparams.bin_dim], name="weight_bin")
        self.neg_edges = tf.placeholder(dtype=tf.int32, shape=[self.no_traj, None, 2], name='neg_edges') 
        self.all_edges = tf.placeholder(dtype=tf.int32, shape=[self.combination, None, 2], name='all_edges')
        
	# for the time being 5 trajectories are in action
        self.trajectories = tf.placeholder(dtype=tf.float32, shape=[self.no_traj, self.n, self.n], name="trajectories")
        self.properties = tf.placeholder(dtype=tf.float32, shape=[self.no_traj], name="properties")
        self.n_fill_edges = tf.placeholder(dtype=tf.int32)
        self.n_edges = tf.placeholder(dtype=tf.float32)
        self.penalty = tf.placeholder(shape=[self.no_traj],dtype=tf.float32)

        #self.known_edges = tf.placeholder(dtype=tf.int32, shape=[None, 2], name='known_edges') 
        #node_count = [len(edge_list) for edge_list in self.edges]
        #print("Debug Input size", self.input_size)
        
	node_count_tf = tf.fill([1, self.input_size],tf.cast(self.n, tf.float32))
        #node_count_tf = tf.Print(node_count_tf, [node_count_tf], message="My node_count_tf")
        #print("Debug size node_count", node_count_tf.get_shape())
        #tf.convert_to_tensor(node_count, dtype=tf.int32)
        
	self.cell = VAEGCell(self.adj, self.features, self.z_dim, self.bin_dim, node_count_tf, self.all_edges)
        self.c_x, dec_out, z_encoded, w_edge, label, lambda_n, lambda_e = self.cell.call(self.input_data, self.n, self.d, self.k, self.combination, self.eps, hparams.sample)
        self.prob = dec_out
        
	#print('Debug', dec_out.shape)
        self.z_encoded = z_encoded
        #self.enc_mu = enc_mu
        #self.enc_sigma = enc_sigma
	self.w_edge = w_edge
        
	#self.label = label
        #self.lambda_n = lambda_n
        #self.lambda_e = lambda_e
        #adj, weight, features, z_dim, bin_dim, node_count, edges, enc_mu, enc_sigma
        self.rlcell = VAEGRLCell(self.adj, self.features, self.z_dim, self.bin_dim, self.all_edges)
        #self, adj, weight, features, z_dim, bin_dim, enc_mu, enc_sigma, edges, index
        self.rl_dec_out, self.rl_w_edge, self.lambda_e, self.label = self.rlcell.call(self.input_data, self.n, self.d, self.k, self.combination, self.eps, hparams.sample)
        print_vars("trainable_variables")
        total_cost = 0.0
        #self.lr = tf.Print(self.lr, [self.lr], message="my lr-values:")
        #self.train_op = tf.train.GradientDescentOptimizer(learning_rate=self.lr)
        self.train_op = tf.train.AdamOptimizer(learning_rate=self.lr, epsilon=1e-06)
        ll = []
        #self.grad = []
        self.grad_placeholder = []
        ll_rl = []
        self.apply_transform_op = []
        tvars = tf.trainable_variables()
        g_vars = [var for var in tvars if 'RL' in var.name]
        print("Debug gvars", g_vars)
        V = []
        ll = []
        ll_rl = []
        w_list = []
        loss = 0.0
        ll_loss = 0.0
        
        for i in range(self.no_traj):
            #if self.properties[i] == 0:
            #    continue
            ll_temp = self.likelihood(self.trajectories[i], self.edges[i], self.neg_edges[i], self.weight_bin[i], self.prob[0], self.w_edge[0], self.penalty[i])
            #ll_temp = tf.Print(ll_temp, [ll_temp], message="my ll-values:")
            ll_poisson = self.likelihood_poisson(lambda_e, self.n_edges)
            label_pred = self.label_loss_predict(self.features, label)
            #label_pred = tf.Print(label_pred, [label_pred], message="my label-ll-values:")
            ll.append(ll_temp + ll_poisson + label_pred)

            ll_rl_temp = self.likelihood(self.trajectories[i], self.edges[i], self.neg_edges[i], self.weight_bin[i], self.rl_dec_out[0], self.rl_w_edge[0], self.penalty[i])
            #ll_rl_temp = tf.Print(ll_rl_temp,[ll_rl_temp], message="my ll_rl-values:")
            ll_rl_poisson = self.likelihood_poisson(self.lambda_e, self.n_edges)
            label_pred_rl = self.label_loss_predict(self.features, self.label)
            #label_pred_rl = tf.Print(label_pred_rl, [label_pred_rl], message="my label-ll-rl-values:")

            ll_rl.append(ll_rl_temp + ll_rl_poisson + label_pred_rl)

            # w_list.append(self.temperature * tf.subtract(ll_rl[i], ll[i])+self.properties[i])
            w_list.append(tf.subtract(ll_rl[i], ll[i]) + self.temperature * self.properties[i] + 1.0)
            ll_loss += (ll_rl[i] - ll[i])
            loss += (ll_rl[i] - ll[i]) + self.temperature * self.properties[i]
        w_total = tf.add_n(w_list)
        w_total = tf.Print(w_total, [w_total], message="my wtotal-values:")
        
        self.ll_loss = ll_loss/ self.no_traj
        self.loss = loss/ self.no_traj
        temp_grad = []
        temp_c_grad = []
        grad_val = []
        grad_c_val =[]
        grad_shape = []
        grad_c_shape = []
        grad_comparison = self.train_op.compute_gradients(self.loss)
        
	for x in range(len(g_vars)):
                        if grad_comparison[x][0] is not None:
                                g = grad_comparison[x]
                        else:
                                g = (tf.fill(tf.shape(g_vars[x]), tf.cast(0.0, tf.float32)), grad_comparison[x][1])
                        #if i == 0:
                        grad_c_val.append(g[0])
                        grad_c_shape.append(g[0].get_shape().as_list())
                        
        for i in range(self.no_traj):
            	grad = self.train_op.compute_gradients(ll_rl[i], var_list=g_vars)
            	w = w_list[i]
		#w = tf.divide(w_list[i], w_total) 
        	w = tf.Print(w, [w], message="my Imp weight-values:")
		
                for x in range(len(g_vars)):
	    	    if grad[x][0] is not None:
                    		g = grad[x]
                    else:
                    		g = (tf.fill(tf.shape(g_vars[x]), tf.cast(0.0, tf.float32)), grad[x][1])
		    if i == 0:
			    temp_grad.append((w * g[0] / (self.no_traj * 50), g[1]))
			    grad_val.append(w * g[0])
			    grad_shape.append(g[0].get_shape().as_list())
		    else:
			    temp_grad[x] = (tf.add(temp_grad[x][0], w * g[0])/(self.no_traj * 50), g[1])
			    grad_val[x] = tf.add(grad_val[x], w * g[0])
                            #grad_shape.append(g[0].get_shape().as_list())
        print("Debug Grad length", len(temp_grad), len(g_vars))
        self.grad = temp_grad
	self.apply_transform_op = self.train_op.apply_gradients(temp_grad)
        #self.grad = temp_grad
        self.sess = tf.Session()
Example #12
0
class VAEGRL(VAEGConfig):
    def __init__(self, hparams, placeholders, num_nodes, num_features, log_fact_k, input_size, istest=False):
        self.features_dim = num_features
        self.input_dim = num_nodes
        self.dropout = placeholders['dropout']
        self.k = hparams.random_walk
        self.lr = placeholders['lr']
        self.decay = placeholders['decay']
        self.n = num_nodes
        self.d = num_features
        self.z_dim = hparams.z_dim
        self.bin_dim = hparams.bin_dim
        self.mask_weight = hparams.mask_weight
        self.log_fact_k = log_fact_k
        self.neg_sample_size = hparams.neg_sample_size
        self.input_size = input_size
        self.combination = hparams.node_sample * hparams.bfs_sample
        self.temperature = hparams.temperature
        self.E = hparams.E
        self.no_traj = hparams.no_traj

        self.adj = tf.placeholder(dtype=tf.float32, shape=[self.n, self.n], name='adj')
        self.features = tf.placeholder(dtype=tf.float32, shape=[self.n, self.d], name='features')
        self.input_data = tf.placeholder(dtype=tf.float32, shape=[self.k, self.n, self.d], name='input')
        self.eps = tf.placeholder(dtype=tf.float32, shape=[self.n, self.z_dim, 1], name='eps')
        
        #For every trajectory
        self.edges = tf.placeholder(dtype=tf.int32, shape=[self.no_traj, None, 2], name='edges') 
        self.weight_bin = tf.placeholder(dtype=tf.float32, shape=[self.no_traj, self.n, self.n, hparams.bin_dim], name="weight_bin")
        self.neg_edges = tf.placeholder(dtype=tf.int32, shape=[self.no_traj, None, 2], name='neg_edges') 
        self.all_edges = tf.placeholder(dtype=tf.int32, shape=[self.combination, None, 2], name='all_edges')
        
	# for the time being 5 trajectories are in action
        self.trajectories = tf.placeholder(dtype=tf.float32, shape=[self.no_traj, self.n, self.n], name="trajectories")
        self.properties = tf.placeholder(dtype=tf.float32, shape=[self.no_traj], name="properties")
        self.n_fill_edges = tf.placeholder(dtype=tf.int32)
        self.n_edges = tf.placeholder(dtype=tf.float32)
        self.penalty = tf.placeholder(shape=[self.no_traj],dtype=tf.float32)

        #self.known_edges = tf.placeholder(dtype=tf.int32, shape=[None, 2], name='known_edges') 
        #node_count = [len(edge_list) for edge_list in self.edges]
        #print("Debug Input size", self.input_size)
        
	node_count_tf = tf.fill([1, self.input_size],tf.cast(self.n, tf.float32))
        #node_count_tf = tf.Print(node_count_tf, [node_count_tf], message="My node_count_tf")
        #print("Debug size node_count", node_count_tf.get_shape())
        #tf.convert_to_tensor(node_count, dtype=tf.int32)
        
	self.cell = VAEGCell(self.adj, self.features, self.z_dim, self.bin_dim, node_count_tf, self.all_edges)
        self.c_x, dec_out, z_encoded, w_edge, label, lambda_n, lambda_e = self.cell.call(self.input_data, self.n, self.d, self.k, self.combination, self.eps, hparams.sample)
        self.prob = dec_out
        
	#print('Debug', dec_out.shape)
        self.z_encoded = z_encoded
        #self.enc_mu = enc_mu
        #self.enc_sigma = enc_sigma
	self.w_edge = w_edge
        
	#self.label = label
        #self.lambda_n = lambda_n
        #self.lambda_e = lambda_e
        #adj, weight, features, z_dim, bin_dim, node_count, edges, enc_mu, enc_sigma
        self.rlcell = VAEGRLCell(self.adj, self.features, self.z_dim, self.bin_dim, self.all_edges)
        #self, adj, weight, features, z_dim, bin_dim, enc_mu, enc_sigma, edges, index
        self.rl_dec_out, self.rl_w_edge, self.lambda_e, self.label = self.rlcell.call(self.input_data, self.n, self.d, self.k, self.combination, self.eps, hparams.sample)
        print_vars("trainable_variables")
        total_cost = 0.0
        #self.lr = tf.Print(self.lr, [self.lr], message="my lr-values:")
        #self.train_op = tf.train.GradientDescentOptimizer(learning_rate=self.lr)
        self.train_op = tf.train.AdamOptimizer(learning_rate=self.lr, epsilon=1e-06)
        ll = []
        #self.grad = []
        self.grad_placeholder = []
        ll_rl = []
        self.apply_transform_op = []
        tvars = tf.trainable_variables()
        g_vars = [var for var in tvars if 'RL' in var.name]
        print("Debug gvars", g_vars)
        V = []
        ll = []
        ll_rl = []
        w_list = []
        loss = 0.0
        ll_loss = 0.0
        
        for i in range(self.no_traj):
            #if self.properties[i] == 0:
            #    continue
            ll_temp = self.likelihood(self.trajectories[i], self.edges[i], self.neg_edges[i], self.weight_bin[i], self.prob[0], self.w_edge[0], self.penalty[i])
            #ll_temp = tf.Print(ll_temp, [ll_temp], message="my ll-values:")
            ll_poisson = self.likelihood_poisson(lambda_e, self.n_edges)
            label_pred = self.label_loss_predict(self.features, label)
            #label_pred = tf.Print(label_pred, [label_pred], message="my label-ll-values:")
            ll.append(ll_temp + ll_poisson + label_pred)

            ll_rl_temp = self.likelihood(self.trajectories[i], self.edges[i], self.neg_edges[i], self.weight_bin[i], self.rl_dec_out[0], self.rl_w_edge[0], self.penalty[i])
            #ll_rl_temp = tf.Print(ll_rl_temp,[ll_rl_temp], message="my ll_rl-values:")
            ll_rl_poisson = self.likelihood_poisson(self.lambda_e, self.n_edges)
            label_pred_rl = self.label_loss_predict(self.features, self.label)
            #label_pred_rl = tf.Print(label_pred_rl, [label_pred_rl], message="my label-ll-rl-values:")

            ll_rl.append(ll_rl_temp + ll_rl_poisson + label_pred_rl)

            # w_list.append(self.temperature * tf.subtract(ll_rl[i], ll[i])+self.properties[i])
            w_list.append(tf.subtract(ll_rl[i], ll[i]) + self.temperature * self.properties[i] + 1.0)
            ll_loss += (ll_rl[i] - ll[i])
            loss += (ll_rl[i] - ll[i]) + self.temperature * self.properties[i]
        w_total = tf.add_n(w_list)
        w_total = tf.Print(w_total, [w_total], message="my wtotal-values:")
        
        self.ll_loss = ll_loss/ self.no_traj
        self.loss = loss/ self.no_traj
        temp_grad = []
        temp_c_grad = []
        grad_val = []
        grad_c_val =[]
        grad_shape = []
        grad_c_shape = []
        grad_comparison = self.train_op.compute_gradients(self.loss)
        
	for x in range(len(g_vars)):
                        if grad_comparison[x][0] is not None:
                                g = grad_comparison[x]
                        else:
                                g = (tf.fill(tf.shape(g_vars[x]), tf.cast(0.0, tf.float32)), grad_comparison[x][1])
                        #if i == 0:
                        grad_c_val.append(g[0])
                        grad_c_shape.append(g[0].get_shape().as_list())
                        
        for i in range(self.no_traj):
            	grad = self.train_op.compute_gradients(ll_rl[i], var_list=g_vars)
            	w = w_list[i]
		#w = tf.divide(w_list[i], w_total) 
        	w = tf.Print(w, [w], message="my Imp weight-values:")
		
                for x in range(len(g_vars)):
	    	    if grad[x][0] is not None:
                    		g = grad[x]
                    else:
                    		g = (tf.fill(tf.shape(g_vars[x]), tf.cast(0.0, tf.float32)), grad[x][1])
		    if i == 0:
			    temp_grad.append((w * g[0] / (self.no_traj * 50), g[1]))
			    grad_val.append(w * g[0])
			    grad_shape.append(g[0].get_shape().as_list())
		    else:
			    temp_grad[x] = (tf.add(temp_grad[x][0], w * g[0])/(self.no_traj * 50), g[1])
			    grad_val[x] = tf.add(grad_val[x], w * g[0])
                            #grad_shape.append(g[0].get_shape().as_list())
        print("Debug Grad length", len(temp_grad), len(g_vars))
        self.grad = temp_grad
	self.apply_transform_op = self.train_op.apply_gradients(temp_grad)
        #self.grad = temp_grad
        self.sess = tf.Session()
        #self.error = error
        # We are considering 10 trajectories only

    def label_loss_predict(self, label, predicted_labels):
                loss = 0.0
                #for i in range(self.combination):
                predicted_label = predicted_labels
                predicted_label_resized = tf.reshape(predicted_label, [self.n, self.d])
                predicted_label_exp = tf.exp(tf.minimum(predicted_label_resized, tf.fill([self.n, self.d], 10.0)))
                predicted_label_pos = tf.reduce_sum(tf.multiply(label, predicted_label_exp), axis=1)
                predicted_label_total = tf.reduce_sum(predicted_label_exp, axis=1)
                predicted_label_prob = tf.divide(predicted_label_pos, predicted_label_total)
                ll = tf.reduce_sum(tf.log(tf.add( predicted_label_prob, tf.fill([self.n, ], 1e-9))))
                return ll

    def likelihood_poisson(self, lambda_, x):
            #x_convert = tf.cast(tf.convert_to_tensor([x]), tf.float32)
            x = tf.Print(x, [x], message="My debug_x_tf")
            log_fact_tf = tf.convert_to_tensor([self.log_fact_k[x-1]], dtype=tf.float32)
            return tf.subtract(tf.subtract(tf.multiply(x, tf.log(lambda_ + 1e-09)), lambda_), log_fact_tf)

    def likelihood(self, adj, edges, neg_edges, weight_bin, prob_dict, w_edge, penalty):
            '''
            negative loglikelihood of the edges
            '''
            ll = 0
            k = 0
            with tf.variable_scope('NLL'):
                    dec_mat_temp = tf.reshape(prob_dict, [self.n, self.n])                
                    dec_mat = tf.exp(tf.minimum(dec_mat_temp, tf.fill([self.n, self.n], tf.cast(10.0, dtype=tf.float32))))
                    dec_mat = tf.Print(dec_mat, [dec_mat], message="my decscore values:")
                    min_val = tf.reduce_mean(dec_mat)
                    penalty = tf.exp(penalty)
                    w_edge_resized = tf.reshape(w_edge, [self.n, self.n, self.bin_dim])
                    w_edge_exp = tf.exp(tf.minimum(w_edge_resized, tf.fill([self.n, self.n, self.bin_dim], 10.0)))
                    w_edge_pos = tf.reduce_sum(tf.multiply(weight_bin, w_edge_exp), axis=2)
                    #print "Debug w_edge posscore", w_edge_pos.shape, dec_mat.shape
                    w_edge_total = tf.reduce_sum(w_edge_exp, axis=2)
                    w_edge_score = tf.gather_nd(tf.divide(w_edge_pos, w_edge_total), edges)
                    w_edge_score = tf.Print(w_edge_score, [w_edge_score], message="my w_edge_score values:")
                    #print "Debug w_edge_score", w_edge_score.shape
                    
                    comp = tf.subtract(tf.ones([self.n, self.n], tf.float32), adj)
                    comp = tf.Print(comp, [comp], message="my comp values:")
                    
                    negscore = tf.multiply(comp, dec_mat)
                    negscore = tf.Print(negscore, [negscore], message="my negscore values:")
                    negscore = tf.gather_nd(negscore, neg_edges)
                    negscore_sum = tf.reduce_sum(negscore)
                    
                    posscore = tf.gather_nd(dec_mat, edges)
                    #print "Debug posscore", posscore.shape
                    posscore = tf.Print(posscore, [posscore], message="my posscore values:")
                    pos_weight_score = tf.multiply(posscore, w_edge_score)
                    st = tf.stack([tf.shape(pos_weight_score)[0]])[0]

                    softmax_out = tf.divide(pos_weight_score, negscore_sum)
                    penalty = tf.log(tf.divide(penalty, negscore_sum))
                    comp = tf.Print(comp, [comp], message="my comp values:")

                    ll += tf.reduce_sum(tf.log(tf.add(softmax_out, tf.fill([1, st], 1e-9)))) + penalty
                    ll = tf.Print(ll, [ll], message="My loss")
            
            return (ll)

    def get_trajectories_nevae(self, p_theta, w_theta, edges, weight, n_fill_edges, atom_list):
            indicator = np.ones([self.n, self.bin_dim])
            list_edges = []
            degree = np.zeros(self.n)
            for i in range(self.n):
                for j in range(i+1, self.n):
                    # removing the possibility of hydrogen hydrogen bond and oxigen bond
                    if (atom_list[i] > 1 or atom_list[j] > 1) and (atom_list[i]!=2 or atom_list[j]!=2):
                        list_edges.append((i,j,1))
                        list_edges.append((i,j,2))
                        list_edges.append((i,j,3))

            known_edges = []
            for i in range(self.n):
                # the atom is hydrogen
                if atom_list[i] <= 1:
                    indicator[i][1] = 0
                if atom_list[i] <= 2:
                    indicator[i][2] = 0

            for k in range(self.E):
                (u, v) = edges[k]
                w = weight[u][v]
                degree[u] += w
                degree[v] += w
                if (atom_list[u] - degree[u]) == 0:
                    indicator[u][0] = 0
                if (atom_list[u] - degree[u]) <= 1:
                    indicator[u][1] = 0
                if (atom_list[u] - degree[u]) <= 2:
                    indicator[u][2] = 0

                if (atom_list[v] - degree[v]) == 0:
                    indicator[v][0] = 0
                if (atom_list[v] - degree[v]) <= 1:
                    indicator[v][1] = 0
                if (atom_list[v] - degree[v]) <= 2:
                    indicator[v][2] = 0

                if u < v:
                    list_edges.remove((u, v, 1))
                    list_edges.remove((u, v, 2))
                    list_edges.remove((u, v, 3))
                    known_edges.append((u, v, w))
                else:
                    list_edges.remove((v, u, 1))
                    list_edges.remove((v, u, 2))
                    list_edges.remove((v, u, 3))
                    known_edges.append((v, u, w))

            trial = 0
            adj = np.zeros((self.n, self.n))
            G_list = []
            adj_list = []
            G_best = ''
            for j in range(1000):
                prob = np.reshape(p_theta, [self.n, self.n])
		w_edge = np.reshape(w_theta, [self.n, self.n, 3])
		edges = self.get_masked_candidate_with_atom_ratio_new(prob, w_edge, atom_count=atom_list, num_edges=n_fill_edges, hde=1)
                G = nx.parse_edgelist(edges, nodetype=int)
                if nx.is_connected(G): 
                        print "Connected"
                        for (u, v) in G.edges():
                            adj[int(u)][int(v)] = 1
			    #int(G[u][v]["weight"])
                            adj[int(v)][int(u)] = 1
			    #int(G[u][v]["weight"])
                        adj_list.append(adj)
                        G_list.append(G)

            #rest = range(self.n)
            candidate_edges_list = get_masked_candidate(self.n, list_edges, known_edges, p_theta, w_theta, n_fill_edges, indicator, degree, atom_list)
            for candidate_edges in candidate_edges_list:
                adj = np.zeros((self.n, self.n))
                if len(candidate_edges) > 0:
                    candidate_edges_weighted = []
                    for (u, v, w) in candidate_edges:

                        if int(u) < int(v):
                            candidate_edges_weighted.append(str(u) + ' ' + str(v) + ' ' + "{'weight':"+str(w)+"}")
                        else:
                            candidate_edges_weighted.append(str(v) + ' ' + str(u) + ' ' + "{'weight':"+str(w)+"}")
                    
                    G = nx.parse_edgelist(candidate_edges_weighted, nodetype=int)

                    for i in range(self.n):
                        if i not in G.nodes():
                            G.add_node(i)

                    if nx.is_connected(G): 
                        for (u, v, w) in candidate_edges:
                            adj[int(u)][int(v)] = int(w)
                            adj[int(v)][int(u)] = int(w)
                        adj_list.append(adj)
                        G_list.append(G)
            return adj_list, G_list


    def initialize(self):
        logger.info("Initialization of parameters")
        # self.sess.run(tf.initialize_all_variables())
        self.sess.run(tf.global_variables_initializer())

    
    def restore(self, savedir):
        saver = tf.train.Saver(tf.global_variables(), max_to_keep = 20)
        ckpt = tf.train.get_checkpoint_state(savedir)
        if ckpt == None or ckpt.model_checkpoint_path == None:
            self.initialize()
        else:
            print("Load the model from {}".format(ckpt.model_checkpoint_path))
            saver.restore(self.sess, ckpt.model_checkpoint_path)

    def partial_restore(self, copydir):
	saver = tf.train.Saver(var_list = tf.global_variables(), max_to_keep = 20 )
        self.initialize()
        print("Debug all", tf.global_variables())
        var_old = [v for v in tf.global_variables() if "RL" not in v.name]
        print("Debug var_old", var_old)
        saver = tf.train.Saver(var_old)
        ckpt = tf.train.get_checkpoint_state(copydir)

        #print_tensors_in_checkpoint_file(file_name=ckpt.model_checkpoint_path, tensor_name='', all_tensors='')
        print("Load the model from {}".format(ckpt.model_checkpoint_path))
        #print_tensors_in_checkpoint_file(ckpt, all_tensors=True, tensor_name='')
        saver.restore(self.sess, ckpt.model_checkpoint_path)
        var_new = [v for v in tf.global_variables() if ("RL" in v.name and "Poisson" in v.name) ]
	for v in var_new:
            v_old_temp = [v_old for v_old in tf.global_variables() if v_old.name == v.name.replace("RL", "") ]
            if len(v_old_temp) == 0:
                continue
            v_old = v_old_temp[0]
            print("v_old", v_old.value(), v_old.name)
            #if v_old  in var_old
            assign = tf.assign(v, v_old)
            self.sess.run(assign)
            #v = tf.Variable(v.name.replace("RL", ""))
            print("v_new", v, v.name)
 

    def copy_weight(self, copydir):
        self.initialize()
        print("Debug all", tf.global_variables())
        var_old = [v for v in tf.global_variables() if "RL" not in v.name]
        print("Debug var_old", var_old)
        saver_old = tf.train.Saver(var_old, max_to_keep=20)
        ckpt = tf.train.get_checkpoint_state(copydir)
        
        #print_tensors_in_checkpoint_file(file_name=ckpt.model_checkpoint_path, tensor_name='', all_tensors='')
        print("Load the model from {}".format(ckpt.model_checkpoint_path))
        #print_tensors_in_checkpoint_file(ckpt, all_tensors=True, tensor_name='')
        saver_old.restore(self.sess, ckpt.model_checkpoint_path)
        var_new = [v for v in tf.global_variables() if "RL" in v.name]
        print("Debug var_new", var_new)
        for v in var_new:
            v_old_temp = [v_old for v_old in tf.global_variables() if v_old.name == v.name.replace("RL", "") ]
            if len(v_old_temp) == 0:
		continue
	    v_old = v_old_temp[0]
	    print("v_old", v_old.value(), v_old.name) 
            #if v_old  in var_old
            assign = tf.assign(v, v_old)
	    self.sess.run(assign)
	    #v = tf.Variable(v.name.replace("RL", ""))
	    print("v_new", v, v.name)
    	saver = tf.train.Saver(var_list=tf.global_variables(), max_to_keep=20)

    def train(self, placeholders, hparams, adj, weight, weight_bin, weight_bin1, features, edges, all_edges, features1, atom_list):
        savedir = hparams.out_dir
        lr = hparams.learning_rate
        dr = hparams.dropout_rate
        decay = hparams.decay_rate

        f1 = open(hparams.out_dir + '/iteration.txt', 'r')
        iter1 = int(f1.read().strip())
        iteration = iter1
        
        # training
        num_epochs = hparams.num_epochs
        create_dir(savedir)
        ckpt = tf.train.get_checkpoint_state(savedir)
        saver = tf.train.Saver(tf.global_variables(), max_to_keep=100)

        if ckpt:
            saver.restore(self.sess, ckpt.model_checkpoint_path)
            print("Load the model from %s" % ckpt.model_checkpoint_path)
        
        start_before_epoch = time.time()
        importance_weight = 0.0
        tvars = tf.trainable_variables()
        g_vars = [var for var in tvars if 'RL' in var.name]
        print("Debug g_vars", g_vars)
        
        grad_local = []
        for x in range(len(g_vars)):
            a = np.zeros(shape=g_vars[x].get_shape().as_list(), dtype=float)
            #a.fill(0.0)
            print("Debug a", a, a.shape)
            grad_local.append(a)
        print("Debug gradlocal", grad_local, g_vars[0].get_shape().as_list())
        all_edges_local = []
        for i in range(self.n):
            for j in range(self.n):
                all_edges_local.append((i,j))
        prev_loss = 10000
	epoch = 0
        
        
        #print "Debug props logp", mean_logp, std_logp, "SAS: ", mean_sas, std_sas, "Cycle :", mean_cycle, std_cycle
       
        while (epoch < num_epochs):
        #for epoch in range(num_epochs):
                i = 0
            	#print "Debug inside loop", epoch
                start = time.time()
                start1 = time.time()
                feed_dict = construct_feed_dict(lr, dr, self.k, self.n, self.d, decay, placeholders)
                # we will sample 50 z values here
                count = 0
                total_train_loss = 0.0
                total_ll_loss = 0.0
                while count < 30:
                    eps = np.random.randn(self.n, self.z_dim, 1)  
                    feed_dict.update({self.input_data: np.zeros([self.k,self.n,self.d])})
                    feed_dict.update({self.eps: eps})
                    feed_dict.update({self.all_edges: [all_edges_local]})
                    list_adj = []
                    list_prop = []
                    list_edge = []
                    list_neg_edge = []
                    prob, w_edge, rl_prob, rl_w_edge, lambda_e, z_encoded, label = self.sess.run([self.prob, self.w_edge, self.rl_dec_out, self.rl_w_edge, self.lambda_e,  self.z_encoded, self.label], feed_dict=feed_dict)
                    features, atom_list = self.getatoms(self.n, label)
                    if len(atom_list) == 0:
                        print "getatom not satisfied bad Z"
                        end2 = time.time()
                        continue
                    max_edges_possible = int(sum(atom_list)/2)
                    n_edges = max_edges_possible + 1
                
                    while(n_edges > max_edges_possible or n_edges < (self.n - 1) ):
                        n_edges = np.random.poisson(lambda_e) 
                
                    end1 = time.time()
                    weights = []
                    weight_bins = []
                    properties = []
                    pos_edges = []
                    neg_edges = []
                    list_penalty = []
                    qed_list = []
                    t_list, G_list = self.get_trajectories_nevae(rl_prob, rl_w_edge, edges[i][0], weight[i], n_edges - self.E, atom_list)
                    edge_len = []
                    for j in range(len(t_list)):
                        t = t_list[j]
                        G = G_list[j]   
                        qed = compute_cost_qed(G, hparams.out_dir+"temp.txt")
                        qed_list.append(qed)
               		properties.append(qed)
                        edge_len.append(len(G.edges()))
                    index_list = np.argsort(properties)[:hparams.no_traj]
		    if len(index_list) < hparams.no_traj or properties[index_list[0]] == 2.0 :
			continue
		    max_edge = max(edge_len)
		    properties_new = []
                    candidate_edges = []
		    for j in range(hparams.no_traj):
                        index = index_list[j]
                        t = t_list[index]
                        G = G_list[index]
                        rl_prob_reshape = np.reshape(rl_prob, [self.n, self.n])
                        minval = min(rl_prob[0])
                        penalty = 0.0
                        penalty_index = np.unravel_index(np.argmin(rl_prob_reshape, axis=None), rl_prob_reshape.shape)
                    
                        penalty_edges = []
                        if len(G.edges())< max_edge:
                            diff = max_edge - len(G.edges())
                            while diff > 0:
                                penalty += penalty
                                penalty_edges.append(penalty_index)
                                diff -= 1
                    
                        weights.append(t)
                        weight_bins.append(get_weight_bins(self.n, self.bin_dim, G))
			properties_new.append(properties[index])
                        candidate_edges.append(list(G.edges_iter(data='weight')))
                        #print "Debug penalty edges", penalty_edges
		        list_penalty.append(penalty)
		        penalty_edges.extend(list(G.edges()))
                        pos_edges.append(penalty_edges)
                        G_comp = nx.complement(G) 
                        comp_edges = list(G_comp.edges())
                        neg_indices = np.random.choice(range(len(comp_edges)), hparams.neg_sample_size, replace=False)
                        neg_edges_to_be_extended = [comp_edges[index] for index in neg_indices]
                        neg_edges.append(neg_edges_to_be_extended)
                
                    #print("Debug shapes pos_edge", pos_edge)
                    feed_dict.update({self.trajectories: weights})
                    feed_dict.update({self.properties:properties_new})
                    feed_dict.update({self.neg_edges: neg_edges})
                    feed_dict.update({self.edges:np.array(pos_edges)})
                    feed_dict.update({self.n_edges:n_edges})
                    feed_dict.update({self.features: features})
                    feed_dict.update({self.penalty: list_penalty})
                    
                    feed_dict.update({self.weight_bin: weight_bins})
                    
                    _, grad,  train_loss, ll_loss = self.sess.run([self.apply_transform_op,  self.grad, self.loss, self.ll_loss], feed_dict=feed_dict)
                    
                    print("Time size of graph", len(tf.get_default_graph().get_operations()))
                    properties_original = [1.0 - x for x in properties_new]
                    total_train_loss += train_loss 
                    total_ll_loss += ll_loss
                    
                    print("LOSS ",count, train_loss, ll_loss,  properties_original)
                    print("candiadte1", candidate_edges[0])
		    print("candiadte2", candidate_edges[1])
		    print("candidate3", candidate_edges[2])
                    end2 = time.time()
                    count += 1
            	iteration += 1
                prev_loss = train_loss
            	epoch += 1
            	if iteration % hparams.log_every == 0 and iteration > 0:
                    #print(train_loss)
                    print("{}/{}(epoch {}), train_loss = {}, ll_loss={}".format(iteration, num_epochs, epoch + 1, total_train_loss, total_ll_loss))
                    checkpoint_path = os.path.join(savedir, 'model.ckpt')
                    saver.save(self.sess, checkpoint_path, global_step=iteration)
                    logger.info("model saved to {}".format(checkpoint_path))
            	end = time.time()
            	print("Time taken for a batch: ",end - start, end2 - start1)
        end_after_epoch = time.time()
        print("Time taken to completed all epochs", -start_before_epoch + end_after_epoch)
        f1 = open(hparams.out_dir+'/iteration.txt','w')
        f1.write(str(iteration))
        

    def getembeddings(self, hparams, placeholders, adj, deg, weight_bin, weight):

        eps = np.random.randn(self.n, self.z_dim, 1)

        feed_dict = construct_feed_dict(hparams.learning_rate, hparams.dropout_rate, self.k, self.n, self.d,
                                        hparams.decay_rate, placeholders)
        feed_dict.update({self.adj: adj})
        feed_dict.update({self.features: deg})
        feed_dict.update({self.input_data: np.zeros([self.k, self.n, self.d])})
        feed_dict.update({self.eps: eps})
        feed_dict.update({self.weight_bin: weight_bin})
        feed_dict.update({self.weight: weight})

        prob, ll, kl, w_edge, embedding = self.sess.run([self.prob, self.ll, self.kl, self.w_edge, self.z_encoded],
                                                        feed_dict=feed_dict)
        return embedding

    def get_masked_candidate_with_atom_ratio_new(self, prob, w_edge, atom_count, num_edges, hde):
        rest = range(self.n)
        nodes = []
        hn = []
        on = []
        nn = []
        cn = []

        for i in range(self.n):
            if atom_count[i] == 1:
                hn.append(i)
            if atom_count[i] == 2:
                on.append(i)
            if atom_count[i] == 3 or atom_count[i] == 5:
                nn.append(i)
            if atom_count[i] == 4:
                cn.append(i)


        nodes.extend(hn)
        nodes.extend(cn)
        nodes.extend(on)
        nodes.extend(nn)

        node_list = atom_count
        print("Debug nodelist", node_list)
        
        indicator = np.ones([self.n, self.bin_dim])
        edge_mask = np.ones([self.n, self.n])
        degree = np.zeros(self.n)

        for node in hn:
            indicator[node][1] = 0
            indicator[node][2] = 0
        for node in on:
            indicator[node][2] = 0

        # two hydrogen atom cannot have an edge between them
        for n1 in hn:
            for n2 in hn:
                edge_mask[n1][n2] = 0
        candidate_edges = []
        # first generate edges joining with Hydrogen atoms sequentially
        index = 0
        i = 0
        hydro_sat = np.zeros(self.n)
        #first handle hydro
        try:
         for node in nodes:
            deg_req = node_list[node]
            d = degree[node]
            list_edges = get_candidate_neighbor_edges(node, self.n)
            if node in hn:
                for i1 in range(self.n):
                    if hydro_sat[i1] == node_list[i1] - 1:
                        edge_mask[i1][node] = 0
                        edge_mask[node][i1] = 0
            while d < deg_req:
                p = normalise_h1(prob, w_edge,  self.bin_dim, indicator, edge_mask, node)
                
                candidate_edges.extend([list_edges[k] for k in
                               np.random.choice(range(len(list_edges)), [1], p=p, replace=False)])

                (u, v, w) = candidate_edges[i]
                degree[u]+= w
                degree[v]+= w
                d += w
                if u in hn:
                    hydro_sat[v] += 1
                if v in hn:
                    hydro_sat[u] += 1
                edge_mask[u][v] = 0
                edge_mask[v][u] = 0
                
                if (node_list[u] - degree[u]) == 0 :
                    indicator[u][0] = 0
                if (node_list[u] - degree[u]) <= 1 :
                    indicator[u][1] = 0
                if (node_list[u] - degree[u]) <= 2:
                    indicator[u][2] = 0

                if (node_list[v] - degree[v]) == 0 :
                    indicator[v][0] = 0
                if (node_list[v] - degree[v]) <= 1 :
                    indicator[v][1] = 0
                if (node_list[v] - degree[v]) <= 2:
                    indicator[v][2] = 0
                
                i+=1 
                #print("Debug candidate_edges", candidate_edges[i - 1])
                #    print("change state", el, degree[el], node_list[el], indicator[el])
                #'''
        except:
         if len(candidate_edges) < 1:
                candidate_edges = []
        candidate_edges_new = []
        for (u, v, w) in candidate_edges:
            if u < v:
                candidate_edges_new.append(str(u) + ' ' + str(v) + ' ' + "{'weight':"+str(w)+"}")
            else:
                candidate_edges_new.append(str(v) + ' ' + str(u) + ' ' + "{'weight':"+str(w)+"}")
        print("Candidate_edges_new", candidate_edges_new)
        return candidate_edges_new


    def get_unmasked_candidate(self, list_edges, prob, w_edge, num_edges):
        # sample 1000 times
        count = 0
        structure_list = defaultdict(int)

        # while (count < 1000):
        while (count < 50):
            indicator = np.ones([self.n, self.bin_dim])
            p, list_edges, w = normalise(prob, w_edge, self.n, self.bin_dim, [], list_edges, indicator)
            candidate_edges = [list_edges[k] for k in
                               np.random.choice(range(len(list_edges)), [num_edges], p=p, replace=False)]
            structure_list[' '.join([str(u) + '-' + str(v) + '-' + str(w) for (u, v, w) in
                                     sorted(candidate_edges, key=itemgetter(0))])] += 1

            # structure_list[sorted(candidate_edges, key=itemgetter(1))] += 1
            count += 1

        # return the element which has been sampled maximum time
        return max(structure_list.iteritems(), key=itemgetter(1))[0]


    def getatoms(self, node, label):
        label_new = np.reshape(label, (node, self.d))
        #print("Debug label original shape:", label_new)
        temp = np.zeros((node, self.d))
        temp.fill(50)
        #print temp, label_new.shape
        minval = np.minimum(label_new, temp)
        label_new = np.exp(minval)
        #print("Debug label exp shape:", label_new)
        s = label_new.shape[0]
        #print("Debug label shape:", label_new.shape, s)

        label_new_sum = np.reshape(np.sum(label_new, axis=1), (s, 1))
        #print("Debug label sum:", label_new_sum.shape, label_new_sum)

        prob_label = label_new / label_new_sum
        
        count = 500
        while(count > 0):
            pred_label = [] 
            #np.zeros(4)
            valency_arr = np.zeros(node)
	    h_c = 0 
	    o_c = 0
	    n_c = 0
	    c_c = 0
            for i in range(node):
                valency = np.random.choice(range(4), [1], p=prob_label[i])
                temp = np.zeros(4)
                temp[valency] += 1
                pred_label.append(temp)
                valency_arr[i] = valency + 1
            	if valency == 0:
			h_c += 1
		if valency == 1:
			o_c += 1
		if valency == 2:
			n_c += 1
		if valency == 3:
			c_c +=1
	    if sum(valency_arr) >= 2 * (self.n - 1):
                	break
            count -= 1
        if sum(valency_arr) < 2 * (self.n -1):
            valency_arr = []
        return (pred_label, valency_arr)