def __init__(self, hparams, placeholders, num_nodes, num_features, edges, log_fact_k, hde, istest=False): self.features_dim = num_features self.input_dim = num_nodes self.dropout = placeholders['dropout'] self.k = hparams.random_walk self.lr = placeholders['lr'] self.decay = placeholders['decay'] self.n = num_nodes self.d = num_features self.z_dim = hparams.z_dim self.bin_dim = hparams.bin_dim self.edges = edges self.count = 0 self.mask_weight = hparams.mask_weight self.log_fact_k = log_fact_k self.hde = hde # For masking we calculated the likelihood like this def masked_ll(weight_temp, weight_negative, posscore, posweightscore, temp_pos_score, temp): degree = np.zeros([self.n], dtype=np.float32) indicator = np.ones([self.n, self.bin_dim], dtype=np.float32) indicator_bridge = np.ones([self.n, self.n], dtype=np.float32) #ring_indicator = np.ones([self.n]) ll = 0.0 adj = np.zeros([self.n, self.n], dtype=np.float32) #for (u, v, w) in self.edges[self.count]: for i in range(len(self.edges[self.count])): (u, v, w) = self.edges[self.count][i] degree[u] += w degree[v] += w modified_weight = tf.reduce_sum( tf.multiply(np.multiply(indicator[u], indicator[v]), weight_temp[u][v])) / weight_negative[u][v] modified_posscore_weighted = modified_weight * posscore[u][ v] * indicator_bridge[u][v] * 1.0 currentscore = modified_posscore_weighted * 1.0 / ( temp_pos_score + temp) ll += tf.log(currentscore + 1e-9) modified_weight = tf.reduce_sum( tf.multiply(np.multiply(indicator[v], indicator[u]), weight_temp[v][u])) / weight_negative[v][u] modified_posscore_weighted = modified_weight * posscore[v][ u] * indicator_bridge[v][u] * 1.0 currentscore = modified_posscore_weighted * 1.0 / ( temp_pos_score + temp) ll += tf.log(currentscore + 1e-9) #indicator = np.ones([3], dtype = np.float32) #if degree[u] >=5 : # indicator[u][0] = 0 if degree[u] >= 4: indicator[u][0] = 0 indicator[u][1] = 0 if degree[u] >= 3: indicator[u][1] = 0 indicator[u][2] = 0 #if degree[v] >=5 : # indicator[v][0] = 0 if degree[v] >= 4: indicator[v][0] = 0 indicator[v][1] = 0 if degree[v] >= 3: indicator[v][1] = 0 indicator[v][2] = 0 # From the next there will be no double bond, ensures there will be alternating bonds # there will ne bo bridge if w == 2: indicator[u][1] = 0 indicator[v][1] = 0 #If we don't want negative sampling we can uncomment the following ''' for i in range(self.n): modified_weight = tf.reduce_sum(tf.multiply(indicator[u], weight_temp[u][i])) / weight_negative[u][i] modified_posscore_weighted = modified_weight * posscore[u][i] * 1.0 temp_pos_score = temp_pos_score - posweightscore[u][i] + modified_posscore_weighted #posweightscore[u][i] = modified_posscore_weighted #temp_posscore[u][i] = tf.reduce_sum(-posweightscore[u][i] + modified_posscore_weighted) modified_weight = tf.reduce_sum(tf.multiply(indicator[u], weight_temp[i][u])) / weight_negative[i][u] modified_posscore_weighted = modified_weight * posscore[i][u] * 1.0 temp_pos_score = temp_pos_score - posweightscore[i][u] + modified_posscore_weighted #posweightscore[i][u] = modified_posscore_weighted #temp_posscore[i][u] = tf.reduce_sum(-posweightscore[i][u] + modified_posscore_weighted) modified_weight = tf.reduce_sum(tf.multiply(indicator[v], weight_temp[v][i])) / weight_negative[v][i] modified_posscore_weighted = modified_weight * posscore[v][i] * 1.0 temp_pos_score = temp_pos_score - posweightscore[v][i] + modified_posscore_weighted #posweightscore[v][i] = modified_posscore_weighted #temp_posscore[v][i] = tf.reduce_sum(-posweightscore[v][i] + modified_posscore_weighted) modified_weight = tf.reduce_sum(tf.multiply(indicator[v], weight_temp[i][v])) / weight_negative[i][v] modified_posscore_weighted = modified_weight * posscore[i][v] * 1.0 temp_pos_score = temp_pos_score - posweightscore[i][v] + modified_posscore_weighted ''' return ll def neg_loglikelihood(prob_dict, w_edge): ''' negative loglikelihood of the edges ''' ll = 0 k = 0 with tf.variable_scope('NLL'): dec_mat_temp = tf.reshape(prob_dict, [self.n, self.n]) w_edge_new = tf.reshape(w_edge, [self.n, self.n, self.bin_dim]) #dec_mat = tf.exp(tf.minimum(tf.reshape(prob_dict, [self.n, self.n]),tf.fill([self.n, self.n], 10.0))) weight_negative = [] weight_stack = [] w_edge_new = tf.exp( tf.minimum(w_edge_new, tf.fill([self.n, self.n, self.bin_dim], 10.0))) weight_temp = tf.multiply(self.weight_bin, w_edge_new) for i in range(self.n): for j in range(self.n): weight_negative.append(tf.reduce_sum(w_edge_new[i][j])) weight_stack.append(tf.reduce_sum(weight_temp[i][j])) weight_stack = tf.reshape(weight_stack, [self.n, self.n]) weight_negative = tf.reshape(weight_negative, [self.n, self.n]) w_score = tf.truediv(weight_stack, weight_negative) weight_comp = tf.subtract(tf.fill([self.n, self.n], 1.0), w_score) dec_mat = tf.exp( tf.minimum(dec_mat_temp, tf.fill([self.n, self.n], 10.0))) dec_mat = tf.Print(dec_mat, [dec_mat], message="my decscore values:") comp = tf.subtract(tf.ones([self.n, self.n], tf.float32), self.adj) comp = tf.Print(comp, [comp], message="my comp values:") temp = tf.reduce_sum(tf.multiply(comp, dec_mat)) negscore = tf.multiply(tf.fill([self.n, self.n], temp + 1e-9), weight_comp) negscore = tf.Print(negscore, [negscore], message="my negscore values:") posscore = tf.multiply(self.adj, dec_mat) posscore = tf.Print(posscore, [posscore], message="my posscore values:") posweightscore = tf.multiply(posscore, w_score) temp_pos_score = tf.reduce_sum(posweightscore) posweightscore = tf.Print(posweightscore, [posweightscore], message="my weighted posscore") softmax_out = tf.truediv(posweightscore, tf.add(posweightscore, negscore)) if self.mask_weight: #print("Mask weight option") ll = masked_ll(weight_temp, weight_negative, posscore, posweightscore, temp_pos_score, temp) else: ll = tf.reduce_sum( tf.log( tf.add(tf.multiply(self.adj, softmax_out), tf.fill([self.n, self.n], 1e-9)))) ll = tf.Print(ll, [ll], message="My loss") return (-ll) def kl_gaussian(mu_1, sigma_1, debug_sigma, mu_2, sigma_2): ''' Kullback leibler divergence for two gaussian distributions ''' print sigma_1.shape, sigma_2.shape with tf.variable_scope("kl_gaussisan"): temp_stack = [] for i in range(self.n): temp_stack.append(tf.square(sigma_1[i])) first_term = tf.trace(tf.stack(temp_stack)) temp_stack = [] for i in range(self.n): temp_stack.append(tf.matmul(tf.transpose(mu_1[i]), mu_1[i])) second_term = tf.reshape(tf.stack(temp_stack), [self.n]) #k = tf.fill([self.n], tf.cast(self.d, tf.float32)) k = tf.fill([self.n], tf.cast(self.z_dim, tf.float32)) temp_stack = [] for i in range(self.n): temp_stack.append(tf.reduce_prod(tf.square( debug_sigma[i]))) third_term = tf.log( tf.add(tf.stack(temp_stack), tf.fill([self.n], 1e-09))) return 0.5 * tf.add( tf.subtract(tf.add(first_term, second_term), k), third_term) def ll_poisson(lambda_, x): return -(x * np.log(lambda_) - lambda_ * np.log(2.72) - self.log_fact_k[x - 1]) def label_loss_predict(label, predicted_label): predicted_label_new = tf.reshape(predicted_label, [self.n, self.d]) return tf.nn.softmax_cross_entropy_with_logits( labels=label, logits=predicted_label_new) def get_lossfunc(enc_mu, enc_sigma, debug_sigma, prior_mu, prior_sigma, dec_out, w_edge, label): kl_loss = kl_gaussian(enc_mu, enc_sigma, debug_sigma, prior_mu, prior_sigma) # KL_divergence loss likelihood_loss = neg_loglikelihood(dec_out, w_edge) # Cross entropy loss self.ll = likelihood_loss self.kl = kl_loss # For ZINC lambda_e = 31 lambda_n = 30 #lambda_hde = 5 #lambda_e = 24 #lambda_n = 24 edgeprob = ll_poisson(lambda_e, len(self.edges[self.count])) nodeprob = ll_poisson(lambda_n, self.n) label_loss = label_loss_predict(self.features, label) #return tf.reduce_mean(kl_loss) + edgeprob + nodeprob + likelihood_loss return tf.reduce_mean( kl_loss + label_loss) + edgeprob + nodeprob + likelihood_loss self.adj = tf.placeholder(dtype=tf.float32, shape=[self.n, self.n], name='adj') self.features = tf.placeholder(dtype=tf.float32, shape=[self.n, self.d], name='features') self.weight = tf.placeholder(dtype=tf.float32, shape=[self.n, self.n], name="weight") self.weight_bin = tf.placeholder( dtype=tf.float32, shape=[self.n, self.n, hparams.bin_dim], name="weight_bin") self.input_data = tf.placeholder(dtype=tf.float32, shape=[self.k, self.n, self.d], name='input') self.eps = tf.placeholder(dtype=tf.float32, shape=[self.n, self.z_dim, 1], name='eps') self.cell = VAEGCell(self.adj, self.weight, self.features, self.z_dim, self.bin_dim) self.c_x, enc_mu, enc_sigma, debug_sigma, dec_out, prior_mu, prior_sigma, z_encoded, w_edge, label = self.cell.call( self.input_data, self.n, self.d, self.k, self.eps, hparams.sample) self.prob = dec_out #print('Debug', dec_out.shape) self.z_encoded = z_encoded self.enc_mu = enc_mu self.enc_sigma = enc_sigma self.w_edge = w_edge self.label = label self.cost = get_lossfunc(enc_mu, enc_sigma, debug_sigma, prior_mu, prior_sigma, dec_out, w_edge, label) print_vars("trainable_variables") # self.lr = tf.Variable(self.lr, trainable=False) self.train_op = tf.train.AdamOptimizer(learning_rate=self.lr) self.grad = self.train_op.compute_gradients(self.cost) self.grad_placeholder = [(tf.placeholder("float", shape=gr[1].get_shape()), gr[1]) for gr in self.grad] self.apply_transform_op = self.train_op.apply_gradients(self.grad) #self.lr = tf.Variable(self.lr, trainable=False) self.sess = tf.Session()
def __init__(self, hparams, placeholders, num_nodes, num_features, edges, istest=False): self.features_dim = num_features self.input_dim = num_nodes self.dropout = placeholders['dropout'] self.k = hparams.random_walk self.lr = placeholders['lr'] self.decay = placeholders['decay'] self.n = num_nodes self.d = num_features self.z_dim = hparams.z_dim self.count = 0 self.edges = edges self.mask_weight = hparams.mask_weight #self.edges, self.non_edges = edges, non_edges #logger.info("Building model starts...") def masked_gen(posscore, negscore): indicator = [] for i in range(self.n): indicator.append(tf.ones(self.n)) temp_posscore = tf.reduce_sum(posscore) ll = 0.0 for (u, v) in self.edges[self.count]: print("Debug", posscore[0].shape, indicator[0].shape) #tf.multiply(tf.reshape(posscore[u], [1, self.n]), indicator[u])[0][v] ll += tf.log( tf.multiply(tf.reshape(posscore[u], [1, self.n]), indicator[u])[0][v] / (temp_posscore + negscore[u][v]) + 1e-09) ll += tf.log( tf.multiply(tf.reshape(posscore[v], [1, self.n]), indicator[v])[0][u] / (temp_posscore + negscore[v][u]) + 1e-09) indicator[u] = np.multiply( tf.subtract(tf.ones([1, self.n]), self.adj[v]), indicator[u]) indicator[v] = np.multiply( tf.subtract(tf.ones([1, self.n]), self.adj[u]), indicator[v]) temp_posscore = temp_posscore - tf.reduce_sum(posscore[u]) temp = tf.multiply(indicator[u], tf.reshape(posscore[u], [self.n])) temp_posscore += tf.reduce_sum(temp) temp_posscore = temp_posscore - \ tf.reduce_sum(posscore[v]) + \ tf.reduce_sum(tf.multiply(indicator[v], posscore[v])) temp_posscore = temp_posscore - \ tf.reduce_sum(tf.transpose(posscore)[ u]) + tf.reduce_sum(tf.multiply(indicator[u], tf.transpose(posscore)[u])) temp_posscore = temp_posscore - \ tf.reduce_sum(tf.transpose(posscore)[ v]) + tf.reduce_sum(tf.multiply(indicator[v], tf.transpose(posscore)[v])) return ll def neg_loglikelihood(prob_dict): ''' negative loglikelihood of the edges ''' ll = 0 k = 0 with tf.variable_scope('NLL'): dec_mat_temp = tf.reshape(prob_dict, [self.n, self.n]) ''' dec_mat_temp = np.zeros((self.n, self.n)) for i in range(self.n): for j in range(i+1, self.n): print("Debug", prob_dict[k]) dec_mat_temp[i][j] = prob_dict[k][0] dec_mat_temp[j][i] = prob_dict[k][0] k+=1 #''' #dec_mat = tf.exp(tf.minimum(tf.reshape(prob_dict, [self.n, self.n]),tf.fill([self.n, self.n], 10.0))) dec_mat = tf.exp( tf.minimum(dec_mat_temp, tf.fill([self.n, self.n], 10.0))) dec_mat = tf.Print(dec_mat, [dec_mat], message="my decscore values:") print("Debug dec_mat", dec_mat.shape, dec_mat.dtype, dec_mat) comp = tf.subtract(tf.ones([self.n, self.n], tf.float32), self.adj) comp = tf.Print(comp, [comp], message="my comp values:") temp = tf.reduce_sum(tf.multiply(comp, dec_mat)) negscore = tf.fill([self.n, self.n], temp + 1e-9) negscore = tf.Print(negscore, [negscore], message="my negscore values:") posscore = tf.multiply(self.adj, dec_mat) posscore = tf.Print(posscore, [posscore], message="my posscore values:") #dec_out = tf.multiply(self.adj, dec_mat) softmax_out = tf.truediv(posscore, tf.add(posscore, negscore)) ll = tf.reduce_sum( tf.log( tf.add(tf.multiply(self.adj, softmax_out), tf.fill([self.n, self.n], 1e-9))), 1) if hparams.mask_weight: ll = masked_gen(posscore, negscore) #ll = masked_ll(posscore, negscore) return (-ll) def kl_gaussian(mu_1, sigma_1, debug_sigma, mu_2, sigma_2): ''' Kullback leibler divergence for two gaussian distributions ''' print(sigma_1.shape, sigma_2.shape) with tf.variable_scope("kl_gaussisan"): temp_stack = [] for i in range(self.n): temp_stack.append(tf.square(sigma_1[i])) first_term = tf.trace(tf.stack(temp_stack)) temp_stack = [] for i in range(self.n): temp_stack.append(tf.matmul(tf.transpose(mu_1[i]), mu_1[i])) second_term = tf.reshape(tf.stack(temp_stack), [self.n]) #k = tf.fill([self.n], tf.cast(self.d, tf.float32)) k = tf.fill([self.n], tf.cast(self.z_dim, tf.float32)) temp_stack = [] # for i in range(self.n): # temp_stack.append(tf.log(tf.truediv(tf.matrix_determinant(sigma_2[i]),tf.add(tf.matrix_determinant(sigma_1[i]), tf.fill([self.d, self.d], 1e-9))))) for i in range(self.n): temp_stack.append(tf.reduce_prod(tf.square( debug_sigma[i]))) print("Debug", tf.stack(temp_stack).shape) third_term = tf.log( tf.add(tf.stack(temp_stack), tf.fill([self.n], 1e-09))) print("debug KL", first_term.shape, second_term.shape, k.shape, third_term.shape, sigma_1[0].shape) # return 0.5 *tf.reduce_sum(( return 0.5 * tf.add( tf.subtract(tf.add(first_term, second_term), k), third_term) def get_lossfunc(enc_mu, enc_sigma, debug_sigma, prior_mu, prior_sigma, dec_out): kl_loss = kl_gaussian(enc_mu, enc_sigma, debug_sigma, prior_mu, prior_sigma) # KL_divergence loss likelihood_loss = neg_loglikelihood(dec_out) # Cross entropy loss self.ll = likelihood_loss self.kl = kl_loss return tf.reduce_mean(kl_loss + likelihood_loss) self.adj = tf.placeholder(dtype=tf.float32, shape=[self.n, self.n], name='adj') self.features = tf.placeholder(dtype=tf.float32, shape=[self.n, self.d], name='features') self.input_data = tf.placeholder(dtype=tf.float32, shape=[self.k, self.n, self.d], name='input') self.eps = tf.placeholder(dtype=tf.float32, shape=[self.n, self.z_dim, 1], name='eps') self.cell = VAEGCell(self.adj, self.features, self.z_dim) self.c_x, enc_mu, enc_sigma, debug_sigma, dec_out, prior_mu, prior_sigma, z_encoded = self.cell.call( self.input_data, self.n, self.d, self.k, self.eps, hparams.sample) self.prob = dec_out print('Debug', dec_out.shape) self.z_encoded = z_encoded self.enc_mu = enc_mu self.enc_sigma = enc_sigma self.cost = get_lossfunc(enc_mu, enc_sigma, debug_sigma, prior_mu, prior_sigma, dec_out) print_vars("trainable_variables") # self.lr = tf.Variable(self.lr, trainable=False) self.train_op = tf.train.AdamOptimizer(learning_rate=self.lr) self.grad = self.train_op.compute_gradients(self.cost) self.grad_placeholder = [(tf.placeholder("float", shape=gr[1].get_shape()), gr[1]) for gr in self.grad] #self.capped_gvs = [(tf.clip_by_value(grad, -1., 1.), var) for grad, var in self.grad] #self.tgv = [self.grad] # self.apply_transform_op = self.train_op.apply_gradients(self.grad_placeholder) #self.apply_transform_op = self.train_op.apply_gradients(self.capped_gvs) self.apply_transform_op = self.train_op.apply_gradients(self.grad) #self.lr = tf.Variable(self.lr, trainable=False) #self.gradient = tf.train.AdamOptimizer(learning_rate=self.lr, epsilon=1e-4).compute_gradients(self.cost) #self.train_op = tf.train.AdamOptimizer(learning_rate=self.lr, epsilon=1e-4).minimize(self.cost) #self.check_op = tf.add_check_numerics_ops() self.sess = tf.Session()
class VAEG(VAEGConfig): def __init__(self, hparams, placeholders, num_nodes, num_features, edges, log_fact_k, hde, istest=False): self.features_dim = num_features self.input_dim = num_nodes self.dropout = placeholders['dropout'] self.k = hparams.random_walk self.lr = placeholders['lr'] self.decay = placeholders['decay'] self.n = num_nodes self.d = num_features self.z_dim = hparams.z_dim self.bin_dim = hparams.bin_dim self.edges = edges self.count = 0 self.mask_weight = hparams.mask_weight self.log_fact_k = log_fact_k self.hde = hde # For masking we calculated the likelihood like this def masked_ll(weight_temp, weight_negative, posscore, posweightscore, temp_pos_score, temp): degree = np.zeros([self.n], dtype=np.float32) indicator = np.ones([self.n, self.bin_dim], dtype=np.float32) indicator_bridge = np.ones([self.n, self.n], dtype=np.float32) #ring_indicator = np.ones([self.n]) ll = 0.0 adj = np.zeros([self.n, self.n], dtype=np.float32) #for (u, v, w) in self.edges[self.count]: for i in range(len(self.edges[self.count])): (u, v, w) = self.edges[self.count][i] degree[u] += w degree[v] += w modified_weight = tf.reduce_sum( tf.multiply(np.multiply(indicator[u], indicator[v]), weight_temp[u][v])) / weight_negative[u][v] modified_posscore_weighted = modified_weight * posscore[u][ v] * indicator_bridge[u][v] * 1.0 currentscore = modified_posscore_weighted * 1.0 / ( temp_pos_score + temp) ll += tf.log(currentscore + 1e-9) modified_weight = tf.reduce_sum( tf.multiply(np.multiply(indicator[v], indicator[u]), weight_temp[v][u])) / weight_negative[v][u] modified_posscore_weighted = modified_weight * posscore[v][ u] * indicator_bridge[v][u] * 1.0 currentscore = modified_posscore_weighted * 1.0 / ( temp_pos_score + temp) ll += tf.log(currentscore + 1e-9) #indicator = np.ones([3], dtype = np.float32) #if degree[u] >=5 : # indicator[u][0] = 0 if degree[u] >= 4: indicator[u][0] = 0 indicator[u][1] = 0 if degree[u] >= 3: indicator[u][1] = 0 indicator[u][2] = 0 #if degree[v] >=5 : # indicator[v][0] = 0 if degree[v] >= 4: indicator[v][0] = 0 indicator[v][1] = 0 if degree[v] >= 3: indicator[v][1] = 0 indicator[v][2] = 0 # From the next there will be no double bond, ensures there will be alternating bonds # there will ne bo bridge if w == 2: indicator[u][1] = 0 indicator[v][1] = 0 #If we don't want negative sampling we can uncomment the following ''' for i in range(self.n): modified_weight = tf.reduce_sum(tf.multiply(indicator[u], weight_temp[u][i])) / weight_negative[u][i] modified_posscore_weighted = modified_weight * posscore[u][i] * 1.0 temp_pos_score = temp_pos_score - posweightscore[u][i] + modified_posscore_weighted #posweightscore[u][i] = modified_posscore_weighted #temp_posscore[u][i] = tf.reduce_sum(-posweightscore[u][i] + modified_posscore_weighted) modified_weight = tf.reduce_sum(tf.multiply(indicator[u], weight_temp[i][u])) / weight_negative[i][u] modified_posscore_weighted = modified_weight * posscore[i][u] * 1.0 temp_pos_score = temp_pos_score - posweightscore[i][u] + modified_posscore_weighted #posweightscore[i][u] = modified_posscore_weighted #temp_posscore[i][u] = tf.reduce_sum(-posweightscore[i][u] + modified_posscore_weighted) modified_weight = tf.reduce_sum(tf.multiply(indicator[v], weight_temp[v][i])) / weight_negative[v][i] modified_posscore_weighted = modified_weight * posscore[v][i] * 1.0 temp_pos_score = temp_pos_score - posweightscore[v][i] + modified_posscore_weighted #posweightscore[v][i] = modified_posscore_weighted #temp_posscore[v][i] = tf.reduce_sum(-posweightscore[v][i] + modified_posscore_weighted) modified_weight = tf.reduce_sum(tf.multiply(indicator[v], weight_temp[i][v])) / weight_negative[i][v] modified_posscore_weighted = modified_weight * posscore[i][v] * 1.0 temp_pos_score = temp_pos_score - posweightscore[i][v] + modified_posscore_weighted ''' return ll def neg_loglikelihood(prob_dict, w_edge): ''' negative loglikelihood of the edges ''' ll = 0 k = 0 with tf.variable_scope('NLL'): dec_mat_temp = tf.reshape(prob_dict, [self.n, self.n]) w_edge_new = tf.reshape(w_edge, [self.n, self.n, self.bin_dim]) #dec_mat = tf.exp(tf.minimum(tf.reshape(prob_dict, [self.n, self.n]),tf.fill([self.n, self.n], 10.0))) weight_negative = [] weight_stack = [] w_edge_new = tf.exp( tf.minimum(w_edge_new, tf.fill([self.n, self.n, self.bin_dim], 10.0))) weight_temp = tf.multiply(self.weight_bin, w_edge_new) for i in range(self.n): for j in range(self.n): weight_negative.append(tf.reduce_sum(w_edge_new[i][j])) weight_stack.append(tf.reduce_sum(weight_temp[i][j])) weight_stack = tf.reshape(weight_stack, [self.n, self.n]) weight_negative = tf.reshape(weight_negative, [self.n, self.n]) w_score = tf.truediv(weight_stack, weight_negative) weight_comp = tf.subtract(tf.fill([self.n, self.n], 1.0), w_score) dec_mat = tf.exp( tf.minimum(dec_mat_temp, tf.fill([self.n, self.n], 10.0))) dec_mat = tf.Print(dec_mat, [dec_mat], message="my decscore values:") comp = tf.subtract(tf.ones([self.n, self.n], tf.float32), self.adj) comp = tf.Print(comp, [comp], message="my comp values:") temp = tf.reduce_sum(tf.multiply(comp, dec_mat)) negscore = tf.multiply(tf.fill([self.n, self.n], temp + 1e-9), weight_comp) negscore = tf.Print(negscore, [negscore], message="my negscore values:") posscore = tf.multiply(self.adj, dec_mat) posscore = tf.Print(posscore, [posscore], message="my posscore values:") posweightscore = tf.multiply(posscore, w_score) temp_pos_score = tf.reduce_sum(posweightscore) posweightscore = tf.Print(posweightscore, [posweightscore], message="my weighted posscore") softmax_out = tf.truediv(posweightscore, tf.add(posweightscore, negscore)) if self.mask_weight: #print("Mask weight option") ll = masked_ll(weight_temp, weight_negative, posscore, posweightscore, temp_pos_score, temp) else: ll = tf.reduce_sum( tf.log( tf.add(tf.multiply(self.adj, softmax_out), tf.fill([self.n, self.n], 1e-9)))) ll = tf.Print(ll, [ll], message="My loss") return (-ll) def kl_gaussian(mu_1, sigma_1, debug_sigma, mu_2, sigma_2): ''' Kullback leibler divergence for two gaussian distributions ''' print sigma_1.shape, sigma_2.shape with tf.variable_scope("kl_gaussisan"): temp_stack = [] for i in range(self.n): temp_stack.append(tf.square(sigma_1[i])) first_term = tf.trace(tf.stack(temp_stack)) temp_stack = [] for i in range(self.n): temp_stack.append(tf.matmul(tf.transpose(mu_1[i]), mu_1[i])) second_term = tf.reshape(tf.stack(temp_stack), [self.n]) #k = tf.fill([self.n], tf.cast(self.d, tf.float32)) k = tf.fill([self.n], tf.cast(self.z_dim, tf.float32)) temp_stack = [] for i in range(self.n): temp_stack.append(tf.reduce_prod(tf.square( debug_sigma[i]))) third_term = tf.log( tf.add(tf.stack(temp_stack), tf.fill([self.n], 1e-09))) return 0.5 * tf.add( tf.subtract(tf.add(first_term, second_term), k), third_term) def ll_poisson(lambda_, x): return -(x * np.log(lambda_) - lambda_ * np.log(2.72) - self.log_fact_k[x - 1]) def label_loss_predict(label, predicted_label): predicted_label_new = tf.reshape(predicted_label, [self.n, self.d]) return tf.nn.softmax_cross_entropy_with_logits( labels=label, logits=predicted_label_new) def get_lossfunc(enc_mu, enc_sigma, debug_sigma, prior_mu, prior_sigma, dec_out, w_edge, label): kl_loss = kl_gaussian(enc_mu, enc_sigma, debug_sigma, prior_mu, prior_sigma) # KL_divergence loss likelihood_loss = neg_loglikelihood(dec_out, w_edge) # Cross entropy loss self.ll = likelihood_loss self.kl = kl_loss # For ZINC lambda_e = 31 lambda_n = 30 #lambda_hde = 5 #lambda_e = 24 #lambda_n = 24 edgeprob = ll_poisson(lambda_e, len(self.edges[self.count])) nodeprob = ll_poisson(lambda_n, self.n) label_loss = label_loss_predict(self.features, label) #return tf.reduce_mean(kl_loss) + edgeprob + nodeprob + likelihood_loss return tf.reduce_mean( kl_loss + label_loss) + edgeprob + nodeprob + likelihood_loss self.adj = tf.placeholder(dtype=tf.float32, shape=[self.n, self.n], name='adj') self.features = tf.placeholder(dtype=tf.float32, shape=[self.n, self.d], name='features') self.weight = tf.placeholder(dtype=tf.float32, shape=[self.n, self.n], name="weight") self.weight_bin = tf.placeholder( dtype=tf.float32, shape=[self.n, self.n, hparams.bin_dim], name="weight_bin") self.input_data = tf.placeholder(dtype=tf.float32, shape=[self.k, self.n, self.d], name='input') self.eps = tf.placeholder(dtype=tf.float32, shape=[self.n, self.z_dim, 1], name='eps') self.cell = VAEGCell(self.adj, self.weight, self.features, self.z_dim, self.bin_dim) self.c_x, enc_mu, enc_sigma, debug_sigma, dec_out, prior_mu, prior_sigma, z_encoded, w_edge, label = self.cell.call( self.input_data, self.n, self.d, self.k, self.eps, hparams.sample) self.prob = dec_out #print('Debug', dec_out.shape) self.z_encoded = z_encoded self.enc_mu = enc_mu self.enc_sigma = enc_sigma self.w_edge = w_edge self.label = label self.cost = get_lossfunc(enc_mu, enc_sigma, debug_sigma, prior_mu, prior_sigma, dec_out, w_edge, label) print_vars("trainable_variables") # self.lr = tf.Variable(self.lr, trainable=False) self.train_op = tf.train.AdamOptimizer(learning_rate=self.lr) self.grad = self.train_op.compute_gradients(self.cost) self.grad_placeholder = [(tf.placeholder("float", shape=gr[1].get_shape()), gr[1]) for gr in self.grad] self.apply_transform_op = self.train_op.apply_gradients(self.grad) #self.lr = tf.Variable(self.lr, trainable=False) self.sess = tf.Session() def initialize(self): logger.info("Initialization of parameters") #self.sess.run(tf.initialize_all_variables()) self.sess.run(tf.global_variables_initializer()) def restore(self, savedir): saver = tf.train.Saver(tf.global_variables()) ckpt = tf.train.get_checkpoint_state(savedir) if ckpt == None or ckpt.model_checkpoint_path == None: self.initialize() else: print("Load the model from {}".format(ckpt.model_checkpoint_path)) saver.restore(self.sess, ckpt.model_checkpoint_path) def train(self, placeholders, hparams, adj, weight, weight_bin, features): savedir = hparams.out_dir lr = hparams.learning_rate dr = hparams.dropout_rate decay = hparams.decay_rate f1 = open(hparams.out_dir + '/iteration.txt', 'r') iteration = int(f1.read().strip()) # training num_epochs = hparams.num_epochs create_dir(savedir) ckpt = tf.train.get_checkpoint_state(savedir) saver = tf.train.Saver(tf.global_variables()) if ckpt: saver.restore(self.sess, ckpt.model_checkpoint_path) print("Load the model from %s" % ckpt.model_checkpoint_path) for epoch in range(num_epochs): start = time.time() for i in range(len(adj)): self.count = i if len(self.edges[self.count]) == 0: continue # Learning rate decay #self.sess.run(tf.assign(self.lr, self.lr * (self.decay ** epoch))) feed_dict = construct_feed_dict(lr, dr, self.k, self.n, self.d, decay, placeholders) feed_dict.update({self.adj: adj[i]}) #print "Debug", features[i].shape eps = np.random.randn(self.n, self.z_dim, 1) #tf.random_normal((self.n, 5, 1), 0.0, 1.0, dtype=tf.float32) feed_dict.update({self.features: features[i]}) feed_dict.update({self.weight_bin: weight_bin[i]}) feed_dict.update({self.weight: weight[i]}) feed_dict.update( {self.input_data: np.zeros([self.k, self.n, self.d])}) feed_dict.update({self.eps: eps}) grad_vals = self.sess.run([g[0] for g in self.grad], feed_dict=feed_dict) for j in xrange(len(self.grad_placeholder)): feed_dict.update( {self.grad_placeholder[j][0]: grad_vals[j]}) input_, train_loss, _, probdict, cx, w_edge = self.sess.run( [ self.input_data, self.cost, self.apply_transform_op, self.prob, self.c_x, self.w_edge ], feed_dict=feed_dict) iteration += 1 if iteration % hparams.log_every == 0 and iteration > 0: print(train_loss) print("{}/{}(epoch {}), train_loss = {:.6f}".format( iteration, num_epochs, epoch + 1, train_loss)) #print(probdict) checkpoint_path = os.path.join(savedir, 'model.ckpt') saver.save(self.sess, checkpoint_path, global_step=iteration) logger.info("model saved to {}".format(checkpoint_path)) end = time.time() print("Time taken for a batch: ", end - start) f1 = open(hparams.out_dir + '/iteration.txt', 'w') f1.write(str(iteration)) def getembeddings(self, hparams, placeholders, adj, deg, weight_bin, weight): eps = np.random.randn(self.n, self.z_dim, 1) feed_dict = construct_feed_dict(hparams.learning_rate, hparams.dropout_rate, self.k, self.n, self.d, hparams.decay_rate, placeholders) feed_dict.update({self.adj: adj}) feed_dict.update({self.features: deg}) feed_dict.update({self.input_data: np.zeros([self.k, self.n, self.d])}) feed_dict.update({self.eps: eps}) feed_dict.update({self.weight_bin: weight_bin}) feed_dict.update({self.weight: weight}) prob, ll, kl, w_edge, embedding = self.sess.run( [self.prob, self.ll, self.kl, self.w_edge, self.z_encoded], feed_dict=feed_dict) return embedding def sample_graph_slerp(self, hparams, placeholders, s_num, G_good, G_bad, inter, ratio, index, num=10): # Agrs : # G_good : embedding of the train graph or good sample # G_bad : embedding of the bad graph list_edges = [] for i in range(self.n): for j in range(i + 1, self.n): #list_edges.append((i,j)) list_edges.append((i, j, 1)) list_edges.append((i, j, 2)) list_edges.append((i, j, 3)) list_weights = [1, 2, 3] #for sample in range(s_num): new_graph = [] for i in range(self.n): #for i in range(index, index+1): node_good = G_good[i] node_bad = G_bad[i] if i == index: if inter == "lerp": new_graph.append( lerp(np.reshape(node_good, -1), np.reshape(node_bad, -1), ratio)) else: new_graph.append( slerp(np.reshape(node_good, -1), np.reshape(node_bad, -1), ratio)) else: new_graph.append(np.reshape(node_good, -1)) eps = np.array(new_graph) eps = eps.reshape(eps.shape + (1, )) hparams.sample = True feed_dict = construct_feed_dict(hparams.learning_rate, hparams.dropout_rate, self.k, self.n, self.d, hparams.decay_rate, placeholders) #TODO adj and deg are filler and does not required while sampling. Need to clean this part adj = np.zeros([self.n, self.n]) deg = np.zeros([self.n, 1], dtype=np.float) weight_bin = np.zeros([self.n, self.n, self.bin_dim]) weight = np.zeros([self.n, self.n]) feed_dict.update({self.adj: adj}) feed_dict.update({self.features: deg}) feed_dict.update({self.input_data: np.zeros([self.k, self.n, self.d])}) feed_dict.update({self.eps: eps}) feed_dict.update({self.weight_bin: weight_bin}) feed_dict.update({self.weight: weight}) prob, ll, kl, w_edge = self.sess.run( [self.prob, self.ll, self.kl, self.w_edge], feed_dict=feed_dict) prob = np.reshape(prob, (self.n, self.n)) w_edge = np.reshape(w_edge, (self.n, self.n, self.bin_dim)) indicator = np.ones([self.n, self.bin_dim]) p, list_edges, w_new = normalise(prob, w_edge, self.n, self.bin_dim, [], list_edges, indicator) candidate_edges = [ list_edges[i] for i in np.random.choice( range(len(list_edges)), [1], p=p, replace=False) ] probtotal = 1.0 degree = np.zeros([self.n]) for i in range(hparams.edges - 1): (u, v, w) = candidate_edges[i] #(u,v) = candidate_edges[i] #w = weight_lists[i] degree[u] += w degree[v] += w if degree[u] >= 4: indicator[u][0] = 0 indicator[u][1] = 0 if degree[u] >= 3: indicator[u][1] = 0 indicator[u][2] = 0 #if degree[v] >=5 : # indicator[v][0] = 0 if degree[v] >= 4: indicator[v][0] = 0 indicator[v][1] = 0 if degree[v] >= 3: indicator[v][1] = 0 indicator[v][2] = 0 p, list_edges, w_new = normalise(prob, w_edge, self.n, self.bin_dim, candidate_edges, list_edges, indicator) candidate_edges.extend([ list_edges[k] for k in np.random.choice( range(len(list_edges)), [1], p=p, replace=False) ]) for (u, v, w) in candidate_edges: with open( hparams.sample_file + '/inter/' + str(index) + inter + str(s_num) + '.txt', 'a') as f: #f.write(str(u)+'\t'+str(v)+'\n') f.write( str(u) + ' ' + str(v) + ' {\'weight\':' + str(w) + '}\n') with open( hparams.z_dir + '/inter/' + str(index) + inter + str(s_num) + '.txt', 'a') as f: for z_i in eps: f.write('[' + ','.join([str(el[0]) for el in z_i]) + ']\n') return new_graph def get_stat(self, hparams, placeholders, num=10, outdir=None): adj, features = load_data(hparams.graph_file, hparams.nodes) hparams.sample = True eps = np.random.randn(self.n, self.z_dim, 1) for i in range(len(adj)): ll_total = 0.0 loss_total = 0.0 prob_derived = 0.0 for j in range(10): eps = np.random.randn(self.n, self.z_dim, 1) feed_dict = construct_feed_dict(hparams.learning_rate, hparams.dropout_rate, self.k, self.n, self.d, hparams.decay_rate, placeholders) feed_dict.update({self.adj: adj[i]}) feed_dict.update({self.features: features[i]}) feed_dict.update( {self.input_data: np.zeros([self.k, self.n, self.d])}) feed_dict.update({self.eps: eps}) prob, ll, z_encoded, enc_mu, enc_sigma, loss, kl = self.sess.run( [ self.prob, self.ll, self.z_encoded, self.enc_mu, self.enc_sigma, self.cost, self.kl ], feed_dict=feed_dict) ll_total += np.mean(ll) loss_total += np.mean(loss) prob = np.triu(np.reshape(prob, (self.n, self.n)), 1) prob = np.divide(prob, np.sum(prob)) for k in range(self.n): for l in range(k + 1, self.n): if adj[i][k][l] == 1: prob_derived += log(prob[k][l]) with open(hparams.sample_file + '/reconstruction_ll.txt', 'a') as f: f.write(str(-np.mean(ll_total) // 10) + '\n') #with open(hparams.graph_file+'/kl.txt', 'a') as f: # f.write(str(-np.mean(kl))+'\n') with open(hparams.sample_file + '/elbo.txt', 'a') as f: f.write(str(-np.mean(loss_total) // 10) + '\n') with open(hparams.sample_file + '/prob_derived.txt', 'a') as f: f.write(str(-np.mean(loss_total) // 10) + '\n') def get_masked_candidate_with_atom_ratio_new(self, prob, w_edge, atom_count, num_edges, hde): #node_list = defaultdict() rest = range(self.n) ''' p_temp = prob[0] nodes = [] sorted_index = np.argsort(np.array(p_temp)) hn = sorted_index[:atom_count[0]] on = sorted_index[atom_count[0]: atom_count[0] + atom_count[1]] nn = sorted_index[atom_count[1] + atom_count[0]: atom_count[1] + atom_count[0] + atom_count[2]] cn = sorted_index[-atom_count[3]:] ''' nodes = [] hn = [] on = [] nn = [] cn = [] for i in range(self.n): if atom_count[i] == 1: hn.append(i) if atom_count[i] == 2: on.append(i) if atom_count[i] == 3 or atom_count[i] == 5: nn.append(i) if atom_count[i] == 4: cn.append(i) nodes.extend(hn) nodes.extend(cn) nodes.extend(on) nodes.extend(nn) node_list = atom_count print("Debug nodelist", node_list) indicator = np.ones([self.n, self.bin_dim]) edge_mask = np.ones([self.n, self.n]) degree = np.zeros(self.n) for node in hn: indicator[node][1] = 0 indicator[node][2] = 0 for node in on: indicator[node][2] = 0 # two hydrogen atom cannot have an edge between them for n1 in hn: for n2 in hn: edge_mask[n1][n2] = 0 candidate_edges = [] # first generate edges joining with Hydrogen atoms sequentially print("Debug atom ratio", hn, on, nn, cn) print("Debug_degree", node_list) print("Debug nodes", nodes) index = 0 i = 0 hydro_sat = np.zeros(self.n) #first handle hydro try: for node in nodes: deg_req = node_list[node] d = degree[node] list_edges = get_candidate_neighbor_edges(node, self.n) #for (u,v,w) in list_edges: # print("list edges", u, node_list[u], degree[u], indicator[u], v, node_list[v], degree[v], indicator[v]) #print("Debug list edges", node, list_edges) #print("Edge mask", edge_mask[node]) if node in hn: for i1 in range(self.n): if hydro_sat[i1] == node_list[i1] - 1: edge_mask[i1][node] = 0 edge_mask[node][i1] = 0 while d < deg_req: p = normalise_h1(prob, w_edge, self.bin_dim, indicator, edge_mask, node) #print("Debug p", p) #list_edges = get_candidate_neighbor_edges(node, self.n) #for (u,v,w) in list_edges: # print("Debug list edges", u, v, node_list[u], node_list[v]) candidate_edges.extend([ list_edges[k] for k in np.random.choice( range(len(list_edges)), [1], p=p, replace=False) ]) (u, v, w) = candidate_edges[i] degree[u] += w degree[v] += w d += w if u in hn: hydro_sat[v] += 1 if v in hn: hydro_sat[u] += 1 edge_mask[u][v] = 0 edge_mask[v][u] = 0 if (node_list[u] - degree[u]) == 0: indicator[u][0] = 0 if (node_list[u] - degree[u]) <= 1: indicator[u][1] = 0 if (node_list[u] - degree[u]) <= 2: indicator[u][2] = 0 if (node_list[v] - degree[v]) == 0: indicator[v][0] = 0 if (node_list[v] - degree[v]) <= 1: indicator[v][1] = 0 if (node_list[v] - degree[v]) <= 2: indicator[v][2] = 0 #check_diconnected i += 1 print("Debug candidate_edges", candidate_edges[i - 1]) # print("change state", el, degree[el], node_list[el], indicator[el]) #''' #list_edges = get_candidate_edges(self.n) #if abs(len(candidate_edges) - num_edges) > 1 : # return '' #''' candidate_rest = '' candidate_edges_new = '' for (u, v, w) in candidate_edges: if u < v: candidate_edges_new += ' ' + str(u) + '-' + str( v) + '-' + str(w) else: candidate_edges_new += ' ' + str(v) + '-' + str( u) + '-' + str(w) print("Candidate_edges_new", candidate_edges_new) return candidate_edges_new + ' ' + candidate_rest except: return '' def get_masked_candidate(self, list_edges, prob, w_edge, num_edges, hde, indicator=[], degree=[]): list_edges_original = copy.copy(list_edges) n = len(prob[0]) #sample 1000 times count = 0 structure_list = defaultdict(int) #while(count < 50): while (count < 1): applyrules = False list_edges = copy.copy(list_edges_original) if len(indicator) == 0: print("Debug indi new assign") indicator = np.ones([self.n, self.bin_dim]) reach = np.ones([n, n]) p, list_edges, w = normalise(prob, w_edge, self.n, self.bin_dim, [], list_edges, indicator) candidate_edges = [ list_edges[k] for k in np.random.choice( range(len(list_edges)), [1], p=p, replace=False) ] #if degree == None: if len(degree) == 0: print("Debug degree new assign") degree = np.zeros([self.n]) G = None saturation = 0 for i1 in range(num_edges - 1): (u, v, w) = candidate_edges[i1] for j in range(n): if reach[u][j] == 0: reach[v][j] = 0 reach[j][v] = 0 if reach[v][j] == 0: reach[u][j] = 0 reach[j][u] = 0 reach[u][v] = 0 reach[v][u] = 0 degree[u] += w degree[v] += w if degree[u] >= 4: indicator[u][0] = 0 if degree[u] >= 3: indicator[u][1] = 0 if degree[u] >= 2: indicator[u][2] = 0 if degree[v] >= 4: indicator[v][0] = 0 if degree[v] >= 3: indicator[v][1] = 0 if degree[v] >= 2: indicator[v][2] = 0 # there will ne bo bridge p, list_edges, w = normalise(prob, w_edge, self.n, self.bin_dim, candidate_edges, list_edges, indicator) try: candidate_edges.extend([ list_edges[k] for k in np.random.choice( range(len(list_edges)), [1], p=p, replace=False) ]) except: #candidate_edges = [] continue structure_list[' '.join([ str(u) + '-' + str(v) + '-' + str(w) for (u, v, w) in sorted(candidate_edges) ])] += 1 count += 1 #return the element which has been sampled maximum time return max(structure_list.iteritems(), key=itemgetter(1))[0] def get_unmasked_candidate(self, list_edges, prob, w_edge, num_edges): # sample 1000 times count = 0 structure_list = defaultdict(int) #while (count < 1000): while (count < 50): indicator = np.ones([self.n, self.bin_dim]) p, list_edges, w = normalise(prob, w_edge, self.n, self.bin_dim, [], list_edges, indicator) candidate_edges = [ list_edges[k] for k in np.random.choice( range(len(list_edges)), [num_edges], p=p, replace=False) ] structure_list[' '.join([ str(u) + '-' + str(v) + '-' + str(w) for (u, v, w) in sorted(candidate_edges, key=itemgetter(0)) ])] += 1 #structure_list[sorted(candidate_edges, key=itemgetter(1))] += 1 count += 1 # return the element which has been sampled maximum time return max(structure_list.iteritems(), key=itemgetter(1))[0] def sample_graph_posterior_new(self, hparams, placeholders, adj, features, weight_bins, weights, embeddings, k=0): list_edges = get_candidate_edges(self.n) feed_dict = construct_feed_dict(hparams.learning_rate, hparams.dropout_rate, self.k, self.n, self.d, hparams.decay_rate, placeholders) feed_dict.update({self.adj: adj}) feed_dict.update({self.features: features}) feed_dict.update({self.weight_bin: weight_bins}) feed_dict.update({self.weight: weights}) feed_dict.update({self.input_data: np.zeros([self.k, self.n, self.d])}) feed_dict.update({self.eps: embeddings}) hparams.sample = True prob, ll, z_encoded, enc_mu, enc_sigma, elbo, w_edge, labels = self.sess.run( [ self.prob, self.ll, self.z_encoded, self.enc_mu, self.enc_sigma, self.cost, self.w_edge, self.label ], feed_dict=feed_dict) # prob = np.triu(np.reshape(prob,(self.n,self.n)),1) prob = np.reshape(prob, (self.n, self.n)) w_edge = np.reshape(w_edge, (self.n, self.n, self.bin_dim)) #indicator = np.ones([self.n, self.bin_dim]) #p, list_edges_new, w_new = normalise(prob, w_edge, self.n, hparams.bin_dim, [], list_edges_new, indicator) #(val_arr, atom_list) = self.getatoms(hparams.nodes, labels) #atom_list = [16,2,1,11] #atom_list = [4, 4, 2, 4, 3, 1, 4, 4, 4, 4, 1, 4, 4, 4, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] #atom_list = [4, 4, 4, 4, 1, 4, 4, 3, 4, 4, 4, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] #atom_list = [4, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 4, 4, 3, 4, 4, 2, 4, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] atom_list = [ 4, 4, 2, 4, 4, 3, 4, 4, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 ] #self.getatoms(atom_list) if not hparams.mask_weight: candidate_edges = self.get_unmasked_candidate( list_edges, prob, w_edge, hparams.edges) else: i = 0 hde = 1 #while (i < 1000): candidate_edges = self.get_masked_candidate_with_atom_ratio_new( prob, w_edge, atom_list, hparams.edges, hde) #if len(candidate_edges) > 0: # break # i += 1 #candidate_edges = self.get_masked_candidate(list_edges, prob, w_edge, hparams.edges, hde) with open(hparams.sample_file + 'temp.txt' + str(k), 'w') as f: for uvw in candidate_edges.split(): [u, v, w] = uvw.split("-") u = int(u) v = int(v) w = int(w) if (u >= 0 and v >= 0): #with open(hparams.sample_file + 'temp.txt', 'a') as f: f.write( str(u) + ' ' + str(v) + ' {\'weight\':' + str(w) + '}\n') def getatoms(self, node, label): label_new = np.reshape(label, (node, self.d)) print("Debug label original shape:", label_new) label_new = np.exp(label_new) s = label_new.shape[0] print("Debug label shape:", label_new.shape, s) label_new_sum = np.reshape(np.sum(label_new, axis=1), (s, 1)) print("Debug label sum:", label_new_sum.shape) prob_label = label_new / label_new_sum pred_label = np.zeros(4) valency_arr = np.zeros(node) print("Debug prob label shape:", prob_label.shape, prob_label) #print("Debug label", label_new) for i in range(node): valency = np.random.choice(range(4), [1], p=prob_label[i]) pred_label[valency] += 1 valency_arr[i] = valency + 1 print("Debug pred_label", pred_label, valency_arr) return (pred_label, valency_arr) def sample_graph_neighborhood(self, hparams, placeholders, adj, features, weights, weight_bins, s_num, node, ratio, hde, num=10, outdir=None): list_edges = get_candidate_edges(self.n) #eps = load_embeddings(hparams.z_dir+'encoded_input0'+'.txt', hparams.z_dim) eps = np.random.randn(self.n, self.z_dim, 1) train_mu = [] train_sigma = [] hparams.sample = False # approach 1 for i in range(len(adj)): feed_dict = construct_feed_dict(hparams.learning_rate, hparams.dropout_rate, self.k, self.n, self.d, hparams.decay_rate, placeholders) feed_dict.update({self.adj: adj[i]}) feed_dict.update({self.features: features[i]}) feed_dict.update({self.weight_bin: weight_bins[i]}) feed_dict.update({self.weight: weights[i]}) feed_dict.update( {self.input_data: np.zeros([self.k, self.n, self.d])}) feed_dict.update({self.eps: eps}) hparams.sample = False prob, ll, z_encoded, enc_mu, enc_sigma, elbo, w_edge = self.sess.run( [ self.prob, self.ll, self.z_encoded, self.enc_mu, self.enc_sigma, self.cost, self.w_edge ], feed_dict=feed_dict) with open(hparams.z_dir + 'encoded_input' + str(i) + '.txt', 'a') as f: for z_i in z_encoded: f.write('[' + ','.join([str(el[0]) for el in z_i]) + ']\n') f.write("\n") with open(hparams.z_dir + 'encoded_mu' + str(i) + '.txt', 'a') as f: for z_i in enc_mu: f.write('[' + ','.join([str(el[0]) for el in z_i]) + ']\n') f.write("\n") with open(hparams.z_dir + 'encoded_sigma' + str(i) + '.txt', 'a') as f: for x in range(self.n): for z_i in enc_sigma[x]: f.write('[' + ','.join([str(el) for el in z_i]) + ']\n') f.write("\n") hparams.sample = True #for j in range(self.n): #for j in [1, 5, 15]: for j in [1]: z_encoded_neighborhood = copy.copy(z_encoded) feed_dict.update({self.eps: z_encoded_neighborhood}) prob, ll, z_encoded_neighborhood, enc_mu, enc_sigma, elbo, w_edge, labels = self.sess.run( [ self.prob, self.ll, self.z_encoded, self.enc_mu, self.enc_sigma, self.cost, self.w_edge, self.label ], feed_dict=feed_dict) # prob = np.triu(np.reshape(prob,(self.n,self.n)),1) with open(hparams.z_dir + 'sampled_z' + str(i) + '.txt', 'a') as f: for z_i in z_encoded: f.write('[' + ','.join([str(el[0]) for el in z_i]) + ']\n') f.write("\n") prob = np.reshape(prob, (self.n, self.n)) w_edge = np.reshape(w_edge, (self.n, self.n, self.bin_dim)) with open(hparams.z_dir + 'prob_mat' + str(i) + '.txt', 'a') as f: for x in range(self.n): f.write('[' + ','.join([str(el) for el in prob[x]]) + ']\n') f.write("\n") with open(hparams.z_dir + 'weight_mat' + str(i) + '.txt', 'a') as f: for x in range(self.n): f.write('[' + ','.join([ str(el[0]) + ' ' + str(el[1]) + ' ' + str(el[2]) for el in w_edge[x] ]) + ']\n') f.write("\n") if not hparams.mask_weight: print("Non mask") candidate_edges = self.get_unmasked_candidate( list_edges, prob, w_edge, hparams.edges) else: print("Mask") (atom_list, valency_arr) = self.getatoms(hparams.nodes, labels) candidate_edges = self.get_masked_candidate_with_atom_ratio_new( prob, w_edge, valency_arr, hparams.edges, hde) for uvw in candidate_edges.split(): [u, v, w] = uvw.split("-") u = int(u) v = int(v) w = int(w) if (u >= 0 and v >= 0): with open( hparams.sample_file + "approach_1_node_" + str(j) + "_" + str(s_num) + '.txt', 'a') as f: f.write( str(u) + ' ' + str(v) + ' {\'weight\':' + str(w) + '}\n') def sample_graph(self, hparams, placeholders, adj, features, weights, weight_bins, s_num, node, hde, num=10, outdir=None): ''' Args : num - int 10 number of edges to be sampled outdir - string output dir ''' list_edges = [] for i in range(self.n): for j in range(i + 1, self.n): list_edges.append((i, j, 1)) list_edges.append((i, j, 2)) list_edges.append((i, j, 3)) #list_edges.append((-1, -1, 0)) list_weight = [1, 2, 3] hparams.sample = True eps = np.random.randn(self.n, self.z_dim, 1) with open(hparams.z_dir + 'test_prior_' + str(s_num) + '.txt', 'a') as f: for z_i in eps: f.write('[' + ','.join([str(el[0]) for el in z_i]) + ']\n') feed_dict = construct_feed_dict(hparams.learning_rate, hparams.dropout_rate, self.k, self.n, self.d, hparams.decay_rate, placeholders) feed_dict.update({self.adj: adj[0]}) feed_dict.update({self.features: features[0]}) feed_dict.update({self.weight_bin: weight_bins[0]}) feed_dict.update({self.weight: weights[0]}) feed_dict.update({self.input_data: np.zeros([self.k, self.n, self.d])}) feed_dict.update({self.eps: eps}) prob, ll, z_encoded, kl, sample_mu, sample_sigma, loss, w_edge, labels = self.sess.run( [ self.prob, self.ll, self.z_encoded, self.kl, self.enc_mu, self.enc_sigma, self.cost, self.w_edge, self.label ], feed_dict=feed_dict) prob = np.reshape(prob, (self.n, self.n)) w_edge = np.reshape(w_edge, (self.n, self.n, self.bin_dim)) indicator = np.ones([self.n, 3]) p, list_edges, w_new = normalise(prob, w_edge, self.n, self.bin_dim, [], list_edges, indicator) if not hparams.mask_weight: trial = 0 while trial < 5000: candidate_edges = [ list_edges[i] for i in np.random.choice(range( len(list_edges)), [hparams.edges], p=p, replace=False) ] with open(hparams.sample_file + 'test.txt', 'w') as f: for (u, v, w) in candidate_edges: if (u >= 0 and v >= 0): f.write( str(u) + ' ' + str(v) + ' {\'weight\':' + str(w) + '}\n') f = open(hparams.sample_file + 'test.txt') G = nx.read_edgelist(f, nodetype=int) if nx.is_connected(G): for (u, v, w) in candidate_edges: if (u >= 0 and v >= 0): with open( hparams.sample_file + "approach_2_" + str(trial) + "_" + str(s_num) + '.txt', 'a') as f: f.write( str(u) + ' ' + str(v) + ' {\'weight\':' + str(w) + '}\n') trial += 1 else: trial = 0 while trial < 5000: candidate_edges = self.get_masked_candidate( list_edges, prob, w_edge, hparams.edges, hde) #print("Debug candidate", candidate_edges) if len(candidate_edges) > 0: with open(hparams.sample_file + 'test.txt', 'w') as f: for uvw in candidate_edges.split(): [u, v, w] = uvw.split("-") u = int(u) v = int(v) w = int(w) if (u >= 0 and v >= 0): f.write( str(u) + ' ' + str(v) + ' {\'weight\':' + str(w) + '}\n') f = open(hparams.sample_file + 'test.txt') #try: G = nx.read_edgelist(f, nodetype=int) #except: #continue if nx.is_connected(G): for uvw in candidate_edges.split(): [u, v, w] = uvw.split("-") u = int(u) v = int(v) w = int(w) if (u >= 0 and v >= 0): with open( hparams.sample_file + "approach_2_" + str(trial) + "_" + str(s_num) + '.txt', 'a') as f: f.write( str(u) + ' ' + str(v) + ' {\'weight\':' + str(w) + '}\n') trial += 1
def __init__(self, hparams, placeholders, num_nodes, num_features, log_fact_k, input_size, istest=False): self.features_dim = num_features self.input_dim = num_nodes self.dropout = placeholders['dropout'] self.k = hparams.random_walk self.lr = placeholders['lr'] self.decay = placeholders['decay'] self.n = num_nodes self.d = num_features self.z_dim = hparams.z_dim self.bin_dim = hparams.bin_dim self.mask_weight = hparams.mask_weight self.log_fact_k = log_fact_k self.neg_sample_size = hparams.neg_sample_size self.input_size = input_size self.combination = hparams.node_sample * hparams.bfs_sample def neg_loglikelihood(prob_dicts, w_edges): ''' negative loglikelihood of the edges ''' ll = 0 k = 0 with tf.variable_scope('NLL'): for i in range(self.combination): prob_dict = prob_dicts[i] w_edge = w_edges[i] prob_dict = tf.Print(prob_dict, [prob_dict], message="my prob dict values:") print("Debug prob dict shape", tf.shape(prob_dict)) prob_dict_resized = tf.reshape(prob_dict, [-1]) prob_dict_resized = tf.Print( prob_dict_resized, [prob_dict_resized], message="my prob dict resized values:") w_edge_size = tf.stack([tf.shape(w_edge)[0]])[0] w_edge_size = tf.Print(w_edge_size, [w_edge_size], message="my size values:") print("Debug w_edge_shape", tf.shape(w_edge), w_edge.get_shape(), tf.stack([tf.shape(w_edge)[0]])[0]) w_edge_resized = tf.reshape(w_edge, [-1, self.bin_dim]) if self.neg_sample_size > 0: w_edge_resized = tf.reshape( w_edge[:-self.bin_dim * self.neg_sample_size], [-1, self.bin_dim]) w_edge_size_r = tf.stack([tf.shape(w_edge_resized)[0]])[0] w_edge_size_r = tf.Print(w_edge_size_r, [w_edge_size_r], message="my size values r:") w_edge_exp = tf.exp( tf.minimum( w_edge_resized, tf.fill([w_edge_size_r, self.bin_dim], 10.0))) w_edge_pos = tf.reduce_sum(tf.multiply( self.weight_bin[i], w_edge_exp), axis=1) w_edge_total = tf.reduce_sum(w_edge_exp, axis=1) w_edge_score = tf.divide(w_edge_pos, w_edge_total) w_edge_score = tf.Print(w_edge_score, [w_edge_score], message="my w_edge_score values:") prob_dict_resized_shape = tf.stack( [tf.shape(prob_dict_resized)[0]])[0] prob_dict_resized_shape = tf.Print( prob_dict_resized_shape, [prob_dict_resized_shape], message="my prob dict size values:") prob_dict_exp = tf.exp( tf.minimum(prob_dict_resized, tf.fill([prob_dict_resized_shape], 10.0))) prob_dict_exp = tf.Print(prob_dict_exp, [prob_dict_exp], message="my decscore values:") pos_score = prob_dict_exp if self.neg_sample_size > 0: pos_score = prob_dict_exp[:-self.neg_sample_size] st = tf.stack([tf.shape(pos_score)[0]])[0] st = tf.Print(st, [st], message="my st values:") pos_score = tf.Print(pos_score, [pos_score], message="my posscore values:") #pos_weight_score = tf.multiply(tf.reshape(pos_score,[st, 1]), w_edge_score) pos_weight_score = tf.multiply( pos_score, tf.reshape(w_edge_score, [1, -1])) neg_score = tf.cumsum(prob_dict_exp, reverse=True) if self.neg_sample_size > 0: neg_score = tf.cumsum( prob_dict_exp[1:], reverse=True)[:-self.neg_sample_size + 1] softmax_out = tf.divide(pos_weight_score, neg_score) ll += tf.reduce_sum( tf.log(tf.add(softmax_out, tf.fill([1, st], 1e-9)))) #ll = tf.reduce_sum(tf.log(tf.add(tf.multiply(self.adj, softmax_out), tf.fill([self.n,self.n], 1e-9)))) ll = ll / self.combination ll = tf.Print(ll, [ll], message="My loss") return (-ll) def kl_gaussian(mu_1, sigma_1, debug_sigma, mu_2, sigma_2): ''' Kullback leibler divergence for two gaussian distributions ''' print sigma_1.shape, sigma_2.shape with tf.variable_scope("kl_gaussisan"): temp_stack = [] for i in range(self.n): temp_stack.append(tf.square(sigma_1[i])) first_term = tf.trace(tf.stack(temp_stack)) temp_stack = [] for i in range(self.n): temp_stack.append(tf.matmul(tf.transpose(mu_1[i]), mu_1[i])) second_term = tf.reshape(tf.stack(temp_stack), [self.n]) k = tf.fill([self.n], tf.cast(self.z_dim, tf.float32)) temp_stack = [] for i in range(self.n): temp_stack.append(tf.reduce_prod(tf.square( debug_sigma[i]))) third_term = tf.log( tf.add(tf.stack(temp_stack), tf.fill([self.n], 1e-09))) return 0.5 * tf.add( tf.subtract(tf.add(first_term, second_term), k), third_term) def ll_poisson(lambda_, x): #x_convert = tf.cast(tf.convert_to_tensor([x]), tf.float32) x = tf.Print(x, [x], message="My debug_x_tf") log_fact_tf = tf.convert_to_tensor([self.log_fact_k[x - 1]], dtype=tf.float32) return -tf.subtract( tf.subtract(tf.multiply(x, tf.log(lambda_ + 1e-09)), lambda_), log_fact_tf) def label_loss_predict(label, predicted_labels, label1): loss = 0.0 #for i in range(self.combination): predicted_label = predicted_labels predicted_label_resized = tf.reshape(predicted_label, [self.n, self.d]) n_class_labels = tf.fill([self.n, 1], tf.cast(4, tf.float32)) #predicted_label_resized_new = tf.concat(values =(predicted_label_resized, n_class_labels), axis=1) loss += tf.nn.sparse_softmax_cross_entropy_with_logits( labels=label1, logits=predicted_label_resized) return loss #return loss/self.combination def get_lossfunc(enc_mu, enc_sigma, debug_sigma, prior_mu, prior_sigma, dec_out, w_edge, label, lambda_n, lambda_e): kl_loss = kl_gaussian(enc_mu, enc_sigma, debug_sigma, prior_mu, prior_sigma) # KL_divergence loss likelihood_loss = neg_loglikelihood(dec_out, w_edge) # Cross entropy loss self.ll = likelihood_loss self.kl = kl_loss lambda_e = tf.Print(lambda_e, [lambda_e], message="My edge_lambda") lambda_n = tf.Print(lambda_n, [lambda_n], message="My node_lambda") #print("Debug self count", self.count, self.edges[self.count]) edgeprob = ll_poisson( lambda_e, tf.cast( tf.subtract( tf.shape(self.edges[0])[0], self.neg_sample_size), tf.float32)) nodeprob = ll_poisson( lambda_n, tf.cast(tf.convert_to_tensor([self.n]), tf.float32)) edgeprob = tf.Print(edgeprob, [edgeprob], message="My edge_prob_loss") nodeprob = tf.Print(nodeprob, [nodeprob], message="My node_prob_loss") label_loss = label_loss_predict(self.features, label, self.features1) label_loss = tf.Print(label_loss, [label_loss], message="My label_loss") loss_1 = tf.reduce_mean(kl_loss + label_loss) loss_1 = tf.Print(loss_1, [loss_1], message="My label_loss1") total_loss = loss_1 + tf.reduce_mean(edgeprob + nodeprob + likelihood_loss) #return tf.reduce_mean(kl_loss) + edgeprob + nodeprob + likelihood_loss total_loss = tf.Print(total_loss, [total_loss], message="My total_loss") return total_loss self.adj = tf.placeholder(dtype=tf.float32, shape=[self.n, self.n], name='adj') self.features = tf.placeholder(dtype=tf.float32, shape=[self.n, self.d], name='features') self.features1 = tf.placeholder(dtype=tf.int32, shape=[self.n], name='features1') self.weight = tf.placeholder(dtype=tf.float32, shape=[self.n, self.n], name="weight") self.weight_bin = tf.placeholder( dtype=tf.float32, shape=[self.combination, None, hparams.bin_dim], name="weight_bin") self.input_data = tf.placeholder(dtype=tf.float32, shape=[self.k, self.n, self.d], name='input') self.eps = tf.placeholder(dtype=tf.float32, shape=[self.n, self.z_dim, 1], name='eps') #self.neg_index = tf.placeholder(dtype=tf.int32,shape=[None], name='neg_index') self.edges = tf.placeholder(dtype=tf.int32, shape=[self.combination, None, 2], name='edges') self.count = tf.placeholder(dtype=tf.int32) #node_count = [len(edge_list) for edge_list in self.edges] print("Debug Input size", self.input_size) node_count_tf = tf.fill([1, self.input_size], tf.cast(self.n, tf.float32)) node_count_tf = tf.Print(node_count_tf, [node_count_tf], message="My node_count_tf") print("Debug size node_count", node_count_tf.get_shape()) #tf.convert_to_tensor(node_count, dtype=tf.int32) self.cell = VAEGCell(self.adj, self.weight, self.features, self.z_dim, self.bin_dim, tf.to_float(node_count_tf), self.edges) self.c_x, enc_mu, enc_sigma, debug_sigma, dec_out, prior_mu, prior_sigma, z_encoded, w_edge, label, lambda_n, lambda_e = self.cell.call( self.input_data, self.n, self.d, self.k, self.combination, self.eps, hparams.sample) self.prob = dec_out #print('Debug', dec_out.shape) self.z_encoded = z_encoded self.enc_mu = enc_mu self.enc_sigma = enc_sigma self.w_edge = w_edge self.label = label self.lambda_n = lambda_n self.lambda_e = lambda_e self.cost = get_lossfunc(enc_mu, enc_sigma, debug_sigma, prior_mu, prior_sigma, dec_out, w_edge, label, lambda_n, lambda_e) print_vars("trainable_variables") # self.lr = tf.Variable(self.lr, trainable=False) self.train_op = tf.train.AdamOptimizer(learning_rate=self.lr) self.grad = self.train_op.compute_gradients(self.cost) self.grad_placeholder = [(tf.placeholder("float", shape=gr[1].get_shape()), gr[1]) for gr in self.grad] self.apply_transform_op = self.train_op.apply_gradients(self.grad) #self.lr = tf.Variable(self.lr, trainable=False) self.sess = tf.Session()
class VAEG(VAEGConfig): def __init__(self, hparams, placeholders, num_nodes, num_features, edges, istest=False): self.features_dim = num_features self.input_dim = num_nodes self.dropout = placeholders['dropout'] self.k = hparams.random_walk self.lr = placeholders['lr'] self.decay = placeholders['decay'] self.n = num_nodes self.d = num_features self.z_dim = hparams.z_dim self.count = 0 self.edges = edges self.mask_weight = hparams.mask_weight #self.edges, self.non_edges = edges, non_edges #logger.info("Building model starts...") def masked_gen(posscore, negscore): indicator = [] for i in range(self.n): indicator.append(tf.ones(self.n)) temp_posscore = tf.reduce_sum(posscore) ll = 0.0 for (u, v) in self.edges[self.count]: print("Debug", posscore[0].shape, indicator[0].shape) #tf.multiply(tf.reshape(posscore[u], [1, self.n]), indicator[u])[0][v] ll += tf.log( tf.multiply(tf.reshape(posscore[u], [1, self.n]), indicator[u])[0][v] / (temp_posscore + negscore[u][v]) + 1e-09) ll += tf.log( tf.multiply(tf.reshape(posscore[v], [1, self.n]), indicator[v])[0][u] / (temp_posscore + negscore[v][u]) + 1e-09) indicator[u] = np.multiply( tf.subtract(tf.ones([1, self.n]), self.adj[v]), indicator[u]) indicator[v] = np.multiply( tf.subtract(tf.ones([1, self.n]), self.adj[u]), indicator[v]) temp_posscore = temp_posscore - tf.reduce_sum(posscore[u]) temp = tf.multiply(indicator[u], tf.reshape(posscore[u], [self.n])) temp_posscore += tf.reduce_sum(temp) temp_posscore = temp_posscore - \ tf.reduce_sum(posscore[v]) + \ tf.reduce_sum(tf.multiply(indicator[v], posscore[v])) temp_posscore = temp_posscore - \ tf.reduce_sum(tf.transpose(posscore)[ u]) + tf.reduce_sum(tf.multiply(indicator[u], tf.transpose(posscore)[u])) temp_posscore = temp_posscore - \ tf.reduce_sum(tf.transpose(posscore)[ v]) + tf.reduce_sum(tf.multiply(indicator[v], tf.transpose(posscore)[v])) return ll def neg_loglikelihood(prob_dict): ''' negative loglikelihood of the edges ''' ll = 0 k = 0 with tf.variable_scope('NLL'): dec_mat_temp = tf.reshape(prob_dict, [self.n, self.n]) ''' dec_mat_temp = np.zeros((self.n, self.n)) for i in range(self.n): for j in range(i+1, self.n): print("Debug", prob_dict[k]) dec_mat_temp[i][j] = prob_dict[k][0] dec_mat_temp[j][i] = prob_dict[k][0] k+=1 #''' #dec_mat = tf.exp(tf.minimum(tf.reshape(prob_dict, [self.n, self.n]),tf.fill([self.n, self.n], 10.0))) dec_mat = tf.exp( tf.minimum(dec_mat_temp, tf.fill([self.n, self.n], 10.0))) dec_mat = tf.Print(dec_mat, [dec_mat], message="my decscore values:") print("Debug dec_mat", dec_mat.shape, dec_mat.dtype, dec_mat) comp = tf.subtract(tf.ones([self.n, self.n], tf.float32), self.adj) comp = tf.Print(comp, [comp], message="my comp values:") temp = tf.reduce_sum(tf.multiply(comp, dec_mat)) negscore = tf.fill([self.n, self.n], temp + 1e-9) negscore = tf.Print(negscore, [negscore], message="my negscore values:") posscore = tf.multiply(self.adj, dec_mat) posscore = tf.Print(posscore, [posscore], message="my posscore values:") #dec_out = tf.multiply(self.adj, dec_mat) softmax_out = tf.truediv(posscore, tf.add(posscore, negscore)) ll = tf.reduce_sum( tf.log( tf.add(tf.multiply(self.adj, softmax_out), tf.fill([self.n, self.n], 1e-9))), 1) if hparams.mask_weight: ll = masked_gen(posscore, negscore) #ll = masked_ll(posscore, negscore) return (-ll) def kl_gaussian(mu_1, sigma_1, debug_sigma, mu_2, sigma_2): ''' Kullback leibler divergence for two gaussian distributions ''' print(sigma_1.shape, sigma_2.shape) with tf.variable_scope("kl_gaussisan"): temp_stack = [] for i in range(self.n): temp_stack.append(tf.square(sigma_1[i])) first_term = tf.trace(tf.stack(temp_stack)) temp_stack = [] for i in range(self.n): temp_stack.append(tf.matmul(tf.transpose(mu_1[i]), mu_1[i])) second_term = tf.reshape(tf.stack(temp_stack), [self.n]) #k = tf.fill([self.n], tf.cast(self.d, tf.float32)) k = tf.fill([self.n], tf.cast(self.z_dim, tf.float32)) temp_stack = [] # for i in range(self.n): # temp_stack.append(tf.log(tf.truediv(tf.matrix_determinant(sigma_2[i]),tf.add(tf.matrix_determinant(sigma_1[i]), tf.fill([self.d, self.d], 1e-9))))) for i in range(self.n): temp_stack.append(tf.reduce_prod(tf.square( debug_sigma[i]))) print("Debug", tf.stack(temp_stack).shape) third_term = tf.log( tf.add(tf.stack(temp_stack), tf.fill([self.n], 1e-09))) print("debug KL", first_term.shape, second_term.shape, k.shape, third_term.shape, sigma_1[0].shape) # return 0.5 *tf.reduce_sum(( return 0.5 * tf.add( tf.subtract(tf.add(first_term, second_term), k), third_term) def get_lossfunc(enc_mu, enc_sigma, debug_sigma, prior_mu, prior_sigma, dec_out): kl_loss = kl_gaussian(enc_mu, enc_sigma, debug_sigma, prior_mu, prior_sigma) # KL_divergence loss likelihood_loss = neg_loglikelihood(dec_out) # Cross entropy loss self.ll = likelihood_loss self.kl = kl_loss return tf.reduce_mean(kl_loss + likelihood_loss) self.adj = tf.placeholder(dtype=tf.float32, shape=[self.n, self.n], name='adj') self.features = tf.placeholder(dtype=tf.float32, shape=[self.n, self.d], name='features') self.input_data = tf.placeholder(dtype=tf.float32, shape=[self.k, self.n, self.d], name='input') self.eps = tf.placeholder(dtype=tf.float32, shape=[self.n, self.z_dim, 1], name='eps') self.cell = VAEGCell(self.adj, self.features, self.z_dim) self.c_x, enc_mu, enc_sigma, debug_sigma, dec_out, prior_mu, prior_sigma, z_encoded = self.cell.call( self.input_data, self.n, self.d, self.k, self.eps, hparams.sample) self.prob = dec_out print('Debug', dec_out.shape) self.z_encoded = z_encoded self.enc_mu = enc_mu self.enc_sigma = enc_sigma self.cost = get_lossfunc(enc_mu, enc_sigma, debug_sigma, prior_mu, prior_sigma, dec_out) print_vars("trainable_variables") # self.lr = tf.Variable(self.lr, trainable=False) self.train_op = tf.train.AdamOptimizer(learning_rate=self.lr) self.grad = self.train_op.compute_gradients(self.cost) self.grad_placeholder = [(tf.placeholder("float", shape=gr[1].get_shape()), gr[1]) for gr in self.grad] #self.capped_gvs = [(tf.clip_by_value(grad, -1., 1.), var) for grad, var in self.grad] #self.tgv = [self.grad] # self.apply_transform_op = self.train_op.apply_gradients(self.grad_placeholder) #self.apply_transform_op = self.train_op.apply_gradients(self.capped_gvs) self.apply_transform_op = self.train_op.apply_gradients(self.grad) #self.lr = tf.Variable(self.lr, trainable=False) #self.gradient = tf.train.AdamOptimizer(learning_rate=self.lr, epsilon=1e-4).compute_gradients(self.cost) #self.train_op = tf.train.AdamOptimizer(learning_rate=self.lr, epsilon=1e-4).minimize(self.cost) #self.check_op = tf.add_check_numerics_ops() self.sess = tf.Session() def initialize(self): logger.info("Initialization of parameters") # self.sess.run(tf.initialize_all_variables()) self.sess.run(tf.global_variables_initializer()) def restore(self, savedir): saver = tf.train.Saver(tf.global_variables()) ckpt = tf.train.get_checkpoint_state(savedir) print("Load the model from {}".format(ckpt.model_checkpoint_path)) saver.restore(self.sess, ckpt.model_checkpoint_path) def train(self, placeholders, hparams, adj, features): savedir = hparams.out_dir lr = hparams.learning_rate dr = hparams.dropout_rate decay = hparams.decay_rate # training num_epochs = hparams.num_epochs create_dir(savedir) ckpt = tf.train.get_checkpoint_state(savedir) saver = tf.train.Saver(tf.global_variables()) if ckpt: saver.restore(self.sess, ckpt.model_checkpoint_path) print("Load the model from %s" % ckpt.model_checkpoint_path) #f = open(hparams.out_dir+"iteration.txt") iteration = 10000 # 1000 for epoch in range(num_epochs): for i in range(len(adj)): self.count = i # Learning rate decay #self.sess.run(tf.assign(self.lr, self.lr * (self.decay ** epoch))) feed_dict = construct_feed_dict(lr, dr, self.k, self.n, self.d, decay, placeholders) feed_dict.update({self.adj: adj[i]}) # print "Debug", features[i].shape eps = np.random.randn(self.n, self.z_dim, 1) #tf.random_normal((self.n, 5, 1), 0.0, 1.0, dtype=tf.float32) feed_dict.update({self.features: features[i]}) feed_dict.update( {self.input_data: np.zeros([self.k, self.n, self.d])}) feed_dict.update({self.eps: eps}) grad_vals = self.sess.run([g[0] for g in self.grad], feed_dict=feed_dict) for j in range(len(self.grad_placeholder)): feed_dict.update( {self.grad_placeholder[j][0]: grad_vals[j]}) input_, train_loss, _, probdict, cx = self.sess.run( [ self.input_data, self.cost, self.apply_transform_op, self.prob, self.c_x ], feed_dict=feed_dict) iteration += 1 # print "Debug Grad", grad_vals[0] # print "Debug CX", cx if iteration % hparams.log_every == 0 and iteration > 0: print("{}/{}(epoch {}), train_loss = {:.6f}".format( iteration, num_epochs, epoch + 1, train_loss)) # print(probdict) checkpoint_path = os.path.join(savedir, 'model.ckpt') saver.save(self.sess, checkpoint_path, global_step=iteration) logger.info("model saved to {}".format(checkpoint_path)) def plot_hspace(self, hparams, placeholders, num): # plot the coordinate in hspace adj, deg = load_data(hparams.graph_file, num) hparams.sample = False #''' for i in range(len(adj)): eps = np.random.randn(self.n, hparams.z_dim, 1) feed_dict = construct_feed_dict(hparams.learning_rate, hparams.dropout_rate, self.k, self.n, self.d, hparams.decay_rate, placeholders) feed_dict.update({self.adj: adj[i]}) feed_dict.update({self.features: deg[i]}) feed_dict.update( {self.input_data: np.zeros([self.k, self.n, self.d])}) feed_dict.update({self.eps: eps}) prob, ll, z = self.sess.run([self.prob, self.ll, self.z_encoded], feed_dict=feed_dict) with open(hparams.z_dir + 'train' + str(i) + '.txt', 'a') as f: for z_i in z: f.write('[' + ','.join([str(el[0]) for el in z_i]) + ']\n') #hparams.sample = True #''' adj, deg = load_data(hparams.sample_file, num) for i in range(len(adj)): eps = np.random.randn(self.n, 5, 1) feed_dict = construct_feed_dict(hparams.learning_rate, hparams.dropout_rate, self.k, self.n, self.d, hparams.decay_rate, placeholders) feed_dict.update({self.adj: adj[i]}) feed_dict.update({self.features: deg[i]}) feed_dict.update( {self.input_data: np.zeros([self.k, self.n, self.d])}) feed_dict.update({self.eps: eps}) prob, ll, z = self.sess.run([self.prob, self.ll, self.z_encoded], feed_dict=feed_dict) with open(hparams.z_dir + 'test_' + str(i) + '.txt', 'a') as f: for z_i in z: f.write('[' + ','.join([str(el[0]) for el in z_i]) + ']\n') #''' def sample_graph_slerp(self, hparams, placeholders, s_num, G_good, G_bad, inter, ratio, num=10): # Agrs : # G_good : embedding of the train graph or good sample # G_bad : embedding of the bad graph list_edges = [] for i in range(self.n): for j in range(i + 1, self.n): list_edges.append((i, j)) # for sample in range(s_num): new_graph = [] for i in range(self.n): node_good = G_good[i] node_bad = G_bad[i] if inter == "lerp": new_graph.append( lerp(np.reshape(node_good, -1), np.reshape(node_bad, -1), ratio)) else: new_graph.append( slerp(np.reshape(node_good, -1), np.reshape(node_bad, -1), ratio)) eps = np.array(new_graph) eps = eps.reshape(eps.shape + (1, )) hparams.sample = True feed_dict = construct_feed_dict(hparams.learning_rate, hparams.dropout_rate, self.k, self.n, self.d, hparams.decay_rate, placeholders) # TODO adj and deg are filler and does not required while sampling. Need to clean this part adj = np.zeros([self.n, self.n]) deg = np.zeros([self.n, 1], dtype=np.float) feed_dict.update({self.adj: adj}) feed_dict.update({self.features: deg}) feed_dict.update({self.input_data: np.zeros([self.k, self.n, self.d])}) feed_dict.update({self.eps: eps}) prob, ll, kl = self.sess.run([self.prob, self.ll, self.kl], feed_dict=feed_dict) prob = np.triu(np.reshape(prob, (self.n, self.n)), 1) prob = np.divide(prob, np.sum(prob)) print("Debug", prob) problist = [] try: for i in range(self.n): for j in range(i + 1, self.n): problist.append(prob[i][j]) p = np.array(problist) # list to numpy conversion can change negligible precision. so it is # desirable to further normalise it p /= p.sum() max_prob = max(p) min_prob = min(p) diff = min_prob + (max_prob - min_prob) * 0.1 print("Debug max prob", max_prob, p) #candidate_edges = [ list_edges[i] for i in np.random.choice(range(len(list_edges)),[num], p=p, replace=False)] candidate_edges = [ list_edges[i] for i in range(len(list_edges)) if p[i] >= diff ] except: return #adj = np.zeros([self.n, self.n]) probmul = 1.0 for (u, v) in candidate_edges: #adj[u][v] = 1 #adj[v][u] = 1 probmul *= prob[u][v] with open( hparams.sample_file + '/inter/' + inter + str(s_num) + '.txt', 'a') as f: f.write(str(u) + '\t' + str(v) + '\n') with open(hparams.z_dir + '/inter/' + inter + str(s_num) + '.txt', 'a') as f: for z_i in eps: f.write('[' + ','.join([str(el[0]) for el in z_i]) + ']\n') #kl_gaussian_mul(np.mean(G_good, axis=0), np.diag(np.var(G_good, axis=0)), np.mean(G_bad, axis = 0), np.diag(np.var(G_bad, axis = 0))) #ll1 = log(probmul) # with open(hparams.sample_file+'/inter/ll.txt', 'a') as f: # f.write(str(ll1)+'\n') #kl1 = np.mean(kl) # with open(hparams.sample_file+'/inter/kl.txt', 'a') as f: # f.write(str(kl1)+'\n') #G_bad = new_graph return new_graph def kl_gaussian_mul(self, mu_1, sigma_1, mu_2, sigma_2): ''' Kullback leibler divergence for two gaussian distributions ''' #print("Debug sigma1", debug_sigma_1, len(debug_sigma_1[0])) # print sigma_1.shape, sigma_2.shape n = self.n temp_stack_1 = [] temp_stack_2 = [] #debug_sigma_1 = np.diag(sigma_1) #debug_sigma_2 = np.diag(sigma_2) for i in range(n): #print("DEBUG i", i) temp_stack_1.append(np.prod(sigma_1[i].diagonal())) temp_stack_2.append(np.prod(sigma_2[i].diagonal())) # Inverse of diaginal covariance ones = np.ones(sigma_2.shape) inverse_sigma_2 = np.subtract( ones, np.true_divide(ones, np.add(ones, sigma_2))) #inverse_sigma_2 = tf.matrix_diag(np.true_divide(np.ones(np.shape(debug_sigma_2)), debug_sigma_2)) term_2 = [] print("DEBUG2", len(inverse_sigma_2)) for i in range(n): term_2.append(np.trace(np.matmul(inverse_sigma_2[i], sigma_1[i]))) # Difference between the mean term_3 = [] k = np.zeros([self.n]) k.fill(mu_1.shape[1]) diff_mean = np.subtract(mu_2, mu_1) for i in range(self.n): term_3.append( np.matmul( np.matmul(np.transpose(diff_mean[i]), inverse_sigma_2[i]), diff_mean[i])) term1 = np.log(np.true_divide(temp_stack_2, temp_stack_1)) # term2 = np.trace(term_2[]) # print "Debug", len(term1), len(term_2), len(term_3), len(term_2), len(term_2[0][0]) KL = 0.5 * np.subtract(np.add(np.add(term1, term_2), term_3), k) #KL = tf.Print(KL, [KL], message="my KL values:") #print("Debug mu1", tf.shape(mu_1)[1]) return np.sum(KL) def get_stat(self, hparams, placeholders, num=10, outdir=None): adj, features, edges = load_data(hparams.graph_file, hparams.nodes) # for i in range(self.n): # deg[i][0] = 2 * np.sum(adj[i])/(self.n*(self.n - 1)) hparams.sample = True eps = np.random.randn(self.n, self.z_dim, 1) if hparams.sample: print("Debug Sample", hparams.sample) for i in range(len(adj)): ll_total = 0.0 loss_total = 0.0 prob_derived = 0.0 for j in range(10): eps = np.random.randn(self.n, self.z_dim, 1) feed_dict = construct_feed_dict(hparams.learning_rate, hparams.dropout_rate, self.k, self.n, self.d, hparams.decay_rate, placeholders) feed_dict.update({self.adj: adj[i]}) feed_dict.update({self.features: features[i]}) feed_dict.update( {self.input_data: np.zeros([self.k, self.n, self.d])}) feed_dict.update({self.eps: eps}) prob, ll, z_encoded, enc_mu, enc_sigma, loss, kl = self.sess.run( [ self.prob, self.ll, self.z_encoded, self.enc_mu, self.enc_sigma, self.cost, self.kl ], feed_dict=feed_dict) ll_total += np.mean(ll) loss_total += np.mean(loss) prob = np.triu(np.reshape(prob, (self.n, self.n)), 1) prob = np.divide(prob, np.sum(prob)) for k in range(self.n): for l in range(k + 1, self.n): if adj[i][k][l] == 1: prob_derived += log(prob[k][l] + 0.1) # with open(hparams.sample_file+'/reconstruction_ll.txt', 'a') as f: with open(hparams.out_dir + '/reconstruction_ll1.txt', 'a') as f: f.write(str(-np.mean(ll_total) // 10) + '\n') # with open(hparams.graph_file+'/kl.txt', 'a') as f: # f.write(str(-np.mean(kl))+'\n') # with open(hparams.sample_file+'/elbo.txt', 'a') as f: with open(hparams.out_dir + '/elbo1.txt', 'a') as f: f.write(str(-np.mean(loss_total) // 10) + '\n') # with open(hparams.sample_file+'/prob_derived.txt', 'a') as f: with open(hparams.out_dir + '/prob_derived1.txt', 'a') as f: f.write(str(-np.mean(loss_total) // 10) + '\n') def zspace_analysis(self, hparams, placeholders, num=10, outdir=None): adj, features = load_data(hparams.graph_file, hparams.nodes) eps = np.random.randn(self.n, self.z_dim, 1) train_z = [] list_edges = [] for i in range(self.n): for j in range(i + 1, self.n): list_edges.append((i, j)) for i in range(len(adj)): hparams.sample = False feed_dict = construct_feed_dict(hparams.learning_rate, hparams.dropout_rate, self.k, self.n, self.d, hparams.decay_rate, placeholders) feed_dict.update({self.adj: adj[i]}) feed_dict.update({self.features: features[i]}) feed_dict.update( {self.input_data: np.zeros([self.k, self.n, self.d])}) feed_dict.update({self.eps: eps}) prob, ll, z_encoded, enc_mu, enc_sigma, elbo = self.sess.run( [ self.prob, self.ll, self.z_encoded, self.enc_mu, self.enc_sigma, self.cost ], feed_dict=feed_dict) train_z.append(z_encoded) with open(hparams.z_dir + 'train_' + str(i) + '.txt', 'a') as f: for z_i in z_encoded: f.write('[' + ','.join([str(el[0]) for el in z_i]) + ']\n') prob = np.triu(np.reshape(prob, (self.n, self.n)), 1) prob = np.divide(prob, np.sum(prob)) problist = [] for k in range(self.n): for l in range(k + 1, self.n): problist.append(prob[k][l]) p = np.array(problist) p /= p.sum() if i < 20: num = 32 else: num = 78 candidate_edges = [ list_edges[k] for k in np.random.choice(range(len(list_edges)), [num], p=p) ] probtotal = 1.0 adjnew = np.zeros([self.n, self.n]) featuresnew = np.zeros([self.n, 1]) for (u, v) in candidate_edges: probtotal *= prob[u][v] adjnew[u][v] = 1 adjnew[v][u] = 1 featuresnew[u][0] += 1 // self.n featuresnew[v][0] += 1 // self.n if i < 20: with open( hparams.sample_file + "type_1_test" + "_" + str(i) + '.txt', 'a') as f: f.write(str(u) + '\t' + str(v) + '\n') else: with open( hparams.sample_file + "type_2_test" + "_" + str(i) + '.txt', 'a') as f: f.write(str(u) + '\t' + str(v) + '\n') # hparams.sample=False eps1 = np.random.randn(self.n, self.z_dim, 1) feed_dict = construct_feed_dict(hparams.learning_rate, hparams.dropout_rate, self.k, self.n, self.d, hparams.decay_rate, placeholders) feed_dict.update({self.adj: adjnew}) feed_dict.update({self.features: featuresnew}) feed_dict.update( {self.input_data: np.zeros([self.k, self.n, self.d])}) feed_dict.update({self.eps: eps1}) prob, z_encoded = self.sess.run([self.prob, self.z_encoded], feed_dict=feed_dict) print("DebugZ", len(z_encoded), len(z_encoded[0])) if i < 20: with open(hparams.z_dir + 'type_1_test_' + str(i) + '.txt', 'a') as f: for z_i in z_encoded: f.write('[' + ','.join([str(el[0]) for el in z_i]) + ']\n') else: with open(hparams.z_dir + 'type_2_test_' + str(i) + '.txt', 'a') as f: for z_i in z_encoded: f.write('[' + ','.join([str(el[0]) for el in z_i]) + ']\n') with open(hparams.sample_file + 'll_' + '.txt', 'a') as f: f.write(str(-np.mean(prob)) + '\n') # Interpolation Finding the likelihood count = 0 for i in range(20): for j in range(20, 40): self.sample_graph_slerp(hparams, placeholders, count, train_z[i], train_z[j], "slerp", 50) count += 1 self.sample_graph_slerp(hparams, placeholders, count, train_z[i], train_z[j], "lerp", 50) count += 1 def getcandidate(self, num, n, p, prob, list_edges): print("Inside gencanidate") adj = np.zeros([n, n]) candidate_edges = [ list_edges[i] for i in np.random.choice(range(len(list_edges)), [1], p=p) ] indicator = np.ones([n, n]) unseen = np.ones(n) probnew = prob for k in range(num - 1): (u, v) = candidate_edges[k] adj[u][v] = 1 adj[v][u] = 1 #unseen[u] = 0 #unseen[v] = 0 indicator[u] = np.multiply( np.multiply(np.subtract(np.ones(n), adj[v]), indicator[u]), unseen) indicator[v] = np.multiply( np.multiply(np.subtract(np.ones(n), adj[u]), indicator[v]), unseen) probnew = np.multiply(np.multiply(probnew, indicator), np.transpose(indicator)) problist = [] for i in range(self.n): for j in range(i + 1, self.n): if (i, j) in candidate_edges: if (i, j) in list_edges: list_edges.remove((i, j)) continue problist.append(probnew[i][j]) p = np.array(problist) p /= p.sum() print("Debug p", p) candidate_edges.extend([ list_edges[i] for i in np.random.choice(range(len(list_edges)), [1], p=p) ]) return candidate_edges def getembeddings(self, hparams, placeholders, adj, deg): eps = np.random.randn(self.n, self.z_dim, 1) feed_dict = construct_feed_dict(hparams.learning_rate, hparams.dropout_rate, self.k, self.n, self.d, hparams.decay_rate, placeholders) feed_dict.update({self.adj: adj}) feed_dict.update({self.features: deg}) feed_dict.update({self.input_data: np.zeros([self.k, self.n, self.d])}) feed_dict.update({self.eps: eps}) prob, ll, kl, embedding = self.sess.run( [self.prob, self.ll, self.kl, self.z_encoded], feed_dict=feed_dict) return embedding def sample_graph(self, hparams, placeholders, s_num, node, num=10, outdir=None, eps_passed=None): ''' Args : num - int 10 number of edges to be sampled outdir - string output dir ''' list_edges = [] for i in range(self.n): for j in range(i + 1, self.n): list_edges.append((i, j)) adj, features, edges = load_data(hparams.graph_file, node) eps = np.random.randn(self.n, self.z_dim, 1) with open(hparams.z_dir + 'test_prior_' + str(s_num) + '.txt', 'a') as f: for z_i in eps: f.write('[' + ','.join([str(el[0]) for el in z_i]) + ']\n') #tf.random_normal((self.n, 5, 1), 0.0, 1.0, dtype=tf.float32) train_mu = [] train_sigma = [] hparams.sample = False for i in range(len(adj)): feed_dict = construct_feed_dict(hparams.learning_rate, hparams.dropout_rate, self.k, self.n, self.d, hparams.decay_rate, placeholders) feed_dict.update({self.adj: adj[i]}) feed_dict.update({self.features: features[i]}) feed_dict.update( {self.input_data: np.zeros([self.k, self.n, self.d])}) feed_dict.update({self.eps: eps}) prob, ll, z_encoded, enc_mu, enc_sigma, elbo = self.sess.run( [ self.prob, self.ll, self.z_encoded, self.enc_mu, self.enc_sigma, self.cost ], feed_dict=feed_dict) prob = np.triu(np.exp(np.reshape(prob, [self.n, self.n])), 1) prob = np.divide(prob, np.sum(prob)) problist = [] for i in range(self.n): for j in range(i + 1, self.n): problist.append(prob[i][j]) p = np.array(problist) p /= p.sum() if hparams.mask_weight: candidate_edges = self.getcandidate(num, self.n, p, prob, list_edges) else: candidate_edges = [ list_edges[i] for i in np.random.choice(range(len(list_edges)), [num], p=p) ] probtotal = 1.0 for (u, v) in candidate_edges: probtotal *= prob[u][v] with open( hparams.sample_file + "approach_1_train" + str(i) + "_" + str(s_num) + '.txt', 'a') as f: f.write(str(u) + ' ' + str(v) + ' {}' + '\n') #ll1 = np.mean(ll) ll1 = log(probtotal) with open( hparams.sample_file + "/approach_1_train" + str(i) + '_ll.txt', 'a') as f: f.write( str(ll1) + "\t" + str(np.mean(ll)) + "\t" + str(np.mean(elbo)) + '\n') # approach 2 hparams.sample = True eps = np.random.randn(self.n, self.z_dim, 1) if eps_passed != None: eps = eps_passed with open(hparams.z_dir + 'test_prior_' + str(s_num) + '.txt', 'a') as f: for z_i in eps: f.write('[' + ','.join([str(el[0]) for el in z_i]) + ']\n') feed_dict = construct_feed_dict(hparams.learning_rate, hparams.dropout_rate, self.k, self.n, self.d, hparams.decay_rate, placeholders) feed_dict.update({self.adj: adj[0]}) feed_dict.update({self.features: features[0]}) feed_dict.update({self.input_data: np.zeros([self.k, self.n, self.d])}) feed_dict.update({self.eps: eps}) prob, ll, z_encoded, kl, sample_mu, sample_sigma, loss = self.sess.run( [ self.prob, self.ll, self.z_encoded, self.kl, self.enc_mu, self.enc_sigma, self.cost ], feed_dict=feed_dict) prob = np.triu(np.exp((np.reshape(prob, (self.n, self.n)))), 1) prob = np.divide(prob, np.sum(prob)) problist = [] for i in range(self.n): for j in range(i + 1, self.n): problist.append(prob[i][j]) p = np.array(problist) p /= p.sum() if hparams.mask_weight: candidate_edges = self.getcandidate(num, self.n, p, prob, list_edges) else: candidate_edges = [ list_edges[i] for i in np.random.choice( range(len(list_edges)), [num], p=p, replace=False) ] probtotal = 1.0 adj = np.zeros([self.n, self.n]) deg = np.zeros([self.n, 1]) for (u, v) in candidate_edges: #adj[u][v] += 1 #adj[v][u] += 1 probtotal *= prob[u][v] with open( hparams.sample_file + "approach_2" + "_" + str(s_num) + '.txt', 'a') as f: f.write(str(u) + ' ' + str(v) + ' {}' + '\n') ll1 = log(probtotal) with open(hparams.sample_file + '/reconstruction_ll.txt', 'a') as f: f.write(str(np.mean(ll)) + '\n') with open(hparams.sample_file + '/elbo.txt', 'a') as f: f.write(str(np.mean(loss)) + '\n')
def __init__(self, hparams, placeholders, num_nodes, num_features, edges, log_fact_k, hde, istest=False): self.features_dim = num_features self.input_dim = num_nodes self.dropout = placeholders['dropout'] self.k = hparams.random_walk self.lr = placeholders['lr'] self.decay = placeholders['decay'] self.n = num_nodes self.d = num_features self.z_dim = hparams.z_dim self.bin_dim = hparams.bin_dim self.edges = edges self.count = 0 self.mask_weight = hparams.mask_weight self.log_fact_k = log_fact_k self.hde = hde self.temperature = hparams.temperature def neg_loglikelihood(prob_dict, w_edge, edge_list): ''' negative loglikelihood of the edges ''' ll = 0 k = 0 with tf.variable_scope('NLL'): w_edge_new = tf.exp( tf.minimum(w_edge, tf.fill([self.n, self.n, self.bin_dim], 10.0))) weight_temp = tf.multiply(self.weight_bin, w_edge_new) len_logits = prob_dict.shape[0] print "Debug len_logits", len_logits, prob_dict.shape dec_mat = tf.exp( tf.minimum(prob_dict, tf.fill([len_logits, 1], 10.0))) dec_mat = tf.Print(dec_mat, [dec_mat], message="my decscore values:") posscoremat = dec_mat[:2 * len(self.edges[self.count])] print "Posscore softmax", posscoremat.shape negscore = tf.reduce_sum(dec_mat[2 * len(self.edges[self.count]):]) print "Negative softmax", negscore.shape negscore = tf.Print(negscore, [negscore], message="my negscore values:") negscoremat = tf.fill([2 * len(self.edges[self.count])], negscore) print "negscore", negscoremat.shape softmax_out = tf.truediv(posscoremat, negscore) print "Shape softmax", softmax_out.shape for i in range(len(edge_list)): (u, v, w) = edge_list[i] ll += tf.log(softmax_out[i] * w_edge[i][w - 1] + 1e-10) ll = tf.Print(ll, [ll], message="My loss") return (-ll) def get_trajectories(p_theta, w_theta, node_list, n_edges): indicator = np.ones([self.n, self.bin_dim]) edge_mask = np.ones([self.n, self.n]) degree = np.zeros(self.n) for (u, v, w) in self.edges[self.count]: edge_mask[u][v] = 0 edge_mask[v][u] = 0 degree[u] += 1 degree[v] += 1 if (node_list[u] - degree[u]) == 0: indicator[u][0] = 0 if (node_list[u] - degree[u]) <= 1: indicator[u][1] = 0 if (node_list[u] - degree[u]) <= 2: indicator[u][2] = 0 if (node_list[v] - degree[v]) == 0: indicator[v][0] = 0 if (node_list[v] - degree[v]) <= 1: indicator[v][1] = 0 if (node_list[v] - degree[v]) <= 2: indicator[v][2] = 0 trial = 0 candidate_edges = [] G = nx.Graph() while trial < 500: candidate_edges = get_weighted_edges(indicator, p_theta, edge_mask, w_theta, n_edges, node_list, degree) G = nx.Graph() G.add_weighted_edges_from(candidate_edges) if nx.is_connected(G): break trial += 1 return candidate_edges, G self.adj = tf.placeholder(dtype=tf.float32, shape=[self.n, self.n], name='adj') self.features = tf.placeholder(dtype=tf.float32, shape=[self.n, self.d], name='features') self.weight = tf.placeholder(dtype=tf.float32, shape=[self.n, self.n], name="weight") self.weight_bin = tf.placeholder( dtype=tf.float32, shape=[self.n, self.n, hparams.bin_dim], name="weight_bin") self.input_data = tf.placeholder(dtype=tf.float32, shape=[self.k, self.n, self.d], name='input') self.index = tf.placeholder(dtype=tf.float32, shape=[self.n * (self.n - 1) / 2], name='index') self.eps = tf.placeholder(dtype=tf.float32, shape=[self.n, self.z_dim, 1], name='eps') self.cell = VAEGCell(self.adj, self.weight, self.features, self.z_dim, self.bin_dim) self.c_x, enc_mu, enc_sigma, debug_sigma, dec_out, prior_mu, prior_sigma, z_encoded, w_edge, label = self.cell.call( self.input_data, self.n, self.d, self.k, self.eps, hparams.sample) self.rlcell = VAEGRLCell(self.adj, self.weight, self.features, self.z_dim, self.bin_dim, enc_mu, enc_sigma, self.edges, self.index) #self, adj, weight, features, z_dim, bin_dim, enc_mu, enc_sigma, edges, index rl_dec_out, rl_w_edge = self.rlcell.call(self.input_data, self.n, self.d, self.k, self.eps, hparams.sample) # We are considering 10 trajectories only self.train_op = tf.train.AdamOptimizer(learning_rate=self.lr) self.grad = [] for j in range(10): trajectory, G = get_trajectories(dec_out, w_edge, label, len(self.edges[self.count])) ll_rl = neg_loglikelihood(rl_dec_out, rl_w_edge, trajectory) ll = neg_loglikelihood(debug_sigma, w_edge, trajectory) importance_weight = tf.exp( 1 / self.temperature * compute_cost(G)) * (ll / ll_rl) self.cost = ll_rl * importance_weight grad = self.train_op.compute_gradients(ll_rl) for i in range(len(grad)): g = grad[i][1] * importance_weight if len(self.grad) > i: self.grad[i] = (grad[i][0], self.grad[i][1] + g / 10) else: self.grad[i] = grad[i] self.prob = dec_out # print('Debug', dec_out.shape) self.z_encoded = z_encoded self.enc_mu = enc_mu self.enc_sigma = enc_sigma self.w_edge = w_edge self.label = label print_vars("trainable_variables") self.apply_transform_op = self.train_op.apply_gradients(self.grad) self.sess = tf.Session()
class VAEG(VAEGConfig): def __init__(self, hparams, placeholders, num_nodes, num_features, log_fact_k, input_size, istest=False): self.features_dim = num_features self.input_dim = num_nodes self.dropout = placeholders['dropout'] self.k = hparams.random_walk self.lr = placeholders['lr'] self.decay = placeholders['decay'] self.n = num_nodes self.d = num_features self.z_dim = hparams.z_dim self.bin_dim = hparams.bin_dim self.mask_weight = hparams.mask_weight self.log_fact_k = log_fact_k self.neg_sample_size = hparams.neg_sample_size self.input_size = input_size self.combination = hparams.node_sample * hparams.bfs_sample def neg_loglikelihood(prob_dicts, w_edges): ''' negative loglikelihood of the edges ''' ll = 0 k = 0 with tf.variable_scope('NLL'): for i in range(self.combination): prob_dict = prob_dicts[i] w_edge = w_edges[i] prob_dict = tf.Print(prob_dict, [prob_dict], message="my prob dict values:") print("Debug prob dict shape", tf.shape(prob_dict)) prob_dict_resized = tf.reshape(prob_dict, [-1]) prob_dict_resized = tf.Print( prob_dict_resized, [prob_dict_resized], message="my prob dict resized values:") w_edge_size = tf.stack([tf.shape(w_edge)[0]])[0] w_edge_size = tf.Print(w_edge_size, [w_edge_size], message="my size values:") print("Debug w_edge_shape", tf.shape(w_edge), w_edge.get_shape(), tf.stack([tf.shape(w_edge)[0]])[0]) w_edge_resized = tf.reshape(w_edge, [-1, self.bin_dim]) if self.neg_sample_size > 0: w_edge_resized = tf.reshape( w_edge[:-self.bin_dim * self.neg_sample_size], [-1, self.bin_dim]) w_edge_size_r = tf.stack([tf.shape(w_edge_resized)[0]])[0] w_edge_size_r = tf.Print(w_edge_size_r, [w_edge_size_r], message="my size values r:") w_edge_exp = tf.exp( tf.minimum( w_edge_resized, tf.fill([w_edge_size_r, self.bin_dim], 10.0))) w_edge_pos = tf.reduce_sum(tf.multiply( self.weight_bin[i], w_edge_exp), axis=1) w_edge_total = tf.reduce_sum(w_edge_exp, axis=1) w_edge_score = tf.divide(w_edge_pos, w_edge_total) w_edge_score = tf.Print(w_edge_score, [w_edge_score], message="my w_edge_score values:") prob_dict_resized_shape = tf.stack( [tf.shape(prob_dict_resized)[0]])[0] prob_dict_resized_shape = tf.Print( prob_dict_resized_shape, [prob_dict_resized_shape], message="my prob dict size values:") prob_dict_exp = tf.exp( tf.minimum(prob_dict_resized, tf.fill([prob_dict_resized_shape], 10.0))) prob_dict_exp = tf.Print(prob_dict_exp, [prob_dict_exp], message="my decscore values:") pos_score = prob_dict_exp if self.neg_sample_size > 0: pos_score = prob_dict_exp[:-self.neg_sample_size] st = tf.stack([tf.shape(pos_score)[0]])[0] st = tf.Print(st, [st], message="my st values:") pos_score = tf.Print(pos_score, [pos_score], message="my posscore values:") #pos_weight_score = tf.multiply(tf.reshape(pos_score,[st, 1]), w_edge_score) pos_weight_score = tf.multiply( pos_score, tf.reshape(w_edge_score, [1, -1])) neg_score = tf.cumsum(prob_dict_exp, reverse=True) if self.neg_sample_size > 0: neg_score = tf.cumsum( prob_dict_exp[1:], reverse=True)[:-self.neg_sample_size + 1] softmax_out = tf.divide(pos_weight_score, neg_score) ll += tf.reduce_sum( tf.log(tf.add(softmax_out, tf.fill([1, st], 1e-9)))) #ll = tf.reduce_sum(tf.log(tf.add(tf.multiply(self.adj, softmax_out), tf.fill([self.n,self.n], 1e-9)))) ll = ll / self.combination ll = tf.Print(ll, [ll], message="My loss") return (-ll) def kl_gaussian(mu_1, sigma_1, debug_sigma, mu_2, sigma_2): ''' Kullback leibler divergence for two gaussian distributions ''' print sigma_1.shape, sigma_2.shape with tf.variable_scope("kl_gaussisan"): temp_stack = [] for i in range(self.n): temp_stack.append(tf.square(sigma_1[i])) first_term = tf.trace(tf.stack(temp_stack)) temp_stack = [] for i in range(self.n): temp_stack.append(tf.matmul(tf.transpose(mu_1[i]), mu_1[i])) second_term = tf.reshape(tf.stack(temp_stack), [self.n]) k = tf.fill([self.n], tf.cast(self.z_dim, tf.float32)) temp_stack = [] for i in range(self.n): temp_stack.append(tf.reduce_prod(tf.square( debug_sigma[i]))) third_term = tf.log( tf.add(tf.stack(temp_stack), tf.fill([self.n], 1e-09))) return 0.5 * tf.add( tf.subtract(tf.add(first_term, second_term), k), third_term) def ll_poisson(lambda_, x): #x_convert = tf.cast(tf.convert_to_tensor([x]), tf.float32) x = tf.Print(x, [x], message="My debug_x_tf") log_fact_tf = tf.convert_to_tensor([self.log_fact_k[x - 1]], dtype=tf.float32) return -tf.subtract( tf.subtract(tf.multiply(x, tf.log(lambda_ + 1e-09)), lambda_), log_fact_tf) def label_loss_predict(label, predicted_labels, label1): loss = 0.0 #for i in range(self.combination): predicted_label = predicted_labels predicted_label_resized = tf.reshape(predicted_label, [self.n, self.d]) n_class_labels = tf.fill([self.n, 1], tf.cast(4, tf.float32)) #predicted_label_resized_new = tf.concat(values =(predicted_label_resized, n_class_labels), axis=1) loss += tf.nn.sparse_softmax_cross_entropy_with_logits( labels=label1, logits=predicted_label_resized) return loss #return loss/self.combination def get_lossfunc(enc_mu, enc_sigma, debug_sigma, prior_mu, prior_sigma, dec_out, w_edge, label, lambda_n, lambda_e): kl_loss = kl_gaussian(enc_mu, enc_sigma, debug_sigma, prior_mu, prior_sigma) # KL_divergence loss likelihood_loss = neg_loglikelihood(dec_out, w_edge) # Cross entropy loss self.ll = likelihood_loss self.kl = kl_loss lambda_e = tf.Print(lambda_e, [lambda_e], message="My edge_lambda") lambda_n = tf.Print(lambda_n, [lambda_n], message="My node_lambda") #print("Debug self count", self.count, self.edges[self.count]) edgeprob = ll_poisson( lambda_e, tf.cast( tf.subtract( tf.shape(self.edges[0])[0], self.neg_sample_size), tf.float32)) nodeprob = ll_poisson( lambda_n, tf.cast(tf.convert_to_tensor([self.n]), tf.float32)) edgeprob = tf.Print(edgeprob, [edgeprob], message="My edge_prob_loss") nodeprob = tf.Print(nodeprob, [nodeprob], message="My node_prob_loss") label_loss = label_loss_predict(self.features, label, self.features1) label_loss = tf.Print(label_loss, [label_loss], message="My label_loss") loss_1 = tf.reduce_mean(kl_loss + label_loss) loss_1 = tf.Print(loss_1, [loss_1], message="My label_loss1") total_loss = loss_1 + tf.reduce_mean(edgeprob + nodeprob + likelihood_loss) #return tf.reduce_mean(kl_loss) + edgeprob + nodeprob + likelihood_loss total_loss = tf.Print(total_loss, [total_loss], message="My total_loss") return total_loss self.adj = tf.placeholder(dtype=tf.float32, shape=[self.n, self.n], name='adj') self.features = tf.placeholder(dtype=tf.float32, shape=[self.n, self.d], name='features') self.features1 = tf.placeholder(dtype=tf.int32, shape=[self.n], name='features1') self.weight = tf.placeholder(dtype=tf.float32, shape=[self.n, self.n], name="weight") self.weight_bin = tf.placeholder( dtype=tf.float32, shape=[self.combination, None, hparams.bin_dim], name="weight_bin") self.input_data = tf.placeholder(dtype=tf.float32, shape=[self.k, self.n, self.d], name='input') self.eps = tf.placeholder(dtype=tf.float32, shape=[self.n, self.z_dim, 1], name='eps') #self.neg_index = tf.placeholder(dtype=tf.int32,shape=[None], name='neg_index') self.edges = tf.placeholder(dtype=tf.int32, shape=[self.combination, None, 2], name='edges') self.count = tf.placeholder(dtype=tf.int32) #node_count = [len(edge_list) for edge_list in self.edges] print("Debug Input size", self.input_size) node_count_tf = tf.fill([1, self.input_size], tf.cast(self.n, tf.float32)) node_count_tf = tf.Print(node_count_tf, [node_count_tf], message="My node_count_tf") print("Debug size node_count", node_count_tf.get_shape()) #tf.convert_to_tensor(node_count, dtype=tf.int32) self.cell = VAEGCell(self.adj, self.weight, self.features, self.z_dim, self.bin_dim, tf.to_float(node_count_tf), self.edges) self.c_x, enc_mu, enc_sigma, debug_sigma, dec_out, prior_mu, prior_sigma, z_encoded, w_edge, label, lambda_n, lambda_e = self.cell.call( self.input_data, self.n, self.d, self.k, self.combination, self.eps, hparams.sample) self.prob = dec_out #print('Debug', dec_out.shape) self.z_encoded = z_encoded self.enc_mu = enc_mu self.enc_sigma = enc_sigma self.w_edge = w_edge self.label = label self.lambda_n = lambda_n self.lambda_e = lambda_e self.cost = get_lossfunc(enc_mu, enc_sigma, debug_sigma, prior_mu, prior_sigma, dec_out, w_edge, label, lambda_n, lambda_e) print_vars("trainable_variables") # self.lr = tf.Variable(self.lr, trainable=False) self.train_op = tf.train.AdamOptimizer(learning_rate=self.lr) self.grad = self.train_op.compute_gradients(self.cost) self.grad_placeholder = [(tf.placeholder("float", shape=gr[1].get_shape()), gr[1]) for gr in self.grad] self.apply_transform_op = self.train_op.apply_gradients(self.grad) #self.lr = tf.Variable(self.lr, trainable=False) self.sess = tf.Session() def initialize(self): logger.info("Initialization of parameters") #self.sess.run(tf.initialize_all_variables()) self.sess.run(tf.global_variables_initializer()) def restore(self, savedir): saver = tf.train.Saver(tf.global_variables()) ckpt = tf.train.get_checkpoint_state(savedir) if ckpt == None or ckpt.model_checkpoint_path == None: self.initialize() else: print("Load the model from {}".format(ckpt.model_checkpoint_path)) saver.restore(self.sess, ckpt.model_checkpoint_path) def train(self, placeholders, hparams, adj, weight, weight_bin, features, edges, neg_edges, features1): savedir = hparams.out_dir lr = hparams.learning_rate dr = hparams.dropout_rate decay = hparams.decay_rate f1 = open(hparams.out_dir + '/iteration.txt', 'r') iteration = int(f1.read().strip()) # training num_epochs = hparams.num_epochs create_dir(savedir) ckpt = tf.train.get_checkpoint_state(savedir) saver = tf.train.Saver(tf.global_variables()) if ckpt: saver.restore(self.sess, ckpt.model_checkpoint_path) print("Load the model from %s" % ckpt.model_checkpoint_path) start_before_epoch = time.time() for epoch in range(num_epochs): start = time.time() for i in range(len(adj)): #self.count = i if len(edges[i]) == 0: continue # Learning rate decay #self.sess.run(tf.assign(self.lr, self.lr * (self.decay ** epoch))) feed_dict = construct_feed_dict(lr, dr, self.k, self.n, self.d, decay, placeholders) feed_dict.update({self.adj: adj[i]}) eps = np.random.randn(self.n, self.z_dim, 1) #tf.random_normal((self.n, 5, 1), 0.0, 1.0, dtype=tf.float32) feed_dict.update({self.features: features[i]}) feed_dict.update({self.features1: features1[i]}) feed_dict.update({self.weight_bin: weight_bin[i]}) feed_dict.update({self.weight: weight[i]}) feed_dict.update( {self.input_data: np.zeros([self.k, self.n, self.d])}) feed_dict.update({self.eps: eps}) neg_indices = np.random.choice(range(len(neg_edges[i])), hparams.neg_sample_size, replace=False) combined_edges = [] neg_edges_to_be_extended = [ neg_edges[i][index] for index in neg_indices ] copy_edge = copy.deepcopy(edges[i]) for j in range(len(edges[i])): #print("Debug edge_list", edge) copy_edge[j].extend(neg_edges_to_be_extended) #print("Debug edge_list_combined", combined_edges) print("Debug feed edges", i, len(edges[i][0]), len(copy_edge[0])) feed_dict.update({self.edges: copy_edge}) input_, train_loss, _, probdict, cx, w_edge, lambda_e, lambda_n = self.sess.run( [ self.input_data, self.cost, self.apply_transform_op, self.prob, self.c_x, self.w_edge, self.lambda_e, self.lambda_n ], feed_dict=feed_dict) iteration += 1 #print("Lambda_e, lambda_n", lambda_e, lambda_n, i) if iteration % hparams.log_every == 0 and iteration > 0: #print(train_loss) print("{}/{}(epoch {}), train_loss = {:.6f}".format( iteration, num_epochs, epoch + 1, train_loss)) checkpoint_path = os.path.join(savedir, 'model.ckpt') saver.save(self.sess, checkpoint_path, global_step=iteration) logger.info("model saved to {}".format(checkpoint_path)) end = time.time() print("Time taken for a batch: ", end - start) end_after_epoch = time.time() print("Time taken to completed all epochs", -start_before_epoch + end_after_epoch) f1 = open(hparams.out_dir + '/iteration.txt', 'w') f1.write(str(iteration)) def getembeddings(self, hparams, placeholders, adj, deg, weight_bin, weight, edges, features1): eps = np.random.randn(self.n, self.z_dim, 1) feed_dict = construct_feed_dict(hparams.learning_rate, hparams.dropout_rate, self.k, self.n, self.d, hparams.decay_rate, placeholders) feed_dict.update({self.adj: adj}) feed_dict.update({self.features: deg}) feed_dict.update({self.input_data: np.zeros([self.k, self.n, self.d])}) feed_dict.update({self.eps: eps}) feed_dict.update({self.weight_bin: weight_bin}) feed_dict.update({self.weight: weight}) feed_dict.update({self.edges: edges}) feed_dict.update({self.features1: features1}) prob, ll, kl, w_edge, embedding = self.sess.run( [self.prob, self.ll, self.kl, self.w_edge, self.z_encoded], feed_dict=feed_dict) return embedding def get_masked_candidate_new(self, prob, w_edge, n_edges, labels): list_edges = get_candidate_edges(self.n) max_node = np.argmax(labels) #max_node = np.argmin(labels) indicator = np.ones([self.n, self.bin_dim]) edge_mask = np.ones([self.n, self.n]) degree = np.zeros(self.n) candidate_edges = get_weighted_edges_connected(indicator, prob, edge_mask, w_edge, n_edges, labels, degree, max_node) candidate_edges_new = [] for (u, v, w) in candidate_edges: if u < v: candidate_edges_new.append( str(u) + ' ' + str(v) + ' ' + "{'weight':" + str(w) + "}") else: candidate_edges_new.append( str(v) + ' ' + str(u) + ' ' + "{'weight':" + str(w) + "}") return candidate_edges_new def get_unmasked_candidate(self, list_edges, prob, w_edge, num_edges): # sample 1000 times count = 0 structure_list = defaultdict(int) #while (count < 1000): while (count < 50): indicator = np.ones([self.n, self.bin_dim]) p, list_edges, w = normalise(prob, w_edge, self.n, self.bin_dim, [], list_edges, indicator) candidate_edges = [ list_edges[k] for k in np.random.choice( range(len(list_edges)), [num_edges], p=p, replace=False) ] structure_list[' '.join([ str(u) + '-' + str(v) + '-' + str(w) for (u, v, w) in sorted(candidate_edges, key=itemgetter(0)) ])] += 1 #structure_list[sorted(candidate_edges, key=itemgetter(1))] += 1 count += 1 # return the element which has been sampled maximum time return max(structure_list.iteritems(), key=itemgetter(1))[0] def getatoms(self, node, label, edges): label_new = np.reshape(label, (node, self.d)) label_new_exp = np.exp(label_new) s = label_new_exp.shape[0] label_new_sum = np.reshape(np.sum(label_new_exp, axis=1), (s, 1)) prob_label = label_new_exp / label_new_sum pred_label = np.zeros(4) valency_arr = np.zeros(node) pred_label = np.zeros(4) valency_arr = np.zeros(node) n_c = 0 n_h = 0 n_n = 0 n_o = 0 for x in range(1000): pred_label = np.zeros(4) valency_arr = np.zeros(node) for i in range(node): ''' ''' valency = np.random.choice(range(0, 4), [1], p=prob_label[i]) if valency == 0: n_h += 1 if valency == 1: n_o += 1 if valency == 2: n_n += 1 if valency == 3: n_c += 1 pred_label[valency] += 1 valency_arr[i] = valency + 1 if (pred_label[0] + pred_label[1] * 2 + pred_label[2] * 3 + pred_label[3] * 4) >= 2 * (node - 1): break return (pred_label, valency_arr) def sample_graph(self, hparams, placeholders, adj, features, features1, weights, weight_bins, edges, k=0, outdir=None): ''' Args : num - int 10 number of edges to be sampled outdir - string output dir ''' list_edges = [] #for i in range(self.n): feed_dict = construct_feed_dict(hparams.learning_rate, hparams.dropout_rate, self.k, self.n, self.d, hparams.decay_rate, placeholders) feed_dict.update({self.adj: adj[0]}) feed_dict.update({self.features: features[0]}) #feed_dict.update({self.weight_bin: weight_bins[0]}) feed_dict.update({self.weight: weights[0]}) feed_dict.update({self.input_data: np.zeros([self.k, self.n, self.d])}) feed_dict.update({self.eps: eps}) feed_dict.update({self.features1: features1[0]}) feed_dict.update({self.weight_bin: [weight_bin]}) feed_dict.update({self.edges: [edges]}) prob, ll, z_encoded, kl, sample_mu, sample_sigma, loss, w_edge, labels = self.sess.run( [ self.prob, self.ll, self.z_encoded, self.kl, self.enc_mu, self.enc_sigma, self.cost, self.w_edge, self.label ], feed_dict=feed_dict) prob = np.reshape(prob, (self.n, self.n)) w_edge = np.reshape(w_edge, (self.n, self.n, self.bin_dim)) smiles = [] trial = 0 while trial < 1000: atom_list = [4 for x in range(self.n)] candidate_edges = self.get_masked_candidate_new( prob, w_edge, hparams.edges, atom_list) if len(candidate_edges) == 0: smiles.append('None') trial += 1 continue G = nx.parse_edgelist(candidate_edges, nodetype=int) edges = G.edges(data=True) if not nx.is_connected(G): smiles.append('None') else: with open(hparams.sample_file + 'temp.txt' + str(trial), 'w') as f: for (u, v, w) in edges: #for (u, v, w) in candidate_edges: u = int(u) v = int(v) #w = int(w) w = w['weight'] if (u >= 0 and v >= 0): f.write( str(u) + ' ' + str(v) + ' {\'weight\':' + str(w) + '}\n') if guess_correct_molecules( hparams.sample_file + 'temp.txt' + str(trial), hparams.sample_file + 'temp.txt', self.n, 1): m1 = Chem.MolFromMol2File(hparams.sample_file + 'temp.txt') s = 'None' if m1 != None: s = Chem.MolToSmiles(m1) smiles.append(s) else: print("Reason: Wrong mol") trial += 1 return smiles
class VAEGRL(VAEGConfig): def __init__(self, hparams, placeholders, num_nodes, num_features, edges, log_fact_k, hde, istest=False): self.features_dim = num_features self.input_dim = num_nodes self.dropout = placeholders['dropout'] self.k = hparams.random_walk self.lr = placeholders['lr'] self.decay = placeholders['decay'] self.n = num_nodes self.d = num_features self.z_dim = hparams.z_dim self.bin_dim = hparams.bin_dim self.edges = edges self.count = 0 self.mask_weight = hparams.mask_weight self.log_fact_k = log_fact_k self.hde = hde self.temperature = hparams.temperature def neg_loglikelihood(prob_dict, w_edge, edge_list): ''' negative loglikelihood of the edges ''' ll = 0 k = 0 with tf.variable_scope('NLL'): w_edge_new = tf.exp( tf.minimum(w_edge, tf.fill([self.n, self.n, self.bin_dim], 10.0))) weight_temp = tf.multiply(self.weight_bin, w_edge_new) len_logits = prob_dict.shape[0] print "Debug len_logits", len_logits, prob_dict.shape dec_mat = tf.exp( tf.minimum(prob_dict, tf.fill([len_logits, 1], 10.0))) dec_mat = tf.Print(dec_mat, [dec_mat], message="my decscore values:") posscoremat = dec_mat[:2 * len(self.edges[self.count])] print "Posscore softmax", posscoremat.shape negscore = tf.reduce_sum(dec_mat[2 * len(self.edges[self.count]):]) print "Negative softmax", negscore.shape negscore = tf.Print(negscore, [negscore], message="my negscore values:") negscoremat = tf.fill([2 * len(self.edges[self.count])], negscore) print "negscore", negscoremat.shape softmax_out = tf.truediv(posscoremat, negscore) print "Shape softmax", softmax_out.shape for i in range(len(edge_list)): (u, v, w) = edge_list[i] ll += tf.log(softmax_out[i] * w_edge[i][w - 1] + 1e-10) ll = tf.Print(ll, [ll], message="My loss") return (-ll) def get_trajectories(p_theta, w_theta, node_list, n_edges): indicator = np.ones([self.n, self.bin_dim]) edge_mask = np.ones([self.n, self.n]) degree = np.zeros(self.n) for (u, v, w) in self.edges[self.count]: edge_mask[u][v] = 0 edge_mask[v][u] = 0 degree[u] += 1 degree[v] += 1 if (node_list[u] - degree[u]) == 0: indicator[u][0] = 0 if (node_list[u] - degree[u]) <= 1: indicator[u][1] = 0 if (node_list[u] - degree[u]) <= 2: indicator[u][2] = 0 if (node_list[v] - degree[v]) == 0: indicator[v][0] = 0 if (node_list[v] - degree[v]) <= 1: indicator[v][1] = 0 if (node_list[v] - degree[v]) <= 2: indicator[v][2] = 0 trial = 0 candidate_edges = [] G = nx.Graph() while trial < 500: candidate_edges = get_weighted_edges(indicator, p_theta, edge_mask, w_theta, n_edges, node_list, degree) G = nx.Graph() G.add_weighted_edges_from(candidate_edges) if nx.is_connected(G): break trial += 1 return candidate_edges, G self.adj = tf.placeholder(dtype=tf.float32, shape=[self.n, self.n], name='adj') self.features = tf.placeholder(dtype=tf.float32, shape=[self.n, self.d], name='features') self.weight = tf.placeholder(dtype=tf.float32, shape=[self.n, self.n], name="weight") self.weight_bin = tf.placeholder( dtype=tf.float32, shape=[self.n, self.n, hparams.bin_dim], name="weight_bin") self.input_data = tf.placeholder(dtype=tf.float32, shape=[self.k, self.n, self.d], name='input') self.index = tf.placeholder(dtype=tf.float32, shape=[self.n * (self.n - 1) / 2], name='index') self.eps = tf.placeholder(dtype=tf.float32, shape=[self.n, self.z_dim, 1], name='eps') self.cell = VAEGCell(self.adj, self.weight, self.features, self.z_dim, self.bin_dim) self.c_x, enc_mu, enc_sigma, debug_sigma, dec_out, prior_mu, prior_sigma, z_encoded, w_edge, label = self.cell.call( self.input_data, self.n, self.d, self.k, self.eps, hparams.sample) self.rlcell = VAEGRLCell(self.adj, self.weight, self.features, self.z_dim, self.bin_dim, enc_mu, enc_sigma, self.edges, self.index) #self, adj, weight, features, z_dim, bin_dim, enc_mu, enc_sigma, edges, index rl_dec_out, rl_w_edge = self.rlcell.call(self.input_data, self.n, self.d, self.k, self.eps, hparams.sample) # We are considering 10 trajectories only self.train_op = tf.train.AdamOptimizer(learning_rate=self.lr) self.grad = [] for j in range(10): trajectory, G = get_trajectories(dec_out, w_edge, label, len(self.edges[self.count])) ll_rl = neg_loglikelihood(rl_dec_out, rl_w_edge, trajectory) ll = neg_loglikelihood(debug_sigma, w_edge, trajectory) importance_weight = tf.exp( 1 / self.temperature * compute_cost(G)) * (ll / ll_rl) self.cost = ll_rl * importance_weight grad = self.train_op.compute_gradients(ll_rl) for i in range(len(grad)): g = grad[i][1] * importance_weight if len(self.grad) > i: self.grad[i] = (grad[i][0], self.grad[i][1] + g / 10) else: self.grad[i] = grad[i] self.prob = dec_out # print('Debug', dec_out.shape) self.z_encoded = z_encoded self.enc_mu = enc_mu self.enc_sigma = enc_sigma self.w_edge = w_edge self.label = label print_vars("trainable_variables") self.apply_transform_op = self.train_op.apply_gradients(self.grad) self.sess = tf.Session() def initialize(self): logger.info("Initialization of parameters") # self.sess.run(tf.initialize_all_variables()) self.sess.run(tf.global_variables_initializer()) def restore(self, savedir): saver = tf.train.Saver(tf.global_variables()) ckpt = tf.train.get_checkpoint_state(savedir) if ckpt == None or ckpt.model_checkpoint_path == None: self.initialize() else: print("Load the model from {}".format(ckpt.model_checkpoint_path)) saver.restore(self.sess, ckpt.model_checkpoint_path) def copy_weight(self, copydir): self.initialize() var_old = [v for v in tf.global_variables() if "RL" not in v.name][0] saver = tf.train.Saver(var_old) ckpt = tf.train.get_checkpoint_state(copydir) print("Load the model from {}".format(ckpt.model_checkpoint_path)) saver.restore(self.sess, ckpt.model_checkpoint_path) def train(self, placeholders, hparams, adj, weight, weight_bin, features): savedir = hparams.out_dir lr = hparams.learning_rate dr = hparams.dropout_rate decay = hparams.decay_rate f1 = open(hparams.out_dir + '/iteration.txt', 'r') iteration = int(f1.read().strip()) # training num_epochs = hparams.num_epochs create_dir(savedir) ckpt = tf.train.get_checkpoint_state(savedir) saver = tf.train.Saver(tf.global_variables()) if ckpt: saver.restore(self.sess, ckpt.model_checkpoint_path) print("Load the model from %s" % ckpt.model_checkpoint_path) for epoch in range(num_epochs): start = time.time() for i in range(len(adj)): self.count = i if len(self.edges[self.count]) == 0: continue # Learning rate decay # self.sess.run(tf.assign(self.lr, self.lr * (self.decay ** epoch))) feed_dict = construct_feed_dict(lr, dr, self.k, self.n, self.d, decay, placeholders) feed_dict.update({self.adj: adj[i]}) # print "Debug", features[i].shape np.random eps = np.random.randn(self.n, self.z_dim, 1) # tf.random_normal((self.n, 5, 1), 0.0, 1.0, dtype=tf.float32) feed_dict.update({self.features: features[i]}) feed_dict.update({self.weight_bin: weight_bin[i]}) feed_dict.update({self.weight: weight[i]}) feed_dict.update( {self.input_data: np.zeros([self.k, self.n, self.d])}) feed_dict.update({self.eps: eps}) grad_vals = self.sess.run([g[0] for g in self.grad], feed_dict=feed_dict) for j in xrange(len(self.grad_placeholder)): feed_dict.update( {self.grad_placeholder[j][0]: grad_vals[j]}) input_, train_loss, _, probdict, cx, w_edge = self.sess.run( [ self.input_data, self.cost, self.apply_transform_op, self.prob, self.c_x, self.w_edge ], feed_dict=feed_dict) iteration += 1 if iteration % hparams.log_every == 0 and iteration > 0: print(train_loss) print("{}/{}(epoch {}), train_loss = {:.6f}".format( iteration, num_epochs, epoch + 1, train_loss)) checkpoint_path = os.path.join(savedir, 'model.ckpt') saver.save(self.sess, checkpoint_path, global_step=iteration) logger.info("model saved to {}".format(checkpoint_path)) end = time.time() print("Time taken for a batch: ", end - start) f1 = open(hparams.out_dir + '/iteration.txt', 'w') f1.write(str(iteration)) def getembeddings(self, hparams, placeholders, adj, deg, weight_bin, weight): eps = np.random.randn(self.n, self.z_dim, 1) feed_dict = construct_feed_dict(hparams.learning_rate, hparams.dropout_rate, self.k, self.n, self.d, hparams.decay_rate, placeholders) feed_dict.update({self.adj: adj}) feed_dict.update({self.features: deg}) feed_dict.update({self.input_data: np.zeros([self.k, self.n, self.d])}) feed_dict.update({self.eps: eps}) feed_dict.update({self.weight_bin: weight_bin}) feed_dict.update({self.weight: weight}) prob, ll, kl, w_edge, embedding = self.sess.run( [self.prob, self.ll, self.kl, self.w_edge, self.z_encoded], feed_dict=feed_dict) return embedding def get_masked_candidate_with_atom_ratio_new(self, prob, w_edge, atom_count, num_edges, hde): # node_list = defaultdict() rest = range(self.n) nodes = [] hn = [] on = [] nn = [] cn = [] for i in range(self.n): if atom_count[i] == 1: hn.append(i) if atom_count[i] == 2: on.append(i) if atom_count[i] == 3 or atom_count[i] == 5: nn.append(i) if atom_count[i] == 4: cn.append(i) nodes.extend(hn) nodes.extend(cn) nodes.extend(on) nodes.extend(nn) node_list = atom_count print("Debug nodelist", node_list) indicator = np.ones([self.n, self.bin_dim]) edge_mask = np.ones([self.n, self.n]) degree = np.zeros(self.n) for node in hn: indicator[node][1] = 0 indicator[node][2] = 0 for node in on: indicator[node][2] = 0 # two hydrogen atom cannot have an edge between them for n1 in hn: for n2 in hn: edge_mask[n1][n2] = 0 candidate_edges = [] # first generate edges joining with Hydrogen atoms sequentially print("Debug atom ratio", hn, on, nn, cn) print("Debug_degree", node_list) print("Debug nodes", nodes) index = 0 i = 0 hydro_sat = np.zeros(self.n) # first handle hydro try: for node in nodes: deg_req = node_list[node] d = degree[node] list_edges = get_candidate_neighbor_edges(node, self.n) # for (u,v,w) in list_edges: # print("list edges", u, node_list[u], degree[u], indicator[u], v, node_list[v], degree[v], indicator[v]) # print("Debug list edges", node, list_edges) # print("Edge mask", edge_mask[node]) if node in hn: for i1 in range(self.n): if hydro_sat[i1] == node_list[i1] - 1: edge_mask[i1][node] = 0 edge_mask[node][i1] = 0 while d < deg_req: p = normalise_h1(prob, w_edge, self.bin_dim, indicator, edge_mask, node) candidate_edges.extend([ list_edges[k] for k in np.random.choice( range(len(list_edges)), [1], p=p, replace=False) ]) (u, v, w) = candidate_edges[i] degree[u] += w degree[v] += w d += w if u in hn: hydro_sat[v] += 1 if v in hn: hydro_sat[u] += 1 edge_mask[u][v] = 0 edge_mask[v][u] = 0 if (node_list[u] - degree[u]) == 0: indicator[u][0] = 0 if (node_list[u] - degree[u]) <= 1: indicator[u][1] = 0 if (node_list[u] - degree[u]) <= 2: indicator[u][2] = 0 if (node_list[v] - degree[v]) == 0: indicator[v][0] = 0 if (node_list[v] - degree[v]) <= 1: indicator[v][1] = 0 if (node_list[v] - degree[v]) <= 2: indicator[v][2] = 0 # check_diconnected i += 1 print("Debug candidate_edges", candidate_edges[i - 1]) # print("change state", el, degree[el], node_list[el], indicator[el]) # ''' # list_edges = get_candidate_edges(self.n) # if abs(len(candidate_edges) - num_edges) > 1 : # return '' # ''' candidate_rest = '' candidate_edges_new = '' for (u, v, w) in candidate_edges: if u < v: candidate_edges_new += ' ' + str(u) + '-' + str( v) + '-' + str(w) else: candidate_edges_new += ' ' + str(v) + '-' + str( u) + '-' + str(w) print("Candidate_edges_new", candidate_edges_new) return candidate_edges_new + ' ' + candidate_rest except: return '' def get_masked_candidate(self, list_edges, prob, w_edge, num_edges, hde, indicator=[], degree=[]): list_edges_original = copy.copy(list_edges) n = len(prob[0]) # sample 1000 times count = 0 structure_list = defaultdict(int) # while(count < 50): while (count < 1): applyrules = False list_edges = copy.copy(list_edges_original) if len(indicator) == 0: print("Debug indi new assign") indicator = np.ones([self.n, self.bin_dim]) reach = np.ones([n, n]) p, list_edges, w = normalise(prob, w_edge, self.n, self.bin_dim, [], list_edges, indicator) candidate_edges = [ list_edges[k] for k in np.random.choice( range(len(list_edges)), [1], p=p, replace=False) ] # if degree == None: if len(degree) == 0: print("Debug degree new assign") degree = np.zeros([self.n]) G = None saturation = 0 for i1 in range(num_edges - 1): (u, v, w) = candidate_edges[i1] for j in range(n): if reach[u][j] == 0: reach[v][j] = 0 reach[j][v] = 0 if reach[v][j] == 0: reach[u][j] = 0 reach[j][u] = 0 reach[u][v] = 0 reach[v][u] = 0 degree[u] += w degree[v] += w if degree[u] >= 4: indicator[u][0] = 0 if degree[u] >= 3: indicator[u][1] = 0 if degree[u] >= 2: indicator[u][2] = 0 if degree[v] >= 4: indicator[v][0] = 0 if degree[v] >= 3: indicator[v][1] = 0 if degree[v] >= 2: indicator[v][2] = 0 # there will ne bo bridge p, list_edges, w = normalise(prob, w_edge, self.n, self.bin_dim, candidate_edges, list_edges, indicator) try: candidate_edges.extend([ list_edges[k] for k in np.random.choice( range(len(list_edges)), [1], p=p, replace=False) ]) except: # candidate_edges = [] continue structure_list[' '.join([ str(u) + '-' + str(v) + '-' + str(w) for (u, v, w) in sorted(candidate_edges) ])] += 1 count += 1 # return the element which has been sampled maximum time return max(structure_list.iteritems(), key=itemgetter(1))[0] def get_unmasked_candidate(self, list_edges, prob, w_edge, num_edges): # sample 1000 times count = 0 structure_list = defaultdict(int) # while (count < 1000): while (count < 50): indicator = np.ones([self.n, self.bin_dim]) p, list_edges, w = normalise(prob, w_edge, self.n, self.bin_dim, [], list_edges, indicator) candidate_edges = [ list_edges[k] for k in np.random.choice( range(len(list_edges)), [num_edges], p=p, replace=False) ] structure_list[' '.join([ str(u) + '-' + str(v) + '-' + str(w) for (u, v, w) in sorted(candidate_edges, key=itemgetter(0)) ])] += 1 # structure_list[sorted(candidate_edges, key=itemgetter(1))] += 1 count += 1 # return the element which has been sampled maximum time return max(structure_list.iteritems(), key=itemgetter(1))[0] def sample_graph_posterior_new(self, hparams, placeholders, adj, features, weight_bins, weights, embeddings, k=0): list_edges = get_candidate_edges(self.n) feed_dict = construct_feed_dict(hparams.learning_rate, hparams.dropout_rate, self.k, self.n, self.d, hparams.decay_rate, placeholders) feed_dict.update({self.adj: adj}) feed_dict.update({self.features: features}) feed_dict.update({self.weight_bin: weight_bins}) feed_dict.update({self.weight: weights}) feed_dict.update({self.input_data: np.zeros([self.k, self.n, self.d])}) feed_dict.update({self.eps: embeddings}) hparams.sample = True prob, ll, z_encoded, enc_mu, enc_sigma, elbo, w_edge, labels = self.sess.run( [ self.prob, self.ll, self.z_encoded, self.enc_mu, self.enc_sigma, self.cost, self.w_edge, self.label ], feed_dict=feed_dict) # prob = np.triu(np.reshape(prob,(self.n,self.n)),1) prob = np.reshape(prob, (self.n, self.n)) w_edge = np.reshape(w_edge, (self.n, self.n, self.bin_dim)) atom_list = [ 4, 4, 2, 4, 4, 3, 4, 4, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 ] # self.getatoms(atom_list) if not hparams.mask_weight: candidate_edges = self.get_unmasked_candidate( list_edges, prob, w_edge, hparams.edges) else: i = 0 hde = 1 # while (i < 1000): candidate_edges = self.get_masked_candidate_with_atom_ratio_new( prob, w_edge, atom_list, hparams.edges, hde) # if len(candidate_edges) > 0: # break # i += 1 # candidate_edges = self.get_masked_candidate(list_edges, prob, w_edge, hparams.edges, hde) with open(hparams.sample_file + 'temp.txt' + str(k), 'w') as f: for uvw in candidate_edges.split(): [u, v, w] = uvw.split("-") u = int(u) v = int(v) w = int(w) if (u >= 0 and v >= 0): # with open(hparams.sample_file + 'temp.txt', 'a') as f: f.write( str(u) + ' ' + str(v) + ' {\'weight\':' + str(w) + '}\n') def getatoms(self, node, label): label_new = np.reshape(label, (node, self.d)) print("Debug label original shape:", label_new) label_new = np.exp(label_new) s = label_new.shape[0] print("Debug label shape:", label_new.shape, s) label_new_sum = np.reshape(np.sum(label_new, axis=1), (s, 1)) print("Debug label sum:", label_new_sum.shape) prob_label = label_new / label_new_sum pred_label = np.zeros(4) valency_arr = np.zeros(node) print("Debug prob label shape:", prob_label.shape, prob_label) # print("Debug label", label_new) for i in range(node): valency = np.random.choice(range(4), [1], p=prob_label[i]) pred_label[valency] += 1 valency_arr[i] = valency + 1 print("Debug pred_label", pred_label, valency_arr) return (pred_label, valency_arr) def sample_graph_neighborhood(self, hparams, placeholders, adj, features, weights, weight_bins, s_num, node, ratio, hde, num=10, outdir=None): list_edges = get_candidate_edges(self.n) # eps = load_embeddings(hparams.z_dir+'encoded_input0'+'.txt', hparams.z_dim) eps = np.random.randn(self.n, self.z_dim, 1) train_mu = [] train_sigma = [] hparams.sample = False # approach 1 for i in range(len(adj)): feed_dict = construct_feed_dict(hparams.learning_rate, hparams.dropout_rate, self.k, self.n, self.d, hparams.decay_rate, placeholders) feed_dict.update({self.adj: adj[i]}) feed_dict.update({self.features: features[i]}) feed_dict.update({self.weight_bin: weight_bins[i]}) feed_dict.update({self.weight: weights[i]}) feed_dict.update( {self.input_data: np.zeros([self.k, self.n, self.d])}) feed_dict.update({self.eps: eps}) hparams.sample = False prob, ll, z_encoded, enc_mu, enc_sigma, elbo, w_edge = self.sess.run( [ self.prob, self.ll, self.z_encoded, self.enc_mu, self.enc_sigma, self.cost, self.w_edge ], feed_dict=feed_dict) with open(hparams.z_dir + 'encoded_input' + str(i) + '.txt', 'a') as f: for z_i in z_encoded: f.write('[' + ','.join([str(el[0]) for el in z_i]) + ']\n') f.write("\n") with open(hparams.z_dir + 'encoded_mu' + str(i) + '.txt', 'a') as f: for z_i in enc_mu: f.write('[' + ','.join([str(el[0]) for el in z_i]) + ']\n') f.write("\n") with open(hparams.z_dir + 'encoded_sigma' + str(i) + '.txt', 'a') as f: for x in range(self.n): for z_i in enc_sigma[x]: f.write('[' + ','.join([str(el) for el in z_i]) + ']\n') f.write("\n") hparams.sample = True # for j in range(self.n): # for j in [1, 5, 15]: for j in [1]: z_encoded_neighborhood = copy.copy(z_encoded) feed_dict.update({self.eps: z_encoded_neighborhood}) prob, ll, z_encoded_neighborhood, enc_mu, enc_sigma, elbo, w_edge, labels = self.sess.run( [ self.prob, self.ll, self.z_encoded, self.enc_mu, self.enc_sigma, self.cost, self.w_edge, self.label ], feed_dict=feed_dict) # prob = np.triu(np.reshape(prob,(self.n,self.n)),1) with open(hparams.z_dir + 'sampled_z' + str(i) + '.txt', 'a') as f: for z_i in z_encoded: f.write('[' + ','.join([str(el[0]) for el in z_i]) + ']\n') f.write("\n") prob = np.reshape(prob, (self.n, self.n)) w_edge = np.reshape(w_edge, (self.n, self.n, self.bin_dim)) with open(hparams.z_dir + 'prob_mat' + str(i) + '.txt', 'a') as f: for x in range(self.n): f.write('[' + ','.join([str(el) for el in prob[x]]) + ']\n') f.write("\n") with open(hparams.z_dir + 'weight_mat' + str(i) + '.txt', 'a') as f: for x in range(self.n): f.write('[' + ','.join([ str(el[0]) + ' ' + str(el[1]) + ' ' + str(el[2]) for el in w_edge[x] ]) + ']\n') f.write("\n") if not hparams.mask_weight: print("Non mask") candidate_edges = self.get_unmasked_candidate( list_edges, prob, w_edge, hparams.edges) else: print("Mask") (atom_list, valency_arr) = self.getatoms(hparams.nodes, labels) candidate_edges = self.get_masked_candidate_with_atom_ratio_new( prob, w_edge, valency_arr, hparams.edges, hde) for uvw in candidate_edges.split(): [u, v, w] = uvw.split("-") u = int(u) v = int(v) w = int(w) if (u >= 0 and v >= 0): with open( hparams.sample_file + "approach_1_node_" + str(j) + "_" + str(s_num) + '.txt', 'a') as f: f.write( str(u) + ' ' + str(v) + ' {\'weight\':' + str(w) + '}\n') def sample_graph(self, hparams, placeholders, adj, features, weights, weight_bins, s_num, node, hde, num=10, outdir=None): ''' Args : num - int 10 number of edges to be sampled outdir - string output dir ''' list_edges = [] for i in range(self.n): for j in range(i + 1, self.n): list_edges.append((i, j, 1)) list_edges.append((i, j, 2)) list_edges.append((i, j, 3)) # list_edges.append((-1, -1, 0)) list_weight = [1, 2, 3] hparams.sample = True eps = np.random.randn(self.n, self.z_dim, 1) with open(hparams.z_dir + 'test_prior_' + str(s_num) + '.txt', 'a') as f: for z_i in eps: f.write('[' + ','.join([str(el[0]) for el in z_i]) + ']\n') feed_dict = construct_feed_dict(hparams.learning_rate, hparams.dropout_rate, self.k, self.n, self.d, hparams.decay_rate, placeholders) feed_dict.update({self.adj: adj[0]}) feed_dict.update({self.features: features[0]}) feed_dict.update({self.weight_bin: weight_bins[0]}) feed_dict.update({self.weight: weights[0]}) feed_dict.update({self.input_data: np.zeros([self.k, self.n, self.d])}) feed_dict.update({self.eps: eps}) prob, ll, z_encoded, kl, sample_mu, sample_sigma, loss, w_edge, labels = self.sess.run( [ self.prob, self.ll, self.z_encoded, self.kl, self.enc_mu, self.enc_sigma, self.cost, self.w_edge, self.label ], feed_dict=feed_dict) prob = np.reshape(prob, (self.n, self.n)) w_edge = np.reshape(w_edge, (self.n, self.n, self.bin_dim)) indicator = np.ones([self.n, 3]) p, list_edges, w_new = normalise(prob, w_edge, self.n, self.bin_dim, [], list_edges, indicator) if not hparams.mask_weight: trial = 0 while trial < 5000: candidate_edges = [ list_edges[i] for i in np.random.choice(range( len(list_edges)), [hparams.edges], p=p, replace=False) ] with open(hparams.sample_file + 'test.txt', 'w') as f: for (u, v, w) in candidate_edges: if (u >= 0 and v >= 0): f.write( str(u) + ' ' + str(v) + ' {\'weight\':' + str(w) + '}\n') f = open(hparams.sample_file + 'test.txt') G = nx.read_edgelist(f, nodetype=int) if nx.is_connected(G): for (u, v, w) in candidate_edges: if (u >= 0 and v >= 0): with open( hparams.sample_file + "approach_2_" + str(trial) + "_" + str(s_num) + '.txt', 'a') as f: f.write( str(u) + ' ' + str(v) + ' {\'weight\':' + str(w) + '}\n') trial += 1 else: trial = 0 while trial < 5000: candidate_edges = self.get_masked_candidate( list_edges, prob, w_edge, hparams.edges, hde) # print("Debug candidate", candidate_edges) if len(candidate_edges) > 0: with open(hparams.sample_file + 'test.txt', 'w') as f: for uvw in candidate_edges.split(): [u, v, w] = uvw.split("-") u = int(u) v = int(v) w = int(w) if (u >= 0 and v >= 0): f.write( str(u) + ' ' + str(v) + ' {\'weight\':' + str(w) + '}\n') f = open(hparams.sample_file + 'test.txt') # try: G = nx.read_edgelist(f, nodetype=int) # except: # continue if nx.is_connected(G): for uvw in candidate_edges.split(): [u, v, w] = uvw.split("-") u = int(u) v = int(v) w = int(w) if (u >= 0 and v >= 0): with open( hparams.sample_file + "approach_2_" + str(trial) + "_" + str(s_num) + '.txt', 'a') as f: f.write( str(u) + ' ' + str(v) + ' {\'weight\':' + str(w) + '}\n') trial += 1
def __init__(self, hparams, placeholders, num_nodes, num_features, log_fact_k, input_size, istest=False): self.features_dim = num_features self.input_dim = num_nodes self.dropout = placeholders['dropout'] self.k = hparams.random_walk self.lr = placeholders['lr'] self.decay = placeholders['decay'] self.n = num_nodes self.d = num_features self.z_dim = hparams.z_dim self.bin_dim = hparams.bin_dim self.mask_weight = hparams.mask_weight self.log_fact_k = log_fact_k self.neg_sample_size = hparams.neg_sample_size self.input_size = input_size self.combination = hparams.node_sample * hparams.bfs_sample self.temperature = hparams.temperature self.E = 20 self.adj = tf.placeholder(dtype=tf.float32, shape=[self.n, self.n], name='adj') self.features = tf.placeholder(dtype=tf.float32, shape=[self.n, self.d], name='features') self.features1 = tf.placeholder(dtype=tf.int32, shape=[self.n], name='features1') self.weight = tf.placeholder(dtype=tf.float32, shape=[self.n, self.n], name="weight") self.weight_bin1 = tf.placeholder( dtype=tf.float32, shape=[self.n, self.n, hparams.bin_dim], name="weight_bin1") self.weight_bin = tf.placeholder( dtype=tf.float32, shape=[self.combination, None, hparams.bin_dim], name="weight_bin") self.input_data = tf.placeholder(dtype=tf.float32, shape=[self.k, self.n, self.d], name='input') self.eps = tf.placeholder(dtype=tf.float32, shape=[self.n, self.z_dim, 1], name='eps') #self.neg_index = tf.placeholder(dtype=tf.int32,shape=[None], name='neg_index') self.edges = tf.placeholder(dtype=tf.int32, shape=[self.combination, None, 2], name='edges') self.all_edges = tf.placeholder(dtype=tf.int32, shape=[self.combination, None, 2], name='all_edges') self.n_fill_edges = tf.placeholder(dtype=tf.int32) #self.known_edges = tf.placeholder(dtype=tf.int32, shape=[None, 2], name='known_edges') #node_count = [len(edge_list) for edge_list in self.edges] print("Debug Input size", self.input_size) node_count_tf = tf.fill([1, self.input_size], tf.cast(self.n, tf.float32)) node_count_tf = tf.Print(node_count_tf, [node_count_tf], message="My node_count_tf") print("Debug size node_count", node_count_tf.get_shape()) #tf.convert_to_tensor(node_count, dtype=tf.int32) self.cell = VAEGCell(self.adj, self.weight, self.features, self.z_dim, self.bin_dim, tf.to_float(node_count_tf), self.all_edges) self.c_x, enc_mu, enc_sigma, self.debug_sigma, dec_out, prior_mu, prior_sigma, z_encoded, w_edge, label, lambda_n, lambda_e = self.cell.call( self.input_data, self.n, self.d, self.k, self.combination, self.eps, hparams.sample) self.prob = dec_out #print('Debug', dec_out.shape) self.z_encoded = z_encoded self.enc_mu = enc_mu self.enc_sigma = enc_sigma self.w_edge = w_edge self.label = label self.lambda_n = lambda_n self.lambda_e = lambda_e #adj, weight, features, z_dim, bin_dim, node_count, edges, enc_mu, enc_sigma self.rlcell = VAEGRLCell(self.adj, self.weight, self.features, self.z_dim, self.bin_dim, self.all_edges, enc_mu, enc_sigma) #self, adj, weight, features, z_dim, bin_dim, enc_mu, enc_sigma, edges, index self.rl_dec_out, self.rl_w_edge = self.rlcell.call( self.input_data, self.n, self.d, self.k, self.combination, self.eps, hparams.sample) self.sess = tf.Session()
class VAEGRL(VAEGConfig): def __init__(self, hparams, placeholders, num_nodes, num_features, log_fact_k, input_size, istest=False): self.features_dim = num_features self.input_dim = num_nodes self.dropout = placeholders['dropout'] self.k = hparams.random_walk self.lr = placeholders['lr'] self.decay = placeholders['decay'] self.n = num_nodes self.d = num_features self.z_dim = hparams.z_dim self.bin_dim = hparams.bin_dim self.mask_weight = hparams.mask_weight self.log_fact_k = log_fact_k self.neg_sample_size = hparams.neg_sample_size self.input_size = input_size self.combination = hparams.node_sample * hparams.bfs_sample self.temperature = hparams.temperature self.E = 20 self.adj = tf.placeholder(dtype=tf.float32, shape=[self.n, self.n], name='adj') self.features = tf.placeholder(dtype=tf.float32, shape=[self.n, self.d], name='features') self.features1 = tf.placeholder(dtype=tf.int32, shape=[self.n], name='features1') self.weight = tf.placeholder(dtype=tf.float32, shape=[self.n, self.n], name="weight") self.weight_bin1 = tf.placeholder( dtype=tf.float32, shape=[self.n, self.n, hparams.bin_dim], name="weight_bin1") self.weight_bin = tf.placeholder( dtype=tf.float32, shape=[self.combination, None, hparams.bin_dim], name="weight_bin") self.input_data = tf.placeholder(dtype=tf.float32, shape=[self.k, self.n, self.d], name='input') self.eps = tf.placeholder(dtype=tf.float32, shape=[self.n, self.z_dim, 1], name='eps') #self.neg_index = tf.placeholder(dtype=tf.int32,shape=[None], name='neg_index') self.edges = tf.placeholder(dtype=tf.int32, shape=[self.combination, None, 2], name='edges') self.all_edges = tf.placeholder(dtype=tf.int32, shape=[self.combination, None, 2], name='all_edges') self.n_fill_edges = tf.placeholder(dtype=tf.int32) #self.known_edges = tf.placeholder(dtype=tf.int32, shape=[None, 2], name='known_edges') #node_count = [len(edge_list) for edge_list in self.edges] print("Debug Input size", self.input_size) node_count_tf = tf.fill([1, self.input_size], tf.cast(self.n, tf.float32)) node_count_tf = tf.Print(node_count_tf, [node_count_tf], message="My node_count_tf") print("Debug size node_count", node_count_tf.get_shape()) #tf.convert_to_tensor(node_count, dtype=tf.int32) self.cell = VAEGCell(self.adj, self.weight, self.features, self.z_dim, self.bin_dim, tf.to_float(node_count_tf), self.all_edges) self.c_x, enc_mu, enc_sigma, self.debug_sigma, dec_out, prior_mu, prior_sigma, z_encoded, w_edge, label, lambda_n, lambda_e = self.cell.call( self.input_data, self.n, self.d, self.k, self.combination, self.eps, hparams.sample) self.prob = dec_out #print('Debug', dec_out.shape) self.z_encoded = z_encoded self.enc_mu = enc_mu self.enc_sigma = enc_sigma self.w_edge = w_edge self.label = label self.lambda_n = lambda_n self.lambda_e = lambda_e #adj, weight, features, z_dim, bin_dim, node_count, edges, enc_mu, enc_sigma self.rlcell = VAEGRLCell(self.adj, self.weight, self.features, self.z_dim, self.bin_dim, self.all_edges, enc_mu, enc_sigma) #self, adj, weight, features, z_dim, bin_dim, enc_mu, enc_sigma, edges, index self.rl_dec_out, self.rl_w_edge = self.rlcell.call( self.input_data, self.n, self.d, self.k, self.combination, self.eps, hparams.sample) self.sess = tf.Session() # We are considering 10 trajectories only def likelihood(self, prob_dict, w_edge, edge_list): ''' negative loglikelihood of the edges ''' ll = 0 k = 0 with tf.variable_scope('NLL'): dec_mat_temp = tf.reshape(prob_dict, [self.n, self.n]) w_edge_exp = tf.exp( tf.minimum(tf.reshape(w_edge, [self.n, self.n, self.bin_dim]), tf.fill([self.n, self.n, self.bin_dim], 10.0))) w_edge_pos = tf.multiply(self.weight_bin1, w_edge_exp) w_edge_total = tf.reduce_sum(w_edge_exp, axis=1) w_edge_score = tf.divide(w_edge_pos, w_edge_total) dec_mat = tf.exp( tf.minimum(dec_mat_temp, tf.fill([self.n, self.n], 10.0))) dec_mat = tf.Print(dec_mat, [dec_mat], message="my decscore values:") print "Debug dec_mat", dec_mat.shape, dec_mat.dtype, dec_mat comp = tf.subtract(tf.ones([self.n, self.n], tf.float32), self.adj) comp = tf.Print(comp, [comp], message="my comp values:") temp = tf.reduce_sum(tf.multiply(comp, dec_mat)) negscore = tf.fill([self.n, self.n], temp + 1e-9) negscore = tf.Print(negscore, [negscore], message="my negscore values:") posscore = tf.multiply(self.adj, dec_mat) posscore = tf.Print(posscore, [posscore], message="my posscore values:") #dec_out = tf.multiply(self.adj, dec_mat) softmax_out = tf.divide(posscore, tf.add(posscore, negscore)) #ll = tf.reduce_sum(tf.log(tf.add(tf.multiply(self.adj, softmax_out), tf.fill([self.n,self.n], 1e-9))),1) ll = 1.0 for i in range(len(edge_list)): (u, v, w) = edge_list[i] ll += softmax_out[u][v] * w_edge_score[u][v][w - 1] + 1e-10 ll = tf.Print(ll, [ll], message="My loss") return (ll) def get_trajectories(self, p_theta, w_theta, edges, weight, n_fill_edges, atom_list): indicator = np.ones([self.n, self.bin_dim]) edge_mask = np.ones([self.n, self.n]) degree = np.zeros(self.n) #print("Debug known edges", tf.shape(self.known_edges),self.known_edges.get_shape()) #N = tf.stack([tf.shape(self.known_edges)[0]])[0] #known_edges = tf.unstack(self.known_edges) # For the time being make the number of known edges a constant E #''' known_edges = [] for k in range(self.E): (u, v) = edges[k] edge_mask[u][v] = 0 edge_mask[v][u] = 0 degree[u] += weight[u][v] degree[v] += weight[v][u] known_edges.append((u, v, weight[u][v])) if (4 - degree[u]) == 0: indicator[u][0] = 0 if (4 - degree[u]) <= 1: indicator[u][1] = 0 if (4 - degree[u]) <= 2: indicator[u][2] = 0 if (4 - degree[v]) == 0: indicator[v][0] = 0 if (4 - degree[v]) <= 1: indicator[v][1] = 0 if (4 - degree[v]) <= 2: indicator[v][2] = 0 #''' trial = 0 candidate_edges = [] G = nx.Graph() while trial < 5: #candidate_edges = #candidate_edges = #self.get_masked_candidate_with_atom_ratio_new(p_theta, w_theta, node_list, self.n_fill_edges, 1) #get_weighted_edges(indicator, p_theta, edge_mask, w_theta, self.n_fill_edges, node_list, degree) candidate_edges = get_masked_candidate_new(p_theta, w_theta, n_fill_edges, atom_list, indicator, edge_mask, degree) candidate_edges.extend(known_edges) G = nx.Graph() G.add_nodes_from(range(self.n)) G.add_weighted_edges_from(candidate_edges) if nx.is_connected(G): print("Debug trial", trial) break trial += 1 print("Trial", trial) return candidate_edges, G def compute_loss(self, prob, w_edge, rl_dec_out, rl_w_edge, edges, weight, n_fill_edges, atom_list): self.train_op = tf.train.AdamOptimizer(learning_rate=self.lr) self.grad = [] tvars = tf.trainable_variables() g_vars = [var for var in tvars if 'RL' in var.name] for j in range(1): trajectory, G = self.get_trajectories(rl_dec_out[0], rl_w_edge[0], edges, weight, n_fill_edges, atom_list) print("Debug trajectory", trajectory) #trajectory, G = get_trajectories(rl_dec_out, rl_w_edge, label, self.edges[0]) ll_rl = self.likelihood(self.rl_dec_out[0], self.rl_w_edge[0], trajectory) ll_rl = tf.Print(ll_rl, [ll_rl], message="my ll_rl values:") ll = 1 #self.likelihood(self.prob[0], self.w_edge[0], trajectory) importance_weight = tf.exp( 1 / self.temperature * compute_cost(G)) * (ll / ll_rl) importance_weight = tf.Print( importance_weight, [importance_weight], message="my importance_weight values:") print("Debug importance weight", importance_weight) self.cost = ll_rl * importance_weight ''' tensor = tf.constant([1], dtype=tf.float32) grad = self.train_op.compute_gradients(tf.log(ll_rl)) #grad = self.train_op.compute_gradients(tensor, var_list=g_vars) #grad = tf.Print(grad, [grad], message="my grad values:") print("Debug grad", len(grad), grad, ll_rl) for i in range(len(grad)): g = grad[i][0] * importance_weight if len(self.grad) > i: self.grad[i] = (self.grad[i][0] + g / 10, grad[i][1]) else: self.grad.append(grad[i]) ''' ''' print_vars("trainable_variables") print("Debug self grads", self.grad) self.apply_transform_op = self.train_op.apply_gradients(self.grad) ''' def initialize(self): logger.info("Initialization of parameters") # self.sess.run(tf.initialize_all_variables()) self.sess.run(tf.global_variables_initializer()) def restore(self, savedir): saver = tf.train.Saver(tf.global_variables()) ckpt = tf.train.get_checkpoint_state(savedir) if ckpt == None or ckpt.model_checkpoint_path == None: self.initialize() else: print("Load the model from {}".format(ckpt.model_checkpoint_path)) saver.restore(self.sess, ckpt.model_checkpoint_path) def copy_weight(self, copydir): self.initialize() print("Debug all", tf.global_variables()) var_old = [v for v in tf.global_variables() if "RL" not in v.name] print("Debug var_old", var_old) saver = tf.train.Saver(var_old) ckpt = tf.train.get_checkpoint_state(copydir) print_tensors_in_checkpoint_file(file_name=ckpt.model_checkpoint_path, tensor_name='', all_tensors='') print("Load the model from {}".format(ckpt.model_checkpoint_path)) saver.restore(self.sess, ckpt.model_checkpoint_path) def train(self, placeholders, hparams, adj, weight, weight_bin, weight_bin1, features, edges, all_edges, features1, atom_list): savedir = hparams.out_dir lr = hparams.learning_rate dr = hparams.dropout_rate decay = hparams.decay_rate f1 = open(hparams.out_dir + '/iteration.txt', 'r') iteration = int(f1.read().strip()) # training num_epochs = hparams.num_epochs create_dir(savedir) ckpt = tf.train.get_checkpoint_state(savedir) saver = tf.train.Saver(tf.global_variables()) if ckpt: saver.restore(self.sess, ckpt.model_checkpoint_path) print("Load the model from %s" % ckpt.model_checkpoint_path) start_before_epoch = time.time() for epoch in range(num_epochs): start = time.time() for i in range(len(adj)): #self.count = i if len(edges[i]) == 0: continue # Learning rate decay #self.sess.run(tf.assign(self.lr, self.lr * (self.decay ** epoch))) feed_dict = construct_feed_dict(lr, dr, self.k, self.n, self.d, decay, placeholders) feed_dict.update({self.adj: adj[i]}) eps = np.random.randn(self.n, self.z_dim, 1) #tf.random_normal((self.n, 5, 1), 0.0, 1.0, dtype=tf.float32) feed_dict.update({self.features: features[i]}) feed_dict.update({self.features1: features1[i]}) feed_dict.update({self.weight_bin: weight_bin[i]}) feed_dict.update({self.weight_bin1: weight_bin1[i]}) feed_dict.update({self.weight: weight[i]}) feed_dict.update( {self.input_data: np.zeros([self.k, self.n, self.d])}) feed_dict.update({self.eps: eps}) feed_dict.update({self.n_fill_edges: len(edges[i][0]) - 20}) #neg_indices = np.random.choice(range(len(neg_edges[i])), hparams.neg_sample_size, replace=False) #combined_edges = [] #neg_edges_to_be_extended = [neg_edges[i][index] for index in neg_indices] #copy_edge = copy.deepcopy(edges[i]) #for j in range(len(edges[i])): # #print("Debug edge_list", edge) # copy_edge[j].extend(neg_edges_to_be_extended) #print("Debug edge_list_combined", combined_edges) #print("Debug feed edges", i, len(edges[i][0]), len(copy_edge[0])) feed_dict.update({self.edges: edges[i]}) feed_dict.update({self.all_edges: [all_edges[i]]}) #feed_dict.update({self.known_edges:copy_edge}) #input_, train_loss, _, probdict, cx, w_edge, lambda_e, lambda_n= self.sess.run([self.input_data ,self.cost, self.apply_transform_op, self.prob, self.c_x, self.w_edge, self.lambda_e, self.lambda_n], feed_dict=feed_dict) prob, w_edge, rl_prob, rl_w_edge, lambda_e, lambda_n = self.sess.run( [ self.prob, self.w_edge, self.rl_dec_out, self.rl_w_edge, self.lambda_e, self.lambda_n ], feed_dict=feed_dict) print("Debug shapes", rl_prob[0].shape, rl_w_edge[0].shape) self.compute_loss(prob, w_edge, rl_prob, rl_w_edge, edges[i][0], weight[i], len(edges[i][0]) - 20, atom_list) #train_loss, _ = self.sess.run([self.cost, self.apply_transform_op]) train_loss = self.sess.run([self.cost], feed_dict=feed_dict) #input_, train_loss, _, probdict, cx, w_edge, lambda_e, lambda_n= self.sess.run([self.input_data ,self.cost, self.apply_transform_op, self.prob, self.c_x, self.w_edge, self.lambda_e, self.lambda_n], feed_dict=feed_dict) iteration += 1 #print("Lambda_e, lambda_n", lambda_e, lambda_n, i) if iteration % hparams.log_every == 0 and iteration > 0: #print(train_loss) print("{}/{}(epoch {}), train_loss = {:.6f}".format( iteration, num_epochs, epoch + 1, train_loss)) checkpoint_path = os.path.join(savedir, 'model.ckpt') saver.save(self.sess, checkpoint_path, global_step=iteration) logger.info("model saved to {}".format(checkpoint_path)) end = time.time() print("Time taken for a batch: ", end - start) end_after_epoch = time.time() print("Time taken to completed all epochs", -start_before_epoch + end_after_epoch) f1 = open(hparams.out_dir + '/iteration.txt', 'w') f1.write(str(iteration)) def getembeddings(self, hparams, placeholders, adj, deg, weight_bin, weight): eps = np.random.randn(self.n, self.z_dim, 1) feed_dict = construct_feed_dict(hparams.learning_rate, hparams.dropout_rate, self.k, self.n, self.d, hparams.decay_rate, placeholders) feed_dict.update({self.adj: adj}) feed_dict.update({self.features: deg}) feed_dict.update({self.input_data: np.zeros([self.k, self.n, self.d])}) feed_dict.update({self.eps: eps}) feed_dict.update({self.weight_bin: weight_bin}) feed_dict.update({self.weight: weight}) prob, ll, kl, w_edge, embedding = self.sess.run( [self.prob, self.ll, self.kl, self.w_edge, self.z_encoded], feed_dict=feed_dict) return embedding def get_masked_candidate_with_atom_ratio_new(self, prob, w_edge, atom_count, num_edges, hde): rest = range(self.n) nodes = [] hn = [] on = [] nn = [] cn = [] for i in range(self.n): if atom_count[i] == 1: hn.append(i) if atom_count[i] == 2: on.append(i) if atom_count[i] == 3 or atom_count[i] == 5: nn.append(i) if atom_count[i] == 4: cn.append(i) nodes.extend(hn) nodes.extend(cn) nodes.extend(on) nodes.extend(nn) node_list = atom_count print("Debug nodelist", node_list) indicator = np.ones([self.n, self.bin_dim]) edge_mask = np.ones([self.n, self.n]) degree = np.zeros(self.n) for node in hn: indicator[node][1] = 0 indicator[node][2] = 0 for node in on: indicator[node][2] = 0 # two hydrogen atom cannot have an edge between them for n1 in hn: for n2 in hn: edge_mask[n1][n2] = 0 candidate_edges = [] # first generate edges joining with Hydrogen atoms sequentially index = 0 i = 0 hydro_sat = np.zeros(self.n) #first handle hydro try: for node in nodes: deg_req = node_list[node] d = degree[node] list_edges = get_candidate_neighbor_edges(node, self.n) if node in hn: for i1 in range(self.n): if hydro_sat[i1] == node_list[i1] - 1: edge_mask[i1][node] = 0 edge_mask[node][i1] = 0 while d < deg_req: p = normalise_h1(prob, w_edge, self.bin_dim, indicator, edge_mask, node) candidate_edges.extend([ list_edges[k] for k in np.random.choice( range(len(list_edges)), [1], p=p, replace=False) ]) (u, v, w) = candidate_edges[i] degree[u] += w degree[v] += w d += w if u in hn: hydro_sat[v] += 1 if v in hn: hydro_sat[u] += 1 edge_mask[u][v] = 0 edge_mask[v][u] = 0 if (node_list[u] - degree[u]) == 0: indicator[u][0] = 0 if (node_list[u] - degree[u]) <= 1: indicator[u][1] = 0 if (node_list[u] - degree[u]) <= 2: indicator[u][2] = 0 if (node_list[v] - degree[v]) == 0: indicator[v][0] = 0 if (node_list[v] - degree[v]) <= 1: indicator[v][1] = 0 if (node_list[v] - degree[v]) <= 2: indicator[v][2] = 0 i += 1 print("Debug candidate_edges", candidate_edges[i - 1]) # print("change state", el, degree[el], node_list[el], indicator[el]) #''' except: if len(candidate_edges) < 1: candidate_edges = [] candidate_edges_new = [] for (u, v, w) in candidate_edges: if u < v: candidate_edges_new.append( str(u) + ' ' + str(v) + ' ' + "{'weight':" + str(w) + "}") else: candidate_edges_new.append( str(v) + ' ' + str(u) + ' ' + "{'weight':" + str(w) + "}") print("Candidate_edges_new", candidate_edges_new) return candidate_edges_new def get_masked_candidate(self, list_edges, prob, w_edge, num_edges, hde, indicator=[], degree=[]): list_edges_original = copy.copy(list_edges) n = len(prob[0]) # sample 1000 times count = 0 structure_list = defaultdict(int) # while(count < 50): while (count < 1): applyrules = False list_edges = copy.copy(list_edges_original) if len(indicator) == 0: print("Debug indi new assign") indicator = np.ones([self.n, self.bin_dim]) reach = np.ones([n, n]) p, list_edges, w = normalise(prob, w_edge, self.n, self.bin_dim, [], list_edges, indicator) candidate_edges = [ list_edges[k] for k in np.random.choice( range(len(list_edges)), [1], p=p, replace=False) ] # if degree == None: if len(degree) == 0: print("Debug degree new assign") degree = np.zeros([self.n]) G = None saturation = 0 for i1 in range(num_edges - 1): (u, v, w) = candidate_edges[i1] for j in range(n): if reach[u][j] == 0: reach[v][j] = 0 reach[j][v] = 0 if reach[v][j] == 0: reach[u][j] = 0 reach[j][u] = 0 reach[u][v] = 0 reach[v][u] = 0 degree[u] += w degree[v] += w if degree[u] >= 4: indicator[u][0] = 0 if degree[u] >= 3: indicator[u][1] = 0 if degree[u] >= 2: indicator[u][2] = 0 if degree[v] >= 4: indicator[v][0] = 0 if degree[v] >= 3: indicator[v][1] = 0 if degree[v] >= 2: indicator[v][2] = 0 # there will ne bo bridge p, list_edges, w = normalise(prob, w_edge, self.n, self.bin_dim, candidate_edges, list_edges, indicator) try: candidate_edges.extend([ list_edges[k] for k in np.random.choice( range(len(list_edges)), [1], p=p, replace=False) ]) except: # candidate_edges = [] continue structure_list[' '.join([ str(u) + '-' + str(v) + '-' + str(w) for (u, v, w) in sorted(candidate_edges) ])] += 1 count += 1 # return the element which has been sampled maximum time return max(structure_list.iteritems(), key=itemgetter(1))[0] def get_unmasked_candidate(self, list_edges, prob, w_edge, num_edges): # sample 1000 times count = 0 structure_list = defaultdict(int) # while (count < 1000): while (count < 50): indicator = np.ones([self.n, self.bin_dim]) p, list_edges, w = normalise(prob, w_edge, self.n, self.bin_dim, [], list_edges, indicator) candidate_edges = [ list_edges[k] for k in np.random.choice( range(len(list_edges)), [num_edges], p=p, replace=False) ] structure_list[' '.join([ str(u) + '-' + str(v) + '-' + str(w) for (u, v, w) in sorted(candidate_edges, key=itemgetter(0)) ])] += 1 # structure_list[sorted(candidate_edges, key=itemgetter(1))] += 1 count += 1 # return the element which has been sampled maximum time return max(structure_list.iteritems(), key=itemgetter(1))[0] def sample_graph_posterior_new(self, hparams, placeholders, adj, features, weight_bins, weights, embeddings, k=0): list_edges = get_candidate_edges(self.n) feed_dict = construct_feed_dict(hparams.learning_rate, hparams.dropout_rate, self.k, self.n, self.d, hparams.decay_rate, placeholders) feed_dict.update({self.adj: adj}) feed_dict.update({self.features: features}) feed_dict.update({self.weight_bin: weight_bins}) feed_dict.update({self.weight: weights}) feed_dict.update({self.input_data: np.zeros([self.k, self.n, self.d])}) feed_dict.update({self.eps: embeddings}) hparams.sample = True prob, ll, z_encoded, enc_mu, enc_sigma, elbo, w_edge, labels = self.sess.run( [ self.prob, self.ll, self.z_encoded, self.enc_mu, self.enc_sigma, self.cost, self.w_edge, self.label ], feed_dict=feed_dict) # prob = np.triu(np.reshape(prob,(self.n,self.n)),1) prob = np.reshape(prob, (self.n, self.n)) w_edge = np.reshape(w_edge, (self.n, self.n, self.bin_dim)) atom_list = [ 4, 4, 2, 4, 4, 3, 4, 4, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 ] # self.getatoms(atom_list) if not hparams.mask_weight: candidate_edges = self.get_unmasked_candidate( list_edges, prob, w_edge, hparams.edges) else: i = 0 hde = 1 # while (i < 1000): candidate_edges = self.get_masked_candidate_with_atom_ratio_new( prob, w_edge, atom_list, hparams.edges, hde) # if len(candidate_edges) > 0: # break # i += 1 # candidate_edges = self.get_masked_candidate(list_edges, prob, w_edge, hparams.edges, hde) with open(hparams.sample_file + 'temp.txt' + str(k), 'w') as f: for uvw in candidate_edges.split(): [u, v, w] = uvw.split("-") u = int(u) v = int(v) w = int(w) if (u >= 0 and v >= 0): # with open(hparams.sample_file + 'temp.txt', 'a') as f: f.write( str(u) + ' ' + str(v) + ' {\'weight\':' + str(w) + '}\n') def getatoms(self, node, label): label_new = np.reshape(label, (node, self.d)) print("Debug label original shape:", label_new) label_new = np.exp(label_new) s = label_new.shape[0] print("Debug label shape:", label_new.shape, s) label_new_sum = np.reshape(np.sum(label_new, axis=1), (s, 1)) print("Debug label sum:", label_new_sum.shape) prob_label = label_new / label_new_sum pred_label = np.zeros(4) valency_arr = np.zeros(node) print("Debug prob label shape:", prob_label.shape, prob_label) # print("Debug label", label_new) for i in range(node): valency = np.random.choice(range(4), [1], p=prob_label[i]) pred_label[valency] += 1 valency_arr[i] = valency + 1 print("Debug pred_label", pred_label, valency_arr) return (pred_label, valency_arr) def sample_graph_neighborhood(self, hparams, placeholders, adj, features, weights, weight_bins, s_num, node, ratio, hde, num=10, outdir=None): list_edges = get_candidate_edges(self.n) # eps = load_embeddings(hparams.z_dir+'encoded_input0'+'.txt', hparams.z_dim) eps = np.random.randn(self.n, self.z_dim, 1) train_mu = [] train_sigma = [] hparams.sample = False # approach 1 for i in range(len(adj)): feed_dict = construct_feed_dict(hparams.learning_rate, hparams.dropout_rate, self.k, self.n, self.d, hparams.decay_rate, placeholders) feed_dict.update({self.adj: adj[i]}) feed_dict.update({self.features: features[i]}) feed_dict.update({self.weight_bin: weight_bins[i]}) feed_dict.update({self.weight: weights[i]}) feed_dict.update( {self.input_data: np.zeros([self.k, self.n, self.d])}) feed_dict.update({self.eps: eps}) hparams.sample = False prob, ll, z_encoded, enc_mu, enc_sigma, elbo, w_edge = self.sess.run( [ self.prob, self.ll, self.z_encoded, self.enc_mu, self.enc_sigma, self.cost, self.w_edge ], feed_dict=feed_dict) with open(hparams.z_dir + 'encoded_input' + str(i) + '.txt', 'a') as f: for z_i in z_encoded: f.write('[' + ','.join([str(el[0]) for el in z_i]) + ']\n') f.write("\n") with open(hparams.z_dir + 'encoded_mu' + str(i) + '.txt', 'a') as f: for z_i in enc_mu: f.write('[' + ','.join([str(el[0]) for el in z_i]) + ']\n') f.write("\n") with open(hparams.z_dir + 'encoded_sigma' + str(i) + '.txt', 'a') as f: for x in range(self.n): for z_i in enc_sigma[x]: f.write('[' + ','.join([str(el) for el in z_i]) + ']\n') f.write("\n") hparams.sample = True # for j in range(self.n): # for j in [1, 5, 15]: for j in [1]: z_encoded_neighborhood = copy.copy(z_encoded) feed_dict.update({self.eps: z_encoded_neighborhood}) prob, ll, z_encoded_neighborhood, enc_mu, enc_sigma, elbo, w_edge, labels = self.sess.run( [ self.prob, self.ll, self.z_encoded, self.enc_mu, self.enc_sigma, self.cost, self.w_edge, self.label ], feed_dict=feed_dict) # prob = np.triu(np.reshape(prob,(self.n,self.n)),1) with open(hparams.z_dir + 'sampled_z' + str(i) + '.txt', 'a') as f: for z_i in z_encoded: f.write('[' + ','.join([str(el[0]) for el in z_i]) + ']\n') f.write("\n") prob = np.reshape(prob, (self.n, self.n)) w_edge = np.reshape(w_edge, (self.n, self.n, self.bin_dim)) with open(hparams.z_dir + 'prob_mat' + str(i) + '.txt', 'a') as f: for x in range(self.n): f.write('[' + ','.join([str(el) for el in prob[x]]) + ']\n') f.write("\n") with open(hparams.z_dir + 'weight_mat' + str(i) + '.txt', 'a') as f: for x in range(self.n): f.write('[' + ','.join([ str(el[0]) + ' ' + str(el[1]) + ' ' + str(el[2]) for el in w_edge[x] ]) + ']\n') f.write("\n") if not hparams.mask_weight: print("Non mask") candidate_edges = self.get_unmasked_candidate( list_edges, prob, w_edge, hparams.edges) else: print("Mask") (atom_list, valency_arr) = self.getatoms(hparams.nodes, labels) candidate_edges = self.get_masked_candidate_with_atom_ratio_new( prob, w_edge, valency_arr, hparams.edges, hde) for uvw in candidate_edges.split(): [u, v, w] = uvw.split("-") u = int(u) v = int(v) w = int(w) if (u >= 0 and v >= 0): with open( hparams.sample_file + "approach_1_node_" + str(j) + "_" + str(s_num) + '.txt', 'a') as f: f.write( str(u) + ' ' + str(v) + ' {\'weight\':' + str(w) + '}\n') def sample_graph(self, hparams, placeholders, adj, features, weights, weight_bins, s_num, node, hde, num=10, outdir=None): ''' Args : num - int 10 number of edges to be sampled outdir - string output dir ''' list_edges = [] for i in range(self.n): for j in range(i + 1, self.n): list_edges.append((i, j, 1)) list_edges.append((i, j, 2)) list_edges.append((i, j, 3)) # list_edges.append((-1, -1, 0)) list_weight = [1, 2, 3] hparams.sample = True eps = np.random.randn(self.n, self.z_dim, 1) with open(hparams.z_dir + 'test_prior_' + str(s_num) + '.txt', 'a') as f: for z_i in eps: f.write('[' + ','.join([str(el[0]) for el in z_i]) + ']\n') feed_dict = construct_feed_dict(hparams.learning_rate, hparams.dropout_rate, self.k, self.n, self.d, hparams.decay_rate, placeholders) feed_dict.update({self.adj: adj[0]}) feed_dict.update({self.features: features[0]}) feed_dict.update({self.weight_bin: weight_bins[0]}) feed_dict.update({self.weight: weights[0]}) feed_dict.update({self.input_data: np.zeros([self.k, self.n, self.d])}) feed_dict.update({self.eps: eps}) prob, ll, z_encoded, kl, sample_mu, sample_sigma, loss, w_edge, labels = self.sess.run( [ self.prob, self.ll, self.z_encoded, self.kl, self.enc_mu, self.enc_sigma, self.cost, self.w_edge, self.label ], feed_dict=feed_dict) prob = np.reshape(prob, (self.n, self.n)) w_edge = np.reshape(w_edge, (self.n, self.n, self.bin_dim)) indicator = np.ones([self.n, 3]) p, list_edges, w_new = normalise(prob, w_edge, self.n, self.bin_dim, [], list_edges, indicator) if not hparams.mask_weight: trial = 0 while trial < 5000: candidate_edges = [ list_edges[i] for i in np.random.choice(range( len(list_edges)), [hparams.edges], p=p, replace=False) ] with open(hparams.sample_file + 'test.txt', 'w') as f: for (u, v, w) in candidate_edges: if (u >= 0 and v >= 0): f.write( str(u) + ' ' + str(v) + ' {\'weight\':' + str(w) + '}\n') f = open(hparams.sample_file + 'test.txt') G = nx.read_edgelist(f, nodetype=int) if nx.is_connected(G): for (u, v, w) in candidate_edges: if (u >= 0 and v >= 0): with open( hparams.sample_file + "approach_2_" + str(trial) + "_" + str(s_num) + '.txt', 'a') as f: f.write( str(u) + ' ' + str(v) + ' {\'weight\':' + str(w) + '}\n') trial += 1 else: trial = 0 while trial < 5000: candidate_edges = self.get_masked_candidate( list_edges, prob, w_edge, hparams.edges, hde) # print("Debug candidate", candidate_edges) if len(candidate_edges) > 0: with open(hparams.sample_file + 'test.txt', 'w') as f: for uvw in candidate_edges.split(): [u, v, w] = uvw.split("-") u = int(u) v = int(v) w = int(w) if (u >= 0 and v >= 0): f.write( str(u) + ' ' + str(v) + ' {\'weight\':' + str(w) + '}\n') f = open(hparams.sample_file + 'test.txt') # try: G = nx.read_edgelist(f, nodetype=int) # except: # continue if nx.is_connected(G): for uvw in candidate_edges.split(): [u, v, w] = uvw.split("-") u = int(u) v = int(v) w = int(w) if (u >= 0 and v >= 0): with open( hparams.sample_file + "approach_2_" + str(trial) + "_" + str(s_num) + '.txt', 'a') as f: f.write( str(u) + ' ' + str(v) + ' {\'weight\':' + str(w) + '}\n') trial += 1
def __init__(self, hparams, placeholders, num_nodes, num_features, log_fact_k, input_size, istest=False): self.features_dim = num_features self.input_dim = num_nodes self.dropout = placeholders['dropout'] self.k = hparams.random_walk self.lr = placeholders['lr'] self.decay = placeholders['decay'] self.n = num_nodes self.d = num_features self.z_dim = hparams.z_dim self.bin_dim = hparams.bin_dim self.mask_weight = hparams.mask_weight self.log_fact_k = log_fact_k self.neg_sample_size = hparams.neg_sample_size self.input_size = input_size self.combination = hparams.node_sample * hparams.bfs_sample self.temperature = hparams.temperature self.E = hparams.E self.no_traj = hparams.no_traj self.adj = tf.placeholder(dtype=tf.float32, shape=[self.n, self.n], name='adj') self.features = tf.placeholder(dtype=tf.float32, shape=[self.n, self.d], name='features') self.input_data = tf.placeholder(dtype=tf.float32, shape=[self.k, self.n, self.d], name='input') self.eps = tf.placeholder(dtype=tf.float32, shape=[self.n, self.z_dim, 1], name='eps') #For every trajectory self.edges = tf.placeholder(dtype=tf.int32, shape=[self.no_traj, None, 2], name='edges') self.weight_bin = tf.placeholder(dtype=tf.float32, shape=[self.no_traj, self.n, self.n, hparams.bin_dim], name="weight_bin") self.neg_edges = tf.placeholder(dtype=tf.int32, shape=[self.no_traj, None, 2], name='neg_edges') self.all_edges = tf.placeholder(dtype=tf.int32, shape=[self.combination, None, 2], name='all_edges') # for the time being 5 trajectories are in action self.trajectories = tf.placeholder(dtype=tf.float32, shape=[self.no_traj, self.n, self.n], name="trajectories") self.properties = tf.placeholder(dtype=tf.float32, shape=[self.no_traj], name="properties") self.n_fill_edges = tf.placeholder(dtype=tf.int32) self.n_edges = tf.placeholder(dtype=tf.float32) self.penalty = tf.placeholder(shape=[self.no_traj],dtype=tf.float32) #self.known_edges = tf.placeholder(dtype=tf.int32, shape=[None, 2], name='known_edges') #node_count = [len(edge_list) for edge_list in self.edges] #print("Debug Input size", self.input_size) node_count_tf = tf.fill([1, self.input_size],tf.cast(self.n, tf.float32)) #node_count_tf = tf.Print(node_count_tf, [node_count_tf], message="My node_count_tf") #print("Debug size node_count", node_count_tf.get_shape()) #tf.convert_to_tensor(node_count, dtype=tf.int32) self.cell = VAEGCell(self.adj, self.features, self.z_dim, self.bin_dim, node_count_tf, self.all_edges) self.c_x, dec_out, z_encoded, w_edge, label, lambda_n, lambda_e = self.cell.call(self.input_data, self.n, self.d, self.k, self.combination, self.eps, hparams.sample) self.prob = dec_out #print('Debug', dec_out.shape) self.z_encoded = z_encoded #self.enc_mu = enc_mu #self.enc_sigma = enc_sigma self.w_edge = w_edge #self.label = label #self.lambda_n = lambda_n #self.lambda_e = lambda_e #adj, weight, features, z_dim, bin_dim, node_count, edges, enc_mu, enc_sigma self.rlcell = VAEGRLCell(self.adj, self.features, self.z_dim, self.bin_dim, self.all_edges) #self, adj, weight, features, z_dim, bin_dim, enc_mu, enc_sigma, edges, index self.rl_dec_out, self.rl_w_edge, self.lambda_e, self.label = self.rlcell.call(self.input_data, self.n, self.d, self.k, self.combination, self.eps, hparams.sample) print_vars("trainable_variables") total_cost = 0.0 #self.lr = tf.Print(self.lr, [self.lr], message="my lr-values:") #self.train_op = tf.train.GradientDescentOptimizer(learning_rate=self.lr) self.train_op = tf.train.AdamOptimizer(learning_rate=self.lr, epsilon=1e-06) ll = [] #self.grad = [] self.grad_placeholder = [] ll_rl = [] self.apply_transform_op = [] tvars = tf.trainable_variables() g_vars = [var for var in tvars if 'RL' in var.name] print("Debug gvars", g_vars) V = [] ll = [] ll_rl = [] w_list = [] loss = 0.0 ll_loss = 0.0 for i in range(self.no_traj): #if self.properties[i] == 0: # continue ll_temp = self.likelihood(self.trajectories[i], self.edges[i], self.neg_edges[i], self.weight_bin[i], self.prob[0], self.w_edge[0], self.penalty[i]) #ll_temp = tf.Print(ll_temp, [ll_temp], message="my ll-values:") ll_poisson = self.likelihood_poisson(lambda_e, self.n_edges) label_pred = self.label_loss_predict(self.features, label) #label_pred = tf.Print(label_pred, [label_pred], message="my label-ll-values:") ll.append(ll_temp + ll_poisson + label_pred) ll_rl_temp = self.likelihood(self.trajectories[i], self.edges[i], self.neg_edges[i], self.weight_bin[i], self.rl_dec_out[0], self.rl_w_edge[0], self.penalty[i]) #ll_rl_temp = tf.Print(ll_rl_temp,[ll_rl_temp], message="my ll_rl-values:") ll_rl_poisson = self.likelihood_poisson(self.lambda_e, self.n_edges) label_pred_rl = self.label_loss_predict(self.features, self.label) #label_pred_rl = tf.Print(label_pred_rl, [label_pred_rl], message="my label-ll-rl-values:") ll_rl.append(ll_rl_temp + ll_rl_poisson + label_pred_rl) # w_list.append(self.temperature * tf.subtract(ll_rl[i], ll[i])+self.properties[i]) w_list.append(tf.subtract(ll_rl[i], ll[i]) + self.temperature * self.properties[i] + 1.0) ll_loss += (ll_rl[i] - ll[i]) loss += (ll_rl[i] - ll[i]) + self.temperature * self.properties[i] w_total = tf.add_n(w_list) w_total = tf.Print(w_total, [w_total], message="my wtotal-values:") self.ll_loss = ll_loss/ self.no_traj self.loss = loss/ self.no_traj temp_grad = [] temp_c_grad = [] grad_val = [] grad_c_val =[] grad_shape = [] grad_c_shape = [] grad_comparison = self.train_op.compute_gradients(self.loss) for x in range(len(g_vars)): if grad_comparison[x][0] is not None: g = grad_comparison[x] else: g = (tf.fill(tf.shape(g_vars[x]), tf.cast(0.0, tf.float32)), grad_comparison[x][1]) #if i == 0: grad_c_val.append(g[0]) grad_c_shape.append(g[0].get_shape().as_list()) for i in range(self.no_traj): grad = self.train_op.compute_gradients(ll_rl[i], var_list=g_vars) w = w_list[i] #w = tf.divide(w_list[i], w_total) w = tf.Print(w, [w], message="my Imp weight-values:") for x in range(len(g_vars)): if grad[x][0] is not None: g = grad[x] else: g = (tf.fill(tf.shape(g_vars[x]), tf.cast(0.0, tf.float32)), grad[x][1]) if i == 0: temp_grad.append((w * g[0] / (self.no_traj * 50), g[1])) grad_val.append(w * g[0]) grad_shape.append(g[0].get_shape().as_list()) else: temp_grad[x] = (tf.add(temp_grad[x][0], w * g[0])/(self.no_traj * 50), g[1]) grad_val[x] = tf.add(grad_val[x], w * g[0]) #grad_shape.append(g[0].get_shape().as_list()) print("Debug Grad length", len(temp_grad), len(g_vars)) self.grad = temp_grad self.apply_transform_op = self.train_op.apply_gradients(temp_grad) #self.grad = temp_grad self.sess = tf.Session()
class VAEGRL(VAEGConfig): def __init__(self, hparams, placeholders, num_nodes, num_features, log_fact_k, input_size, istest=False): self.features_dim = num_features self.input_dim = num_nodes self.dropout = placeholders['dropout'] self.k = hparams.random_walk self.lr = placeholders['lr'] self.decay = placeholders['decay'] self.n = num_nodes self.d = num_features self.z_dim = hparams.z_dim self.bin_dim = hparams.bin_dim self.mask_weight = hparams.mask_weight self.log_fact_k = log_fact_k self.neg_sample_size = hparams.neg_sample_size self.input_size = input_size self.combination = hparams.node_sample * hparams.bfs_sample self.temperature = hparams.temperature self.E = hparams.E self.no_traj = hparams.no_traj self.adj = tf.placeholder(dtype=tf.float32, shape=[self.n, self.n], name='adj') self.features = tf.placeholder(dtype=tf.float32, shape=[self.n, self.d], name='features') self.input_data = tf.placeholder(dtype=tf.float32, shape=[self.k, self.n, self.d], name='input') self.eps = tf.placeholder(dtype=tf.float32, shape=[self.n, self.z_dim, 1], name='eps') #For every trajectory self.edges = tf.placeholder(dtype=tf.int32, shape=[self.no_traj, None, 2], name='edges') self.weight_bin = tf.placeholder(dtype=tf.float32, shape=[self.no_traj, self.n, self.n, hparams.bin_dim], name="weight_bin") self.neg_edges = tf.placeholder(dtype=tf.int32, shape=[self.no_traj, None, 2], name='neg_edges') self.all_edges = tf.placeholder(dtype=tf.int32, shape=[self.combination, None, 2], name='all_edges') # for the time being 5 trajectories are in action self.trajectories = tf.placeholder(dtype=tf.float32, shape=[self.no_traj, self.n, self.n], name="trajectories") self.properties = tf.placeholder(dtype=tf.float32, shape=[self.no_traj], name="properties") self.n_fill_edges = tf.placeholder(dtype=tf.int32) self.n_edges = tf.placeholder(dtype=tf.float32) self.penalty = tf.placeholder(shape=[self.no_traj],dtype=tf.float32) #self.known_edges = tf.placeholder(dtype=tf.int32, shape=[None, 2], name='known_edges') #node_count = [len(edge_list) for edge_list in self.edges] #print("Debug Input size", self.input_size) node_count_tf = tf.fill([1, self.input_size],tf.cast(self.n, tf.float32)) #node_count_tf = tf.Print(node_count_tf, [node_count_tf], message="My node_count_tf") #print("Debug size node_count", node_count_tf.get_shape()) #tf.convert_to_tensor(node_count, dtype=tf.int32) self.cell = VAEGCell(self.adj, self.features, self.z_dim, self.bin_dim, node_count_tf, self.all_edges) self.c_x, dec_out, z_encoded, w_edge, label, lambda_n, lambda_e = self.cell.call(self.input_data, self.n, self.d, self.k, self.combination, self.eps, hparams.sample) self.prob = dec_out #print('Debug', dec_out.shape) self.z_encoded = z_encoded #self.enc_mu = enc_mu #self.enc_sigma = enc_sigma self.w_edge = w_edge #self.label = label #self.lambda_n = lambda_n #self.lambda_e = lambda_e #adj, weight, features, z_dim, bin_dim, node_count, edges, enc_mu, enc_sigma self.rlcell = VAEGRLCell(self.adj, self.features, self.z_dim, self.bin_dim, self.all_edges) #self, adj, weight, features, z_dim, bin_dim, enc_mu, enc_sigma, edges, index self.rl_dec_out, self.rl_w_edge, self.lambda_e, self.label = self.rlcell.call(self.input_data, self.n, self.d, self.k, self.combination, self.eps, hparams.sample) print_vars("trainable_variables") total_cost = 0.0 #self.lr = tf.Print(self.lr, [self.lr], message="my lr-values:") #self.train_op = tf.train.GradientDescentOptimizer(learning_rate=self.lr) self.train_op = tf.train.AdamOptimizer(learning_rate=self.lr, epsilon=1e-06) ll = [] #self.grad = [] self.grad_placeholder = [] ll_rl = [] self.apply_transform_op = [] tvars = tf.trainable_variables() g_vars = [var for var in tvars if 'RL' in var.name] print("Debug gvars", g_vars) V = [] ll = [] ll_rl = [] w_list = [] loss = 0.0 ll_loss = 0.0 for i in range(self.no_traj): #if self.properties[i] == 0: # continue ll_temp = self.likelihood(self.trajectories[i], self.edges[i], self.neg_edges[i], self.weight_bin[i], self.prob[0], self.w_edge[0], self.penalty[i]) #ll_temp = tf.Print(ll_temp, [ll_temp], message="my ll-values:") ll_poisson = self.likelihood_poisson(lambda_e, self.n_edges) label_pred = self.label_loss_predict(self.features, label) #label_pred = tf.Print(label_pred, [label_pred], message="my label-ll-values:") ll.append(ll_temp + ll_poisson + label_pred) ll_rl_temp = self.likelihood(self.trajectories[i], self.edges[i], self.neg_edges[i], self.weight_bin[i], self.rl_dec_out[0], self.rl_w_edge[0], self.penalty[i]) #ll_rl_temp = tf.Print(ll_rl_temp,[ll_rl_temp], message="my ll_rl-values:") ll_rl_poisson = self.likelihood_poisson(self.lambda_e, self.n_edges) label_pred_rl = self.label_loss_predict(self.features, self.label) #label_pred_rl = tf.Print(label_pred_rl, [label_pred_rl], message="my label-ll-rl-values:") ll_rl.append(ll_rl_temp + ll_rl_poisson + label_pred_rl) # w_list.append(self.temperature * tf.subtract(ll_rl[i], ll[i])+self.properties[i]) w_list.append(tf.subtract(ll_rl[i], ll[i]) + self.temperature * self.properties[i] + 1.0) ll_loss += (ll_rl[i] - ll[i]) loss += (ll_rl[i] - ll[i]) + self.temperature * self.properties[i] w_total = tf.add_n(w_list) w_total = tf.Print(w_total, [w_total], message="my wtotal-values:") self.ll_loss = ll_loss/ self.no_traj self.loss = loss/ self.no_traj temp_grad = [] temp_c_grad = [] grad_val = [] grad_c_val =[] grad_shape = [] grad_c_shape = [] grad_comparison = self.train_op.compute_gradients(self.loss) for x in range(len(g_vars)): if grad_comparison[x][0] is not None: g = grad_comparison[x] else: g = (tf.fill(tf.shape(g_vars[x]), tf.cast(0.0, tf.float32)), grad_comparison[x][1]) #if i == 0: grad_c_val.append(g[0]) grad_c_shape.append(g[0].get_shape().as_list()) for i in range(self.no_traj): grad = self.train_op.compute_gradients(ll_rl[i], var_list=g_vars) w = w_list[i] #w = tf.divide(w_list[i], w_total) w = tf.Print(w, [w], message="my Imp weight-values:") for x in range(len(g_vars)): if grad[x][0] is not None: g = grad[x] else: g = (tf.fill(tf.shape(g_vars[x]), tf.cast(0.0, tf.float32)), grad[x][1]) if i == 0: temp_grad.append((w * g[0] / (self.no_traj * 50), g[1])) grad_val.append(w * g[0]) grad_shape.append(g[0].get_shape().as_list()) else: temp_grad[x] = (tf.add(temp_grad[x][0], w * g[0])/(self.no_traj * 50), g[1]) grad_val[x] = tf.add(grad_val[x], w * g[0]) #grad_shape.append(g[0].get_shape().as_list()) print("Debug Grad length", len(temp_grad), len(g_vars)) self.grad = temp_grad self.apply_transform_op = self.train_op.apply_gradients(temp_grad) #self.grad = temp_grad self.sess = tf.Session() #self.error = error # We are considering 10 trajectories only def label_loss_predict(self, label, predicted_labels): loss = 0.0 #for i in range(self.combination): predicted_label = predicted_labels predicted_label_resized = tf.reshape(predicted_label, [self.n, self.d]) predicted_label_exp = tf.exp(tf.minimum(predicted_label_resized, tf.fill([self.n, self.d], 10.0))) predicted_label_pos = tf.reduce_sum(tf.multiply(label, predicted_label_exp), axis=1) predicted_label_total = tf.reduce_sum(predicted_label_exp, axis=1) predicted_label_prob = tf.divide(predicted_label_pos, predicted_label_total) ll = tf.reduce_sum(tf.log(tf.add( predicted_label_prob, tf.fill([self.n, ], 1e-9)))) return ll def likelihood_poisson(self, lambda_, x): #x_convert = tf.cast(tf.convert_to_tensor([x]), tf.float32) x = tf.Print(x, [x], message="My debug_x_tf") log_fact_tf = tf.convert_to_tensor([self.log_fact_k[x-1]], dtype=tf.float32) return tf.subtract(tf.subtract(tf.multiply(x, tf.log(lambda_ + 1e-09)), lambda_), log_fact_tf) def likelihood(self, adj, edges, neg_edges, weight_bin, prob_dict, w_edge, penalty): ''' negative loglikelihood of the edges ''' ll = 0 k = 0 with tf.variable_scope('NLL'): dec_mat_temp = tf.reshape(prob_dict, [self.n, self.n]) dec_mat = tf.exp(tf.minimum(dec_mat_temp, tf.fill([self.n, self.n], tf.cast(10.0, dtype=tf.float32)))) dec_mat = tf.Print(dec_mat, [dec_mat], message="my decscore values:") min_val = tf.reduce_mean(dec_mat) penalty = tf.exp(penalty) w_edge_resized = tf.reshape(w_edge, [self.n, self.n, self.bin_dim]) w_edge_exp = tf.exp(tf.minimum(w_edge_resized, tf.fill([self.n, self.n, self.bin_dim], 10.0))) w_edge_pos = tf.reduce_sum(tf.multiply(weight_bin, w_edge_exp), axis=2) #print "Debug w_edge posscore", w_edge_pos.shape, dec_mat.shape w_edge_total = tf.reduce_sum(w_edge_exp, axis=2) w_edge_score = tf.gather_nd(tf.divide(w_edge_pos, w_edge_total), edges) w_edge_score = tf.Print(w_edge_score, [w_edge_score], message="my w_edge_score values:") #print "Debug w_edge_score", w_edge_score.shape comp = tf.subtract(tf.ones([self.n, self.n], tf.float32), adj) comp = tf.Print(comp, [comp], message="my comp values:") negscore = tf.multiply(comp, dec_mat) negscore = tf.Print(negscore, [negscore], message="my negscore values:") negscore = tf.gather_nd(negscore, neg_edges) negscore_sum = tf.reduce_sum(negscore) posscore = tf.gather_nd(dec_mat, edges) #print "Debug posscore", posscore.shape posscore = tf.Print(posscore, [posscore], message="my posscore values:") pos_weight_score = tf.multiply(posscore, w_edge_score) st = tf.stack([tf.shape(pos_weight_score)[0]])[0] softmax_out = tf.divide(pos_weight_score, negscore_sum) penalty = tf.log(tf.divide(penalty, negscore_sum)) comp = tf.Print(comp, [comp], message="my comp values:") ll += tf.reduce_sum(tf.log(tf.add(softmax_out, tf.fill([1, st], 1e-9)))) + penalty ll = tf.Print(ll, [ll], message="My loss") return (ll) def get_trajectories_nevae(self, p_theta, w_theta, edges, weight, n_fill_edges, atom_list): indicator = np.ones([self.n, self.bin_dim]) list_edges = [] degree = np.zeros(self.n) for i in range(self.n): for j in range(i+1, self.n): # removing the possibility of hydrogen hydrogen bond and oxigen bond if (atom_list[i] > 1 or atom_list[j] > 1) and (atom_list[i]!=2 or atom_list[j]!=2): list_edges.append((i,j,1)) list_edges.append((i,j,2)) list_edges.append((i,j,3)) known_edges = [] for i in range(self.n): # the atom is hydrogen if atom_list[i] <= 1: indicator[i][1] = 0 if atom_list[i] <= 2: indicator[i][2] = 0 for k in range(self.E): (u, v) = edges[k] w = weight[u][v] degree[u] += w degree[v] += w if (atom_list[u] - degree[u]) == 0: indicator[u][0] = 0 if (atom_list[u] - degree[u]) <= 1: indicator[u][1] = 0 if (atom_list[u] - degree[u]) <= 2: indicator[u][2] = 0 if (atom_list[v] - degree[v]) == 0: indicator[v][0] = 0 if (atom_list[v] - degree[v]) <= 1: indicator[v][1] = 0 if (atom_list[v] - degree[v]) <= 2: indicator[v][2] = 0 if u < v: list_edges.remove((u, v, 1)) list_edges.remove((u, v, 2)) list_edges.remove((u, v, 3)) known_edges.append((u, v, w)) else: list_edges.remove((v, u, 1)) list_edges.remove((v, u, 2)) list_edges.remove((v, u, 3)) known_edges.append((v, u, w)) trial = 0 adj = np.zeros((self.n, self.n)) G_list = [] adj_list = [] G_best = '' for j in range(1000): prob = np.reshape(p_theta, [self.n, self.n]) w_edge = np.reshape(w_theta, [self.n, self.n, 3]) edges = self.get_masked_candidate_with_atom_ratio_new(prob, w_edge, atom_count=atom_list, num_edges=n_fill_edges, hde=1) G = nx.parse_edgelist(edges, nodetype=int) if nx.is_connected(G): print "Connected" for (u, v) in G.edges(): adj[int(u)][int(v)] = 1 #int(G[u][v]["weight"]) adj[int(v)][int(u)] = 1 #int(G[u][v]["weight"]) adj_list.append(adj) G_list.append(G) #rest = range(self.n) candidate_edges_list = get_masked_candidate(self.n, list_edges, known_edges, p_theta, w_theta, n_fill_edges, indicator, degree, atom_list) for candidate_edges in candidate_edges_list: adj = np.zeros((self.n, self.n)) if len(candidate_edges) > 0: candidate_edges_weighted = [] for (u, v, w) in candidate_edges: if int(u) < int(v): candidate_edges_weighted.append(str(u) + ' ' + str(v) + ' ' + "{'weight':"+str(w)+"}") else: candidate_edges_weighted.append(str(v) + ' ' + str(u) + ' ' + "{'weight':"+str(w)+"}") G = nx.parse_edgelist(candidate_edges_weighted, nodetype=int) for i in range(self.n): if i not in G.nodes(): G.add_node(i) if nx.is_connected(G): for (u, v, w) in candidate_edges: adj[int(u)][int(v)] = int(w) adj[int(v)][int(u)] = int(w) adj_list.append(adj) G_list.append(G) return adj_list, G_list def initialize(self): logger.info("Initialization of parameters") # self.sess.run(tf.initialize_all_variables()) self.sess.run(tf.global_variables_initializer()) def restore(self, savedir): saver = tf.train.Saver(tf.global_variables(), max_to_keep = 20) ckpt = tf.train.get_checkpoint_state(savedir) if ckpt == None or ckpt.model_checkpoint_path == None: self.initialize() else: print("Load the model from {}".format(ckpt.model_checkpoint_path)) saver.restore(self.sess, ckpt.model_checkpoint_path) def partial_restore(self, copydir): saver = tf.train.Saver(var_list = tf.global_variables(), max_to_keep = 20 ) self.initialize() print("Debug all", tf.global_variables()) var_old = [v for v in tf.global_variables() if "RL" not in v.name] print("Debug var_old", var_old) saver = tf.train.Saver(var_old) ckpt = tf.train.get_checkpoint_state(copydir) #print_tensors_in_checkpoint_file(file_name=ckpt.model_checkpoint_path, tensor_name='', all_tensors='') print("Load the model from {}".format(ckpt.model_checkpoint_path)) #print_tensors_in_checkpoint_file(ckpt, all_tensors=True, tensor_name='') saver.restore(self.sess, ckpt.model_checkpoint_path) var_new = [v for v in tf.global_variables() if ("RL" in v.name and "Poisson" in v.name) ] for v in var_new: v_old_temp = [v_old for v_old in tf.global_variables() if v_old.name == v.name.replace("RL", "") ] if len(v_old_temp) == 0: continue v_old = v_old_temp[0] print("v_old", v_old.value(), v_old.name) #if v_old in var_old assign = tf.assign(v, v_old) self.sess.run(assign) #v = tf.Variable(v.name.replace("RL", "")) print("v_new", v, v.name) def copy_weight(self, copydir): self.initialize() print("Debug all", tf.global_variables()) var_old = [v for v in tf.global_variables() if "RL" not in v.name] print("Debug var_old", var_old) saver_old = tf.train.Saver(var_old, max_to_keep=20) ckpt = tf.train.get_checkpoint_state(copydir) #print_tensors_in_checkpoint_file(file_name=ckpt.model_checkpoint_path, tensor_name='', all_tensors='') print("Load the model from {}".format(ckpt.model_checkpoint_path)) #print_tensors_in_checkpoint_file(ckpt, all_tensors=True, tensor_name='') saver_old.restore(self.sess, ckpt.model_checkpoint_path) var_new = [v for v in tf.global_variables() if "RL" in v.name] print("Debug var_new", var_new) for v in var_new: v_old_temp = [v_old for v_old in tf.global_variables() if v_old.name == v.name.replace("RL", "") ] if len(v_old_temp) == 0: continue v_old = v_old_temp[0] print("v_old", v_old.value(), v_old.name) #if v_old in var_old assign = tf.assign(v, v_old) self.sess.run(assign) #v = tf.Variable(v.name.replace("RL", "")) print("v_new", v, v.name) saver = tf.train.Saver(var_list=tf.global_variables(), max_to_keep=20) def train(self, placeholders, hparams, adj, weight, weight_bin, weight_bin1, features, edges, all_edges, features1, atom_list): savedir = hparams.out_dir lr = hparams.learning_rate dr = hparams.dropout_rate decay = hparams.decay_rate f1 = open(hparams.out_dir + '/iteration.txt', 'r') iter1 = int(f1.read().strip()) iteration = iter1 # training num_epochs = hparams.num_epochs create_dir(savedir) ckpt = tf.train.get_checkpoint_state(savedir) saver = tf.train.Saver(tf.global_variables(), max_to_keep=100) if ckpt: saver.restore(self.sess, ckpt.model_checkpoint_path) print("Load the model from %s" % ckpt.model_checkpoint_path) start_before_epoch = time.time() importance_weight = 0.0 tvars = tf.trainable_variables() g_vars = [var for var in tvars if 'RL' in var.name] print("Debug g_vars", g_vars) grad_local = [] for x in range(len(g_vars)): a = np.zeros(shape=g_vars[x].get_shape().as_list(), dtype=float) #a.fill(0.0) print("Debug a", a, a.shape) grad_local.append(a) print("Debug gradlocal", grad_local, g_vars[0].get_shape().as_list()) all_edges_local = [] for i in range(self.n): for j in range(self.n): all_edges_local.append((i,j)) prev_loss = 10000 epoch = 0 #print "Debug props logp", mean_logp, std_logp, "SAS: ", mean_sas, std_sas, "Cycle :", mean_cycle, std_cycle while (epoch < num_epochs): #for epoch in range(num_epochs): i = 0 #print "Debug inside loop", epoch start = time.time() start1 = time.time() feed_dict = construct_feed_dict(lr, dr, self.k, self.n, self.d, decay, placeholders) # we will sample 50 z values here count = 0 total_train_loss = 0.0 total_ll_loss = 0.0 while count < 30: eps = np.random.randn(self.n, self.z_dim, 1) feed_dict.update({self.input_data: np.zeros([self.k,self.n,self.d])}) feed_dict.update({self.eps: eps}) feed_dict.update({self.all_edges: [all_edges_local]}) list_adj = [] list_prop = [] list_edge = [] list_neg_edge = [] prob, w_edge, rl_prob, rl_w_edge, lambda_e, z_encoded, label = self.sess.run([self.prob, self.w_edge, self.rl_dec_out, self.rl_w_edge, self.lambda_e, self.z_encoded, self.label], feed_dict=feed_dict) features, atom_list = self.getatoms(self.n, label) if len(atom_list) == 0: print "getatom not satisfied bad Z" end2 = time.time() continue max_edges_possible = int(sum(atom_list)/2) n_edges = max_edges_possible + 1 while(n_edges > max_edges_possible or n_edges < (self.n - 1) ): n_edges = np.random.poisson(lambda_e) end1 = time.time() weights = [] weight_bins = [] properties = [] pos_edges = [] neg_edges = [] list_penalty = [] qed_list = [] t_list, G_list = self.get_trajectories_nevae(rl_prob, rl_w_edge, edges[i][0], weight[i], n_edges - self.E, atom_list) edge_len = [] for j in range(len(t_list)): t = t_list[j] G = G_list[j] qed = compute_cost_qed(G, hparams.out_dir+"temp.txt") qed_list.append(qed) properties.append(qed) edge_len.append(len(G.edges())) index_list = np.argsort(properties)[:hparams.no_traj] if len(index_list) < hparams.no_traj or properties[index_list[0]] == 2.0 : continue max_edge = max(edge_len) properties_new = [] candidate_edges = [] for j in range(hparams.no_traj): index = index_list[j] t = t_list[index] G = G_list[index] rl_prob_reshape = np.reshape(rl_prob, [self.n, self.n]) minval = min(rl_prob[0]) penalty = 0.0 penalty_index = np.unravel_index(np.argmin(rl_prob_reshape, axis=None), rl_prob_reshape.shape) penalty_edges = [] if len(G.edges())< max_edge: diff = max_edge - len(G.edges()) while diff > 0: penalty += penalty penalty_edges.append(penalty_index) diff -= 1 weights.append(t) weight_bins.append(get_weight_bins(self.n, self.bin_dim, G)) properties_new.append(properties[index]) candidate_edges.append(list(G.edges_iter(data='weight'))) #print "Debug penalty edges", penalty_edges list_penalty.append(penalty) penalty_edges.extend(list(G.edges())) pos_edges.append(penalty_edges) G_comp = nx.complement(G) comp_edges = list(G_comp.edges()) neg_indices = np.random.choice(range(len(comp_edges)), hparams.neg_sample_size, replace=False) neg_edges_to_be_extended = [comp_edges[index] for index in neg_indices] neg_edges.append(neg_edges_to_be_extended) #print("Debug shapes pos_edge", pos_edge) feed_dict.update({self.trajectories: weights}) feed_dict.update({self.properties:properties_new}) feed_dict.update({self.neg_edges: neg_edges}) feed_dict.update({self.edges:np.array(pos_edges)}) feed_dict.update({self.n_edges:n_edges}) feed_dict.update({self.features: features}) feed_dict.update({self.penalty: list_penalty}) feed_dict.update({self.weight_bin: weight_bins}) _, grad, train_loss, ll_loss = self.sess.run([self.apply_transform_op, self.grad, self.loss, self.ll_loss], feed_dict=feed_dict) print("Time size of graph", len(tf.get_default_graph().get_operations())) properties_original = [1.0 - x for x in properties_new] total_train_loss += train_loss total_ll_loss += ll_loss print("LOSS ",count, train_loss, ll_loss, properties_original) print("candiadte1", candidate_edges[0]) print("candiadte2", candidate_edges[1]) print("candidate3", candidate_edges[2]) end2 = time.time() count += 1 iteration += 1 prev_loss = train_loss epoch += 1 if iteration % hparams.log_every == 0 and iteration > 0: #print(train_loss) print("{}/{}(epoch {}), train_loss = {}, ll_loss={}".format(iteration, num_epochs, epoch + 1, total_train_loss, total_ll_loss)) checkpoint_path = os.path.join(savedir, 'model.ckpt') saver.save(self.sess, checkpoint_path, global_step=iteration) logger.info("model saved to {}".format(checkpoint_path)) end = time.time() print("Time taken for a batch: ",end - start, end2 - start1) end_after_epoch = time.time() print("Time taken to completed all epochs", -start_before_epoch + end_after_epoch) f1 = open(hparams.out_dir+'/iteration.txt','w') f1.write(str(iteration)) def getembeddings(self, hparams, placeholders, adj, deg, weight_bin, weight): eps = np.random.randn(self.n, self.z_dim, 1) feed_dict = construct_feed_dict(hparams.learning_rate, hparams.dropout_rate, self.k, self.n, self.d, hparams.decay_rate, placeholders) feed_dict.update({self.adj: adj}) feed_dict.update({self.features: deg}) feed_dict.update({self.input_data: np.zeros([self.k, self.n, self.d])}) feed_dict.update({self.eps: eps}) feed_dict.update({self.weight_bin: weight_bin}) feed_dict.update({self.weight: weight}) prob, ll, kl, w_edge, embedding = self.sess.run([self.prob, self.ll, self.kl, self.w_edge, self.z_encoded], feed_dict=feed_dict) return embedding def get_masked_candidate_with_atom_ratio_new(self, prob, w_edge, atom_count, num_edges, hde): rest = range(self.n) nodes = [] hn = [] on = [] nn = [] cn = [] for i in range(self.n): if atom_count[i] == 1: hn.append(i) if atom_count[i] == 2: on.append(i) if atom_count[i] == 3 or atom_count[i] == 5: nn.append(i) if atom_count[i] == 4: cn.append(i) nodes.extend(hn) nodes.extend(cn) nodes.extend(on) nodes.extend(nn) node_list = atom_count print("Debug nodelist", node_list) indicator = np.ones([self.n, self.bin_dim]) edge_mask = np.ones([self.n, self.n]) degree = np.zeros(self.n) for node in hn: indicator[node][1] = 0 indicator[node][2] = 0 for node in on: indicator[node][2] = 0 # two hydrogen atom cannot have an edge between them for n1 in hn: for n2 in hn: edge_mask[n1][n2] = 0 candidate_edges = [] # first generate edges joining with Hydrogen atoms sequentially index = 0 i = 0 hydro_sat = np.zeros(self.n) #first handle hydro try: for node in nodes: deg_req = node_list[node] d = degree[node] list_edges = get_candidate_neighbor_edges(node, self.n) if node in hn: for i1 in range(self.n): if hydro_sat[i1] == node_list[i1] - 1: edge_mask[i1][node] = 0 edge_mask[node][i1] = 0 while d < deg_req: p = normalise_h1(prob, w_edge, self.bin_dim, indicator, edge_mask, node) candidate_edges.extend([list_edges[k] for k in np.random.choice(range(len(list_edges)), [1], p=p, replace=False)]) (u, v, w) = candidate_edges[i] degree[u]+= w degree[v]+= w d += w if u in hn: hydro_sat[v] += 1 if v in hn: hydro_sat[u] += 1 edge_mask[u][v] = 0 edge_mask[v][u] = 0 if (node_list[u] - degree[u]) == 0 : indicator[u][0] = 0 if (node_list[u] - degree[u]) <= 1 : indicator[u][1] = 0 if (node_list[u] - degree[u]) <= 2: indicator[u][2] = 0 if (node_list[v] - degree[v]) == 0 : indicator[v][0] = 0 if (node_list[v] - degree[v]) <= 1 : indicator[v][1] = 0 if (node_list[v] - degree[v]) <= 2: indicator[v][2] = 0 i+=1 #print("Debug candidate_edges", candidate_edges[i - 1]) # print("change state", el, degree[el], node_list[el], indicator[el]) #''' except: if len(candidate_edges) < 1: candidate_edges = [] candidate_edges_new = [] for (u, v, w) in candidate_edges: if u < v: candidate_edges_new.append(str(u) + ' ' + str(v) + ' ' + "{'weight':"+str(w)+"}") else: candidate_edges_new.append(str(v) + ' ' + str(u) + ' ' + "{'weight':"+str(w)+"}") print("Candidate_edges_new", candidate_edges_new) return candidate_edges_new def get_unmasked_candidate(self, list_edges, prob, w_edge, num_edges): # sample 1000 times count = 0 structure_list = defaultdict(int) # while (count < 1000): while (count < 50): indicator = np.ones([self.n, self.bin_dim]) p, list_edges, w = normalise(prob, w_edge, self.n, self.bin_dim, [], list_edges, indicator) candidate_edges = [list_edges[k] for k in np.random.choice(range(len(list_edges)), [num_edges], p=p, replace=False)] structure_list[' '.join([str(u) + '-' + str(v) + '-' + str(w) for (u, v, w) in sorted(candidate_edges, key=itemgetter(0))])] += 1 # structure_list[sorted(candidate_edges, key=itemgetter(1))] += 1 count += 1 # return the element which has been sampled maximum time return max(structure_list.iteritems(), key=itemgetter(1))[0] def getatoms(self, node, label): label_new = np.reshape(label, (node, self.d)) #print("Debug label original shape:", label_new) temp = np.zeros((node, self.d)) temp.fill(50) #print temp, label_new.shape minval = np.minimum(label_new, temp) label_new = np.exp(minval) #print("Debug label exp shape:", label_new) s = label_new.shape[0] #print("Debug label shape:", label_new.shape, s) label_new_sum = np.reshape(np.sum(label_new, axis=1), (s, 1)) #print("Debug label sum:", label_new_sum.shape, label_new_sum) prob_label = label_new / label_new_sum count = 500 while(count > 0): pred_label = [] #np.zeros(4) valency_arr = np.zeros(node) h_c = 0 o_c = 0 n_c = 0 c_c = 0 for i in range(node): valency = np.random.choice(range(4), [1], p=prob_label[i]) temp = np.zeros(4) temp[valency] += 1 pred_label.append(temp) valency_arr[i] = valency + 1 if valency == 0: h_c += 1 if valency == 1: o_c += 1 if valency == 2: n_c += 1 if valency == 3: c_c +=1 if sum(valency_arr) >= 2 * (self.n - 1): break count -= 1 if sum(valency_arr) < 2 * (self.n -1): valency_arr = [] return (pred_label, valency_arr)