def get_trainer(trial, dataloader): n_layers = trial.suggest_categorical('n_layer', [2, 3, 4]) hidden_dims = [] for i in range(n_layers): hidden_dim = int( trial.suggest_loguniform('hidden_dim_{}'.format(i), 4, 256)) hidden_dims.append(hidden_dim) model = GAE(39, hidden_dims) lr = trial.suggest_loguniform('lr', 1e-6, 1e-2) optim = torch.optim.Adam(model.parameters(), lr=lr) trainer = Trainer(model, optim, dataloader) return trainer
def main(): if not os.path.exists(args.save_dir): os.makedirs(os.path.join(save_dir, 'zinc250k.png')) model = GAE(args.in_dim, args.hidden_dims) model.to(device) print('Loading data') with open(args.data_file, 'rb') as f: graphs = dill.load(f) print('Loaded {} molecules'.format(len(graphs))) train_graphs, val_graphs = train_test_split(graphs, test_size=10000) train_dataset = MolDataset(train_graphs) val_dataset = MolDataset(val_graphs) del train_graphs, val_graphs train_loader = DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, collate_fn=collate) val_loader = DataLoader(val_dataset, batch_size=args.batch_size, shuffle=False, collate_fn=collate) trainer = Trainer(model, args) train_losses, val_losses = [], [] print('Training Start') for epoch in tqdm(range(args.n_epochs)): train_loss = 0 model.train() for bg in tqdm(train_loader): bg.set_e_initializer(dgl.init.zero_initializer) bg.set_n_initializer(dgl.init.zero_initializer) train_loss += trainer.iteration(bg) train_loss /= len(train_loader) train_losses.append(train_loss) trainer.save(epoch, args.save_dir) val_loss = 0 model.eval() for bg in val_loader: bg.set_e_initializer(dgl.init.zero_initializer) bg.set_n_initializer(dgl.init.zero_initializer) val_loss += trainer.iteration(bg, train=False) val_loss /= len(val_loader) val_losses.append(val_loss) print('Epoch: {:02d} | Train Loss: {:.4f} | Validation Loss: {:.4f}'. format(epoch, train_loss, val_loss)) plot(train_losses, val_losses)
def main(): if not os.path.exists(args.save_dir): os.makedirs(args.save_dir) # TODO: train test split # load and preprocess dataset data = load_data(args) features = torch.FloatTensor(data.features) in_feats = features.shape[1] print(features.shape) model = GAE(in_feats, [32,16]) model.train() optim = torch.optim.Adam(model.parameters(), lr=1e-2) loss_function = BCELoss g = DGLGraph(data.graph) g.ndata['h'] = features n_epochs = 500 losses = [] print('Training Start') for epoch in tqdm(range(n_epochs)): g.ndata['h'] = features # normalization degs = g.in_degrees().float() norm = torch.pow(degs, -0.5) norm[torch.isinf(norm)] = 0 g.ndata['norm'] = norm.unsqueeze(1) adj = g.adjacency_matrix().to_dense() pos_weight = torch.Tensor([float(adj.shape[0] * adj.shape[0] - adj.sum()) / adj.sum()]) adj_logits = model.forward(g)#, features) loss = loss_function(adj_logits, adj, pos_weight=pos_weight) optim.zero_grad() loss.backward() optim.step() losses.append(loss.item()) print('Epoch: {:02d} | Loss: {:.5f}'.format(epoch, loss)) plt.plot(losses) plt.xlabel('iteration') plt.ylabel('train loss') plt.grid() plt.show()
def build_model(self): self.obs = tf.placeholder(tf.float32, [None, self.observation_size]) self.action = tf.placeholder(tf.float32, [None, self.action_size]) self.advantage = tf.placeholder(tf.float32, [None]) #Mean of old action distribution self.old_action_dist_mu = tf.placeholder(tf.float32, [None, self.action_size]) self.old_action_dist_logstd = tf.placeholder(tf.float32, [None, self.action_size]) #NN framework for action distribution self.action_dist_mu, action_dist_logstd = self.build_policy(self.obs) # Get trainable variables for the policy (NN weights) tr_vrbs = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='Policy') for i in tr_vrbs: print(i.op.name) #Construct distribution by repeating action_dis_logstd self.action_dist_logstd = tf.tile(action_dist_logstd, (tf.shape(action_dist_logstd)[0], 1)) #Probability of action under old policy vs. new policy self.log_policy = LOG_POLICY(self.action_dist_mu, self.action_dist_logstd, self.action) self.log_old_policy = LOG_POLICY(self.old_action_dist_mu, self.old_action_dist_logstd, self.action) policy_ratio = tf.exp(self.log_policy - self.log_old_policy) #Number of observations in batch batch_size = tf.cast(tf.shape(self.obs)[0], tf.float32) ''' Equation (14) in paper Contribution of a single s_n : Expectation over a~q[ (new policy / q(is)) * advantage_old] ''' surr_single_state = -tf.reduce_mean(policy_ratio * self.advantage) #Define KL divergence and shannon entropy, averaged over a set of inputs (policies) kl = GAUSS_KL(self.old_action_dist_mu, self.old_action_dist_logstd, self.action_dist_mu, self.action_dist_logstd) / batch_size ent = GAUSS_ENTROPY(self.action_dist_mu, self.action_dist_logstd) / batch_size #Define 'loss' quantities to constrain or maximize self.losses = [surr_single_state, kl, ent] # Maximize surrogate function over policy parameter 'theta' represented by neural network weights self.pg = FLAT_GRAD(surr_single_state, tr_vrbs) #KL divergence where first argument is fixed kl_first_fixed = GAUSS_KL_FIRST_FIX( self.action_dist_mu, self.action_dist_logstd) / batch_size #Gradient of KL divergence w.r.t. theta (NN policy weights) first_kl_grads = tf.gradients(kl_first_fixed, tr_vrbs) self.flat_tangent = tf.placeholder(tf.float32, [None]) tangent = list() start = 0 for vrbs in tr_vrbs: variable_size = np.prod(vrbs.get_shape().as_list()) param = tf.reshape( self.flat_tangent[start:(start + variable_size)], vrbs.get_shape()) tangent.append(param) start += variable_size ''' Gradient of KL with tangent vector gradient_w_tangent : list of KL_prime*y for each variables ''' gradient_w_tangent = [ tf.reduce_sum(kl_g * t) for (kl_g, t) in zip(first_kl_grads, tangent) ] ''' From derivative of KL_prime*y : [dKL/dx1, dKL/dx2...]*y y -> Ay, A is n by n matrix but hard to implement(numerically solving (n*n)*(n*1)) so first multiply target 'y' to gradient and take derivation 'self.FVP' Returns : [d2KL/dx1dx1+d2KL/dx1dx2..., d2KL/dx1dx2+d2KL/dx2dx2..., ...]*y So get (second derivative of KL divergence)*y for each variable => y->JMJy (Fisher Vector Product) ''' self.FVP = FLAT_GRAD(gradient_w_tangent, tr_vrbs) #Get actual parameter value self.get_value = GetValue(self.sess, tr_vrbs, name='Policy') #Set parameter values self.set_value = SetValue(self.sess, tr_vrbs, name='Policy') #Estimate of the advantage function self.gae = GAE(self.sess, self.observation_size, self.args.gamma, self.args.lamda, self.args.vf_constraint) #Intialization of the barrier function compensator self.bar_comp = BARRIER(self.args, self.sess, self.observation_size, self.action_size) #Variable initializers self.sess.run(tf.global_variables_initializer())
def build_model(self): self.obs = tf.placeholder(tf.float32, [None, self.observation_size]) self.action = tf.placeholder(tf.float32, [None, self.action_size]) self.advantage = tf.placeholder(tf.float32, [None]) # Mean of old action distribution self.old_action_dist_mu = tf.placeholder(tf.float32, [None, self.action_size]) self.old_action_dist_logstd = tf.placeholder(tf.float32, [None, self.action_size]) ''' Mean value for each action : each action has gaussian distribution with mean and standard deviation With continuous state and action space, use GAUSSIAN DISTRIBUTION, maps from the input features to the mean of Gaussian distribution for each action Seperate set of parameters specifies the log standard deviation of each action => The policy is defined by the normnal distribution (mean=NeuralNet(states), stddev= exp(r)) ''' self.action_dist_mu, action_dist_logstd = self.build_policy(self.obs) # Make log standard shape from [1, action size] => [batch size, action size] # tf.tile(A, reps) : construct an tensor by repeating A given by 'reps' # Use tf.shape instead of tf.get_shape() when 'None' used in placeholder self.action_dist_logstd = tf.tile(action_dist_logstd, (tf.shape(action_dist_logstd)[0], 1)) # outputs probability of taking 'self.action' # new distribution self.log_policy = LOG_POLICY(self.action_dist_mu, self.action_dist_logstd, self.action) # old distribution self.log_old_policy = LOG_POLICY(self.old_action_dist_mu, self.old_action_dist_logstd, self.action) # Take exponential to log policy distribution ''' Equation (14) in paper Contribution of a single s_n : Expectation over a~q[(new policy / q(is)) * advantace_old] sampling distribution q is normally old policy ''' batch_size = tf.shape(self.obs)[0] # print('Batch size %d' % batch_size) policy_ratio = tf.exp(self.log_policy - self.log_old_policy) surr_single_state = -tf.reduce_mean(policy_ratio * self.advantage) # tf.shape returns dtype=int32, tensor conversion requested dtype float32 batch_size = tf.cast(batch_size, tf.float32) # Average KL divergence and shannon entropy, averaged over a set of inputs to function mu kl = GAUSS_KL(self.old_action_dist_mu, self.old_action_dist_logstd, self.action_dist_mu, self.action_dist_logstd) / batch_size ent = GAUSS_ENTROPY(self.action_dist_mu, self.action_dist_logstd) / batch_size self.losses = [surr_single_state, kl, ent] #tr_vrbs = tf.trainable_variables() tr_vrbs = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='Policy') for i in tr_vrbs: print(i.op.name) ''' Compute a search direction using a linear approx to objective and quadratic approx to constraint => The search direction is computed by approximately solving 'Ax=g' where A is FIM Quadratic approximation to KL divergence constraint ''' # Maximize surrogate function over policy parameter 'theta' self.pg = FLAT_GRAD(surr_single_state, tr_vrbs) # KL divergence where first argument is fixed # First argument would be old policy parameters, so keep it constant kl_first_fixed = GAUSS_KL_FIRST_FIX(self.action_dist_mu, self.action_dist_logstd) / batch_size # Gradient of KL divergence first_kl_grads = tf.gradients(kl_first_fixed, tr_vrbs) # Vectors we are going to multiply self.flat_tangent = tf.placeholder(tf.float32, [None]) tangent = list() start = 0 for vrbs in tr_vrbs: variable_size = np.prod(vrbs.get_shape().as_list()) param = tf.reshape(self.flat_tangent[start:(start+variable_size)], vrbs.get_shape()) tangent.append(param) start += variable_size ''' Gradient of KL with tangent vector gradient_w_tangent : list of KL_prime*y for each variables ''' gradient_w_tangent = [tf.reduce_sum(kl_g*t) for (kl_g, t) in zip(first_kl_grads, tangent)] ''' From derivative of KL_prime*y : [dKL/dx1, dKL/dx2...]*y y -> Ay, A is n by n matrix but hard to implement(numerically solving (n*n)*(n*1)) so first multiply target 'y' to gradient and take derivation 'self.FVP' Returns : [d2KL/dx1dx1+d2KL/dx1dx2..., d2KL/dx1dx2+d2KL/dx2dx2..., ...]*y So get (second derivative of KL divergence)*y for each variable => y->JMJy (Fisher Vector Product) ''' self.FVP = FLAT_GRAD(gradient_w_tangent, tr_vrbs) # Get actual paramenter value self.get_value = GetValue(self.sess, tr_vrbs, name='Policy') # To set parameter values self.set_value = SetValue(self.sess, tr_vrbs, name='Policy') # GAE self.gae = GAE(self.sess, self.observation_size, self.args.gamma, self.args.lamda, self.args.vf_constraint) self.sess.run(tf.global_variables_initializer())
def main(): # Get arguments parsed args = get_args() # Setup for logging output_dir = 'output/{}'.format( datetime.now( timezone('Asia/Hong_Kong')).strftime('%Y-%m-%d_%H-%M-%S-%f')[:-3]) create_dir(output_dir) LogHelper.setup(log_path='{}/training.log'.format(output_dir), level_str='INFO') _logger = logging.getLogger(__name__) # Save the configuration for logging purpose save_yaml_config(args, path='{}/config.yaml'.format(output_dir)) # Reproducibility set_seed(args.seed) # Get dataset dataset = SyntheticDataset(args.n, args.d, args.graph_type, args.degree, args.sem_type, args.noise_scale, args.dataset_type, args.x_dim) _logger.info('Finished generating dataset') model = GAE(args.n, args.d, args.x_dim, args.seed, args.num_encoder_layers, args.num_decoder_layers, args.hidden_size, args.latent_dim, args.l1_graph_penalty, args.use_float64) model.print_summary(print_func=model.logger.info) trainer = ALTrainer(args.init_rho, args.rho_thres, args.h_thres, args.rho_multiply, args.init_iter, args.learning_rate, args.h_tol, args.early_stopping, args.early_stopping_thres) W_est = trainer.train(model, dataset.X, dataset.W, args.graph_thres, args.max_iter, args.iter_step, output_dir) _logger.info('Finished training model') # Save raw recovered graph, ground truth and observational data after training np.save('{}/true_graph.npy'.format(output_dir), dataset.W) np.save('{}/observational_data.npy'.format(output_dir), dataset.X) np.save('{}/final_raw_recovered_graph.npy'.format(output_dir), W_est) # Plot raw recovered graph plot_recovered_graph( W_est, dataset.W, save_name='{}/raw_recovered_graph.png'.format(output_dir)) _logger.info('Filter by constant threshold') W_est = W_est / np.max(np.abs(W_est)) # Normalize # Plot thresholded recovered graph W_est[np.abs(W_est) < args.graph_thres] = 0 # Thresholding plot_recovered_graph( W_est, dataset.W, save_name='{}/thresholded_recovered_graph.png'.format(output_dir)) results_thresholded = count_accuracy(dataset.W, W_est) _logger.info('Results after thresholding by {}: {}'.format( args.graph_thres, results_thresholded))