def main(): n = args.n m = args.m nt = args.nt mt = args.mt reward = np.ones((n,m,4))*-0.1 reward[nt-2,mt-1,2],reward[nt-1,mt-2,1] = 0,0 torch.set_num_threads(1) device = torch.device("cuda:0" if args.cuda else "cpu") gcn_model = GCN(nfeat=n*m, nhid=args.hidden) gcn_model.to(device) optimizer = optim.Adam(gcn_model.parameters(),lr=args.lr, weight_decay=args.weight_decay) adj,features,_,_ = shortest_dist(n,m,[n,m]) features = normalize(sp.csr_matrix(features)) features = torch.FloatTensor(np.array(features.todense())) adj = adj + adj.T.multiply(adj.T > adj) - adj.multiply(adj.T > adj) adj = normalize(sp.csr_matrix(adj) + sp.eye(adj.shape[0])) adj = sparse_mx_to_torch_sparse_tensor(adj) for episodes in range(args.gcn_epi): update_graph(n,m,args,gcn_model,optimizer) print("{} episode done :".format(episodes+1)) if args.cuda and torch.cuda.is_available(): features = features.cuda() adj = adj.cuda() output = gcn_model(features,adj).cpu() gcn_phi = torch.exp(output).detach().numpy() gcn_phi = gcn_phi[:,1].reshape(n,m) param1 = Params(n,m,nt,mt,gamma = args.gamma,qstep = args.qstep,pstep = args.pstep,alpha = 0,noepi = args.noepi) param2 = Params(n,m,nt,mt,gamma = args.gamma,qstep = args.qstep,pstep = args.pstep,alpha = 1,noepi = args.noepi) regcn,valgcn = ACPhi(param1,reward,gcn_phi) reg,val = ACPhi(param2,reward,gcn_phi) PlotAnalysis(param1.noepi,reg,val,regcn,valgcn,gcn_phi)
def gcn(): print('Training GCN mode!') # initialize results arrays total_mse = np.zeros(args.exp_number) total_pcc = np.zeros(args.exp_number) total_mae = np.zeros(args.exp_number) mse_datasets = {} std_datasets = {} pcc_datasets = {} pcc_std_datasets = {} mae_datasets = {} mae_std_datasets = {} t_total = time.time() if args.dataset == 'all': datasets = [ 'airport', 'collaboration', 'congress', 'forum', 'geom', 'astro' ] else: datasets = [args.dataset] for dataset in datasets: for exp_number in range(args.exp_number): print("%s: experiment number %d" % (dataset, exp_number + 1)) data = preprocess_dataset.clean_data(dataset) if dataset != 'usair': data['weights'] = preprocessing.normalize([data['weights']])[0] # random split of data data_train, data_test = train_test_split(data, test_size=0.2) data_train, data_val = train_test_split(data_train, test_size=0.08) data = data.reset_index() data_train = data_train.reset_index() data_val = data_val.reset_index() data_test = data_test.reset_index() G = preprocess_dataset.create_graph_gcn(dataset, data, data_train) val_G = preprocess_dataset.create_graph_gcn( dataset, data, data_val) test_G = preprocess_dataset.create_graph_gcn( dataset, data, data_test) nodes_len = len(G.nodes) node_ids_to_index = {} for i, node_id in enumerate(G.nodes): node_ids_to_index[node_id] = i train_A = nx.adjacency_matrix(G) val_A = nx.adjacency_matrix(val_G) test_A = nx.adjacency_matrix(test_G) # identity if same for all I = identity(G.number_of_nodes(), dtype='int8', format='csr') features = torch.FloatTensor(np.array(I.todense())).cuda() train_labels = torch.FloatTensor( data_train['weights'].values).cuda() val_labels = torch.FloatTensor(data_val['weights'].values).cuda() test_labels = torch.FloatTensor(data_test['weights'].values).cuda() train_A = sparse_mx_to_torch_sparse_tensor(train_A).cuda() val_A = sparse_mx_to_torch_sparse_tensor(val_A).cuda() test_A = sparse_mx_to_torch_sparse_tensor(test_A).cuda() model = GCN(nfeat=nodes_len, nhid=args.nhid, nclass=args.nclass, dropout=args.dropout) optimizer = optim.Adam(model.parameters(), lr=args.lr) model.train() model = model.to(args.device) # train for epoch in range(args.epochs): t = time.time() model.train() optimizer.zero_grad() output = model(features, train_A, torch.tensor(data_train['A'].values).cuda(), torch.tensor(data_train['B'].values).cuda(), node_ids_to_index) loss_train = F.mse_loss(output, train_labels) loss_train.backward() optimizer.step() # validation model.eval() output = model(features, val_A, torch.tensor(data_val['A'].values).cuda(), torch.tensor(data_val['B'].values).cuda(), node_ids_to_index) loss_val = F.mse_loss(output, val_labels) if args.verbose: print('Epoch: {:04d}'.format(epoch + 1), 'loss_train: {:.4f}'.format(loss_train.item()), 'loss_val: {:.4f}'.format(loss_val.item()), 'time: {:.4f}s'.format(time.time() - t)) # test model.eval() with torch.no_grad(): output = model(features, test_A, torch.tensor(data_test['A'].values).cuda(), torch.tensor(data_test['B'].values).cuda(), node_ids_to_index) loss_test = F.mse_loss(torch.flatten(output), test_labels) pcc_test = pearson_correlation(test_labels, output) mae_test = F.l1_loss(output, test_labels) print("Test set results:", "loss= {:.10f}".format(loss_test.item()), "pcc= {:.10f}".format(pcc_test), "mae= {:.10f}".format(mae_test.item())) total_mse[exp_number] = loss_test total_pcc[exp_number] = pcc_test total_mae[exp_number] = mae_test # results mse_datasets[dataset] = np.mean(total_mse) std_datasets[dataset] = np.std(total_mse) total_mse = np.zeros(args.exp_number) pcc_datasets[dataset] = np.mean(total_pcc[~np.isnan(total_pcc)]) pcc_std_datasets[dataset] = np.std(total_pcc[~np.isnan(total_pcc)]) total_pcc = np.zeros(args.exp_number) mae_datasets[dataset] = np.mean(total_mae) mae_std_datasets[dataset] = np.std(total_mae) total_mae = np.zeros(args.exp_number) for dataset in datasets: print("MSE %s: {:,f}".format(mse_datasets[dataset]) % dataset) print("MSE_STD %s: {:,f}".format(std_datasets[dataset]) % dataset) print("PCC %s: {:,f}".format(pcc_datasets[dataset]) % dataset) print("PCC_STD %s: {:,f}".format(pcc_std_datasets[dataset]) % dataset) print("MAE %s: {:,f}".format(mae_datasets[dataset]) % dataset) print("MAE_STD %s: {:,f}".format(mae_std_datasets[dataset]) % dataset) print("Total time elapsed: {:.4f}s".format(time.time() - t_total)) exit()
soft_out = torch.unsqueeze( torch.nn.functional.softmax(output, dim=1)[:, 1], 1) loss_reg = torch.mm(torch.mm(soft_out.T, laplacian), soft_out) print('Epoch: {:04d}'.format(epoch + 1), 'loss_train: {:.4f}'.format(loss_train.item()), 'time: {:.4f}s'.format(time.time() - t)) loss_train += args.gcn_lambda * loss_reg.squeeze() loss_train.backward() optimizer.step() torch.set_num_threads(1) device = torch.device("cuda:0" if args.cuda else "cpu") n, m = 15, 15 gcn_model = GCN(nfeat=n * m, nhid=args.hidden) gcn_model.to(device) optimizer = optim.Adam(gcn_model.parameters(), lr=args.lr, weight_decay=args.weight_decay) adj, features, _, _ = shortest_dist(n, m, [n, m]) features = normalize(sp.csr_matrix(features)) features = torch.FloatTensor(np.array(features.todense())) adj = adj + adj.T.multiply(adj.T > adj) - adj.multiply(adj.T > adj) adj = normalize(sp.csr_matrix(adj) + sp.eye(adj.shape[0])) adj = sparse_mx_to_torch_sparse_tensor(adj) for episodes in range(40): update_graph(n, m, args, gcn_model, optimizer) print("{} episode done :".format(episodes + 1))
def main(): torch.set_num_threads(1) device = torch.device("cuda:0" if args.cuda else "cpu") run_id = "alpha{}".format(args.gcn_alpha) if args.use_logger: from utils import Logger folder = "{}/{}".format(args.folder, run_id) logger = Logger(algo_name=args.algo, environment_name=args.env_name, folder=folder, seed=args.seed) logger.save_args(args) print("---------------------------------------") print('Saving to', logger.save_folder) print("---------------------------------------") else: print("---------------------------------------") print('NOTE : NOT SAVING RESULTS') print("---------------------------------------") all_rewards = [] envs = make_vec_envs(args.env_name, args.seed, args.num_processes, args.gamma, args.log_dir, args.add_timestep, device, False) actor_critic = Policy(envs.observation_space.shape, envs.action_space, args.env_name, base_kwargs={'recurrent': args.recurrent_policy}) actor_critic.to(device) if args.algo == 'a2c': agent = algo.A2C_ACKTR(actor_critic, args.value_loss_coef, args.entropy_coef, lr=args.lr, eps=args.eps, alpha=args.alpha, max_grad_norm=args.max_grad_norm) elif args.algo == 'ppo': agent = algo.PPO(actor_critic, args.clip_param, args.ppo_epoch, args.num_mini_batch, args.value_loss_coef, args.entropy_coef, lr=args.lr, eps=args.eps, max_grad_norm=args.max_grad_norm) elif args.algo == 'acktr': agent = algo.A2C_ACKTR(actor_critic, args.value_loss_coef, args.entropy_coef, acktr=True) rollouts = RolloutStorage(args.num_steps, args.num_processes, envs.observation_space.shape, envs.action_space, actor_critic.recurrent_hidden_state_size, actor_critic.base.output_size) obs = envs.reset() rollouts.obs[0].copy_(obs) rollouts.to(device) ############################ # GCN Model and optimizer from pygcn.train import update_graph from pygcn.models import GCN gcn_model = GCN(nfeat=actor_critic.base.output_size, nhid=args.gcn_hidden) gcn_model.to(device) gcn_optimizer = optim.Adam(gcn_model.parameters(), lr=args.gcn_lr, weight_decay=args.gcn_weight_decay) gcn_loss = nn.NLLLoss() gcn_states = [[] for _ in range(args.num_processes)] Gs = [nx.Graph() for _ in range(args.num_processes)] node_ptrs = [0 for _ in range(args.num_processes)] rew_states = [[] for _ in range(args.num_processes)] ############################ episode_rewards = deque(maxlen=100) avg_fwdloss = deque(maxlen=100) rew_rms = RunningMeanStd(shape=()) delay_rew = torch.zeros([args.num_processes, 1]) delay_step = torch.zeros([args.num_processes]) start = time.time() for j in range(num_updates): if args.use_linear_lr_decay: # decrease learning rate linearly update_linear_schedule( agent.optimizer, j, num_updates, agent.optimizer.lr if args.algo == "acktr" else args.lr) for step in range(args.num_steps): # Sample actions with torch.no_grad(): value, action, action_log_prob,\ recurrent_hidden_states, hidden_states = actor_critic.act( rollouts.obs[step], rollouts.recurrent_hidden_states[step], rollouts.masks[step]) # Obser reward and next obs obs, reward, done, infos = envs.step(action) delay_rew += reward delay_step += 1 for idx, (info, hid, eps_done) in enumerate(zip(infos, hidden_states, done)): if eps_done or delay_step[idx] == args.reward_freq: reward[idx] = delay_rew[idx] delay_rew[idx] = delay_step[idx] = 0 else: reward[idx] = 0 if 'episode' in info.keys(): episode_rewards.append(info['episode']['r']) if args.gcn_alpha < 1.0: gcn_states[idx].append(hid) node_ptrs[idx] += 1 if not eps_done: Gs[idx].add_edge(node_ptrs[idx] - 1, node_ptrs[idx]) if reward[idx] != 0. or eps_done: rew_states[idx].append( [node_ptrs[idx] - 1, reward[idx]]) if eps_done: adj = nx.adjacency_matrix(Gs[idx]) if len(Gs[idx].nodes)\ else sp.csr_matrix(np.eye(1,dtype='int64')) update_graph(gcn_model, gcn_optimizer, torch.stack(gcn_states[idx]), adj, rew_states[idx], gcn_loss, args, envs) gcn_states[idx] = [] Gs[idx] = nx.Graph() node_ptrs[idx] = 0 rew_states[idx] = [] # If done then clean the history of observations. masks = torch.FloatTensor([[0.0] if done_ else [1.0] for done_ in done]) rollouts.insert(obs, recurrent_hidden_states, action, action_log_prob, value, reward, masks, hidden_states) with torch.no_grad(): next_value = actor_critic.get_value( rollouts.obs[-1], rollouts.recurrent_hidden_states[-1], rollouts.masks[-1]).detach() rollouts.compute_returns(next_value, args.use_gae, args.gamma, args.tau, gcn_model, args.gcn_alpha) agent.update(rollouts) rollouts.after_update() ####################### Saving and book-keeping ####################### if (j % int(num_updates / 5.) == 0 or j == num_updates - 1) and args.save_dir != "": print('Saving model') print() save_dir = "{}/{}/{}".format(args.save_dir, args.folder, run_id) save_path = os.path.join(save_dir, args.algo, 'seed' + str(args.seed)) + '_iter' + str(j) try: os.makedirs(save_path) except OSError: pass # A really ugly way to save a model to CPU save_model = actor_critic save_gcn = gcn_model if args.cuda: save_model = copy.deepcopy(actor_critic).cpu() save_gcn = copy.deepcopy(gcn_model).cpu() save_model = [ save_gcn, save_model, hasattr(envs.venv, 'ob_rms') and envs.venv.ob_rms or None ] torch.save(save_model, os.path.join(save_path, args.env_name + "ac.pt")) total_num_steps = (j + 1) * args.num_processes * args.num_steps if j % args.log_interval == 0 and len(episode_rewards) > 1: end = time.time() print("Updates {}, num timesteps {}, FPS {} \n Last {}\ training episodes: mean/median reward {:.2f}/{:.2f},\ min/max reward {:.2f}/{:.2f}, success rate {:.2f}, avg fwdloss {:.2f}\n" .format( j, total_num_steps, int(total_num_steps / (end - start)), len(episode_rewards), np.mean(episode_rewards), np.median(episode_rewards), np.min(episode_rewards), np.max(episode_rewards), np.count_nonzero(np.greater(episode_rewards, 0)) / len(episode_rewards), np.mean(avg_fwdloss), )) all_rewards.append(np.mean(episode_rewards)) if args.use_logger: logger.save_task_results(all_rewards) ####################### Saving and book-keeping ####################### envs.close()