def main(): parser = argparse.ArgumentParser() parser.add_argument("--env_name", type=str, default='h8-10x10') parser.add_argument("--training_step", type=int, default=10000000) parser.add_argument("--gamma", type=float, default=0.99) parser.add_argument("--lr", type=float, default=0.0005) parser.add_argument("--buf_size", type=int, default=100000) parser.add_argument("--batch_size", type=int, default=128) parser.add_argument("--target_update_interval", type=int, default=10000) parser.add_argument("--train_interval", type=int, default=1) parser.add_argument("--model_save_interval", type=int, default=100000) parser.add_argument("--eval_interval", type=int, default=50000) parser.add_argument("--init_epsilon", type=float, default=1.0) parser.add_argument("--final_epsilon", type=float, default=0.1) parser.add_argument("--decay_step", type=int, default=9000000) parser.add_argument("--po", type=str2bool, default=False) parser.add_argument("--rew_norm", type=float, default=100.0) parser.add_argument("--gpu", type=int, default=2) parser.add_argument("--seed", type=int, default=1) parser.add_argument("--logdir", type=str, default='./logs/') parser.add_argument("--modeldir", type=str, default='./models/') parser.add_argument("--exp_flag", type=str, default='') args = parser.parse_args() os.environ["CUDA_VISIBLE_DEVICES"] = str(args.gpu) set_seed(args.seed) dir_name = exp_name(args, [ 'training_step', 'gpu', 'gamma', 'model_save_interval', 'decay_step', 'eval_interval', 'logdir', 'modeldir', 'exp_flag', 'seed', 'rew_norm' ], seed=args.seed, exp_flag=args.exp_flag) model_dir = os.path.join(args.modeldir, dir_name) # create dicrectory to save params if not os.path.exists(model_dir): os.makedirs(model_dir) logger = te.Logger(os.path.join(args.logdir, dir_name)) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") obs_to_state = partial(obs2state, args.po) decay_interval = args.decay_step // ( (args.init_epsilon - args.final_epsilon) / 0.01) register(id=env_dict[args.env_name][0], entry_point=env_dict[args.env_name][1]) env_name = env_dict[args.env_name][0] env = gym.make(env_name) env.seed(args.seed) obs = env.reset() s = obs_to_state(obs) q = Qnet(input_size=len(s)).to(device) q_target = Qnet(input_size=len(s)).to(device) q_target.load_state_dict(q.state_dict()) memory = ReplayBuffer(buffer_size=args.buf_size) optimizer = optim.Adam(q.parameters(), lr=args.lr) n_epi = 0 epi_score = 0.0 epi_len = 0 for tstep in range(args.training_step): epsilon = max(args.final_epsilon, args.init_epsilon - 0.01 * (tstep // decay_interval)) # Linear annealing a = q.sample_action(torch.from_numpy(s).float().to(device), epsilon) obs_prime, r, done, info = env.step(a) s_prime = obs_to_state(obs_prime) done_mask = 0.0 if done else 1.0 memory.put((s, a, r / args.rew_norm, s_prime, done_mask)) s = s_prime epi_score += r epi_len += 1 if done: obs = env.reset() s = obs_to_state(obs) n_epi += 1 logger.log_scalar('epi_r', epi_score, step=n_epi) logger.log_scalar('epi_len', epi_len, step=n_epi) if n_epi % 20 == 0 and n_epi != 0: print( "n_episode :{}, score : {:.1f}, n_buffer : {}, eps : {:.1f}%, tstep : {}, epi_len : {}" .format(n_epi, epi_score, memory.size(), epsilon * 100, tstep, epi_len)) epi_score = 0 epi_len = 0 if memory.size() > 2000: if tstep % args.train_interval == 0: # t_train_s = time.time() q_loss = train(q, q_target, gamma=args.gamma, batch_size=args.batch_size, memory=memory, optimizer=optimizer, device=device) # t_train_e = time.time() # print('train_time:', t_train_e-t_train_s) logger.log_scalar('q_loss', q_loss, step=tstep) if tstep % args.target_update_interval == 0 and tstep != 0: q_target.load_state_dict(q.state_dict()) if tstep % args.eval_interval == 0: eval_score = evaluate(env_name, q, obs_to_state=obs_to_state, device=device) logger.log_scalar('eval_score', eval_score, step=tstep) print('tstep : {}, eval_score : {}'.format(tstep, eval_score)) if tstep % args.model_save_interval == 0 and tstep != 0: torch.save(q.state_dict(), os.path.join(model_dir, 'param_' + str(tstep) + '.pkl')) torch.save(q.state_dict(), os.path.join(model_dir, 'param_final.pkl')) env.close()
def main(): parser = argparse.ArgumentParser() parser.add_argument("--env_name", type=str, default='h8-10x10') parser.add_argument("--training_step", type=int, default=200000) parser.add_argument("--nworker", type=int, default=64) parser.add_argument("--gamma", type=float, default=0.99) parser.add_argument("--lr", type=float, default=0.0005) parser.add_argument("--buf_size", type=int, default=100000) parser.add_argument("--batch_size", type=int, default=8192) parser.add_argument("--start_training_buffer_size", type=int, default=50000) parser.add_argument("--target_update_interval", type=int, default=10000) parser.add_argument("--train_interval", type=int, default=1) parser.add_argument("--model_save_interval", type=int, default=100000) parser.add_argument("--eval_interval", type=int, default=50000) parser.add_argument("--init_epsilon", type=float, default=1.0) parser.add_argument("--final_epsilon", type=float, default=0.1) parser.add_argument("--decay_step", type=int, default=180000) parser.add_argument("--po", type=str2bool, default=True) parser.add_argument("--rew_norm", type=float, default=100) parser.add_argument("--ae_coef", type=float, default=1) parser.add_argument("--use_global", type=str2bool, default=True) parser.add_argument("--gpu", type=int, default=2) parser.add_argument("--seed", type=int, default=1) parser.add_argument("--logdir", type=str, default='./logs/') parser.add_argument("--modeldir", type=str, default='./models/') parser.add_argument("--exp_flag", type=str, default='ae') args = parser.parse_args() os.environ["CUDA_VISIBLE_DEVICES"] = str(args.gpu) set_seed(args.seed) dir_name = exp_name(args, ['training_step', 'start_training_buffer_size', 'gpu', 'gamma', 'model_save_interval', 'decay_step', 'eval_interval', 'logdir', 'modeldir', 'exp_flag', 'seed', 'target_update_interval', 'po', 'rew_norm', 'ae_coef'], seed=args.seed, exp_flag=args.exp_flag) model_dir = os.path.join(args.modeldir, dir_name) # create dicrectory to save params if not os.path.exists(model_dir): os.makedirs(model_dir) logger = te.Logger(os.path.join(args.logdir, dir_name)) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") obs_to_state = partial(obs2state, args.po) decay_interval = args.decay_step // ((args.init_epsilon - args.final_epsilon) / 0.01) register(id=env_dict[args.env_name][0], entry_point=env_dict[args.env_name][1] ) env_name = env_dict[args.env_name][0] # env = gym.make(env_name) # env.seed(args.seed) workers = [] parent_conns = [] child_conns = [] for idx in range(args.nworker): parent_conn, child_conn = Pipe() worker = Environment(env_name, idx, child_conn, seed=args.seed + idx, obs2state=obs_to_state) worker.start() workers.append(worker) parent_conns.append(parent_conn) child_conns.append(child_conn) # obs = env.reset() # s = obs_to_state(obs) s, graph_lst, other_lst = get_init_state(args.nworker, device) q = GcnAEQnet(num_entity=5, input_node_feature_size=2, latent_node_feature_size=4).to(device) q_target = GcnAEQnet(num_entity=5, input_node_feature_size=2, latent_node_feature_size=4).to(device) q_target.load_state_dict(q.state_dict()) memory = ReplayBuffer(buffer_size=args.buf_size) optimizer = optim.Adam(q.parameters(), lr=args.lr) # n_epi = 0 # epi_score = 0.0 # epi_len = 0 for tstep in range(args.training_step): # t_total_s = time.time() epsilon = max(args.final_epsilon, args.init_epsilon - 0.01 * (tstep // decay_interval)) # Linear annealing # t_sample_a_s = time.time() a = q.sample_action(s[0].to(device), torch.from_numpy(np.array(s[1])).float().to(device), epsilon) # t_sample_a_e = time.time() # print('sample_a_time:', t_sample_a_e - t_sample_a_s) # t_step_s = time.time() for parent_conn, action in zip(parent_conns, a): parent_conn.send(action) # obs_prime, r, done, info = env.step(a) # t_step_e = time.time() # print('env_step_time:', t_step_e-t_step_s) # s_prime = obs_to_state(obs_prime) # done_mask = 0.0 if done else 1.0 graph_prime_lst = [] other_prime_lst = [] idx = 0 for parent_conn in parent_conns: s_prime_sp, r_sp, done_sp, _ = parent_conn.recv() graph_prime_lst.append(s_prime_sp[0]) other_prime_lst.append(s_prime_sp[1]) #[[][][]] done_mask_sp = 0.0 if done_sp else 1.0 memory.put(([graph_lst[idx].to(device), other_lst[idx]], a[idx], r_sp / args.rew_norm, [s_prime_sp[0].to(device), s_prime_sp[1]], done_mask_sp)) idx += 1 graph_lst = copy.deepcopy(graph_prime_lst) other_lst = copy.deepcopy(other_prime_lst) s = [Batch.from_data_list(graph_prime_lst, follow_batch=[]), torch.tensor(other_prime_lst, dtype=torch.float)] # epi_score += r # epi_len += 1 # if done: # obs = env.reset() # s = obs_to_state(obs) # n_epi += 1 # logger.log_scalar('epi_r', epi_score, step=n_epi) # logger.log_scalar('epi_len', epi_len, step=n_epi) # if n_epi % 20 == 0 and n_epi != 0: # print("n_episode :{}, score : {:.1f}, n_buffer : {}, eps : {:.1f}%, tstep : {}, epi_len : {}".format( # n_epi, epi_score, memory.size(), epsilon * 100, tstep, epi_len)) # epi_score = 0 # epi_len = 0 real_step = tstep * args.nworker if memory.size() > args.start_training_buffer_size: if real_step % args.train_interval == 0: # t_train_s = time.time() q_loss, ae_loss = train(q, q_target, gamma=args.gamma, batch_size=args.batch_size, memory=memory, optimizer=optimizer, device=device, coef=args.ae_coef, use_global=args.use_global) # t_train_e = time.time() # print('train_time:', t_train_e - t_train_s) logger.log_scalar('q_loss', q_loss, step=real_step) logger.log_scalar('ae_loss', ae_loss, step=real_step) if real_step % args.target_update_interval == 0 and real_step != 0: q_target.load_state_dict(q.state_dict()) if real_step % args.eval_interval == 0: # t_eval_s = time.time() eval_score = evaluate(env_name, q, obs_to_state=obs_to_state, device=device) # t_eval_e = time.time() # print('eval_time:', t_eval_e - t_eval_s) logger.log_scalar('eval_score', eval_score, step=real_step) print('tstep : {}, eval_score : {}'.format(real_step, eval_score)) if real_step % args.model_save_interval == 0 and real_step != 0: torch.save(q.state_dict(), os.path.join(model_dir, 'param_' + str(real_step) + '.pkl')) # t_total_e = time.time() # print('total_time:', t_total_e-t_total_s) torch.save(q.state_dict(), os.path.join(model_dir, 'param_final.pkl'))
missing_entity = 3 num_epoch = 50 num_node_feature = 2 num_ecc_hidden_size = 64 num_ecc_out_size = 2 num_fc_hidden_size = 64 num_edge_feature = 1 batch_size = 100 test_size = 1000 lr = 0.001 dropout = True seed = 0 logger = te.Logger('./logs/gcn_predictor/missing_entity=' + str(missing_entity) + '_num_fc_hidden_size=' + str(num_fc_hidden_size) + 'num_ecc_hidden_size=' + str(num_ecc_hidden_size) + '_lr=' + str(lr) + '_dropout=' + str(dropout) + '/seed=' + str(seed)) mylogger = L('./mylogs/gcn_predictor/missing_entity=' + str(missing_entity) + '_num_fc_hidden_size=' + str(num_fc_hidden_size) + 'num_ecc_hidden_size=' + str(num_ecc_hidden_size) + '_lr=' + str(lr) + '_dropout=' + str(dropout)) set_seed(seed) class EdgeNet(nn.Module): def __init__(self, input_size, output_size): super(EdgeNet, self).__init__() self.fc1 = nn.Sequential(nn.Linear(input_size, num_fc_hidden_size), nn.ReLU(True)) self.fc2 = nn.Sequential(nn.Linear(num_fc_hidden_size, output_size))
def __init__(self, log_path: PathLike): import tensorboard_easy self.logger = tensorboard_easy.Logger(log_path)
def __init__(self, log_path): self.logger = tensorboard_easy.Logger(log_path)