Esempio n. 1
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--env_name", type=str, default='h8-10x10')
    parser.add_argument("--training_step", type=int, default=10000000)
    parser.add_argument("--gamma", type=float, default=0.99)
    parser.add_argument("--lr", type=float, default=0.0005)
    parser.add_argument("--buf_size", type=int, default=100000)
    parser.add_argument("--batch_size", type=int, default=128)
    parser.add_argument("--target_update_interval", type=int, default=10000)
    parser.add_argument("--train_interval", type=int, default=1)
    parser.add_argument("--model_save_interval", type=int, default=100000)
    parser.add_argument("--eval_interval", type=int, default=50000)
    parser.add_argument("--init_epsilon", type=float, default=1.0)
    parser.add_argument("--final_epsilon", type=float, default=0.1)
    parser.add_argument("--decay_step", type=int, default=9000000)
    parser.add_argument("--po", type=str2bool, default=False)
    parser.add_argument("--rew_norm", type=float, default=100.0)
    parser.add_argument("--gpu", type=int, default=2)
    parser.add_argument("--seed", type=int, default=1)
    parser.add_argument("--logdir", type=str, default='./logs/')
    parser.add_argument("--modeldir", type=str, default='./models/')
    parser.add_argument("--exp_flag", type=str, default='')
    args = parser.parse_args()
    os.environ["CUDA_VISIBLE_DEVICES"] = str(args.gpu)
    set_seed(args.seed)
    dir_name = exp_name(args, [
        'training_step', 'gpu', 'gamma', 'model_save_interval', 'decay_step',
        'eval_interval', 'logdir', 'modeldir', 'exp_flag', 'seed', 'rew_norm'
    ],
                        seed=args.seed,
                        exp_flag=args.exp_flag)
    model_dir = os.path.join(args.modeldir, dir_name)
    # create dicrectory to save params
    if not os.path.exists(model_dir):
        os.makedirs(model_dir)
    logger = te.Logger(os.path.join(args.logdir, dir_name))
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    obs_to_state = partial(obs2state, args.po)
    decay_interval = args.decay_step // (
        (args.init_epsilon - args.final_epsilon) / 0.01)

    register(id=env_dict[args.env_name][0],
             entry_point=env_dict[args.env_name][1])
    env_name = env_dict[args.env_name][0]
    env = gym.make(env_name)
    env.seed(args.seed)

    obs = env.reset()
    s = obs_to_state(obs)

    q = Qnet(input_size=len(s)).to(device)
    q_target = Qnet(input_size=len(s)).to(device)
    q_target.load_state_dict(q.state_dict())
    memory = ReplayBuffer(buffer_size=args.buf_size)
    optimizer = optim.Adam(q.parameters(), lr=args.lr)

    n_epi = 0
    epi_score = 0.0
    epi_len = 0
    for tstep in range(args.training_step):
        epsilon = max(args.final_epsilon, args.init_epsilon - 0.01 *
                      (tstep // decay_interval))  # Linear annealing

        a = q.sample_action(torch.from_numpy(s).float().to(device), epsilon)
        obs_prime, r, done, info = env.step(a)
        s_prime = obs_to_state(obs_prime)
        done_mask = 0.0 if done else 1.0
        memory.put((s, a, r / args.rew_norm, s_prime, done_mask))
        s = s_prime
        epi_score += r
        epi_len += 1
        if done:
            obs = env.reset()
            s = obs_to_state(obs)
            n_epi += 1
            logger.log_scalar('epi_r', epi_score, step=n_epi)
            logger.log_scalar('epi_len', epi_len, step=n_epi)
            if n_epi % 20 == 0 and n_epi != 0:
                print(
                    "n_episode :{}, score : {:.1f}, n_buffer : {}, eps : {:.1f}%, tstep : {}, epi_len : {}"
                    .format(n_epi, epi_score, memory.size(), epsilon * 100,
                            tstep, epi_len))
            epi_score = 0
            epi_len = 0

        if memory.size() > 2000:
            if tstep % args.train_interval == 0:
                # t_train_s = time.time()
                q_loss = train(q,
                               q_target,
                               gamma=args.gamma,
                               batch_size=args.batch_size,
                               memory=memory,
                               optimizer=optimizer,
                               device=device)
                # t_train_e = time.time()
                # print('train_time:', t_train_e-t_train_s)
                logger.log_scalar('q_loss', q_loss, step=tstep)

        if tstep % args.target_update_interval == 0 and tstep != 0:
            q_target.load_state_dict(q.state_dict())

        if tstep % args.eval_interval == 0:
            eval_score = evaluate(env_name,
                                  q,
                                  obs_to_state=obs_to_state,
                                  device=device)
            logger.log_scalar('eval_score', eval_score, step=tstep)
            print('tstep : {}, eval_score : {}'.format(tstep, eval_score))

        if tstep % args.model_save_interval == 0 and tstep != 0:
            torch.save(q.state_dict(),
                       os.path.join(model_dir, 'param_' + str(tstep) + '.pkl'))
    torch.save(q.state_dict(), os.path.join(model_dir, 'param_final.pkl'))
    env.close()
Esempio n. 2
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--env_name", type=str, default='h8-10x10')
    parser.add_argument("--training_step", type=int, default=200000)
    parser.add_argument("--nworker", type=int, default=64)
    parser.add_argument("--gamma", type=float, default=0.99)
    parser.add_argument("--lr", type=float, default=0.0005)
    parser.add_argument("--buf_size", type=int, default=100000)
    parser.add_argument("--batch_size", type=int, default=8192)
    parser.add_argument("--start_training_buffer_size", type=int, default=50000)
    parser.add_argument("--target_update_interval", type=int, default=10000)
    parser.add_argument("--train_interval", type=int, default=1)
    parser.add_argument("--model_save_interval", type=int, default=100000)
    parser.add_argument("--eval_interval", type=int, default=50000)
    parser.add_argument("--init_epsilon", type=float, default=1.0)
    parser.add_argument("--final_epsilon", type=float, default=0.1)
    parser.add_argument("--decay_step", type=int, default=180000)
    parser.add_argument("--po", type=str2bool, default=True)
    parser.add_argument("--rew_norm", type=float, default=100)
    parser.add_argument("--ae_coef", type=float, default=1)
    parser.add_argument("--use_global", type=str2bool, default=True)
    parser.add_argument("--gpu", type=int, default=2)
    parser.add_argument("--seed", type=int, default=1)
    parser.add_argument("--logdir", type=str, default='./logs/')
    parser.add_argument("--modeldir", type=str, default='./models/')
    parser.add_argument("--exp_flag", type=str, default='ae')
    args = parser.parse_args()
    os.environ["CUDA_VISIBLE_DEVICES"] = str(args.gpu)
    set_seed(args.seed)
    dir_name = exp_name(args, ['training_step', 'start_training_buffer_size', 'gpu', 'gamma', 'model_save_interval', 'decay_step',
                               'eval_interval', 'logdir', 'modeldir', 'exp_flag', 'seed', 'target_update_interval', 'po', 'rew_norm', 'ae_coef'],
                        seed=args.seed,
                        exp_flag=args.exp_flag)
    model_dir = os.path.join(args.modeldir, dir_name)
    # create dicrectory to save params
    if not os.path.exists(model_dir):
        os.makedirs(model_dir)
    logger = te.Logger(os.path.join(args.logdir, dir_name))
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    obs_to_state = partial(obs2state, args.po)
    decay_interval = args.decay_step // ((args.init_epsilon - args.final_epsilon) / 0.01)

    register(id=env_dict[args.env_name][0],
             entry_point=env_dict[args.env_name][1]
             )
    env_name = env_dict[args.env_name][0]
    # env = gym.make(env_name)
    # env.seed(args.seed)
    workers = []
    parent_conns = []
    child_conns = []
    for idx in range(args.nworker):
        parent_conn, child_conn = Pipe()
        worker = Environment(env_name, idx, child_conn, seed=args.seed + idx, obs2state=obs_to_state)
        worker.start()
        workers.append(worker)
        parent_conns.append(parent_conn)
        child_conns.append(child_conn)
    # obs = env.reset()
    # s = obs_to_state(obs)
    s, graph_lst, other_lst = get_init_state(args.nworker, device)
    q = GcnAEQnet(num_entity=5, input_node_feature_size=2, latent_node_feature_size=4).to(device)
    q_target = GcnAEQnet(num_entity=5, input_node_feature_size=2, latent_node_feature_size=4).to(device)
    q_target.load_state_dict(q.state_dict())
    memory = ReplayBuffer(buffer_size=args.buf_size)
    optimizer = optim.Adam(q.parameters(), lr=args.lr)

    # n_epi = 0
    # epi_score = 0.0
    # epi_len = 0
    for tstep in range(args.training_step):
        # t_total_s = time.time()
        epsilon = max(args.final_epsilon, args.init_epsilon - 0.01 * (tstep // decay_interval))  # Linear annealing
        # t_sample_a_s = time.time()
        a = q.sample_action(s[0].to(device), torch.from_numpy(np.array(s[1])).float().to(device), epsilon)
        # t_sample_a_e = time.time()
        # print('sample_a_time:', t_sample_a_e - t_sample_a_s)
        # t_step_s = time.time()
        for parent_conn, action in zip(parent_conns, a):
            parent_conn.send(action)
            # obs_prime, r, done, info = env.step(a)
        # t_step_e = time.time()
        # print('env_step_time:', t_step_e-t_step_s)
        # s_prime = obs_to_state(obs_prime)
        # done_mask = 0.0 if done else 1.0
        graph_prime_lst = []
        other_prime_lst = []
        idx = 0
        for parent_conn in parent_conns:
            s_prime_sp, r_sp, done_sp, _ = parent_conn.recv()
            graph_prime_lst.append(s_prime_sp[0])
            other_prime_lst.append(s_prime_sp[1]) #[[][][]]
            done_mask_sp = 0.0 if done_sp else 1.0
            memory.put(([graph_lst[idx].to(device), other_lst[idx]], a[idx], r_sp / args.rew_norm, [s_prime_sp[0].to(device), s_prime_sp[1]], done_mask_sp))
            idx += 1
        graph_lst = copy.deepcopy(graph_prime_lst)
        other_lst = copy.deepcopy(other_prime_lst)
        s = [Batch.from_data_list(graph_prime_lst, follow_batch=[]), torch.tensor(other_prime_lst, dtype=torch.float)]
        # epi_score += r
        # epi_len += 1
        # if done:
        #     obs = env.reset()
        #     s = obs_to_state(obs)
        #     n_epi += 1
        #     logger.log_scalar('epi_r', epi_score, step=n_epi)
        #     logger.log_scalar('epi_len', epi_len, step=n_epi)
        #     if n_epi % 20 == 0 and n_epi != 0:
        #         print("n_episode :{}, score : {:.1f}, n_buffer : {}, eps : {:.1f}%, tstep : {}, epi_len : {}".format(
        #             n_epi, epi_score, memory.size(), epsilon * 100, tstep, epi_len))
        #     epi_score = 0
        #     epi_len = 0
        real_step = tstep * args.nworker
        if memory.size() > args.start_training_buffer_size:
            if real_step % args.train_interval == 0:
                # t_train_s = time.time()
                q_loss, ae_loss = train(q, q_target, gamma=args.gamma, batch_size=args.batch_size, memory=memory, optimizer=optimizer, device=device, coef=args.ae_coef, use_global=args.use_global)
                # t_train_e = time.time()
                # print('train_time:', t_train_e - t_train_s)
                logger.log_scalar('q_loss', q_loss, step=real_step)
                logger.log_scalar('ae_loss', ae_loss, step=real_step)

        if real_step % args.target_update_interval == 0 and real_step != 0:
            q_target.load_state_dict(q.state_dict())

        if real_step % args.eval_interval == 0:
            # t_eval_s = time.time()
            eval_score = evaluate(env_name, q, obs_to_state=obs_to_state, device=device)
            # t_eval_e = time.time()
            # print('eval_time:', t_eval_e - t_eval_s)
            logger.log_scalar('eval_score', eval_score, step=real_step)
            print('tstep : {}, eval_score : {}'.format(real_step, eval_score))

        if real_step % args.model_save_interval == 0 and real_step != 0:
            torch.save(q.state_dict(), os.path.join(model_dir, 'param_' + str(real_step) + '.pkl'))
        # t_total_e = time.time()
        # print('total_time:', t_total_e-t_total_s)
    torch.save(q.state_dict(), os.path.join(model_dir, 'param_final.pkl'))
Esempio n. 3
0
missing_entity = 3
num_epoch = 50
num_node_feature = 2
num_ecc_hidden_size = 64
num_ecc_out_size = 2
num_fc_hidden_size = 64
num_edge_feature = 1
batch_size = 100
test_size = 1000
lr = 0.001
dropout = True
seed = 0
logger = te.Logger('./logs/gcn_predictor/missing_entity=' +
                   str(missing_entity) + '_num_fc_hidden_size=' +
                   str(num_fc_hidden_size) + 'num_ecc_hidden_size=' +
                   str(num_ecc_hidden_size) + '_lr=' + str(lr) + '_dropout=' +
                   str(dropout) + '/seed=' + str(seed))
mylogger = L('./mylogs/gcn_predictor/missing_entity=' + str(missing_entity) +
             '_num_fc_hidden_size=' + str(num_fc_hidden_size) +
             'num_ecc_hidden_size=' + str(num_ecc_hidden_size) + '_lr=' +
             str(lr) + '_dropout=' + str(dropout))
set_seed(seed)


class EdgeNet(nn.Module):
    def __init__(self, input_size, output_size):
        super(EdgeNet, self).__init__()
        self.fc1 = nn.Sequential(nn.Linear(input_size, num_fc_hidden_size),
                                 nn.ReLU(True))
        self.fc2 = nn.Sequential(nn.Linear(num_fc_hidden_size, output_size))
Esempio n. 4
0
 def __init__(self, log_path: PathLike):
     import tensorboard_easy
     self.logger = tensorboard_easy.Logger(log_path)
Esempio n. 5
0
 def __init__(self, log_path):
     self.logger = tensorboard_easy.Logger(log_path)