def main():
    env = gym.make(env_name)
    env.seed(500)
    torch.manual_seed(500)

    num_inputs = env.observation_space.shape[0]
    num_actions = env.action_space.n
    global_model = Model(num_inputs, num_actions)
    global_model.share_memory()
    global_optimizer = SharedAdam(global_model.parameters(), lr=lr)
    global_ep, global_ep_r, res_queue = mp.Value('i',
                                                 0), mp.Value('d',
                                                              0.), mp.Queue()

    writer = SummaryWriter('logs')

    workers = [
        Worker(global_model, global_optimizer, global_ep, global_ep_r,
               res_queue, i) for i in range(mp.cpu_count())
    ]
    [w.start() for w in workers]
    res = []
    while True:
        r = res_queue.get()
        if r is not None:
            res.append(r)
            [ep, ep_r, loss] = r
            writer.add_scalar('log/score', float(ep_r), ep)
            writer.add_scalar('log/loss', float(loss), ep)
        else:
            break
    [w.join() for w in workers]
Exemple #2
0
def main():
    env = gym.make(env_name)
    env.seed(500)
    torch.manual_seed(500)

    num_inputs = env.observation_space.shape[0]
    num_actions = env.action_space.n

    ### 共通となるモデルを定義 これを各ワーカーに参照渡し(?)する
    global_model = Model(num_inputs, num_actions)
    global_model.share_memory()
    global_optimizer = SharedAdam(global_model.parameters(), lr=lr)
    global_ep, global_ep_r, res_queue = mp.Value('i', 0), mp.Value('d', 0.), mp.Queue()

    ### 各Worlerを定義 global_model, およびそれを学習させるための各機能を参照(?)渡し
    workers = [Worker(global_model, global_optimizer, global_ep, global_ep_r, res_queue, i) for i in range(mp.cpu_count())]
    ### ここから大体の仕事はWorkerに移る
    [w.start() for w in workers] ### Worker.start() は Worker.run() を実行するというmultiprocessの仕様
    res = []

    while True:
        ### res_queueには各Workerの結果が集積されていく
        r = res_queue.get()
        if r is not None:
            res.append(r)
            [ep, ep_r, loss] = r
        else:
            break
    [w.join() for w in workers]
Exemple #3
0
def main():
    env = gym.make(env_name)
    env.seed(500)
    torch.manual_seed(500)

    num_inputs = env.observation_space.shape[0]
    num_actions = env.action_space.n
    env.close()

    global_model = Model(num_inputs, num_actions)
    global_average_model = Model(num_inputs, num_actions)
    global_model.share_memory()
    global_average_model.share_memory()
    global_optimizer = SharedAdam(global_model.parameters(), lr=lr)
    global_ep, global_ep_r, res_queue = mp.Value('i',
                                                 0), mp.Value('d',
                                                              0.), mp.Queue()

    n = mp.cpu_count()
    workers = [
        Worker(global_model, global_average_model, global_optimizer, global_ep,
               global_ep_r, res_queue, i) for i in range(n)
    ]
    print(aaa)
    [w.start() for w in workers]
    res = []
    while True:
        r = res_queue.get()
        if r is not None:
            res.append(r)
            [ep, ep_r, loss] = r
        else:
            break
    [w.join() for w in workers]
Exemple #4
0
def main():
    if PRINTFLAG == False:
        time_now = int(time.time())
        time_local = time.localtime(time_now)
        dt = time.strftime("%Y-%m-%d_%H-%M-%S", time_local)
        resFileDir = "../../data/RL_model/"+dt
        modelFileDir = resFileDir + "/model"
        if os.path.exists(modelFileDir) == False:
            os.makedirs(modelFileDir)

    gnet = Net()        # global network
    gnet.share_memory()         # share the global parameters in multiprocessing
    opt = SharedAdam(gnet.parameters(), lr=0.0001)      # global optimizer
    global_ep, global_ep_r, res_queue = mp.Value('i', 0), mp.Value('d', 0.), mp.Queue()
    # parallel training
    print("cpu_count = %d" % mp.cpu_count())
    if platform.system() == "Linux":
        worker_count = 20
    else:
        worker_count = 1
    workers = [Worker(gnet, opt, global_ep, global_ep_r, res_queue, i) for i in range(worker_count)]
    [w.start() for w in workers]
    res = []                    # record episode reward to plot
    max_r = [-1,-1000]
    if PRINTFLAG == False:
        file = open(resFileDir+"/res.txt", 'w')
    epoch = 0
    while True:
        r = res_queue.get()
        if r is not None:
            res.append(r)
            if r > max_r[1]:
                max_r[0] = epoch
                max_r[1] = r
                if PRINTFLAG==False:
                    torch.save(gnet.state_dict(), modelFileDir + "/" + str(epoch) + ".pkl")
                print("max r = %f, epoch = %d" % (max_r[0], max_r[1]))
            elif epoch % 100==0:
                if PRINTFLAG==False:
                    torch.save(gnet.state_dict(), modelFileDir + "/" + str(epoch) + ".pkl")
            if PRINTFLAG == False:
                file.write(str(r) + "\n")
                file.flush()
            epoch += 1
        else:
            break
    if PRINTFLAG == False:
        file.close()
    [w.join() for w in workers]
    if PRINTFLAG == False:
        plt.plot(res)
        plt.ylabel('Moving average ep reward')
        plt.xlabel('Step')
        plt.savefig(resFileDir+"/res.pdf")
Exemple #5
0
 def __init__(self, obs_dim, act_dim, lr, agents, obs_type="RGB", width=None, height=None, channel=None, lr_scheduler=False, influencer_num=1):
     self.agents = agents
     self.agent_num = len(agents)
     self.influencer_num = influencer_num
     self.lr_scheduler = lr_scheduler
     self.action = act_dim
     self.obs_type = obs_type
     if obs_type == "RGB":self.width = width; self.height = height; self.channel = channel
     self.obs_dim = obs_dim
     for i in range(self.agent_num):
         self.agents[i].optimizer = SharedAdam(self.agents[i].parameters(), lr=lr, betas=(0.92,0.99))            #optimizer和scheduler放在agent(network)了
         if lr_scheduler:self.agents[i].lr_scheduler = torch.optim.lr_scheduler.StepLR(self.agents[i].optimizer, #SharedAdam是莫烦A3C中实现的optimizer,好像是用来同时更新两个网络的,细节不太懂
                                                                                       step_size=10000,
                                                                                       gamma=0.9,
                                                                                       last_epoch=-1)
    def __init__(self, env_id, input_shape, n_actions, icm, n_threads=8):
        names = [str(i) for i in range(1, n_threads + 1)]

        global_actor_critic = ActorCritic(input_shape, n_actions)
        global_actor_critic.share_memory()
        global_optim = SharedAdam(global_actor_critic.parameters())

        if not icm:
            global_icm = None
            global_icm_optim = None
        else:
            global_icm = ICM(input_shape, n_actions)
            global_icm.share_memory()
            global_icm_optim = SharedAdam(global_icm.parameters())

        self.ps = [
            mp.Process(target=worker,
                       args=(name, input_shape, n_actions, global_actor_critic,
                             global_icm, global_optim, global_icm_optim,
                             env_id, n_threads, icm)) for name in names
        ]

        [p.start() for p in self.ps]
        [p.join() for p in self.ps]
def main():
    gamma = 0.9
    max_episodes = 2000
    update_global_iter = 10
    env_name = 'MountainCar-v0'

    env = gym.make(env_name)
    s_dim = env.observation_space.shape[0]
    a_dim = env.action_space.n

    global_net = A3C(s_dim, a_dim)
    global_net.share_memory()
    global_opt = SharedAdam(global_net.parameters(), lr=0.001)

    global_episode = mp.Value('i', 0)
    global_rewards = mp.Value('d', 0.)
    result_queue = mp.Queue()

    num_cpu = mp.cpu_count()
    print('cpu count:', num_cpu)
    workers = [
        Worker(global_net, global_opt, global_episode, global_rewards,
               result_queue, gamma, max_episodes, update_global_iter, env_name,
               i) for i in range(num_cpu)
    ]
    [w.start() for w in workers]

    results = []
    while True:
        result = result_queue.get()
        if result == 'done':
            break
        else:
            results.append(result)

    [w.join() for w in workers]

    print('done')

    plt.plot(results)
    plt.ylabel('Moving average episode reward')
    plt.xlabel('Step')
    plt.title('A3C')
    plt.savefig('result.png')
    plt.show()
Exemple #8
0
def main():
    env = gym.make(env_name)
    env.seed(500)
    torch.manual_seed(500)

    num_inputs = env.observation_space.shape[0]
    num_actions = env.action_space.n
    print('state size:', num_inputs)
    print('action size:', num_actions)

    online_net = QNet(num_inputs, num_actions)
    target_net = QNet(num_inputs, num_actions)
    target_net.load_state_dict(online_net.state_dict())
    online_net.share_memory()
    target_net.share_memory()

    optimizer = SharedAdam(online_net.parameters(), lr=lr)
    global_ep, global_ep_r, res_queue = mp.Value('i',
                                                 0), mp.Value('d',
                                                              0.), mp.Queue()

    writer = SummaryWriter('logs')

    online_net.to(device)
    target_net.to(device)
    online_net.train()
    target_net.train()

    workers = [
        Worker(online_net, target_net, optimizer, global_ep, global_ep_r,
               res_queue, i) for i in range(mp.cpu_count())
    ]
    [w.start() for w in workers]
    res = []
    while True:
        r = res_queue.get()
        if r is not None:
            res.append(r)
            [ep, ep_r, loss] = r
            writer.add_scalar('log/score', float(ep_r), ep)
            writer.add_scalar('log/loss', float(loss), ep)
        else:
            break
    [w.join() for w in workers]
Exemple #9
0
def trainA3C(file_name="A3C",
             env=GridworldEnv(1),
             update_global_iter=10,
             gamma=0.999,
             is_plot=False,
             num_episodes=500,
             max_num_steps_per_episode=1000,
             learning_rate=0.0001):
    """
    A3C training routine. Retuns rewards and durations logs.
    Plot environment screen
    """
    ns = env.observation_space.shape[
        0]  ## Line to fix for arbitrary environment
    na = env.action_space.n

    gnet = Net(ns, na)  # global network
    gnet.share_memory()  # share the global parameters in multiprocessing
    opt = SharedAdam(gnet.parameters(), lr=learning_rate)  # global optimizer
    global_ep, global_ep_r, res_queue = mp.Value('i',
                                                 0), mp.Value('d',
                                                              0.), mp.Queue()

    # parallel training
    workers = [
        Worker(gnet, opt, global_ep, global_ep_r, res_queue, i,
               update_global_iter, num_episodes, max_num_steps_per_episode,
               gamma, env, ns, na) for i in range(mp.cpu_count())
    ]

    [w.start() for w in workers]
    episode_rewards = []  # record episode reward to plot
    while True:
        r = res_queue.get()
        if r is not None:
            episode_rewards.append(r)
        else:
            break
    [w.join() for w in workers]

    #Store results
    np.save(file_name + '-a3c-rewards', episode_rewards)

    return episode_rewards
    else:
        runs = 1

    for i in range(runs):
        starttime = datetime.now()

        # load global network
        if handleArguments().load_model:
            model = Net(len(actions))
            model = torch.load("./VIZDOOM/doom_save_model/a2c_sync_doom.pt")
            model.eval()
        else:
            model = Net(len(actions))

        # global optimizer
        opt = SharedAdam(model.parameters(), lr=0.001, betas=(0.92, 0.999))

        # record episode-reward and duration-episode to plot
        res = []
        durations = []
        action = []
        global_ep, global_ep_r, global_time_done = mp.Value('i', 0), mp.Value(
            'd', 0.), mp.Value('d', 0.)
        res_queue, time_queue, action_queue = mp.Queue(), mp.Queue(), mp.Queue(
        )
        loop = 0

        while global_ep.value < MAX_EP:
            loop += 1
            print("loop: ", loop)
            # parallel training
Exemple #11
0
    def run(self):
        print('Starting Process {}'.format(self.name))
        # change_opt(self.opt, 'step')
        # print_opt_state(self.opt, 'step')

        for i in range(steps):
            change_opt(self.opt, 'exp_avg')
            self.opt.step()
            # print_opt_state(optimizer, 'step')
            # print_opt_state(optimizer, 'exp_avg')


if __name__ == '__main__':

    gnet = Actor(state_size, action_size, random_seed)
    optimizer = SharedAdam(gnet.parameters())

    global_ep, global_ep_r, res_queue = mp.Value('i',
                                                 0), mp.Value('d',
                                                              0.), mp.Queue()

    print('Number of workers is {} '.format(mp.cpu_count()))

    workers = [
        Worker(gnet, optimizer, global_ep, global_ep_r, res_queue, i)
        for i in range(mp.cpu_count())
    ]
    [w.start() for w in workers]
    [w.join() for w in workers]
Exemple #12
0
    max_episode = 10000
    capacity = 200
    train_freq = 20
    n_step = 4
    stack_num = 3
    pc_weight = 1.
    rp_weight = 1.
    vr_weight = 1.
    batch_size = 32
    observation_dim = (3, 84, 84)
    entropy_weight = 1e-4

    env = gym.make(env_id)
    action_dim = env.action_space.n
    global_net = unreal(observation_dim, action_dim, gamma, entropy_weight)
    optimizer = SharedAdam(global_net.parameters(), lr=1e-4)
    global_episode_counter, global_reward, res_queue = mp.Value(
        'i', 0), mp.Value('d', 0.), mp.Queue()
    workers = [
        worker(global_net=global_net,
               optimizer=optimizer,
               global_episode_counter=global_episode_counter,
               global_reward=global_reward,
               res_queue=res_queue,
               name=str(i),
               max_episode=max_episode,
               gamma=gamma,
               env_id=env_id,
               capacity=capacity,
               train_freq=train_freq,
               n_step=n_step,
for i in range(50):
    ot = 1222
    j=0
    while(True):
        j += 1
        job,pre = job.expand(enet)
        a = v_wrap(enet.choose_action(v_wrap(s)))
        a.numpy()
        s_,r,done  =  job.step(a.numpy())
        if abs(pre - ot ) == 0 :
            r = 100 + r
        else:
            r = 100/abs(pre - ot )+r        
	
        opt = SharedAdam(enet.parameters(), lr=0.000001)
        _,q_t = tnet.forward(v_wrap(s))
        loss = enet.loss_func(v_wrap(s),a,v_wrap(r)+0.1*q_t)
        if j % 2 ==0:
            tnet.load_state_dict(enet.state_dict())
        opt.zero_grad()
        loss.backward()
        opt.step()
        s = s_
        job,time4 = job.expand(enet)
        timee.append(time4)
        if done :
#            timee.append(job.total_time)
            print(job.total_time)
            job = job_shop_env("la11")
#            job.reset()
Exemple #14
0
                                  buffer_s, buffer_a, buffer_r)
                    buffer_s, buffer_a, buffer_r = [], [], []

                    if done:  # done and print information
                        record(self.g_ep, self.g_ep_r, ep_r, self.res_queue,
                               self.name)
                        break
                s = s1
                total_step += 1
        self.res_queue.put(None)


if __name__ == "__main__":
    gnet = Network()  # global network
    gnet.share_memory()  # share the global parameters in multiprocessing
    opt = SharedAdam(gnet.parameters())  # global optimizer

    global_ep, global_ep_r, res_queue = mp.Value('i',
                                                 0), mp.Value('d',
                                                              0.), mp.Queue()

    # parallel training
    workers = [
        Worker(gnet, opt, global_ep, global_ep_r, res_queue, i)
        for i in range(mp.cpu_count())
    ]
    #workers = [Worker(gnet, opt, global_ep, global_ep_r, res_queue, 0)]

    [w.start() for w in workers]

    results = []
Exemple #15
0
                    pickle.dump(self.train_logs,
                                fHandle,
                                protocol=pickle.HIGHEST_PROTOCOL)


if __name__ == '__main__':
    configure_env_params()
    args.logfile_latest = args.logfile + '_' + args.env + '_latest_DDPG' + '.pkl'
    args.logfile = args.logfile + '_' + args.env + '_DDPG_' + time.strftime(
        "%Y%m%d-%H%M%S") + '.pkl'



    global_ddpg = DDPG(obs_dim=obs_dim, act_dim=act_dim, env=env, memory_size=args.rmsize,\
                        batch_size=args.bsize, tau=args.tau)
    optimizer_global_actor = SharedAdam(global_ddpg.actor.parameters(),
                                        lr=5e-5)
    optimizer_global_critic = SharedAdam(global_ddpg.critic.parameters(),
                                         lr=5e-5)  #, weight_decay=1e-02)

    # optimizer_global_actor.share_memory()
    # optimizer_global_critic.share_memory()
    global_ddpg.share_memory()

    if not args.multithread:
        worker = Worker(str(1), optimizer_global_actor,
                        optimizer_global_critic)
        worker.work(global_ddpg)
    else:
        processes = []
        for i in range(args.n_workers):
            worker = Worker(str(i), optimizer_global_actor,
                    buffer_s, buffer_a, buffer_r = [], [], []

                    if done:  # done and print information
                        record(self.g_ep, self.g_ep_r, ep_r, self.res_queue,
                               self.name)
                        break
                s = s_
                real_state = real_state_
                total_step += 1
        self.res_queue.put(None)


if __name__ == "__main__":
    gnet = Net(N_S, N_A)  # global network
    gnet.share_memory()  # share the global parameters in multiprocessing
    opt = SharedAdam(gnet.parameters(), lr=1e-4,
                     betas=(0.92, 0.999))  # global optimizer
    global_ep, global_ep_r, res_queue = mp.Value('i',
                                                 0), mp.Value('d',
                                                              0.), mp.Queue()

    # unparallel training
    #workers = [Worker(gnet, opt, global_ep, global_ep_r, res_queue, i) for i in range(1)]
    # parallel training
    workers = [
        Worker(gnet, opt, global_ep, global_ep_r, res_queue, i)
        for i in range(mp.cpu_count())
    ]
    [w.start() for w in workers]
    res = []  # record episode reward to plot
    while True:
        r = res_queue.get()
Exemple #17
0
def main(cfg):
    ckpt = None
    if cfg.ckpt:
        if not os.path.exists(cfg.ckpt):
            print('Invalid ckpt path:', cfg.ckpt)
            exit(1)
        ckpt = torch.load(cfg.ckpt, map_location=lambda storage, loc: storage)

        print(cfg.ckpt, 'loaded')
        loaded_cfg = ckpt['cfg'].__dict__
        pprint(loaded_cfg)

        del loaded_cfg['num_episodes']
        del loaded_cfg['num_workers']
        del loaded_cfg['test_set']
        del loaded_cfg['pre_ckpt']

        cfg.__dict__.update(loaded_cfg)
        cfg.model = cfg.model.upper()

        print()
        print('Merged Config')
        pprint(cfg.__dict__)
    else:
        os.makedirs(os.path.join(cfg.log_dir, 'ckpt'))

    prepro_dir = os.path.join(cfg.prepro_dir, 'task%s' % (cfg.task_id))
    with open(os.path.join(prepro_dir, 'vocab.pk'), 'rb') as f:
        vocab = pickle.load(f)

    with open(os.path.join(prepro_dir, 'stats.pk'), 'rb') as f:
        stats = pickle.load(f)
        stats['max_ques_len'] = stats['max_sent_len']

    shared_model = create_a3c_model(cfg, vocab, stats)

    if cfg.pre_ckpt is not None:
        pretrain_param = torch.load(cfg.pre_ckpt,
                                    map_location=lambda storage, loc: storage)
        pretrain_param = pretrain_param['model']
        missing_keys = []
        unexpected_keys = []
        error_msgs = []
        new_pretrain_param = pretrain_param.copy()
        pretrain_param = new_pretrain_param.copy()

        metadata = getattr(pretrain_param, '_metadata', None)
        if metadata is not None:
            pretrain_param._metadata = metadata

        def load(module, prefix=''):
            local_metadata = {} if metadata is None else metadata.get(
                prefix[:-1], {})
            module._load_from_state_dict(pretrain_param, prefix,
                                         local_metadata, True, missing_keys,
                                         unexpected_keys, error_msgs)
            for name, child in module._modules.items():
                if child is not None:
                    load(child, prefix + name + '.')

        load(shared_model, prefix='')
        print("Weights of {} not initialized from pretrained model: {}".format(
            shared_model.__class__.__name__, missing_keys))
        print("Weights from pretrained model not used in {}: {}".format(
            shared_model.__class__.__name__, unexpected_keys))

    if ckpt is not None:
        shared_model.load_state_dict(ckpt['model'])
    shared_model.share_memory()

    params = filter(lambda p: p.requires_grad, shared_model.parameters())
    optim = SharedAdam(params, lr=cfg.lr)

    if ckpt is not None:
        optim.load_state_dict(ckpt['optim'])
    optim.share_memory()

    set_seed(cfg.seed)

    done = mp.Value('i', False)
    if ckpt is not None:
        gstep = mp.Value('i', ckpt['step'])
    else:
        gstep = mp.Value('i', 0)
    queue = mp.Queue()

    train_env = create_env(cfg, 'train', vocab, stats, shuffle=True)
    valid_shuffle = False if cfg.num_valid_episodes == 0 else True
    valid_env = create_env(cfg, 'valid', vocab, stats, shuffle=valid_shuffle)

    procs = []
    if cfg.debug:
        p = TrainWorker(cfg, len(procs), done, shared_model, optim, vocab,
                        stats, train_env, queue, gstep)
        # p = ValidWorker(cfg, len(procs), done, shared_model, optim, vocab, stats, valid_env, gstep)
        p.run()
        return

    p = ValidWorker(cfg, len(procs), done, shared_model, optim, vocab, stats,
                    valid_env, gstep)
    p.start()
    procs.append(p)

    for _ in range(cfg.num_workers - 1):
        p = TrainWorker(cfg, len(procs), done, shared_model, optim, vocab,
                        stats, train_env, queue, gstep)
        p.start()
        procs.append(p)

    p = TensorboardWorker(cfg, len(procs), queue, done, gstep)
    p.start()
    procs.append(p)

    for p in procs:
        p.join()
    print('All processes is finished:', cfg.log_dir)
Exemple #18
0
                                  buffer_s, buffer_a, buffer_r, GAMMA)
                    buffer_s, buffer_a, buffer_r = [], [], []
                    if done:
                        record(self.g_ep, self.g_ep_r, ep_r, self.res_queue,
                               self.name)
                        break

                s = s_
                total_step += 1
        self.res_queue.put(None)


if __name__ == "__main__":
    gnet = Net(N_S, N_A)
    gnet.share_memory()
    opt = SharedAdam(gnet.parameters(), lr=1e-4, betas=(0.92, 0.999))
    global_ep, global_ep_r, res_queue = mp.Value('i',
                                                 0), mp.Value('d',
                                                              0.), mp.Queue()

    workers = [
        Worker(gnet, opt, global_ep, global_ep_r, res_queue, i)
        for i in range(mp.cpu_count())
    ]
    [w.start() for w in workers]

    workers = [
        Worker(gnet, opt, global_ep, global_ep_r, res_queue, i)
        for i in range(mp.cpu_count())
    ]
    [w.start() for w in workers]
Exemple #19
0
                    # sync
                    a_loss, c_loss = push_and_pull(self.opt, self.lnet, self.gnet, done, s_, buffer_s, buffer_a, buffer_r, GAMMA, self.g_ep.value)
                    buffer_s, buffer_a, buffer_r = [], [], []

                    if done:  # done and print information
                        record(self.g_ep, self.g_ep_r, ep_r, self.res_queue, self.name, a_loss.data.numpy()[0], c_loss.data.numpy()[0])
                        break
                s = s_
                total_step += 1
        self.res_queue.put(None)


if __name__ == "__main__":
    gnet = Net(N_S, N_A)        # global network
    gnet.share_memory()         # share the global parameters in multiprocessing
    opt = SharedAdam(gnet.parameters(), lr=5e-3)      # global optimizer
    global_ep, global_ep_r, res_queue, q_lock = mp.Value('i', 0), mp.Value('d', 0.), mp.Queue(), mp.Lock()

    # parallel training
    workers = [Worker(gnet, opt, global_ep, global_ep_r, res_queue, i) for i in range(mp.cpu_count())]
    # workers = [Worker(gnet, opt, global_ep, global_ep_r, res_queue, i) for i in range(1)]

    [w.start() for w in workers]
    res = []                    # record episode reward to plot
    while True:
        r = res_queue.get()
        if r is not None:
            res.append(r)
        else:
            break
    [w.join() for w in workers]
Exemple #20
0

if __name__=="__main__":
    #torch.backends.cudnn.benchmark = True
    params_file = sys.argv[1]
    timestamp = sys.argv[2]
    param = json.load(open(params_file))
    param['model_dir']= param['model_dir']+'/'+param['question_type']+'_'+timestamp
    #train_model = TrainModel(param)
#    train_model.train()
    read_data = ReadBatchData(param)
    gnet = NPI(param, read_data.none_argtype_index, read_data.num_argtypes, \
                     read_data.num_progs, read_data.max_arguments, \
                     read_data.rel_index, read_data.type_index, \
                     read_data.wikidata_rel_embed, read_data.wikidata_type_embed, \
                     read_data.vocab_init_embed, read_data.program_to_argtype, \
                     read_data.program_to_targettype) # local network
    gnet.share_memory()         # share the global parameters in multiprocessing
    learning_rate = param['learning_rate']
    opt = SharedAdam(gnet.parameters(), lr=learning_rate)      # global optimizer
    opt.zero_grad()


    # parallel training
#    workers = [TrainModel(param, gnet, opt, i) for i in range(mp.cpu_count())]
    workers = [TrainModel(param, gnet, opt, i) for i in range(4)]
    [w.start() for w in workers]
    [w.join() for w in workers]


        runs = 3
    else:
        runs = 1

    for i in range(runs):
        starttime = datetime.now()

        # load global network
        if handleArguments().load_model:
            model = Net(len(actions))
            model = torch.load("./VIZDOOM/doom_save_model/a2c_doom.pt")
            model.eval()
        else:
            model = Net(len(actions))

        opt = SharedAdam(model.parameters(), lr=0.001,
                         betas=(0.92, 0.999))  # global optimizer

        # Global variables for episodes
        durations = []
        scores = []
        global_ep, global_ep_r, global_time_done = 0, 0., 0.
        name = 'w00'
        total_step = 1
        stop_processes = False

        while global_ep < MAX_EP and stop_processes is False:
            game.new_episode()
            state = game_state(game)
            buffer_s, buffer_a, buffer_r = [], [], []
            ep_r = 0.
            while True:
Exemple #22
0
    gnet = {'actor': Actor(state_size, action_size, random_seed).to(device), \
            'critic': Critic(state_size, action_size, random_seed).to(device) }

    opt = {}  # stores both shared optimizers for critic and actor networks
    LR_ACTOR = 1e-4
    LR_CRITIC = 1e-3

    print('Networks present are: ')
    for key, value in gnet.items(
    ):  # Alternatively if gnet is a class, use gnet.__dict__
        if isinstance(value, nn.Module):
            value.share_memory()
            print('Sharing in memory {}: '.format(key))
            if key == 'actor' or key == 'critic':
                opt[key + '_optimizer'] = SharedAdam(
                    value.parameters(),
                    lr=LR_ACTOR if key == 'actor' else LR_CRITIC)

    global_ep, global_ep_r, res_queue = mp.Value('i',
                                                 0), mp.Value('d',
                                                              0.), mp.Queue()

    # parallel training

    workers = [
        Worker(gnet, opt, global_ep, global_ep_r, res_queue, i)
        for i in range(mp.cpu_count())
    ]

    try:
        mp.set_start_method('spawn', force=True)
Exemple #23
0
                    buffer_s, buffer_a, buffer_r = [], [], []

                    if done:  # done and print information
                        record(self.g_ep, self.g_ep_r, ep_r, self.res_queue,
                               self.name)
                        break
                s = s_
                total_step += 1

        self.res_queue.put(None)


if __name__ == "__main__":
    gnet = Net(N_S, N_A)  # global network
    gnet.share_memory()  # share the global parameters in multiprocessing
    opt = SharedAdam(gnet.parameters(), lr=0.0002)  # global optimizer
    global_ep, global_ep_r, res_queue = mp.Value('i',
                                                 0), mp.Value('d',
                                                              0.), mp.Queue()

    # parallel training
    workers = [
        Worker(gnet, opt, global_ep, global_ep_r, res_queue, i)
        for i in range(mp.cpu_count())
    ]
    [w.start() for w in workers]
    res = []  # record episode reward to plot
    while True:
        r = res_queue.get()
        if r is not None:
            res.append(r)
Exemple #24
0
def train():
    with open('data/config.json') as json_file:
        config = json.load(json_file)

    # This will train on CPU with no error if the 2 lines below are commented. However, we need to set start mode to
    # spawn to train on CUDA
    if config["use_gpu"] and torch.cuda.is_available():
        mp.set_start_method('spawn')

    log_queue = setup_main_logging(config)

    check_point_folder = os.path.join(config["check_point_folder"],
                                      config["env"])
    if not os.path.exists(check_point_folder):
        os.makedirs(check_point_folder)

    env = create_env(config["env"], config["seed"])

    state_size = config["state_size"]

    with open(os.path.join("data", f"{config['env']}_action_mappings.npz"),
              'rb') as f:
        archive = np.load(f)
        action_mappings = np.float32(archive[archive.files[0]])

    with open(
            os.path.join("data", f"{config['env']}_action_line_mappings.npz"),
            'rb') as f:
        archive = np.load(f)
        action_line_mappings = np.float32(archive[archive.files[0]])

    action_mappings_tensors = []
    action_line_mappings_tensors = []
    for gpu_id in config["gpu_ids"]:
        action_mappings_copy = np.copy(action_mappings)
        action_mappings_tensor = cuda(
            gpu_id, torch.tensor(action_mappings_copy, requires_grad=False))
        action_mappings_tensors.append(action_mappings_tensor)

        action_line_mappings_copy = np.copy(action_line_mappings)
        action_line_mappings_tensor = cuda(
            gpu_id, torch.tensor(action_line_mappings_copy,
                                 requires_grad=False))
        action_line_mappings_tensors.append(action_line_mappings_tensor)

    global_net = Net(state_size,
                     torch.tensor(action_mappings, requires_grad=False),
                     torch.tensor(action_line_mappings, requires_grad=False))

    if os.path.exists(config["load_model"]):
        global_net.load_state_dict(torch.load(config["load_model"]))

    global_net.share_memory()
    opt = SharedAdam(global_net.parameters(),
                     lr=config["learning_rate"])  # global optimizer

    global_step, global_ep, global_ep_r, res_queue, g_num_candidate_acts = mp.Value(
        'i', 0), mp.Value('i', 0), mp.Value('d', 0.), mp.Queue(), mp.Value(
            'i', config["starting_num_candidate_acts"])

    agents = [
        Agent(global_net=global_net,
              opt=opt,
              global_ep=global_ep,
              global_step=global_step,
              global_ep_r=global_ep_r,
              res_queue=res_queue,
              global_num_candidate_acts=g_num_candidate_acts,
              rank=i,
              config=config,
              log_queue=log_queue,
              action_mappings=action_mappings_tensors[i %
                                                      len(config["gpu_ids"])],
              action_line_mappings=action_line_mappings_tensors[i % len(
                  config["gpu_ids"])]) for i in range(config["num_workers"])
    ]

    [agent.start() for agent in agents]

    res = []
    while True:
        r = res_queue.get()
        if r is not None:
            res.append(r)
        else:
            break
    [w.join() for w in agents]
    torch.save(global_net.state_dict(), "model.pth")
Exemple #25
0
    #     torch.multiprocessing.set_start_method("spawn")
    #     torch.cuda.init()
    #     torch.cuda.device(0)

    # Initialize Global Net and Optimizer
    if TEST_MODEL:
        test()
        sys.exit()

    if LOAD_MODEL:
        gnet = torch.load(MODEL_NAME + ".pt")
    else:
        gnet = Net(N_S, N_A)

    gnet.share_memory()
    opt = SharedAdam(gnet.parameters(), lr=LEARNING_RATE)
    global_ep, global_ep_r, res_queue = mp.Value('i',
                                                 0), mp.Value('d',
                                                              0.), mp.Queue()

    # Parallel training
    agent_port = 4100
    monitor_port = 4200
    workers = [
        Worker(gnet, opt, global_ep, global_ep_r, res_queue, i, agent_port + i,
               monitor_port + i) for i in range(NUM_WORKERS)
    ]
    [w.start() for w in workers]

    # Plot moving average of rewards
    res = []
Exemple #26
0
                state = next_state
                total_step += 1

        return


'''
Main part
'''
if __name__ == '__main__':

    Global_A2C = A2C(StateSize, ActionSize)  # Create Global A2C agent
    Global_A2C.share_memory()  # Load the Global A2C agent to shared memory

    Optimizer = SharedAdam(Global_A2C.parameters(), lr=0.0002)

    # Create Local A2C agents (processes)
    Workers = [
        Each_local_worker(Global_A2C, Optimizer, i)
        for i in range(mp.cpu_count())
    ]

    print(' >> Train Begin ...')
    # Let them begin in each assigned thread
    [thread.start() for thread in Workers]

    # Let them wait until the others end to prevent zombie process
    [thread.join() for thread in Workers]

    # Test the model
    os.environ['OMP_NUM_THREADS'] = '1'

    args = parser.parse_args()
    env = gym.make("FetchPickAndPlace-v1")
    shared_model = Actor()
    if args.use_cuda:
        shared_model.cuda()
    torch.cuda.manual_seed_all(12)

    shared_model.share_memory()

    if os.path.isfile(args.save_path1):
        print('Loading A3C parametets ...')
        shared_model.load_state_dict(torch.load(args.save_path1))

    optimizer = SharedAdam(shared_model.parameters(), lr=args.lr)
    optimizer.share_memory()

    print("No of available cores : {}".format(mp.cpu_count()))

    processes = []

    counter = mp.Value('i', 0)
    lock = mp.Lock()
    print(counter)
    p = mp.Process(target=test,
                   args=(args.num_processes, args, shared_model, counter))

    p.start()
    processes.append(p)
Exemple #28
0
    args.save_dir = '{}/'.format(
        args.env.lower())  # keep the directory structure simple
    if args.render:
        args.processes = 1
        args.test = True  # render mode -> test mode w one process
    if args.test: args.lr = 0  # don't train in render mode
    args.num_actions = gym.make(
        args.env).action_space.n  # get the action space of this game
    os.makedirs(args.save_dir) if not os.path.exists(
        args.save_dir) else None  # make dir to save models etc.

    torch.manual_seed(args.seed)
    shared_model = NNPolicy(channels=1,
                            memsize=args.hidden,
                            num_actions=args.num_actions).share_memory()
    shared_optimizer = SharedAdam(shared_model.parameters(), lr=args.lr)

    info = {
        k: torch.DoubleTensor([0]).share_memory_()
        for k in ['run_epr', 'run_loss', 'episodes', 'frames']
    }
    info['frames'] += shared_model.try_load(args.save_dir) * 1e6
    if int(info['frames'].item()) == 0:
        printlog(args, '', end='', mode='w')  # clear log file

    processes = []
    for rank in range(args.processes):
        p = mp.Process(target=train,
                       args=(shared_model, shared_optimizer, rank, args, info))
        p.start()
        processes.append(p)
                            save_checkpoint(self.gnet, self.opt, self.g_ep)

                        break
                s = s_
                total_step += 1
        self.res_queue.put(None)
        env.close()


if __name__ == "__main__":
    #import pdb;pdb.set_trace()
    # torch.autograd.set_detect_anomaly(True)
    mp.set_start_method("spawn")
    gnet = Net(N_S, N_A)
    gnet.share_memory()
    opt = SharedAdam(gnet.parameters())  # , lr=1e-4,
    #  betas=(0.92, 0.999))
    global_ep, global_ep_r, res_queue = mp.Value('i',
                                                 0), mp.Value('d',
                                                              0.), mp.Queue()

    #LOAD MODEL FROM CHECKPOINT
    load_checkpoint(gnet, opt, global_ep)

    # parallel training
    workers = [
        Worker(gnet, opt, global_ep, global_ep_r, res_queue, i)
        for i in range(NUM_WORKERS)
    ]
    [w.start() for w in workers]
    res = []  # record episode reward to plot
                    push_and_pull(self.opt, self.lnet, self.gnet, done, s_,
                                  buffer_s, buffer_a, buffer_r, GAMMA)
                    buffer_s, buffer_a, buffer_r = [], [], []
                    if done:
                        record(self.g_ep, self.g_ep_r, ep_r, self.res_queue,
                               self.name)
                        break
                s = s_
                total_step += 1
        self.res_queue.put(None)


if __name__ == "__main__":
    gnet = Net(N_S, N_A)
    gnet.share_memory()
    opt = SharedAdam(gnet.parameters(), lr=0.0002)
    global_ep, global_ep_r, res_queue = mp.Value('i',
                                                 0), mp.Value('d',
                                                              0.), mp.Queue()

    workers = [
        worker(gnet, opt, global_ep, global_ep_r, res_queue, i)
        for i in range(mp.cpu_count())
    ]
    [w.start() for w in workers]
    res = []
    res = []  # record episode reward to plot
    while True:
        r = res_queue.get()
        if r is not None:
            res.append(r)