def main(): env = gym.make(env_name) env.seed(500) torch.manual_seed(500) num_inputs = env.observation_space.shape[0] num_actions = env.action_space.n global_model = Model(num_inputs, num_actions) global_model.share_memory() global_optimizer = SharedAdam(global_model.parameters(), lr=lr) global_ep, global_ep_r, res_queue = mp.Value('i', 0), mp.Value('d', 0.), mp.Queue() writer = SummaryWriter('logs') workers = [ Worker(global_model, global_optimizer, global_ep, global_ep_r, res_queue, i) for i in range(mp.cpu_count()) ] [w.start() for w in workers] res = [] while True: r = res_queue.get() if r is not None: res.append(r) [ep, ep_r, loss] = r writer.add_scalar('log/score', float(ep_r), ep) writer.add_scalar('log/loss', float(loss), ep) else: break [w.join() for w in workers]
def main(): env = gym.make(env_name) env.seed(500) torch.manual_seed(500) num_inputs = env.observation_space.shape[0] num_actions = env.action_space.n ### 共通となるモデルを定義 これを各ワーカーに参照渡し(?)する global_model = Model(num_inputs, num_actions) global_model.share_memory() global_optimizer = SharedAdam(global_model.parameters(), lr=lr) global_ep, global_ep_r, res_queue = mp.Value('i', 0), mp.Value('d', 0.), mp.Queue() ### 各Worlerを定義 global_model, およびそれを学習させるための各機能を参照(?)渡し workers = [Worker(global_model, global_optimizer, global_ep, global_ep_r, res_queue, i) for i in range(mp.cpu_count())] ### ここから大体の仕事はWorkerに移る [w.start() for w in workers] ### Worker.start() は Worker.run() を実行するというmultiprocessの仕様 res = [] while True: ### res_queueには各Workerの結果が集積されていく r = res_queue.get() if r is not None: res.append(r) [ep, ep_r, loss] = r else: break [w.join() for w in workers]
def main(): env = gym.make(env_name) env.seed(500) torch.manual_seed(500) num_inputs = env.observation_space.shape[0] num_actions = env.action_space.n env.close() global_model = Model(num_inputs, num_actions) global_average_model = Model(num_inputs, num_actions) global_model.share_memory() global_average_model.share_memory() global_optimizer = SharedAdam(global_model.parameters(), lr=lr) global_ep, global_ep_r, res_queue = mp.Value('i', 0), mp.Value('d', 0.), mp.Queue() n = mp.cpu_count() workers = [ Worker(global_model, global_average_model, global_optimizer, global_ep, global_ep_r, res_queue, i) for i in range(n) ] print(aaa) [w.start() for w in workers] res = [] while True: r = res_queue.get() if r is not None: res.append(r) [ep, ep_r, loss] = r else: break [w.join() for w in workers]
def main(): if PRINTFLAG == False: time_now = int(time.time()) time_local = time.localtime(time_now) dt = time.strftime("%Y-%m-%d_%H-%M-%S", time_local) resFileDir = "../../data/RL_model/"+dt modelFileDir = resFileDir + "/model" if os.path.exists(modelFileDir) == False: os.makedirs(modelFileDir) gnet = Net() # global network gnet.share_memory() # share the global parameters in multiprocessing opt = SharedAdam(gnet.parameters(), lr=0.0001) # global optimizer global_ep, global_ep_r, res_queue = mp.Value('i', 0), mp.Value('d', 0.), mp.Queue() # parallel training print("cpu_count = %d" % mp.cpu_count()) if platform.system() == "Linux": worker_count = 20 else: worker_count = 1 workers = [Worker(gnet, opt, global_ep, global_ep_r, res_queue, i) for i in range(worker_count)] [w.start() for w in workers] res = [] # record episode reward to plot max_r = [-1,-1000] if PRINTFLAG == False: file = open(resFileDir+"/res.txt", 'w') epoch = 0 while True: r = res_queue.get() if r is not None: res.append(r) if r > max_r[1]: max_r[0] = epoch max_r[1] = r if PRINTFLAG==False: torch.save(gnet.state_dict(), modelFileDir + "/" + str(epoch) + ".pkl") print("max r = %f, epoch = %d" % (max_r[0], max_r[1])) elif epoch % 100==0: if PRINTFLAG==False: torch.save(gnet.state_dict(), modelFileDir + "/" + str(epoch) + ".pkl") if PRINTFLAG == False: file.write(str(r) + "\n") file.flush() epoch += 1 else: break if PRINTFLAG == False: file.close() [w.join() for w in workers] if PRINTFLAG == False: plt.plot(res) plt.ylabel('Moving average ep reward') plt.xlabel('Step') plt.savefig(resFileDir+"/res.pdf")
def __init__(self, obs_dim, act_dim, lr, agents, obs_type="RGB", width=None, height=None, channel=None, lr_scheduler=False, influencer_num=1): self.agents = agents self.agent_num = len(agents) self.influencer_num = influencer_num self.lr_scheduler = lr_scheduler self.action = act_dim self.obs_type = obs_type if obs_type == "RGB":self.width = width; self.height = height; self.channel = channel self.obs_dim = obs_dim for i in range(self.agent_num): self.agents[i].optimizer = SharedAdam(self.agents[i].parameters(), lr=lr, betas=(0.92,0.99)) #optimizer和scheduler放在agent(network)了 if lr_scheduler:self.agents[i].lr_scheduler = torch.optim.lr_scheduler.StepLR(self.agents[i].optimizer, #SharedAdam是莫烦A3C中实现的optimizer,好像是用来同时更新两个网络的,细节不太懂 step_size=10000, gamma=0.9, last_epoch=-1)
def __init__(self, env_id, input_shape, n_actions, icm, n_threads=8): names = [str(i) for i in range(1, n_threads + 1)] global_actor_critic = ActorCritic(input_shape, n_actions) global_actor_critic.share_memory() global_optim = SharedAdam(global_actor_critic.parameters()) if not icm: global_icm = None global_icm_optim = None else: global_icm = ICM(input_shape, n_actions) global_icm.share_memory() global_icm_optim = SharedAdam(global_icm.parameters()) self.ps = [ mp.Process(target=worker, args=(name, input_shape, n_actions, global_actor_critic, global_icm, global_optim, global_icm_optim, env_id, n_threads, icm)) for name in names ] [p.start() for p in self.ps] [p.join() for p in self.ps]
def main(): gamma = 0.9 max_episodes = 2000 update_global_iter = 10 env_name = 'MountainCar-v0' env = gym.make(env_name) s_dim = env.observation_space.shape[0] a_dim = env.action_space.n global_net = A3C(s_dim, a_dim) global_net.share_memory() global_opt = SharedAdam(global_net.parameters(), lr=0.001) global_episode = mp.Value('i', 0) global_rewards = mp.Value('d', 0.) result_queue = mp.Queue() num_cpu = mp.cpu_count() print('cpu count:', num_cpu) workers = [ Worker(global_net, global_opt, global_episode, global_rewards, result_queue, gamma, max_episodes, update_global_iter, env_name, i) for i in range(num_cpu) ] [w.start() for w in workers] results = [] while True: result = result_queue.get() if result == 'done': break else: results.append(result) [w.join() for w in workers] print('done') plt.plot(results) plt.ylabel('Moving average episode reward') plt.xlabel('Step') plt.title('A3C') plt.savefig('result.png') plt.show()
def main(): env = gym.make(env_name) env.seed(500) torch.manual_seed(500) num_inputs = env.observation_space.shape[0] num_actions = env.action_space.n print('state size:', num_inputs) print('action size:', num_actions) online_net = QNet(num_inputs, num_actions) target_net = QNet(num_inputs, num_actions) target_net.load_state_dict(online_net.state_dict()) online_net.share_memory() target_net.share_memory() optimizer = SharedAdam(online_net.parameters(), lr=lr) global_ep, global_ep_r, res_queue = mp.Value('i', 0), mp.Value('d', 0.), mp.Queue() writer = SummaryWriter('logs') online_net.to(device) target_net.to(device) online_net.train() target_net.train() workers = [ Worker(online_net, target_net, optimizer, global_ep, global_ep_r, res_queue, i) for i in range(mp.cpu_count()) ] [w.start() for w in workers] res = [] while True: r = res_queue.get() if r is not None: res.append(r) [ep, ep_r, loss] = r writer.add_scalar('log/score', float(ep_r), ep) writer.add_scalar('log/loss', float(loss), ep) else: break [w.join() for w in workers]
def trainA3C(file_name="A3C", env=GridworldEnv(1), update_global_iter=10, gamma=0.999, is_plot=False, num_episodes=500, max_num_steps_per_episode=1000, learning_rate=0.0001): """ A3C training routine. Retuns rewards and durations logs. Plot environment screen """ ns = env.observation_space.shape[ 0] ## Line to fix for arbitrary environment na = env.action_space.n gnet = Net(ns, na) # global network gnet.share_memory() # share the global parameters in multiprocessing opt = SharedAdam(gnet.parameters(), lr=learning_rate) # global optimizer global_ep, global_ep_r, res_queue = mp.Value('i', 0), mp.Value('d', 0.), mp.Queue() # parallel training workers = [ Worker(gnet, opt, global_ep, global_ep_r, res_queue, i, update_global_iter, num_episodes, max_num_steps_per_episode, gamma, env, ns, na) for i in range(mp.cpu_count()) ] [w.start() for w in workers] episode_rewards = [] # record episode reward to plot while True: r = res_queue.get() if r is not None: episode_rewards.append(r) else: break [w.join() for w in workers] #Store results np.save(file_name + '-a3c-rewards', episode_rewards) return episode_rewards
else: runs = 1 for i in range(runs): starttime = datetime.now() # load global network if handleArguments().load_model: model = Net(len(actions)) model = torch.load("./VIZDOOM/doom_save_model/a2c_sync_doom.pt") model.eval() else: model = Net(len(actions)) # global optimizer opt = SharedAdam(model.parameters(), lr=0.001, betas=(0.92, 0.999)) # record episode-reward and duration-episode to plot res = [] durations = [] action = [] global_ep, global_ep_r, global_time_done = mp.Value('i', 0), mp.Value( 'd', 0.), mp.Value('d', 0.) res_queue, time_queue, action_queue = mp.Queue(), mp.Queue(), mp.Queue( ) loop = 0 while global_ep.value < MAX_EP: loop += 1 print("loop: ", loop) # parallel training
def run(self): print('Starting Process {}'.format(self.name)) # change_opt(self.opt, 'step') # print_opt_state(self.opt, 'step') for i in range(steps): change_opt(self.opt, 'exp_avg') self.opt.step() # print_opt_state(optimizer, 'step') # print_opt_state(optimizer, 'exp_avg') if __name__ == '__main__': gnet = Actor(state_size, action_size, random_seed) optimizer = SharedAdam(gnet.parameters()) global_ep, global_ep_r, res_queue = mp.Value('i', 0), mp.Value('d', 0.), mp.Queue() print('Number of workers is {} '.format(mp.cpu_count())) workers = [ Worker(gnet, optimizer, global_ep, global_ep_r, res_queue, i) for i in range(mp.cpu_count()) ] [w.start() for w in workers] [w.join() for w in workers]
max_episode = 10000 capacity = 200 train_freq = 20 n_step = 4 stack_num = 3 pc_weight = 1. rp_weight = 1. vr_weight = 1. batch_size = 32 observation_dim = (3, 84, 84) entropy_weight = 1e-4 env = gym.make(env_id) action_dim = env.action_space.n global_net = unreal(observation_dim, action_dim, gamma, entropy_weight) optimizer = SharedAdam(global_net.parameters(), lr=1e-4) global_episode_counter, global_reward, res_queue = mp.Value( 'i', 0), mp.Value('d', 0.), mp.Queue() workers = [ worker(global_net=global_net, optimizer=optimizer, global_episode_counter=global_episode_counter, global_reward=global_reward, res_queue=res_queue, name=str(i), max_episode=max_episode, gamma=gamma, env_id=env_id, capacity=capacity, train_freq=train_freq, n_step=n_step,
for i in range(50): ot = 1222 j=0 while(True): j += 1 job,pre = job.expand(enet) a = v_wrap(enet.choose_action(v_wrap(s))) a.numpy() s_,r,done = job.step(a.numpy()) if abs(pre - ot ) == 0 : r = 100 + r else: r = 100/abs(pre - ot )+r opt = SharedAdam(enet.parameters(), lr=0.000001) _,q_t = tnet.forward(v_wrap(s)) loss = enet.loss_func(v_wrap(s),a,v_wrap(r)+0.1*q_t) if j % 2 ==0: tnet.load_state_dict(enet.state_dict()) opt.zero_grad() loss.backward() opt.step() s = s_ job,time4 = job.expand(enet) timee.append(time4) if done : # timee.append(job.total_time) print(job.total_time) job = job_shop_env("la11") # job.reset()
buffer_s, buffer_a, buffer_r) buffer_s, buffer_a, buffer_r = [], [], [] if done: # done and print information record(self.g_ep, self.g_ep_r, ep_r, self.res_queue, self.name) break s = s1 total_step += 1 self.res_queue.put(None) if __name__ == "__main__": gnet = Network() # global network gnet.share_memory() # share the global parameters in multiprocessing opt = SharedAdam(gnet.parameters()) # global optimizer global_ep, global_ep_r, res_queue = mp.Value('i', 0), mp.Value('d', 0.), mp.Queue() # parallel training workers = [ Worker(gnet, opt, global_ep, global_ep_r, res_queue, i) for i in range(mp.cpu_count()) ] #workers = [Worker(gnet, opt, global_ep, global_ep_r, res_queue, 0)] [w.start() for w in workers] results = []
pickle.dump(self.train_logs, fHandle, protocol=pickle.HIGHEST_PROTOCOL) if __name__ == '__main__': configure_env_params() args.logfile_latest = args.logfile + '_' + args.env + '_latest_DDPG' + '.pkl' args.logfile = args.logfile + '_' + args.env + '_DDPG_' + time.strftime( "%Y%m%d-%H%M%S") + '.pkl' global_ddpg = DDPG(obs_dim=obs_dim, act_dim=act_dim, env=env, memory_size=args.rmsize,\ batch_size=args.bsize, tau=args.tau) optimizer_global_actor = SharedAdam(global_ddpg.actor.parameters(), lr=5e-5) optimizer_global_critic = SharedAdam(global_ddpg.critic.parameters(), lr=5e-5) #, weight_decay=1e-02) # optimizer_global_actor.share_memory() # optimizer_global_critic.share_memory() global_ddpg.share_memory() if not args.multithread: worker = Worker(str(1), optimizer_global_actor, optimizer_global_critic) worker.work(global_ddpg) else: processes = [] for i in range(args.n_workers): worker = Worker(str(i), optimizer_global_actor,
buffer_s, buffer_a, buffer_r = [], [], [] if done: # done and print information record(self.g_ep, self.g_ep_r, ep_r, self.res_queue, self.name) break s = s_ real_state = real_state_ total_step += 1 self.res_queue.put(None) if __name__ == "__main__": gnet = Net(N_S, N_A) # global network gnet.share_memory() # share the global parameters in multiprocessing opt = SharedAdam(gnet.parameters(), lr=1e-4, betas=(0.92, 0.999)) # global optimizer global_ep, global_ep_r, res_queue = mp.Value('i', 0), mp.Value('d', 0.), mp.Queue() # unparallel training #workers = [Worker(gnet, opt, global_ep, global_ep_r, res_queue, i) for i in range(1)] # parallel training workers = [ Worker(gnet, opt, global_ep, global_ep_r, res_queue, i) for i in range(mp.cpu_count()) ] [w.start() for w in workers] res = [] # record episode reward to plot while True: r = res_queue.get()
def main(cfg): ckpt = None if cfg.ckpt: if not os.path.exists(cfg.ckpt): print('Invalid ckpt path:', cfg.ckpt) exit(1) ckpt = torch.load(cfg.ckpt, map_location=lambda storage, loc: storage) print(cfg.ckpt, 'loaded') loaded_cfg = ckpt['cfg'].__dict__ pprint(loaded_cfg) del loaded_cfg['num_episodes'] del loaded_cfg['num_workers'] del loaded_cfg['test_set'] del loaded_cfg['pre_ckpt'] cfg.__dict__.update(loaded_cfg) cfg.model = cfg.model.upper() print() print('Merged Config') pprint(cfg.__dict__) else: os.makedirs(os.path.join(cfg.log_dir, 'ckpt')) prepro_dir = os.path.join(cfg.prepro_dir, 'task%s' % (cfg.task_id)) with open(os.path.join(prepro_dir, 'vocab.pk'), 'rb') as f: vocab = pickle.load(f) with open(os.path.join(prepro_dir, 'stats.pk'), 'rb') as f: stats = pickle.load(f) stats['max_ques_len'] = stats['max_sent_len'] shared_model = create_a3c_model(cfg, vocab, stats) if cfg.pre_ckpt is not None: pretrain_param = torch.load(cfg.pre_ckpt, map_location=lambda storage, loc: storage) pretrain_param = pretrain_param['model'] missing_keys = [] unexpected_keys = [] error_msgs = [] new_pretrain_param = pretrain_param.copy() pretrain_param = new_pretrain_param.copy() metadata = getattr(pretrain_param, '_metadata', None) if metadata is not None: pretrain_param._metadata = metadata def load(module, prefix=''): local_metadata = {} if metadata is None else metadata.get( prefix[:-1], {}) module._load_from_state_dict(pretrain_param, prefix, local_metadata, True, missing_keys, unexpected_keys, error_msgs) for name, child in module._modules.items(): if child is not None: load(child, prefix + name + '.') load(shared_model, prefix='') print("Weights of {} not initialized from pretrained model: {}".format( shared_model.__class__.__name__, missing_keys)) print("Weights from pretrained model not used in {}: {}".format( shared_model.__class__.__name__, unexpected_keys)) if ckpt is not None: shared_model.load_state_dict(ckpt['model']) shared_model.share_memory() params = filter(lambda p: p.requires_grad, shared_model.parameters()) optim = SharedAdam(params, lr=cfg.lr) if ckpt is not None: optim.load_state_dict(ckpt['optim']) optim.share_memory() set_seed(cfg.seed) done = mp.Value('i', False) if ckpt is not None: gstep = mp.Value('i', ckpt['step']) else: gstep = mp.Value('i', 0) queue = mp.Queue() train_env = create_env(cfg, 'train', vocab, stats, shuffle=True) valid_shuffle = False if cfg.num_valid_episodes == 0 else True valid_env = create_env(cfg, 'valid', vocab, stats, shuffle=valid_shuffle) procs = [] if cfg.debug: p = TrainWorker(cfg, len(procs), done, shared_model, optim, vocab, stats, train_env, queue, gstep) # p = ValidWorker(cfg, len(procs), done, shared_model, optim, vocab, stats, valid_env, gstep) p.run() return p = ValidWorker(cfg, len(procs), done, shared_model, optim, vocab, stats, valid_env, gstep) p.start() procs.append(p) for _ in range(cfg.num_workers - 1): p = TrainWorker(cfg, len(procs), done, shared_model, optim, vocab, stats, train_env, queue, gstep) p.start() procs.append(p) p = TensorboardWorker(cfg, len(procs), queue, done, gstep) p.start() procs.append(p) for p in procs: p.join() print('All processes is finished:', cfg.log_dir)
buffer_s, buffer_a, buffer_r, GAMMA) buffer_s, buffer_a, buffer_r = [], [], [] if done: record(self.g_ep, self.g_ep_r, ep_r, self.res_queue, self.name) break s = s_ total_step += 1 self.res_queue.put(None) if __name__ == "__main__": gnet = Net(N_S, N_A) gnet.share_memory() opt = SharedAdam(gnet.parameters(), lr=1e-4, betas=(0.92, 0.999)) global_ep, global_ep_r, res_queue = mp.Value('i', 0), mp.Value('d', 0.), mp.Queue() workers = [ Worker(gnet, opt, global_ep, global_ep_r, res_queue, i) for i in range(mp.cpu_count()) ] [w.start() for w in workers] workers = [ Worker(gnet, opt, global_ep, global_ep_r, res_queue, i) for i in range(mp.cpu_count()) ] [w.start() for w in workers]
# sync a_loss, c_loss = push_and_pull(self.opt, self.lnet, self.gnet, done, s_, buffer_s, buffer_a, buffer_r, GAMMA, self.g_ep.value) buffer_s, buffer_a, buffer_r = [], [], [] if done: # done and print information record(self.g_ep, self.g_ep_r, ep_r, self.res_queue, self.name, a_loss.data.numpy()[0], c_loss.data.numpy()[0]) break s = s_ total_step += 1 self.res_queue.put(None) if __name__ == "__main__": gnet = Net(N_S, N_A) # global network gnet.share_memory() # share the global parameters in multiprocessing opt = SharedAdam(gnet.parameters(), lr=5e-3) # global optimizer global_ep, global_ep_r, res_queue, q_lock = mp.Value('i', 0), mp.Value('d', 0.), mp.Queue(), mp.Lock() # parallel training workers = [Worker(gnet, opt, global_ep, global_ep_r, res_queue, i) for i in range(mp.cpu_count())] # workers = [Worker(gnet, opt, global_ep, global_ep_r, res_queue, i) for i in range(1)] [w.start() for w in workers] res = [] # record episode reward to plot while True: r = res_queue.get() if r is not None: res.append(r) else: break [w.join() for w in workers]
if __name__=="__main__": #torch.backends.cudnn.benchmark = True params_file = sys.argv[1] timestamp = sys.argv[2] param = json.load(open(params_file)) param['model_dir']= param['model_dir']+'/'+param['question_type']+'_'+timestamp #train_model = TrainModel(param) # train_model.train() read_data = ReadBatchData(param) gnet = NPI(param, read_data.none_argtype_index, read_data.num_argtypes, \ read_data.num_progs, read_data.max_arguments, \ read_data.rel_index, read_data.type_index, \ read_data.wikidata_rel_embed, read_data.wikidata_type_embed, \ read_data.vocab_init_embed, read_data.program_to_argtype, \ read_data.program_to_targettype) # local network gnet.share_memory() # share the global parameters in multiprocessing learning_rate = param['learning_rate'] opt = SharedAdam(gnet.parameters(), lr=learning_rate) # global optimizer opt.zero_grad() # parallel training # workers = [TrainModel(param, gnet, opt, i) for i in range(mp.cpu_count())] workers = [TrainModel(param, gnet, opt, i) for i in range(4)] [w.start() for w in workers] [w.join() for w in workers]
runs = 3 else: runs = 1 for i in range(runs): starttime = datetime.now() # load global network if handleArguments().load_model: model = Net(len(actions)) model = torch.load("./VIZDOOM/doom_save_model/a2c_doom.pt") model.eval() else: model = Net(len(actions)) opt = SharedAdam(model.parameters(), lr=0.001, betas=(0.92, 0.999)) # global optimizer # Global variables for episodes durations = [] scores = [] global_ep, global_ep_r, global_time_done = 0, 0., 0. name = 'w00' total_step = 1 stop_processes = False while global_ep < MAX_EP and stop_processes is False: game.new_episode() state = game_state(game) buffer_s, buffer_a, buffer_r = [], [], [] ep_r = 0. while True:
gnet = {'actor': Actor(state_size, action_size, random_seed).to(device), \ 'critic': Critic(state_size, action_size, random_seed).to(device) } opt = {} # stores both shared optimizers for critic and actor networks LR_ACTOR = 1e-4 LR_CRITIC = 1e-3 print('Networks present are: ') for key, value in gnet.items( ): # Alternatively if gnet is a class, use gnet.__dict__ if isinstance(value, nn.Module): value.share_memory() print('Sharing in memory {}: '.format(key)) if key == 'actor' or key == 'critic': opt[key + '_optimizer'] = SharedAdam( value.parameters(), lr=LR_ACTOR if key == 'actor' else LR_CRITIC) global_ep, global_ep_r, res_queue = mp.Value('i', 0), mp.Value('d', 0.), mp.Queue() # parallel training workers = [ Worker(gnet, opt, global_ep, global_ep_r, res_queue, i) for i in range(mp.cpu_count()) ] try: mp.set_start_method('spawn', force=True)
buffer_s, buffer_a, buffer_r = [], [], [] if done: # done and print information record(self.g_ep, self.g_ep_r, ep_r, self.res_queue, self.name) break s = s_ total_step += 1 self.res_queue.put(None) if __name__ == "__main__": gnet = Net(N_S, N_A) # global network gnet.share_memory() # share the global parameters in multiprocessing opt = SharedAdam(gnet.parameters(), lr=0.0002) # global optimizer global_ep, global_ep_r, res_queue = mp.Value('i', 0), mp.Value('d', 0.), mp.Queue() # parallel training workers = [ Worker(gnet, opt, global_ep, global_ep_r, res_queue, i) for i in range(mp.cpu_count()) ] [w.start() for w in workers] res = [] # record episode reward to plot while True: r = res_queue.get() if r is not None: res.append(r)
def train(): with open('data/config.json') as json_file: config = json.load(json_file) # This will train on CPU with no error if the 2 lines below are commented. However, we need to set start mode to # spawn to train on CUDA if config["use_gpu"] and torch.cuda.is_available(): mp.set_start_method('spawn') log_queue = setup_main_logging(config) check_point_folder = os.path.join(config["check_point_folder"], config["env"]) if not os.path.exists(check_point_folder): os.makedirs(check_point_folder) env = create_env(config["env"], config["seed"]) state_size = config["state_size"] with open(os.path.join("data", f"{config['env']}_action_mappings.npz"), 'rb') as f: archive = np.load(f) action_mappings = np.float32(archive[archive.files[0]]) with open( os.path.join("data", f"{config['env']}_action_line_mappings.npz"), 'rb') as f: archive = np.load(f) action_line_mappings = np.float32(archive[archive.files[0]]) action_mappings_tensors = [] action_line_mappings_tensors = [] for gpu_id in config["gpu_ids"]: action_mappings_copy = np.copy(action_mappings) action_mappings_tensor = cuda( gpu_id, torch.tensor(action_mappings_copy, requires_grad=False)) action_mappings_tensors.append(action_mappings_tensor) action_line_mappings_copy = np.copy(action_line_mappings) action_line_mappings_tensor = cuda( gpu_id, torch.tensor(action_line_mappings_copy, requires_grad=False)) action_line_mappings_tensors.append(action_line_mappings_tensor) global_net = Net(state_size, torch.tensor(action_mappings, requires_grad=False), torch.tensor(action_line_mappings, requires_grad=False)) if os.path.exists(config["load_model"]): global_net.load_state_dict(torch.load(config["load_model"])) global_net.share_memory() opt = SharedAdam(global_net.parameters(), lr=config["learning_rate"]) # global optimizer global_step, global_ep, global_ep_r, res_queue, g_num_candidate_acts = mp.Value( 'i', 0), mp.Value('i', 0), mp.Value('d', 0.), mp.Queue(), mp.Value( 'i', config["starting_num_candidate_acts"]) agents = [ Agent(global_net=global_net, opt=opt, global_ep=global_ep, global_step=global_step, global_ep_r=global_ep_r, res_queue=res_queue, global_num_candidate_acts=g_num_candidate_acts, rank=i, config=config, log_queue=log_queue, action_mappings=action_mappings_tensors[i % len(config["gpu_ids"])], action_line_mappings=action_line_mappings_tensors[i % len( config["gpu_ids"])]) for i in range(config["num_workers"]) ] [agent.start() for agent in agents] res = [] while True: r = res_queue.get() if r is not None: res.append(r) else: break [w.join() for w in agents] torch.save(global_net.state_dict(), "model.pth")
# torch.multiprocessing.set_start_method("spawn") # torch.cuda.init() # torch.cuda.device(0) # Initialize Global Net and Optimizer if TEST_MODEL: test() sys.exit() if LOAD_MODEL: gnet = torch.load(MODEL_NAME + ".pt") else: gnet = Net(N_S, N_A) gnet.share_memory() opt = SharedAdam(gnet.parameters(), lr=LEARNING_RATE) global_ep, global_ep_r, res_queue = mp.Value('i', 0), mp.Value('d', 0.), mp.Queue() # Parallel training agent_port = 4100 monitor_port = 4200 workers = [ Worker(gnet, opt, global_ep, global_ep_r, res_queue, i, agent_port + i, monitor_port + i) for i in range(NUM_WORKERS) ] [w.start() for w in workers] # Plot moving average of rewards res = []
state = next_state total_step += 1 return ''' Main part ''' if __name__ == '__main__': Global_A2C = A2C(StateSize, ActionSize) # Create Global A2C agent Global_A2C.share_memory() # Load the Global A2C agent to shared memory Optimizer = SharedAdam(Global_A2C.parameters(), lr=0.0002) # Create Local A2C agents (processes) Workers = [ Each_local_worker(Global_A2C, Optimizer, i) for i in range(mp.cpu_count()) ] print(' >> Train Begin ...') # Let them begin in each assigned thread [thread.start() for thread in Workers] # Let them wait until the others end to prevent zombie process [thread.join() for thread in Workers] # Test the model
os.environ['OMP_NUM_THREADS'] = '1' args = parser.parse_args() env = gym.make("FetchPickAndPlace-v1") shared_model = Actor() if args.use_cuda: shared_model.cuda() torch.cuda.manual_seed_all(12) shared_model.share_memory() if os.path.isfile(args.save_path1): print('Loading A3C parametets ...') shared_model.load_state_dict(torch.load(args.save_path1)) optimizer = SharedAdam(shared_model.parameters(), lr=args.lr) optimizer.share_memory() print("No of available cores : {}".format(mp.cpu_count())) processes = [] counter = mp.Value('i', 0) lock = mp.Lock() print(counter) p = mp.Process(target=test, args=(args.num_processes, args, shared_model, counter)) p.start() processes.append(p)
args.save_dir = '{}/'.format( args.env.lower()) # keep the directory structure simple if args.render: args.processes = 1 args.test = True # render mode -> test mode w one process if args.test: args.lr = 0 # don't train in render mode args.num_actions = gym.make( args.env).action_space.n # get the action space of this game os.makedirs(args.save_dir) if not os.path.exists( args.save_dir) else None # make dir to save models etc. torch.manual_seed(args.seed) shared_model = NNPolicy(channels=1, memsize=args.hidden, num_actions=args.num_actions).share_memory() shared_optimizer = SharedAdam(shared_model.parameters(), lr=args.lr) info = { k: torch.DoubleTensor([0]).share_memory_() for k in ['run_epr', 'run_loss', 'episodes', 'frames'] } info['frames'] += shared_model.try_load(args.save_dir) * 1e6 if int(info['frames'].item()) == 0: printlog(args, '', end='', mode='w') # clear log file processes = [] for rank in range(args.processes): p = mp.Process(target=train, args=(shared_model, shared_optimizer, rank, args, info)) p.start() processes.append(p)
save_checkpoint(self.gnet, self.opt, self.g_ep) break s = s_ total_step += 1 self.res_queue.put(None) env.close() if __name__ == "__main__": #import pdb;pdb.set_trace() # torch.autograd.set_detect_anomaly(True) mp.set_start_method("spawn") gnet = Net(N_S, N_A) gnet.share_memory() opt = SharedAdam(gnet.parameters()) # , lr=1e-4, # betas=(0.92, 0.999)) global_ep, global_ep_r, res_queue = mp.Value('i', 0), mp.Value('d', 0.), mp.Queue() #LOAD MODEL FROM CHECKPOINT load_checkpoint(gnet, opt, global_ep) # parallel training workers = [ Worker(gnet, opt, global_ep, global_ep_r, res_queue, i) for i in range(NUM_WORKERS) ] [w.start() for w in workers] res = [] # record episode reward to plot
push_and_pull(self.opt, self.lnet, self.gnet, done, s_, buffer_s, buffer_a, buffer_r, GAMMA) buffer_s, buffer_a, buffer_r = [], [], [] if done: record(self.g_ep, self.g_ep_r, ep_r, self.res_queue, self.name) break s = s_ total_step += 1 self.res_queue.put(None) if __name__ == "__main__": gnet = Net(N_S, N_A) gnet.share_memory() opt = SharedAdam(gnet.parameters(), lr=0.0002) global_ep, global_ep_r, res_queue = mp.Value('i', 0), mp.Value('d', 0.), mp.Queue() workers = [ worker(gnet, opt, global_ep, global_ep_r, res_queue, i) for i in range(mp.cpu_count()) ] [w.start() for w in workers] res = [] res = [] # record episode reward to plot while True: r = res_queue.get() if r is not None: res.append(r)