def run_sim(rank, params, shared_model, shared_optimizer, count, lock): if not os.path.exists('./' + params.weight_dir): os.mkdir('./' + params.weight_dir) if not os.path.exists('./log'): os.mkdir('./log') logging.basicConfig(filename='./log/' + params.log_file + '.log', level=logging.INFO) ptitle('Training Agent: {}'.format(rank)) gpu_id = params.gpu_ids_train[rank % len(params.gpu_ids_train)] api = objrender.RenderAPI(w=params.width, h=params.height, device=gpu_id) cfg = load_config('config.json') torch.manual_seed(random.randint(0, 1000) + rank) if gpu_id >= 0: torch.cuda.manual_seed(random.randint(0, 1000) + rank) model = A3C_LSTM_GA() with torch.cuda.device(gpu_id): model = model.cuda() Agent = run_agent(model, gpu_id) house_id = params.house_id if house_id == -1: house_id = rank if house_id > 50: house_id = house_id % 50 env = Environment(api, get_house_id(house_id, params.difficulty), cfg) task = RoomNavTask(env, hardness=params.hardness, segment_input=params.semantic_mode, max_steps=params.max_steps, discrete_action=True) n_train = 0 best_rate = 0.0 save_model_index = 0 while True: n_train += 1 training(task, gpu_id, shared_model, Agent, shared_optimizer, params, lock, count) if n_train % 1000 == 0: with lock: n_update = count.value with torch.cuda.device(gpu_id): Agent.model.load_state_dict(shared_model.state_dict()) start_time = time.time() best_rate, save_model_index = testing(lock, n_update, gpu_id, Agent, task, best_rate, params, save_model_index, start_time, logging, house_id)
def _initialize(self): h, w = self.screen_size api = objrender.RenderAPI(w=w, h=h, device=0) env = Environment(api, self.scene, self.configuration) env.reset() env = RoomNavTask(env, discrete_action=True, depth_signal=False, segment_input=False, reward_type=None, hardness=self.hardness) self._env = env
def start_experiment(env_name, env_type): global ENVIRONMENT if env_type == "MiniGrid": import gym_minigrid env = gym.make(env_name) ENVIRONMENT = env elif env_type == "House3D": from House3D.common import load_config from House3D.house import House from House3D.core import Environment, MultiHouseEnv from House3D.roomnav import objrender, RoomNavTask from House3D.objrender import RenderMode api = objrender.RenderAPI(w=400, h=300, device=0) cfg = load_config('/home/mfe/code/dc2g/House3D/House3D/config.json') house = '00a42e8f3cb11489501cfeba86d6a297' # houses = ['00065ecbdd7300d35ef4328ffe871505', # 'cf57359cd8603c3d9149445fb4040d90', '31966fdc9f9c87862989fae8ae906295', 'ff32675f2527275171555259b4a1b3c3', # '7995c2a93311717a3a9c48d789563590', '8b8c1994f3286bfc444a7527ffacde86', '775941abe94306edc1b5820e3a992d75', # '32e53679b33adfcc5a5660b8c758cc96', '4383029c98c14177640267bd34ad2f3c', '0884337c703e7c25949d3a237101f060', # '492c5839f8a534a673c92912aedc7b63', 'a7e248efcdb6040c92ac0cdc3b2351a6', '2364b7dcc432c6d6dcc59dba617b5f4b', # 'e3ae3f7b32cf99b29d3c8681ec3be321', 'f10ce4008da194626f38f937fb9c1a03', 'e6f24af5f87558d31db17b86fe269cf2', # '1dba3a1039c6ec1a3c141a1cb0ad0757', 'b814705bc93d428507a516b866efda28', '26e33980e4b4345587d6278460746ec4', # '5f3f959c7b3e6f091898caa8e828f110', 'b5bd72478fce2a2dbd1beb1baca48abd', '9be4c7bee6c0ba81936ab0e757ab3d61'] #env = MultiHouseEnv(api, houses[:3], cfg) # use 3 houses house_env = Environment(api, house, cfg) env = RoomNavTask(house_env, hardness=0.6, discrete_action=True) ENVIRONMENT = env.house elif env_type == "AirSim": import gym_airsim env = gym.make(env_name) ENVIRONMENT = env return env
api = objrender.RenderAPI( w=400, h=300, device=0) cfg = load_config('config.json') houses = ['00065ecbdd7300d35ef4328ffe871505', 'cf57359cd8603c3d9149445fb4040d90', '31966fdc9f9c87862989fae8ae906295', '7995c2a93311717a3a9c48d789563590', '8b8c1994f3286bfc444a7527ffacde86', '32e53679b33adfcc5a5660b8c758cc96', '492c5839f8a534a673c92912aedc7b63', 'e3ae3f7b32cf99b29d3c8681ec3be321', '1dba3a1039c6ec1a3c141a1cb0ad0757', '5f3f959c7b3e6f091898caa8e828f110'] env = Environment(api, np.random.choice(houses, 1)[0], cfg) task = RoomNavTask(env, hardness=0.6, discrete_action=True) succ = deque(maxlen=500) for i in range(EPISODE): step, total_rew, good = 0, 0, 0 task.reset() while True: act = task._action_space.sample() obs, rew, done, info = task.step(act) total_rew += rew step += 1 if done or step > 150:
def run_sim(rank, params, shared_model, shared_optimizer, count, lock): ptitle('Training Agent: {}'.format(rank)) gpu_id = params.gpu_ids_train[rank % len(params.gpu_ids_train)] api = objrender.RenderAPI(w=params.width, h=params.height, device=gpu_id) cfg = load_config('config.json') if shared_optimizer is None: optimizer = optim.Adam(shared_model.parameters(), lr=params.lr, amsgrad=params.amsgrad, weight_decay=params.weight_decay) #optimizer.share_memory() else: optimizer = shared_optimizer torch.manual_seed(params.seed + rank) if gpu_id >= 0: torch.cuda.manual_seed(params.seed + rank) model = A3C_LSTM_GA() with torch.cuda.device(gpu_id): model = model.cuda() Agent = run_agent(model, gpu_id) house_id = params.house_id if house_id == -1: house_id = rank if house_id >= 20: house_id = house_id % 20 env = Environment(api, get_house_id(house_id), cfg) task = RoomNavTask(env, hardness=params.hardness, segment_input=params.semantic_mode, max_steps=params.max_steps, discrete_action=True) for episode in range(params.max_episode): next_observation = task.reset() target = task.info['target_room'] target = get_instruction_idx(target) with torch.cuda.device(gpu_id): target = Variable(torch.LongTensor(target)).cuda() Agent.model.load_state_dict(shared_model.state_dict()) Agent.cx = Variable(torch.zeros(1, 256).cuda()) Agent.hx = Variable(torch.zeros(1, 256).cuda()) Agent.target = target total_reward, num_steps, good = 0, 0, 0 Agent.done = False done = False Agent.eps_len = 0 while not done: num_steps += 1 observation = next_observation act, entropy, value, log_prob = Agent.action_train( observation, target) next_observation, reward, done, info = task.step(actions[act[0]]) rew = np.clip(reward, -1.0, 1.0) Agent.put_reward(rew, entropy, value, log_prob) if num_steps % params.num_steps == 0 or done: if done: Agent.done = done with lock: count.value += 1 Agent.training(next_observation, shared_model, optimizer, params) if done: break
def run_sim(rank, params, state_Queue, action_done, actions, reward_Queue, lock): ptitle('Training Agent: {}'.format(rank)) gpu_id = params.gpu_ids_train[rank % len(params.gpu_ids_train)] api = objrender.RenderAPI(w=params.width, h=params.height, device=gpu_id) cfg = load_config('config.json') house_id = params.house_id if house_id == -1: house_id = rank if house_id > 50: house_id = house_id % 50 env = Environment(api, get_house_id(house_id, params.difficulty), cfg) task = RoomNavTask(env, hardness=params.hardness, segment_input=params.semantic_mode, max_steps=params.max_steps, discrete_action=True) while True: next_observation = task.reset() target = task.info['target_room'] target = get_instruction_idx(target) # with torch.cuda.device(gpu_id): # target = Variable(torch.LongTensor(target)).cuda() total_reward, num_steps, good = 0, 0, 0 done = False test = False while not done: num_steps += 1 observation = next_observation state = rank, [observation, target] state_Queue.put(state) state_Queue.join() # action_done.get() # action done action = actions[rank] if action == 99: test = True break # call for test next_observation, reward, done, info = task.step(action) reward = np.clip(reward, -1.0, 10.0) if reward != -1.0 and reward != 10.0: # make sparse reward reward = 0.0 total_reward += reward rew = [rank, done, reward] # print("send - rank: {:d}, reward: {:3.2f}".format(rank, reward)) reward_Queue.put(rew) reward_Queue.join() if done: break
def test(rank, params, shared_model, count, lock, best_acc, evaluation=True): if not os.path.exists('./' + params.weight_dir): os.mkdir('./' + params.weight_dir) if not os.path.exists('./log'): os.mkdir('./log') logging.basicConfig(filename='./log/' + params.log_file + '.log', level=logging.INFO) ptitle('Test Agent: {}'.format(rank)) gpu_id = params.gpu_ids_test[rank % len(params.gpu_ids_test)] api = objrender.RenderAPI(w=params.width, h=params.height, device=gpu_id) cfg = load_config('config.json') best_rate = 0.0 save_model_index = 0 n_update = 0 torch.manual_seed(params.seed + rank) if gpu_id >= 0: torch.cuda.manual_seed(params.seed + rank) model = A3C_LSTM_GA() with torch.cuda.device(gpu_id): model = model.cuda() Agent = run_agent(model, gpu_id) house_id = params.house_id if house_id == -1: house_id = rank if house_id >= 20: house_id = house_id % 20 #time.sleep(rank*30) env = Environment(api, get_house_id(house_id), cfg) task = RoomNavTask(env, hardness=params.hardness, segment_input=params.semantic_mode, max_steps=params.max_steps, discrete_action=True) #reward_type='indicator' start_time = time.time() if evaluation is True: max_episode = params.max_episode n_try = params.n_eval else: max_episode = 1 # for loaded model test n_try = params.n_test for episode in range(max_episode): eval = [] if evaluation is True: with lock: n_update = count.value with torch.cuda.device(gpu_id): Agent.model.load_state_dict(shared_model.state_dict()) else: with torch.cuda.device(gpu_id): Agent.model.load_state_dict(shared_model) Agent.model.eval() for i in range(n_try): next_observation = task.reset() target = task.info['target_room'] target = get_instruction_idx(target) with torch.cuda.device(gpu_id): target = Variable(torch.LongTensor(target)).cuda() Agent.cx = Variable(torch.zeros(1, 256).cuda()) Agent.hx = Variable(torch.zeros(1, 256).cuda()) Agent.target = target step, total_rew, good = 0, 0, 0 done = False while not done: observation = next_observation act = Agent.action_test(observation, target) next_observation, rew, done, info = task.step(actions[act[0]]) total_rew += rew if rew == 10: # success good = 1 step += 1 if done: break eval.append((step, total_rew, good)) if len(eval) > 0: succ = [e for e in eval if e[2] > 0] succ_rate = (len(succ) / len(eval)) * 100 if evaluation is True: # evaluation mode with lock: #if best_acc.value >= best_rate: # best_rate = best_acc.value if succ_rate >= best_rate: best_rate = succ_rate with torch.cuda.device(gpu_id): torch.save( Agent.model.state_dict(), params.weight_dir + 'model' + str(n_update) + '.ckpt') save_model_index += 1 #if best_rate > best_acc.value: # best_acc.value = best_rate avg_reward = sum([e[1] for e in eval]) / len(eval) avg_length = sum([e[0] for e in eval]) / len(eval) msg = " ".join([ "++++++++++ Task Stats +++++++++++\n", "Time {}\n".format( time.strftime("%dd %Hh %Mm %Ss", time.gmtime(time.time() - start_time))), "Episode Played: {:d}\n".format(len(eval)), "N_Update = {:d}\n".format(n_update), "House id: {:d}\n".format(house_id), "Avg Reward = {:5.3f}\n".format(avg_reward), "Avg Length = {:.3f}\n".format(avg_length), "Best rate {:3.2f}, Success rate {:3.2f}%".format( best_rate, succ_rate) ]) print(msg) logging.info(msg)
def run_test(rank, params, loaded_model, lock, seen_succ, seen_length, unseen_succ, unseen_length): logging.basicConfig(filename='./log/' + params.log_file + '.log', level=logging.INFO) ptitle('Test Agent: {}'.format(rank)) gpu_id = params.gpu_ids_test[rank % len(params.gpu_ids_test)] api = objrender.RenderAPI(w=params.width, h=params.height, device=gpu_id) cfg = load_config('config.json') torch.manual_seed(params.seed + rank) if gpu_id >= 0: with torch.cuda.device(gpu_id): torch.cuda.manual_seed(params.seed + rank) model = A3C_LSTM_GA() with torch.cuda.device(gpu_id): model = model.cuda() load_model = torch.load( loaded_model, map_location=lambda storage, loc: storage.cuda(gpu_id)) model.load_state_dict(load_model) model.eval() Agent = run_agent(model, gpu_id) n_test = 0 start_time = time.time() while True: house_id = rank + (n_test * params.n_process) if house_id >= 70: break else: if house_id < 20: seen = True house = get_house_id(house_id) else: seen = False house = get_eval_house_id(house_id - (n_test * params.n_process)) env = Environment(api, house, cfg) task = RoomNavTask(env, hardness=params.hardness, segment_input=params.semantic_mode, max_steps=params.max_steps, discrete_action=True) #reward_type='indicator' eval = [] for i in range(params.n_test): next_observation = task.reset() target = task.info['target_room'] target = get_instruction_idx(target) with torch.cuda.device(gpu_id): target = Variable(torch.LongTensor(target)).cuda() Agent.cx = Variable(torch.zeros(1, 256).cuda()) Agent.hx = Variable(torch.zeros(1, 256).cuda()) Agent.target = target step, total_rew, good = 0, 0, 0 done = False while not done: observation = next_observation act = Agent.action_test(observation, target) next_observation, rew, done, info = task.step(actions[act[0]]) total_rew += rew if rew == 10: # success good = 1 step += 1 if done: break eval.append((step, total_rew, good)) if len(eval) > 0: succ = [e for e in eval if e[2] > 0] succ_rate = (len(succ) / len(eval)) * 100 avg_reward = sum([e[1] for e in eval]) / len(eval) avg_length = sum([e[0] for e in eval]) / len(eval) if seen: msg_seen = "Seen" msg_house = house_id else: msg_seen = "Unseen" msg_house = house_id - 20 msg = " ".join([ "++++++++++ Task Stats +++++++++++\n", "Time {}\n".format( time.strftime("%dd %Hh %Mm %Ss", time.gmtime(time.time() - start_time))), "Episode Played: {:d}\n".format(len(eval)), "{:s} House id: {:d}\n".format(msg_seen, msg_house), "Avg Reward = {:5.3f}\n".format(avg_reward), "Avg Length = {:.3f}\n".format(avg_length), "Success rate {:3.2f}%".format(succ_rate) ]) print(msg) logging.info(msg) with lock: if seen: seen_succ.value += len(succ) seen_length.value += sum([e[0] for e in eval]) else: unseen_succ.value += len(succ) unseen_length.value += sum([e[0] for e in eval]) n_test += 1
def main(): ## make environment and task env = Environment(api, np.random.choice(houses, 1)[0], cfg) task = RoomNavTask(env, hardness=0.6, discrete_action=True) ## make gated attention network net = cuda(A3C_LSTM_GA(len(targets))) succ = deque(maxlen=500) traj = [] total_step = 0 ## main loop for interact with environment for i in range(EPISODE): ## initialize task step, total_rew, good = 0, 0, 0 obs = task.reset() target = task.info['target_room'] target = [1 if targets[i] == 1 else 0 for i in range(len(targets))] target = cuda(Variable(torch.FloatTensor(target))) ## initialize hidden state hx = cuda(Variable(torch.zeros(1, 256))) cx = cuda(Variable(torch.zeros(1, 256))) while True: obs = cuda(Variable(torch.FloatTensor(obs))) value, prob, act, hx, cx = net.action( obs.permute(2, 0, 1)[None], target, hx, cx) next_obs, rew, done, info = task.step(actions[act]) total_rew += rew ## append data traj.append([act, rew, done, prob, value]) ## train! if len(traj) == args.max_steps: train(traj, total_step) traj = [] step += 1 total_step += 1 obs = next_obs if done or step > 150: if done: good = 1 succ.append(good) ## print logs print("\n+++++++++++++ status ++++++++++++") print("Episode {:4d}, Reward: {:2.3f}".format( i + 1, total_rew)) print("Target {}".format(task.info['target_room'])) print("Success rate {:3.2f}%".format( np.sum(succ) / len(succ) * 100)) break ## tensorboard summary writer writer.add_scalar("Success rate", np.sum(succ) / len(succ), i + 1) writer.add_scalar("Reward", total_rew, i + 1)