Ejemplo n.º 1
0
def run_sim(rank, params, shared_model, shared_optimizer, count, lock):
    if not os.path.exists('./' + params.weight_dir):
        os.mkdir('./' + params.weight_dir)
    if not os.path.exists('./log'):
        os.mkdir('./log')
    logging.basicConfig(filename='./log/' + params.log_file + '.log',
                        level=logging.INFO)

    ptitle('Training Agent: {}'.format(rank))
    gpu_id = params.gpu_ids_train[rank % len(params.gpu_ids_train)]
    api = objrender.RenderAPI(w=params.width, h=params.height, device=gpu_id)
    cfg = load_config('config.json')

    torch.manual_seed(random.randint(0, 1000) + rank)
    if gpu_id >= 0:
        torch.cuda.manual_seed(random.randint(0, 1000) + rank)

    model = A3C_LSTM_GA()
    with torch.cuda.device(gpu_id):
        model = model.cuda()

    Agent = run_agent(model, gpu_id)
    house_id = params.house_id

    if house_id == -1:
        house_id = rank
    if house_id > 50:
        house_id = house_id % 50

    env = Environment(api, get_house_id(house_id, params.difficulty), cfg)
    task = RoomNavTask(env,
                       hardness=params.hardness,
                       segment_input=params.semantic_mode,
                       max_steps=params.max_steps,
                       discrete_action=True)

    n_train = 0
    best_rate = 0.0
    save_model_index = 0

    while True:
        n_train += 1
        training(task, gpu_id, shared_model, Agent, shared_optimizer, params,
                 lock, count)

        if n_train % 1000 == 0:
            with lock:
                n_update = count.value
            with torch.cuda.device(gpu_id):
                Agent.model.load_state_dict(shared_model.state_dict())

            start_time = time.time()
            best_rate, save_model_index = testing(lock, n_update, gpu_id,
                                                  Agent, task, best_rate,
                                                  params, save_model_index,
                                                  start_time, logging,
                                                  house_id)
Ejemplo n.º 2
0
 def _initialize(self):
     h, w = self.screen_size
     api = objrender.RenderAPI(w=w, h=h, device=0)
     env = Environment(api, self.scene, self.configuration)
     env.reset()
     env = RoomNavTask(env,
                       discrete_action=True,
                       depth_signal=False,
                       segment_input=False,
                       reward_type=None,
                       hardness=self.hardness)
     self._env = env
Ejemplo n.º 3
0
def start_experiment(env_name, env_type):
    global ENVIRONMENT
    if env_type == "MiniGrid":
        import gym_minigrid
        env = gym.make(env_name)
        ENVIRONMENT = env
    elif env_type == "House3D":
        from House3D.common import load_config

        from House3D.house import House
        from House3D.core import Environment, MultiHouseEnv
        from House3D.roomnav import objrender, RoomNavTask
        from House3D.objrender import RenderMode

        api = objrender.RenderAPI(w=400, h=300, device=0)
        cfg = load_config('/home/mfe/code/dc2g/House3D/House3D/config.json')

        house = '00a42e8f3cb11489501cfeba86d6a297'
        # houses = ['00065ecbdd7300d35ef4328ffe871505',
        # 'cf57359cd8603c3d9149445fb4040d90', '31966fdc9f9c87862989fae8ae906295', 'ff32675f2527275171555259b4a1b3c3',
        # '7995c2a93311717a3a9c48d789563590', '8b8c1994f3286bfc444a7527ffacde86', '775941abe94306edc1b5820e3a992d75',
        # '32e53679b33adfcc5a5660b8c758cc96', '4383029c98c14177640267bd34ad2f3c', '0884337c703e7c25949d3a237101f060',
        # '492c5839f8a534a673c92912aedc7b63', 'a7e248efcdb6040c92ac0cdc3b2351a6', '2364b7dcc432c6d6dcc59dba617b5f4b',
        # 'e3ae3f7b32cf99b29d3c8681ec3be321', 'f10ce4008da194626f38f937fb9c1a03', 'e6f24af5f87558d31db17b86fe269cf2',
        # '1dba3a1039c6ec1a3c141a1cb0ad0757', 'b814705bc93d428507a516b866efda28', '26e33980e4b4345587d6278460746ec4',
        # '5f3f959c7b3e6f091898caa8e828f110', 'b5bd72478fce2a2dbd1beb1baca48abd', '9be4c7bee6c0ba81936ab0e757ab3d61']
        #env = MultiHouseEnv(api, houses[:3], cfg)  # use 3 houses
        house_env = Environment(api, house, cfg)
        env = RoomNavTask(house_env, hardness=0.6, discrete_action=True)
        ENVIRONMENT = env.house
    elif env_type == "AirSim":
        import gym_airsim
        env = gym.make(env_name)
        ENVIRONMENT = env

    return env
Ejemplo n.º 4
0
api = objrender.RenderAPI(
    w=400, h=300, device=0)
cfg = load_config('config.json')

houses = ['00065ecbdd7300d35ef4328ffe871505',
    'cf57359cd8603c3d9149445fb4040d90', '31966fdc9f9c87862989fae8ae906295',
    '7995c2a93311717a3a9c48d789563590', '8b8c1994f3286bfc444a7527ffacde86',
    '32e53679b33adfcc5a5660b8c758cc96',
    '492c5839f8a534a673c92912aedc7b63',
    'e3ae3f7b32cf99b29d3c8681ec3be321',
    '1dba3a1039c6ec1a3c141a1cb0ad0757',
    '5f3f959c7b3e6f091898caa8e828f110']

env = Environment(api, np.random.choice(houses, 1)[0], cfg)
task = RoomNavTask(env, hardness=0.6, discrete_action=True)

succ = deque(maxlen=500)
for i in range(EPISODE):
  step, total_rew, good = 0, 0, 0
  task.reset()

  while True:
    act = task._action_space.sample()
    obs, rew, done, info = task.step(act)
    
    total_rew += rew

    step += 1

    if done or step > 150:
Ejemplo n.º 5
0
def run_sim(rank, params, shared_model, shared_optimizer, count, lock):
    ptitle('Training Agent: {}'.format(rank))
    gpu_id = params.gpu_ids_train[rank % len(params.gpu_ids_train)]
    api = objrender.RenderAPI(w=params.width, h=params.height, device=gpu_id)
    cfg = load_config('config.json')

    if shared_optimizer is None:
        optimizer = optim.Adam(shared_model.parameters(),
                               lr=params.lr,
                               amsgrad=params.amsgrad,
                               weight_decay=params.weight_decay)
        #optimizer.share_memory()
    else:
        optimizer = shared_optimizer

    torch.manual_seed(params.seed + rank)
    if gpu_id >= 0:
        torch.cuda.manual_seed(params.seed + rank)

    model = A3C_LSTM_GA()
    with torch.cuda.device(gpu_id):
        model = model.cuda()

    Agent = run_agent(model, gpu_id)

    house_id = params.house_id
    if house_id == -1:
        house_id = rank
    if house_id >= 20:
        house_id = house_id % 20

    env = Environment(api, get_house_id(house_id), cfg)
    task = RoomNavTask(env,
                       hardness=params.hardness,
                       segment_input=params.semantic_mode,
                       max_steps=params.max_steps,
                       discrete_action=True)

    for episode in range(params.max_episode):
        next_observation = task.reset()
        target = task.info['target_room']
        target = get_instruction_idx(target)

        with torch.cuda.device(gpu_id):
            target = Variable(torch.LongTensor(target)).cuda()
            Agent.model.load_state_dict(shared_model.state_dict())
            Agent.cx = Variable(torch.zeros(1, 256).cuda())
            Agent.hx = Variable(torch.zeros(1, 256).cuda())
            Agent.target = target

        total_reward, num_steps, good = 0, 0, 0
        Agent.done = False
        done = False
        Agent.eps_len = 0

        while not done:
            num_steps += 1
            observation = next_observation
            act, entropy, value, log_prob = Agent.action_train(
                observation, target)
            next_observation, reward, done, info = task.step(actions[act[0]])

            rew = np.clip(reward, -1.0, 1.0)

            Agent.put_reward(rew, entropy, value, log_prob)
            if num_steps % params.num_steps == 0 or done:
                if done:
                    Agent.done = done
                with lock:
                    count.value += 1
                Agent.training(next_observation, shared_model, optimizer,
                               params)

            if done:
                break
Ejemplo n.º 6
0
def run_sim(rank, params, state_Queue, action_done, actions, reward_Queue,
            lock):
    ptitle('Training Agent: {}'.format(rank))
    gpu_id = params.gpu_ids_train[rank % len(params.gpu_ids_train)]
    api = objrender.RenderAPI(w=params.width, h=params.height, device=gpu_id)
    cfg = load_config('config.json')

    house_id = params.house_id

    if house_id == -1:
        house_id = rank
    if house_id > 50:
        house_id = house_id % 50

    env = Environment(api, get_house_id(house_id, params.difficulty), cfg)
    task = RoomNavTask(env,
                       hardness=params.hardness,
                       segment_input=params.semantic_mode,
                       max_steps=params.max_steps,
                       discrete_action=True)

    while True:
        next_observation = task.reset()
        target = task.info['target_room']
        target = get_instruction_idx(target)

        # with torch.cuda.device(gpu_id):
        #     target = Variable(torch.LongTensor(target)).cuda()

        total_reward, num_steps, good = 0, 0, 0
        done = False
        test = False

        while not done:
            num_steps += 1
            observation = next_observation
            state = rank, [observation, target]
            state_Queue.put(state)

            state_Queue.join()

            # action_done.get()   # action done
            action = actions[rank]
            if action == 99:
                test = True
                break  # call for test

            next_observation, reward, done, info = task.step(action)

            reward = np.clip(reward, -1.0, 10.0)
            if reward != -1.0 and reward != 10.0:  # make sparse reward
                reward = 0.0
            total_reward += reward

            rew = [rank, done, reward]
            # print("send - rank: {:d}, reward: {:3.2f}".format(rank, reward))
            reward_Queue.put(rew)

            reward_Queue.join()

            if done:
                break
Ejemplo n.º 7
0
def test(rank, params, shared_model, count, lock, best_acc, evaluation=True):
    if not os.path.exists('./' + params.weight_dir):
        os.mkdir('./' + params.weight_dir)
    if not os.path.exists('./log'):
        os.mkdir('./log')
    logging.basicConfig(filename='./log/' + params.log_file + '.log',
                        level=logging.INFO)
    ptitle('Test Agent: {}'.format(rank))
    gpu_id = params.gpu_ids_test[rank % len(params.gpu_ids_test)]

    api = objrender.RenderAPI(w=params.width, h=params.height, device=gpu_id)
    cfg = load_config('config.json')
    best_rate = 0.0
    save_model_index = 0
    n_update = 0

    torch.manual_seed(params.seed + rank)
    if gpu_id >= 0:
        torch.cuda.manual_seed(params.seed + rank)

    model = A3C_LSTM_GA()
    with torch.cuda.device(gpu_id):
        model = model.cuda()

    Agent = run_agent(model, gpu_id)

    house_id = params.house_id
    if house_id == -1:
        house_id = rank
    if house_id >= 20:
        house_id = house_id % 20

    #time.sleep(rank*30)

    env = Environment(api, get_house_id(house_id), cfg)
    task = RoomNavTask(env,
                       hardness=params.hardness,
                       segment_input=params.semantic_mode,
                       max_steps=params.max_steps,
                       discrete_action=True)  #reward_type='indicator'

    start_time = time.time()

    if evaluation is True:
        max_episode = params.max_episode
        n_try = params.n_eval
    else:
        max_episode = 1  # for loaded model test
        n_try = params.n_test

    for episode in range(max_episode):
        eval = []
        if evaluation is True:
            with lock:
                n_update = count.value
            with torch.cuda.device(gpu_id):
                Agent.model.load_state_dict(shared_model.state_dict())
        else:
            with torch.cuda.device(gpu_id):
                Agent.model.load_state_dict(shared_model)
        Agent.model.eval()

        for i in range(n_try):
            next_observation = task.reset()
            target = task.info['target_room']
            target = get_instruction_idx(target)

            with torch.cuda.device(gpu_id):
                target = Variable(torch.LongTensor(target)).cuda()
                Agent.cx = Variable(torch.zeros(1, 256).cuda())
                Agent.hx = Variable(torch.zeros(1, 256).cuda())
                Agent.target = target
            step, total_rew, good = 0, 0, 0
            done = False

            while not done:
                observation = next_observation
                act = Agent.action_test(observation, target)

                next_observation, rew, done, info = task.step(actions[act[0]])
                total_rew += rew

                if rew == 10:  # success
                    good = 1

                step += 1

                if done:
                    break
            eval.append((step, total_rew, good))

        if len(eval) > 0:
            succ = [e for e in eval if e[2] > 0]
            succ_rate = (len(succ) / len(eval)) * 100

            if evaluation is True:  # evaluation mode
                with lock:
                    #if best_acc.value >= best_rate:
                    #    best_rate = best_acc.value
                    if succ_rate >= best_rate:
                        best_rate = succ_rate
                        with torch.cuda.device(gpu_id):
                            torch.save(
                                Agent.model.state_dict(), params.weight_dir +
                                'model' + str(n_update) + '.ckpt')
                        save_model_index += 1
                    #if best_rate > best_acc.value:
                    #    best_acc.value = best_rate

            avg_reward = sum([e[1] for e in eval]) / len(eval)
            avg_length = sum([e[0] for e in eval]) / len(eval)
            msg = " ".join([
                "++++++++++ Task Stats +++++++++++\n", "Time {}\n".format(
                    time.strftime("%dd %Hh %Mm %Ss",
                                  time.gmtime(time.time() - start_time))),
                "Episode Played: {:d}\n".format(len(eval)),
                "N_Update = {:d}\n".format(n_update),
                "House id: {:d}\n".format(house_id),
                "Avg Reward = {:5.3f}\n".format(avg_reward),
                "Avg Length = {:.3f}\n".format(avg_length),
                "Best rate {:3.2f}, Success rate {:3.2f}%".format(
                    best_rate, succ_rate)
            ])
            print(msg)
            logging.info(msg)
Ejemplo n.º 8
0
def run_test(rank, params, loaded_model, lock, seen_succ, seen_length,
             unseen_succ, unseen_length):

    logging.basicConfig(filename='./log/' + params.log_file + '.log',
                        level=logging.INFO)
    ptitle('Test Agent: {}'.format(rank))
    gpu_id = params.gpu_ids_test[rank % len(params.gpu_ids_test)]

    api = objrender.RenderAPI(w=params.width, h=params.height, device=gpu_id)
    cfg = load_config('config.json')

    torch.manual_seed(params.seed + rank)
    if gpu_id >= 0:
        with torch.cuda.device(gpu_id):
            torch.cuda.manual_seed(params.seed + rank)

    model = A3C_LSTM_GA()
    with torch.cuda.device(gpu_id):
        model = model.cuda()
        load_model = torch.load(
            loaded_model,
            map_location=lambda storage, loc: storage.cuda(gpu_id))
        model.load_state_dict(load_model)
        model.eval()

    Agent = run_agent(model, gpu_id)

    n_test = 0
    start_time = time.time()

    while True:
        house_id = rank + (n_test * params.n_process)

        if house_id >= 70:
            break
        else:
            if house_id < 20:
                seen = True
                house = get_house_id(house_id)
            else:
                seen = False
                house = get_eval_house_id(house_id -
                                          (n_test * params.n_process))

        env = Environment(api, house, cfg)
        task = RoomNavTask(env,
                           hardness=params.hardness,
                           segment_input=params.semantic_mode,
                           max_steps=params.max_steps,
                           discrete_action=True)  #reward_type='indicator'

        eval = []
        for i in range(params.n_test):
            next_observation = task.reset()
            target = task.info['target_room']
            target = get_instruction_idx(target)

            with torch.cuda.device(gpu_id):
                target = Variable(torch.LongTensor(target)).cuda()
                Agent.cx = Variable(torch.zeros(1, 256).cuda())
                Agent.hx = Variable(torch.zeros(1, 256).cuda())
                Agent.target = target
            step, total_rew, good = 0, 0, 0
            done = False

            while not done:
                observation = next_observation
                act = Agent.action_test(observation, target)

                next_observation, rew, done, info = task.step(actions[act[0]])
                total_rew += rew

                if rew == 10:  # success
                    good = 1

                step += 1

                if done:
                    break
            eval.append((step, total_rew, good))

        if len(eval) > 0:
            succ = [e for e in eval if e[2] > 0]
            succ_rate = (len(succ) / len(eval)) * 100

            avg_reward = sum([e[1] for e in eval]) / len(eval)
            avg_length = sum([e[0] for e in eval]) / len(eval)
            if seen:
                msg_seen = "Seen"
                msg_house = house_id
            else:
                msg_seen = "Unseen"
                msg_house = house_id - 20

            msg = " ".join([
                "++++++++++ Task Stats +++++++++++\n", "Time {}\n".format(
                    time.strftime("%dd %Hh %Mm %Ss",
                                  time.gmtime(time.time() - start_time))),
                "Episode Played: {:d}\n".format(len(eval)),
                "{:s} House id: {:d}\n".format(msg_seen, msg_house),
                "Avg Reward = {:5.3f}\n".format(avg_reward),
                "Avg Length = {:.3f}\n".format(avg_length),
                "Success rate {:3.2f}%".format(succ_rate)
            ])
            print(msg)
            logging.info(msg)
            with lock:
                if seen:
                    seen_succ.value += len(succ)
                    seen_length.value += sum([e[0] for e in eval])
                else:
                    unseen_succ.value += len(succ)
                    unseen_length.value += sum([e[0] for e in eval])
            n_test += 1
Ejemplo n.º 9
0
def main():
    ## make environment and task
    env = Environment(api, np.random.choice(houses, 1)[0], cfg)
    task = RoomNavTask(env, hardness=0.6, discrete_action=True)

    ## make gated attention network
    net = cuda(A3C_LSTM_GA(len(targets)))

    succ = deque(maxlen=500)
    traj = []
    total_step = 0

    ## main loop for interact with environment
    for i in range(EPISODE):
        ## initialize task
        step, total_rew, good = 0, 0, 0
        obs = task.reset()
        target = task.info['target_room']

        target = [1 if targets[i] == 1 else 0 for i in range(len(targets))]
        target = cuda(Variable(torch.FloatTensor(target)))

        ## initialize hidden state
        hx = cuda(Variable(torch.zeros(1, 256)))
        cx = cuda(Variable(torch.zeros(1, 256)))

        while True:
            obs = cuda(Variable(torch.FloatTensor(obs)))
            value, prob, act, hx, cx = net.action(
                obs.permute(2, 0, 1)[None], target, hx, cx)
            next_obs, rew, done, info = task.step(actions[act])
            total_rew += rew

            ## append data
            traj.append([act, rew, done, prob, value])

            ## train!
            if len(traj) == args.max_steps:
                train(traj, total_step)
                traj = []

            step += 1
            total_step += 1

            obs = next_obs

            if done or step > 150:
                if done:
                    good = 1
                succ.append(good)

                ## print logs
                print("\n+++++++++++++ status ++++++++++++")
                print("Episode {:4d}, Reward: {:2.3f}".format(
                    i + 1, total_rew))
                print("Target {}".format(task.info['target_room']))
                print("Success rate {:3.2f}%".format(
                    np.sum(succ) / len(succ) * 100))
                break

                ## tensorboard summary writer
                writer.add_scalar("Success rate",
                                  np.sum(succ) / len(succ), i + 1)
                writer.add_scalar("Reward", total_rew, i + 1)