예제 #1
0
def run_sim(rank, params, shared_model, shared_optimizer, count, lock):
    if not os.path.exists('./' + params.weight_dir):
        os.mkdir('./' + params.weight_dir)
    if not os.path.exists('./log'):
        os.mkdir('./log')
    logging.basicConfig(filename='./log/' + params.log_file + '.log',
                        level=logging.INFO)

    ptitle('Training Agent: {}'.format(rank))
    gpu_id = params.gpu_ids_train[rank % len(params.gpu_ids_train)]
    api = objrender.RenderAPI(w=params.width, h=params.height, device=gpu_id)
    cfg = load_config('config.json')

    torch.manual_seed(random.randint(0, 1000) + rank)
    if gpu_id >= 0:
        torch.cuda.manual_seed(random.randint(0, 1000) + rank)

    model = A3C_LSTM_GA()
    with torch.cuda.device(gpu_id):
        model = model.cuda()

    Agent = run_agent(model, gpu_id)
    house_id = params.house_id

    if house_id == -1:
        house_id = rank
    if house_id > 50:
        house_id = house_id % 50

    env = Environment(api, get_house_id(house_id, params.difficulty), cfg)
    task = RoomNavTask(env,
                       hardness=params.hardness,
                       segment_input=params.semantic_mode,
                       max_steps=params.max_steps,
                       discrete_action=True)

    n_train = 0
    best_rate = 0.0
    save_model_index = 0

    while True:
        n_train += 1
        training(task, gpu_id, shared_model, Agent, shared_optimizer, params,
                 lock, count)

        if n_train % 1000 == 0:
            with lock:
                n_update = count.value
            with torch.cuda.device(gpu_id):
                Agent.model.load_state_dict(shared_model.state_dict())

            start_time = time.time()
            best_rate, save_model_index = testing(lock, n_update, gpu_id,
                                                  Agent, task, best_rate,
                                                  params, save_model_index,
                                                  start_time, logging,
                                                  house_id)
예제 #2
0
def main():
    params = Params()

    mp.set_start_method('spawn')
    count = mp.Value('i', 0)
    best_acc = mp.Value('d', 0.0)
    lock = mp.Lock()

    shared_model = A3C_LSTM_GA()
    shared_model = shared_model.share_memory()

    shared_optimizer = SharedAdam(shared_model.parameters(),
                                  lr=params.lr,
                                  amsgrad=params.amsgrad,
                                  weight_decay=params.weight_decay)
    shared_optimizer.share_memory()
    #run_sim(0, params, shared_model, None,  count, lock)
    #test(params, shared_model, count, lock, best_acc)

    processes = []

    train_process = 0
    test_process = 0

    for rank in range(params.n_process):

        p = mp.Process(target=test,
                       args=(
                           test_process,
                           params,
                           shared_model,
                           count,
                           lock,
                           best_acc,
                       ))
        p.start()
        processes.append(p)
        test_process += 1

        for i in range(2):
            p = mp.Process(target=run_sim,
                           args=(
                               train_process,
                               params,
                               shared_model,
                               shared_optimizer,
                               count,
                               lock,
                           ))
            p.start()
            processes.append(p)
        train_process += 1

    for p in processes:
        p.join()
예제 #3
0
    def __init__(self):
        args = parser.parse_args()
        args.input_size = len(word_to_idx)

        shared_model = A3C_LSTM_GA(args)
        shared_model = shared_model.cuda()

        # Load the model
        if (args.load != "0"):
            shared_model.load_state_dict(
                torch.load(args.load,
                           map_location=lambda storage, loc: storage))

        shared_model.share_memory()
        self.test_model = test(args, shared_model)
예제 #4
0
    def __init__(self, args, share_model):
        torch.manual_seed(args.seed + 0)

        self.model = A3C_LSTM_GA(args)
        self.model = self.model.cuda()
        self.word_to_idx = {
            'object': 4, 'cylinder': 5, 'blue': 7, 'Go': 0, 'cube': 8, 
            'green': 9, 'ball': 6, 'red': 3, 'the': 2, 'yellow': 10, 'to': 1,
            'go': 11, 'any': 12, 'then': 13, 'sphere': 6
        }

        if (args.load != "0"):
            print("Loading model ... "+ args.load)
            self.model.load_state_dict(
                torch.load(args.load, map_location=lambda storage, loc: storage))
        self.model.eval()
        
        self.start = True
        self.episode_length = 0
예제 #5
0
    def __init__(self, params, gpu_id=0):
        self.params = params
        self.device = "cuda:" + str(gpu_id)
        self.model = A3C_LSTM_GA().to(self.device)
        self.optimizer = optim.Adam(self.model.parameters(),
                                    lr=params.lr,
                                    amsgrad=params.amsgrad,
                                    weight_decay=params.weight_decay)
        self.hx = torch.zeros(self.params.n_process, 256).to(self.device)
        self.cx = torch.zeros(self.params.n_process, 256).to(self.device)
        self.eps_len = 0
        self.values = []
        self.log_probs = []
        self.rewards = []
        # self.entropies = []
        self.done = False
        self.info = None
        self.reward = 0
        self.gpu_id = gpu_id
        self.target = None
        self.n_update = 0

        self.num_steps = params.num_steps
        self.step = 0

        self.rewards = torch.zeros(self.num_steps, params.n_process,
                                   1).to(self.device)
        self.value_preds = torch.zeros(self.num_steps + 1, params.n_process,
                                       1).to(self.device)
        self.returns = torch.zeros(self.num_steps + 1, params.n_process,
                                   1).to(self.device)
        self.action_log_probs = torch.zeros(self.num_steps, params.n_process,
                                            1).to(self.device)
        self.masks = torch.ones(self.num_steps + 1, params.n_process,
                                1).to(self.device)

        self.entropies = torch.zeros(self.num_steps, params.n_process,
                                     1).to(self.device)
        args.use_train_instructions = 0
        args.num_processes = 0
        log_filename = "test-ZSL.log"
    else:
        assert False, "Invalid evaluation type"

    env = grounding_env.GroundingEnv(args)
    args.input_size = len(env.word_to_idx)

    # Setup logging
    if not os.path.exists(args.dump_location):
        os.makedirs(args.dump_location)
    logging.basicConfig(filename=args.dump_location + log_filename,
                        level=logging.INFO)

    shared_model = A3C_LSTM_GA(args)

    # Load the model
    if (args.load != "0"):
        shared_model.load_state_dict(
            torch.load(args.load, map_location=lambda storage, loc: storage))

    shared_model.share_memory()

    processes = []

    # Start the test thread
    p = mp.Process(target=test, args=(args.num_processes, args, shared_model))
    p.start()
    processes.append(p)
예제 #7
0
def run_sim(rank, params, shared_model, shared_optimizer, count, lock):
    ptitle('Training Agent: {}'.format(rank))
    gpu_id = params.gpu_ids_train[rank % len(params.gpu_ids_train)]
    api = objrender.RenderAPI(w=params.width, h=params.height, device=gpu_id)
    cfg = load_config('config.json')

    if shared_optimizer is None:
        optimizer = optim.Adam(shared_model.parameters(),
                               lr=params.lr,
                               amsgrad=params.amsgrad,
                               weight_decay=params.weight_decay)
        #optimizer.share_memory()
    else:
        optimizer = shared_optimizer

    torch.manual_seed(params.seed + rank)
    if gpu_id >= 0:
        torch.cuda.manual_seed(params.seed + rank)

    model = A3C_LSTM_GA()
    with torch.cuda.device(gpu_id):
        model = model.cuda()

    Agent = run_agent(model, gpu_id)

    house_id = params.house_id
    if house_id == -1:
        house_id = rank
    if house_id >= 20:
        house_id = house_id % 20

    env = Environment(api, get_house_id(house_id), cfg)
    task = RoomNavTask(env,
                       hardness=params.hardness,
                       segment_input=params.semantic_mode,
                       max_steps=params.max_steps,
                       discrete_action=True)

    for episode in range(params.max_episode):
        next_observation = task.reset()
        target = task.info['target_room']
        target = get_instruction_idx(target)

        with torch.cuda.device(gpu_id):
            target = Variable(torch.LongTensor(target)).cuda()
            Agent.model.load_state_dict(shared_model.state_dict())
            Agent.cx = Variable(torch.zeros(1, 256).cuda())
            Agent.hx = Variable(torch.zeros(1, 256).cuda())
            Agent.target = target

        total_reward, num_steps, good = 0, 0, 0
        Agent.done = False
        done = False
        Agent.eps_len = 0

        while not done:
            num_steps += 1
            observation = next_observation
            act, entropy, value, log_prob = Agent.action_train(
                observation, target)
            next_observation, reward, done, info = task.step(actions[act[0]])

            rew = np.clip(reward, -1.0, 1.0)

            Agent.put_reward(rew, entropy, value, log_prob)
            if num_steps % params.num_steps == 0 or done:
                if done:
                    Agent.done = done
                with lock:
                    count.value += 1
                Agent.training(next_observation, shared_model, optimizer,
                               params)

            if done:
                break
def test(rank, args, shared_model):
    torch.manual_seed(args.seed + rank)

    env = grounding_env.GroundingEnv(args)
    env.game_init()

    model = A3C_LSTM_GA(args)

    if (args.load != "0"):
        print("Loading model ... " + args.load)
        model.load_state_dict(
            torch.load(args.load, map_location=lambda storage, loc: storage))

    model.eval()

    (image, instruction), _, _, _ = env.reset()

    # Print instruction while evaluating and visualizing
    if args.evaluate != 0 and args.visualize == 1:
        print("Instruction: {} ".format(instruction))

    # Getting indices of the words in the instruction
    instruction_idx = []
    for word in instruction.split(" "):
        instruction_idx.append(env.word_to_idx[word])
    instruction_idx = np.array(instruction_idx)

    image = torch.from_numpy(image).float() / 255.0
    instruction_idx = torch.from_numpy(instruction_idx).view(1, -1)

    reward_sum = 0
    done = True

    start_time = time.time()

    episode_length = 0
    rewards_list = []
    accuracy_list = []
    episode_length_list = []
    num_episode = 0
    best_reward = 0.0
    test_freq = 50
    while True:
        episode_length += 1
        if done:
            if (args.evaluate == 0):
                model.load_state_dict(shared_model.state_dict())

            cx = Variable(torch.zeros(1, 256), volatile=True)
            hx = Variable(torch.zeros(1, 256), volatile=True)
        else:
            cx = Variable(cx.data, volatile=True)
            hx = Variable(hx.data, volatile=True)

        tx = Variable(torch.from_numpy(np.array([episode_length])).long(),
                      volatile=True)

        value, logit, (hx, cx) = model((Variable(image.unsqueeze(0),
                                                 volatile=True),
                                        Variable(instruction_idx,
                                                 volatile=True), (tx, hx, cx)))
        prob = F.softmax(logit)
        action = prob.max(1)[1].data.numpy()

        (image, _), reward, done, _ = env.step(action[0])

        done = done or episode_length >= args.max_episode_length
        reward_sum += reward

        if done:
            num_episode += 1
            rewards_list.append(reward_sum)
            # Print reward while evaluating and visualizing
            if args.evaluate != 0 and args.visualize == 1:
                print("Total reward: {}".format(reward_sum))

            episode_length_list.append(episode_length)
            if reward == CORRECT_OBJECT_REWARD:
                accuracy = 1
            else:
                accuracy = 0
            accuracy_list.append(accuracy)
            if (len(rewards_list) >= test_freq):
                print(" ".join([
                    "Time {},".format(
                        time.strftime("%Hh %Mm %Ss",
                                      time.gmtime(time.time() - start_time))),
                    "Avg Reward {},".format(np.mean(rewards_list)),
                    "Avg Accuracy {},".format(np.mean(accuracy_list)),
                    "Avg Ep length {},".format(np.mean(episode_length_list)),
                    "Best Reward {}".format(best_reward)
                ]))
                logging.info(" ".join([
                    "Time {},".format(
                        time.strftime("%Hh %Mm %Ss",
                                      time.gmtime(time.time() - start_time))),
                    "Avg Reward {},".format(np.mean(rewards_list)),
                    "Avg Accuracy {},".format(np.mean(accuracy_list)),
                    "Avg Ep length {},".format(np.mean(episode_length_list)),
                    "Best Reward {}".format(best_reward)
                ]))
                if np.mean(rewards_list) >= best_reward and args.evaluate == 0:
                    torch.save(model.state_dict(),
                               args.dump_location + "model_best")
                    best_reward = np.mean(rewards_list)

                rewards_list = []
                accuracy_list = []
                episode_length_list = []
            reward_sum = 0
            episode_length = 0
            (image, instruction), _, _, _ = env.reset()
            # Print instruction while evaluating and visualizing
            if args.evaluate != 0 and args.visualize == 1:
                print("Instruction: {} ".format(instruction))

            # Getting indices of the words in the instruction
            instruction_idx = []
            for word in instruction.split(" "):
                instruction_idx.append(env.word_to_idx[word])
            instruction_idx = np.array(instruction_idx)
            instruction_idx = torch.from_numpy(instruction_idx).view(1, -1)
예제 #9
0
def test(rank, params, shared_model, count, lock, best_acc, evaluation=True):
    if not os.path.exists('./' + params.weight_dir):
        os.mkdir('./' + params.weight_dir)
    if not os.path.exists('./log'):
        os.mkdir('./log')
    logging.basicConfig(filename='./log/' + params.log_file + '.log',
                        level=logging.INFO)
    ptitle('Test Agent: {}'.format(rank))
    gpu_id = params.gpu_ids_test[rank % len(params.gpu_ids_test)]

    api = objrender.RenderAPI(w=params.width, h=params.height, device=gpu_id)
    cfg = load_config('config.json')
    best_rate = 0.0
    save_model_index = 0
    n_update = 0

    torch.manual_seed(params.seed + rank)
    if gpu_id >= 0:
        torch.cuda.manual_seed(params.seed + rank)

    model = A3C_LSTM_GA()
    with torch.cuda.device(gpu_id):
        model = model.cuda()

    Agent = run_agent(model, gpu_id)

    house_id = params.house_id
    if house_id == -1:
        house_id = rank
    if house_id >= 20:
        house_id = house_id % 20

    #time.sleep(rank*30)

    env = Environment(api, get_house_id(house_id), cfg)
    task = RoomNavTask(env,
                       hardness=params.hardness,
                       segment_input=params.semantic_mode,
                       max_steps=params.max_steps,
                       discrete_action=True)  #reward_type='indicator'

    start_time = time.time()

    if evaluation is True:
        max_episode = params.max_episode
        n_try = params.n_eval
    else:
        max_episode = 1  # for loaded model test
        n_try = params.n_test

    for episode in range(max_episode):
        eval = []
        if evaluation is True:
            with lock:
                n_update = count.value
            with torch.cuda.device(gpu_id):
                Agent.model.load_state_dict(shared_model.state_dict())
        else:
            with torch.cuda.device(gpu_id):
                Agent.model.load_state_dict(shared_model)
        Agent.model.eval()

        for i in range(n_try):
            next_observation = task.reset()
            target = task.info['target_room']
            target = get_instruction_idx(target)

            with torch.cuda.device(gpu_id):
                target = Variable(torch.LongTensor(target)).cuda()
                Agent.cx = Variable(torch.zeros(1, 256).cuda())
                Agent.hx = Variable(torch.zeros(1, 256).cuda())
                Agent.target = target
            step, total_rew, good = 0, 0, 0
            done = False

            while not done:
                observation = next_observation
                act = Agent.action_test(observation, target)

                next_observation, rew, done, info = task.step(actions[act[0]])
                total_rew += rew

                if rew == 10:  # success
                    good = 1

                step += 1

                if done:
                    break
            eval.append((step, total_rew, good))

        if len(eval) > 0:
            succ = [e for e in eval if e[2] > 0]
            succ_rate = (len(succ) / len(eval)) * 100

            if evaluation is True:  # evaluation mode
                with lock:
                    #if best_acc.value >= best_rate:
                    #    best_rate = best_acc.value
                    if succ_rate >= best_rate:
                        best_rate = succ_rate
                        with torch.cuda.device(gpu_id):
                            torch.save(
                                Agent.model.state_dict(), params.weight_dir +
                                'model' + str(n_update) + '.ckpt')
                        save_model_index += 1
                    #if best_rate > best_acc.value:
                    #    best_acc.value = best_rate

            avg_reward = sum([e[1] for e in eval]) / len(eval)
            avg_length = sum([e[0] for e in eval]) / len(eval)
            msg = " ".join([
                "++++++++++ Task Stats +++++++++++\n", "Time {}\n".format(
                    time.strftime("%dd %Hh %Mm %Ss",
                                  time.gmtime(time.time() - start_time))),
                "Episode Played: {:d}\n".format(len(eval)),
                "N_Update = {:d}\n".format(n_update),
                "House id: {:d}\n".format(house_id),
                "Avg Reward = {:5.3f}\n".format(avg_reward),
                "Avg Length = {:.3f}\n".format(avg_length),
                "Best rate {:3.2f}, Success rate {:3.2f}%".format(
                    best_rate, succ_rate)
            ])
            print(msg)
            logging.info(msg)
예제 #10
0
def run_test(rank, params, loaded_model, lock, seen_succ, seen_length,
             unseen_succ, unseen_length):

    logging.basicConfig(filename='./log/' + params.log_file + '.log',
                        level=logging.INFO)
    ptitle('Test Agent: {}'.format(rank))
    gpu_id = params.gpu_ids_test[rank % len(params.gpu_ids_test)]

    api = objrender.RenderAPI(w=params.width, h=params.height, device=gpu_id)
    cfg = load_config('config.json')

    torch.manual_seed(params.seed + rank)
    if gpu_id >= 0:
        with torch.cuda.device(gpu_id):
            torch.cuda.manual_seed(params.seed + rank)

    model = A3C_LSTM_GA()
    with torch.cuda.device(gpu_id):
        model = model.cuda()
        load_model = torch.load(
            loaded_model,
            map_location=lambda storage, loc: storage.cuda(gpu_id))
        model.load_state_dict(load_model)
        model.eval()

    Agent = run_agent(model, gpu_id)

    n_test = 0
    start_time = time.time()

    while True:
        house_id = rank + (n_test * params.n_process)

        if house_id >= 70:
            break
        else:
            if house_id < 20:
                seen = True
                house = get_house_id(house_id)
            else:
                seen = False
                house = get_eval_house_id(house_id -
                                          (n_test * params.n_process))

        env = Environment(api, house, cfg)
        task = RoomNavTask(env,
                           hardness=params.hardness,
                           segment_input=params.semantic_mode,
                           max_steps=params.max_steps,
                           discrete_action=True)  #reward_type='indicator'

        eval = []
        for i in range(params.n_test):
            next_observation = task.reset()
            target = task.info['target_room']
            target = get_instruction_idx(target)

            with torch.cuda.device(gpu_id):
                target = Variable(torch.LongTensor(target)).cuda()
                Agent.cx = Variable(torch.zeros(1, 256).cuda())
                Agent.hx = Variable(torch.zeros(1, 256).cuda())
                Agent.target = target
            step, total_rew, good = 0, 0, 0
            done = False

            while not done:
                observation = next_observation
                act = Agent.action_test(observation, target)

                next_observation, rew, done, info = task.step(actions[act[0]])
                total_rew += rew

                if rew == 10:  # success
                    good = 1

                step += 1

                if done:
                    break
            eval.append((step, total_rew, good))

        if len(eval) > 0:
            succ = [e for e in eval if e[2] > 0]
            succ_rate = (len(succ) / len(eval)) * 100

            avg_reward = sum([e[1] for e in eval]) / len(eval)
            avg_length = sum([e[0] for e in eval]) / len(eval)
            if seen:
                msg_seen = "Seen"
                msg_house = house_id
            else:
                msg_seen = "Unseen"
                msg_house = house_id - 20

            msg = " ".join([
                "++++++++++ Task Stats +++++++++++\n", "Time {}\n".format(
                    time.strftime("%dd %Hh %Mm %Ss",
                                  time.gmtime(time.time() - start_time))),
                "Episode Played: {:d}\n".format(len(eval)),
                "{:s} House id: {:d}\n".format(msg_seen, msg_house),
                "Avg Reward = {:5.3f}\n".format(avg_reward),
                "Avg Length = {:.3f}\n".format(avg_length),
                "Success rate {:3.2f}%".format(succ_rate)
            ])
            print(msg)
            logging.info(msg)
            with lock:
                if seen:
                    seen_succ.value += len(succ)
                    seen_length.value += sum([e[0] for e in eval])
                else:
                    unseen_succ.value += len(succ)
                    unseen_length.value += sum([e[0] for e in eval])
            n_test += 1