Exemple #1
0
    def validate(self, episodes, verbose=True):
        if verbose:
            logger.info("Validating the model")
        if getattr(self.args, 'multi_env', None):
            agent = utils.load_agent(self.env[0],
                                     model_name=self.args.model,
                                     argmax=True)
        else:
            agent = utils.load_agent(self.env,
                                     model_name=self.args.model,
                                     argmax=True)

        # Setting the agent model to the current model
        agent.model = self.acmodel

        agent.model.eval()
        logs = []

        for env_name in ([self.args.env]
                         if not getattr(self.args, 'multi_env', None) else
                         self.args.multi_env):
            logs += [
                batch_evaluate(agent,
                               env_name,
                               self.val_seed,
                               episodes,
                               pixel=self.use_pixel)
            ]
            self.val_seed += episodes
        agent.model.train()

        return logs
Exemple #2
0
    def validate(self, episodes, verbose=True):
        # Seed needs to be reset for each validation, to ensure consistency
        utils.seed(self.args.val_seed)

        if verbose:
            logger.info("Validating the model")
        if getattr(self.args, 'multi_env', None):
            agent = utils.load_agent(self.env[0],
                                     model_name=self.args.model,
                                     argmax=True)
        else:
            agent = utils.load_agent(self.env,
                                     model_name=self.args.model,
                                     argmax=True)

        # Setting the agent model to the current model
        agent.model = self.acmodel

        agent.model.eval()
        logs = []

        for env_name in ([self.args.env]
                         if not getattr(self.args, 'multi_env', None) else
                         self.args.multi_env):
            logs += [
                batch_evaluate(agent, env_name, self.args.val_seed, episodes)
            ]
        agent.model.train()

        return logs
Exemple #3
0
def main(args, seed, episodes):
    # Set seed for all randomness sources
    utils.seed(seed)

    # Define agent

    env = gym.make(args.env)
    env.seed(seed)
    agent = utils.load_agent(env, args.model, None, None, args.argmax,
                             args.env)
    if args.model is None and args.episodes > len(agent.demos):
        # Set the number of episodes to be the number of demos
        episodes = len(agent.demos)

    # Evaluate
    if isinstance(agent, utils.DemoAgent):
        logs = evaluate_demo_agent(agent, episodes)
    elif isinstance(agent, utils.BotAgent) or args.contiguous_episodes:
        logs = evaluate(agent, env, episodes, False)
    else:
        logs = batch_evaluate(agent,
                              args.env,
                              seed,
                              episodes,
                              return_obss_actions=True)

    return logs
def main_test(args, seed, episodes):
    # Set seed for all randomness sources
    utils.seed(seed)

    # Define agent
    # do test environment
    env_name = args.env + "_Test-v0"
    env = gym.make(env_name)
    env.seed(seed)
    agent = utils.load_agent(env,
                             args.model,
                             argmax=args.argmax,
                             env_name=env_name)
    if args.model is None and args.episodes > len(agent.demos):
        # Set the number of episodes to be the number of demos
        episodes = len(agent.demos)

    # Evaluate
    if isinstance(agent, utils.DemoAgent):
        logs = evaluate_demo_agent(agent, episodes)
    elif isinstance(agent, utils.BotAgent):
        logs = evaluate(agent, env, episodes, False)
    else:
        logs = batch_evaluate(agent, env_name, seed, episodes)

    return logs
Exemple #5
0
def evaluate_agent(il_learn, eval_seed, num_eval_demos):
    """
    Evaluate the agent on some number of episodes and return the seeds for the
    episodes the agent performed the worst on.
    """

    logger.info("Evaluating agent on {}".format(il_learn.args.env))

    agent = utils.load_agent(il_learn.env, il_learn.args.model)

    agent.model.eval()
    logs = batch_evaluate(agent,
                          il_learn.args.env,
                          episodes=num_eval_demos,
                          seed=eval_seed,
                          seed_shift=0)
    agent.model.train()

    success_rate = np.mean(
        [1 if r > 0 else 0 for r in logs['return_per_episode']])
    logger.info("success rate: {:.2f}".format(success_rate))

    # Find the seeds for all the failing demos
    fail_seeds = []
    for idx, ret in enumerate(logs["return_per_episode"]):
        if ret <= 0:
            fail_seeds.append(logs["seed_per_episode"][idx])

    return success_rate, fail_seeds
Exemple #6
0
def evaluate_agent(il_learn,
                   eval_seed,
                   num_eval_demos,
                   return_obss_actions=False):
    """
    Evaluate the agent on some number of episodes and return the seeds for the
    episodes the agent performed the worst on.
    """

    logger.info("Evaluating agent on {} using {} demos".format(
        il_learn.args.env, num_eval_demos))

    agent = utils.load_agent(il_learn.env, il_learn.args.model)

    agent.model.eval()
    logs = batch_evaluate(agent,
                          il_learn.args.env,
                          episodes=num_eval_demos,
                          seed=eval_seed,
                          seed_shift=0,
                          return_obss_actions=return_obss_actions)
    agent.model.train()

    success_rate = np.mean(
        [1 if r > 0 else 0 for r in logs['return_per_episode']])
    logger.info("success rate: {:.2f}".format(success_rate))

    # Find the seeds for all the failing demos
    fail_seeds = []
    fail_obss = []
    fail_actions = []

    for idx, ret in enumerate(logs["return_per_episode"]):
        if ret <= 0:
            fail_seeds.append(logs["seed_per_episode"][idx])
            if return_obss_actions:
                fail_obss.append(logs["observations_per_episode"][idx])
                fail_actions.append(logs["actions_per_episode"][idx])

    logger.info("{} fails".format(len(fail_seeds)))

    if not return_obss_actions:
        return success_rate, fail_seeds
    else:
        return success_rate, fail_seeds, fail_obss, fail_actions
def main(args, seed, episodes):
    # Set seed for all randomness sources
    utils.seed(seed)

    # Define agent

    env = gym.make(args.env)
    env.seed(seed)
    agent = utils.load_agent(env, args.model, args.demos, args.demos_origin,
                             args.argmax, args.env)
    if args.model is None and args.episodes > len(agent.demos):
        # Set the number of episodes to be the number of demos
        episodes = len(agent.demos)

    if args.proj is not None:
        assert args.proj_file is not None

    if args.proj_file is not None:
        with open(args.proj_file, newline="") as reader:
            proj_sentences = reader.readlines()
    else:
        proj_sentences = None

    seeds = []
    orig_missions = []
    missions = []
    with open(args.turk_file, newline="") as reader:
        csv_reader = csv.reader(reader)
        header = next(csv_reader)
        i_seed = header.index("Input.seed")
        i_orig_dir = header.index("Input.cmd")
        i_mission = header.index("Answer.command")
        for row in csv_reader:
            seeds.append(int(row[i_seed]))
            orig_missions.append(row[i_orig_dir])
            missions.append(row[i_mission])

    if not args.human:
        logs = evaluate_fixed_seeds(agent, env, episodes, seeds, orig_missions)
    else:
        logs = evaluate_fixed_seeds(agent, env, episodes, seeds, orig_missions,
                                    missions, args.proj, proj_sentences)

    return logs
Exemple #8
0
def main(args, seed, episodes):
    # Set seed for all randomness sources
    utils.seed(seed)

    # Define agent

    env = gym.make(args.env)
    env.seed(seed)
    agent = utils.load_agent(env, args.model, args.demos, args.demos_origin,
                             args.argmax, args.env)
    if args.model is None and args.episodes > len(agent.demos):
        # Set the number of episodes to be the number of demos
        episodes = len(agent.demos)

    # Evaluate
    if isinstance(agent, utils.ModelAgent) and not args.contiguous_episodes:
        logs = batch_evaluate(agent, args.env, seed, episodes)
    else:
        logs = evaluate(agent, env, episodes, False)

    return logs
Exemple #9
0
# Set seed for all randomness sources

utils.seed(args.seed)

# Generate environment

env = gym.make(args.env)
env.seed(args.seed)

global obs
obs = env.reset()
print("Mission: {}".format(obs["mission"]))

# Define agent
agent = utils.load_agent(env, args.model, args.demos, args.demos_origin,
                         args.argmax, args.env)

# Run the agent

done = True

action = None


def keyDownCb(keyName):
    global obs
    # Avoiding processing of observation by agent for wrong key clicks
    if keyName not in action_map and keyName != "RETURN":
        return

    agent_action = agent.act(obs)['action']
Exemple #10
0
def generate_demos(n_episodes, valid, seed, shift=0):
    utils.seed(seed)

    # Generate environment
    env = gym.make(args.env)
    env.seed(seed)
    for i in range(shift):
        env.reset()

    agent = utils.load_agent(env, args.model, args.demos, 'agent', args.argmax,
                             args.env)
    demos_path = utils.get_demos_path(args.demos, args.env, 'agent', valid)
    demos = []

    checkpoint_time = time.time()

    while True:
        # Run the expert for one episode

        done = False
        obs = env.reset()
        agent.on_reset()

        actions = []
        mission = obs["mission"]
        images = []
        directions = []

        try:
            while not done:
                action = agent.act(obs)['action']
                if isinstance(action, torch.Tensor):
                    action = action.item()
                new_obs, reward, done, _ = env.step(action)
                agent.analyze_feedback(reward, done)

                actions.append(action)
                images.append(obs['image'])
                directions.append(obs['direction'])

                obs = new_obs
            if reward > 0 and (args.filter_steps == 0
                               or len(images) <= args.filter_steps):
                demos.append((mission, blosc.pack_array(np.array(images)),
                              directions, actions))

            if len(demos) >= n_episodes:
                break
            if reward == 0:
                if args.on_exception == 'crash':
                    raise Exception("mission failed")
                logger.info("mission failed")
        except Exception:
            if args.on_exception == 'crash':
                raise
            logger.exception("error while generating demo #{}".format(
                len(demos)))
            continue

        if len(demos) and len(demos) % args.log_interval == 0:
            now = time.time()
            demos_per_second = args.log_interval / (now - checkpoint_time)
            to_go = (n_episodes - len(demos)) / demos_per_second
            logger.info(
                "demo #{}, {:.3f} demos per second, {:.3f} seconds to go".
                format(len(demos), demos_per_second, to_go))
            checkpoint_time = now

        # Save demonstrations

        if args.save_interval > 0 and len(
                demos) < n_episodes and len(demos) % args.save_interval == 0:
            logger.info("Saving demos...")
            utils.save_demos(demos, demos_path)
            logger.info("Demos saved")
            # print statistics for the last 100 demonstrations
            print_demo_lengths(demos[-100:])

    # Save demonstrations
    logger.info("Saving demos...")
    utils.save_demos(demos, demos_path)
    logger.info("Demos saved")
    print_demo_lengths(demos[-100:])
    if "_n" in args.env:
        env = gym.make(args.env,
                       pairs_dict=pairs_dict,
                       test_instr_mode=test_mode,
                       num_dists=args.num_dists)
    else:
        env = gym.make(args.env)

    demo_path = os.path.join(model_path, test_mode)
    env = Monitor(env, demo_path, _check_log_this, force=True)
    env.seed(args.seed)

    # Define agent
    agent = utils.load_agent(env=env,
                             model_name=args.model,
                             argmax=args.argmax,
                             env_name=args.env,
                             instr_arch=args.instr_arch)
    utils.seed(args.seed)

    print('\n')
    print(f'=== EVALUATING MODE: {test_mode} ===')

    # Run the agent
    done = False
    action = None
    obs = env.reset()

    step = 0
    episode_num = 0
Exemple #12
0
def main(args, seed, episodes):
    # Set seed for all randomness sources
    utils.seed(seed)

    # Keep track of results per task.
    results = {}

    for env_name in args.env:

        start_time = time.time()

        env = gym.make(env_name)
        env.seed(seed)
        if args.model is None and args.episodes > len(agent.demos):
            # Set the number of episodes to be the number of demos
            episodes = len(agent.demos)

        # Define agent
        agent = utils.load_agent(env,
                                 args.model,
                                 args.demos,
                                 args.demos_origin,
                                 args.argmax,
                                 env_name,
                                 model_path=args.model_path)

        # Evaluate
        if isinstance(agent, utils.DemoAgent):
            logs = evaluate_demo_agent(agent, episodes)
        elif isinstance(agent, utils.BotAgent) or args.contiguous_episodes:
            logs = evaluate(agent, env, episodes, False)
        else:
            logs = batch_evaluate(agent, env_name, seed, episodes)

        end_time = time.time()

        # Print logs
        num_frames = sum(logs["num_frames_per_episode"])
        fps = num_frames / (end_time - start_time)
        ellapsed_time = int(end_time - start_time)
        duration = datetime.timedelta(seconds=ellapsed_time)

        if args.model is not None:
            return_per_episode = utils.synthesize(logs["return_per_episode"])
            success_per_episode = utils.synthesize(
                [1 if r > 0 else 0 for r in logs["return_per_episode"]])

        num_frames_per_episode = utils.synthesize(
            logs["num_frames_per_episode"])

        if args.model is not None:
            print(
                "F {} | FPS {:.0f} | D {} | R:xsmM {:.3f} {:.3f} {:.3f} {:.3f} | S {:.3f} | F:xsmM {:.1f} {:.1f} {} {}"
                .format(num_frames, fps, duration,
                        *return_per_episode.values(),
                        success_per_episode['mean'],
                        *num_frames_per_episode.values()))
        else:
            print(
                "F {} | FPS {:.0f} | D {} | F:xsmM {:.1f} {:.1f} {} {}".format(
                    num_frames, fps, duration,
                    *num_frames_per_episode.values()))

        indexes = sorted(range(len(logs["num_frames_per_episode"])),
                         key=lambda k: -logs["num_frames_per_episode"][k])

        n = args.worst_episodes_to_show
        if n > 0:
            print("{} worst episodes:".format(n))
            for i in indexes[:n]:
                if 'seed_per_episode' in logs:
                    print(logs['seed_per_episode'][i])
                if args.model is not None:
                    print("- episode {}: R={}, F={}".format(
                        i, logs["return_per_episode"][i],
                        logs["num_frames_per_episode"][i]))
                else:
                    print("- episode {}: F={}".format(
                        i, logs["num_frames_per_episode"][i]))

        # Store results for this env.
        logs['return_per_episode'] = return_per_episode
        logs['success_per_episode'] = success_per_episode
        logs['num_frames_per_episode'] = num_frames_per_episode
        results[env_name] = logs

    return results
    args.seed = 0 if args.model is not None else 1

# Set seed for all randomness sources

utils.seed(args.seed)

# Generate environment

env = gym.make(args.env)
env.seed(args.seed)
for _ in range(args.shift):
    env.reset()

# Define agent

agent = utils.load_agent(args, env)

# Run the agent

done = True
import cv2
import numpy as np
episode = 0
step = 0
while True:
    time.sleep(args.pause)
    image = env.render("rgb_array")
    image = cv2.resize(image, dsize=(512, 512), interpolation=cv2.INTER_CUBIC)
    #image = np.transpose(image, (2, 0, 1))
    file_name = 'rendered_image/episodes_' + str(episode) + '_step_' + str(
        step) + '.png'
Exemple #14
0
def generate_demos(n_episodes, valid, seed, shift=0):
    utils.seed(seed)

    # Generate environment
    env = gym.make(args.env)
    use_pixels = args.pixels
    if use_pixels:
        env = RGBImgPartialObsWrapper(env)

    agent = utils.load_agent(env, args.model, args.demos, 'agent', args.argmax,
                             args.env)
    demos_path = utils.get_demos_path(args.demos, args.env, 'agent', valid)
    demos = []

    checkpoint_time = time.time()

    just_crashed = False
    while True:
        if len(demos) == n_episodes:
            break

        done = False
        if just_crashed:
            logger.info(
                "reset the environment to find a mission that the bot can solve"
            )
            env.reset()
        else:
            env.seed(seed + len(demos))
        obs = env.reset()
        agent.on_reset()

        actions = []
        mission = obs["mission"]
        images = []
        directions = []

        try:
            while not done:
                action = agent.act(obs)['action']
                if isinstance(action, torch.Tensor):
                    action = action.item()
                new_obs, reward, done, _ = env.step(action)
                agent.analyze_feedback(reward, done)

                actions.append(action)
                images.append(obs['image'])
                if use_pixels:
                    directions.append(None)
                else:
                    directions.append(obs['direction'])

                obs = new_obs
            if reward > 0 and (args.filter_steps == 0
                               or len(images) <= args.filter_steps):
                demos.append((mission, blosc.pack_array(np.array(images)),
                              directions, actions))
                just_crashed = False

            if reward == 0:
                if args.on_exception == 'crash':
                    raise Exception(
                        "mission failed, the seed is {}".format(seed +
                                                                len(demos)))
                just_crashed = True
                logger.info("mission failed")
        except (Exception, AssertionError):
            if args.on_exception == 'crash':
                raise
            just_crashed = True
            logger.exception("error while generating demo #{}".format(
                len(demos)))
            continue

        if len(demos) and len(demos) % args.log_interval == 0:
            now = time.time()
            demos_per_second = args.log_interval / (now - checkpoint_time)
            to_go = (n_episodes - len(demos)) / demos_per_second
            logger.info(
                "demo #{}, {:.3f} demos per second, {:.3f} seconds to go".
                format(len(demos) - 1, demos_per_second, to_go))
            checkpoint_time = now

        # Save demonstrations

        if args.save_interval > 0 and len(
                demos) < n_episodes and len(demos) % args.save_interval == 0:
            logger.info("Saving demos...")
            utils.save_demos(demos, demos_path)
            logger.info("{} demos saved".format(len(demos)))
            # print statistics for the last 100 demonstrations
            print_demo_lengths(demos[-100:])

    # Save demonstrations
    logger.info("Saving demos...")
    utils.save_demos(demos, demos_path)
    logger.info("{} demos saved".format(len(demos)))
    print_demo_lengths(demos[-100:])