Exemple #1
0
def main(args, seed, episodes):
    # Set seed for all randomness sources
    utils.seed(seed)

    # Define agent

    env = gym.make(args.env)
    env.seed(seed)
    agent = utils.load_agent(env, args.model, None, None, args.argmax,
                             args.env)
    if args.model is None and args.episodes > len(agent.demos):
        # Set the number of episodes to be the number of demos
        episodes = len(agent.demos)

    # Evaluate
    if isinstance(agent, utils.DemoAgent):
        logs = evaluate_demo_agent(agent, episodes)
    elif isinstance(agent, utils.BotAgent) or args.contiguous_episodes:
        logs = evaluate(agent, env, episodes, False)
    else:
        logs = batch_evaluate(agent,
                              args.env,
                              seed,
                              episodes,
                              return_obss_actions=True)

    return logs
def main_test(args, seed, episodes):
    # Set seed for all randomness sources
    utils.seed(seed)

    # Define agent
    # do test environment
    env_name = args.env + "_Test-v0"
    env = gym.make(env_name)
    env.seed(seed)
    agent = utils.load_agent(env,
                             args.model,
                             argmax=args.argmax,
                             env_name=env_name)
    if args.model is None and args.episodes > len(agent.demos):
        # Set the number of episodes to be the number of demos
        episodes = len(agent.demos)

    # Evaluate
    if isinstance(agent, utils.DemoAgent):
        logs = evaluate_demo_agent(agent, episodes)
    elif isinstance(agent, utils.BotAgent):
        logs = evaluate(agent, env, episodes, False)
    else:
        logs = batch_evaluate(agent, env_name, seed, episodes)

    return logs
Exemple #3
0
def main(args, seed, episodes):
    # Set seed for all randomness sources
    utils.seed(seed)

    # Keep track of results per task.
    results = {}

    for env_name in args.env:

        start_time = time.time()

        env = gym.make(env_name)
        env.seed(seed)
        if args.model is None and args.episodes > len(agent.demos):
            # Set the number of episodes to be the number of demos
            episodes = len(agent.demos)

        # Define agent
        agent = utils.load_agent(env,
                                 args.model,
                                 args.demos,
                                 args.demos_origin,
                                 args.argmax,
                                 env_name,
                                 model_path=args.model_path)

        # Evaluate
        if isinstance(agent, utils.DemoAgent):
            logs = evaluate_demo_agent(agent, episodes)
        elif isinstance(agent, utils.BotAgent) or args.contiguous_episodes:
            logs = evaluate(agent, env, episodes, False)
        else:
            logs = batch_evaluate(agent, env_name, seed, episodes)

        end_time = time.time()

        # Print logs
        num_frames = sum(logs["num_frames_per_episode"])
        fps = num_frames / (end_time - start_time)
        ellapsed_time = int(end_time - start_time)
        duration = datetime.timedelta(seconds=ellapsed_time)

        if args.model is not None:
            return_per_episode = utils.synthesize(logs["return_per_episode"])
            success_per_episode = utils.synthesize(
                [1 if r > 0 else 0 for r in logs["return_per_episode"]])

        num_frames_per_episode = utils.synthesize(
            logs["num_frames_per_episode"])

        if args.model is not None:
            print(
                "F {} | FPS {:.0f} | D {} | R:xsmM {:.3f} {:.3f} {:.3f} {:.3f} | S {:.3f} | F:xsmM {:.1f} {:.1f} {} {}"
                .format(num_frames, fps, duration,
                        *return_per_episode.values(),
                        success_per_episode['mean'],
                        *num_frames_per_episode.values()))
        else:
            print(
                "F {} | FPS {:.0f} | D {} | F:xsmM {:.1f} {:.1f} {} {}".format(
                    num_frames, fps, duration,
                    *num_frames_per_episode.values()))

        indexes = sorted(range(len(logs["num_frames_per_episode"])),
                         key=lambda k: -logs["num_frames_per_episode"][k])

        n = args.worst_episodes_to_show
        if n > 0:
            print("{} worst episodes:".format(n))
            for i in indexes[:n]:
                if 'seed_per_episode' in logs:
                    print(logs['seed_per_episode'][i])
                if args.model is not None:
                    print("- episode {}: R={}, F={}".format(
                        i, logs["return_per_episode"][i],
                        logs["num_frames_per_episode"][i]))
                else:
                    print("- episode {}: F={}".format(
                        i, logs["num_frames_per_episode"][i]))

        # Store results for this env.
        logs['return_per_episode'] = return_per_episode
        logs['success_per_episode'] = success_per_episode
        logs['num_frames_per_episode'] = num_frames_per_episode
        results[env_name] = logs

    return results