Esempio n. 1
0
def make_envs(procs, env_name, seed, extrap_min, extrap_max):
    envs = []
    for i in range(procs):
        env = utils.make_env(env_name, seed + 100000 * i, {"extrapolate_min": extrap_min, "extrapolate_max": extrap_max})
        envs.append(env)
    env = ParallelEnv(envs)
    print("Environments loaded\n")
    return env
Esempio n. 2
0
def main():

    # Parse arguments

    parser = argparse.ArgumentParser()
    parser.add_argument("--env",
                        required=True,
                        help="name of the environment (REQUIRED)")
    parser.add_argument("--model",
                        required=True,
                        help="name of the trained model (REQUIRED)")
    parser.add_argument("--episodes",
                        type=int,
                        default=100,
                        help="number of episodes of evaluation (default: 100)")
    parser.add_argument("--seed",
                        type=int,
                        default=0,
                        help="random seed (default: 0)")
    parser.add_argument("--procs",
                        type=int,
                        default=1,
                        help="number of processes (default: 16)")
    parser.add_argument("--argmax",
                        action="store_true",
                        default=False,
                        help="action with highest probability is selected")
    parser.add_argument("--worst-episodes-to-show",
                        type=int,
                        default=10,
                        help="how many worst episodes to show")
    parser.add_argument("--memory",
                        action="store_true",
                        default=False,
                        help="add a LSTM to the model")
    parser.add_argument("--text",
                        action="store_true",
                        default=False,
                        help="add a GRU to the model")
    parser.add_argument("--visualize", default=False, help="print stuff")
    parser.add_argument("--save_path",
                        default="test_image",
                        help="save path for agent visualizations")
    args = parser.parse_args()

    # Set seed for all randomness sources

    utils.seed(args.seed)

    # Set device

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"Device: {device}\n")

    # Load environments

    envs = []
    for i in range(args.procs):
        env = utils.make_env(args.env, args.seed + 10000 * i)
        envs.append(env)
    env = ParallelEnv(envs)
    print("Environments loaded\n")

    # Load agent

    model_dir = utils.get_model_dir(args.model)
    agent = utils.Agent(env.observation_space,
                        env.action_space,
                        model_dir,
                        device=device,
                        argmax=args.argmax,
                        num_envs=args.procs,
                        use_memory=args.memory,
                        use_text=args.text)
    print("Agent loaded\n")

    # Initialize logs

    logs = {"num_frames_per_episode": [], "return_per_episode": []}

    # Run agent

    start_time = time.time()

    obss = env.reset()

    log_done_counter = 0
    log_episode_return = torch.zeros(args.procs, device=device)
    log_episode_num_frames = torch.zeros(args.procs, device=device)

    img_sum = []
    obss_sum = None
    encoding_sum = None
    img_count = 0

    while log_done_counter < args.episodes:
        actions = agent.get_actions(obss)
        obss, rewards, dones, _ = env.step(actions)

        agent.analyze_feedbacks(rewards, dones)

        log_episode_return += torch.tensor(rewards,
                                           device=device,
                                           dtype=torch.float)
        log_episode_num_frames += torch.ones(args.procs, device=device)

        state = env.get_environment_state()
        img = state.grid.render(32,
                                state.agent_pos,
                                state.agent_dir,
                                highlight_mask=None)
        encoding = state.grid.encode()
        #        img_count += 1
        #        if img_count == 1:
        #            img_sum = img
        ##            obss_sum = obss[0]['image']
        ##            encoding_sum = encoding
        #        else:
        #            img_sum += img
        ##            obss_sum += obss[0]['image']
        ##            encoding_sum += encoding

        for i, done in enumerate(dones):
            if done:
                log_done_counter += 1
                logs["return_per_episode"].append(log_episode_return[i].item())
                logs["num_frames_per_episode"].append(
                    log_episode_num_frames[i].item())

                if args.visualize:
                    if len(img_sum) > 0:
                        img_sum = img_sum / img_count
                        #                        img_sum = img_sum.astype(numpy.uint8)
                        filepath = args.save_path + '_image_' + str(
                            log_done_counter - 1) + '.jpg'
                        imsave(filepath, img_sum)
                        img_sum = []
                        img_count = 0
            else:
                img_count += 1
                if img_count == 1:
                    img_sum = img  #.astype(float)
                else:
                    img_sum += img

        mask = 1 - torch.tensor(dones, device=device, dtype=torch.float)
        log_episode_return *= mask
        log_episode_num_frames *= mask

    end_time = time.time()

    # Print logs

    num_frames = sum(logs["num_frames_per_episode"])
    fps = num_frames / (end_time - start_time)
    duration = int(end_time - start_time)
    return_per_episode = utils.synthesize(logs["return_per_episode"])
    num_frames_per_episode = utils.synthesize(logs["num_frames_per_episode"])

    print(
        "F {} | FPS {:.0f} | D {} | R:μσmM {:.2f} {:.2f} {:.2f} {:.2f} | F:μσmM {:.1f} {:.1f} {} {}"
        .format(num_frames, fps, duration, *return_per_episode.values(),
                *num_frames_per_episode.values()))

    # Print worst episodes

    n = args.worst_episodes_to_show
    if n > 0:
        print("\n{} worst episodes:".format(n))

        indexes = sorted(range(len(logs["return_per_episode"])),
                         key=lambda k: logs["return_per_episode"][k])
        for i in indexes[:n]:
            print("- episode {}: R={}, F={}".format(
                i, logs["return_per_episode"][i],
                logs["num_frames_per_episode"][i]))
Esempio n. 3
0
# Set seed for all randomness sources

utils.seed(args.seed)

# Set device

print(f"Device: {device}\n")

# Load environments

envs = []
for i in range(args.procs):
    env = utils.make_env(args.env, args.seed + 10000 * i)
    envs.append(env)
env = ParallelEnv(envs)
print("Environments loaded\n")

# Load agent

model_dir = utils.get_model_dir(args.model)
agent = utils.Agent(env.observation_space,
                    env.action_space,
                    model_dir,
                    argmax=args.argmax,
                    num_envs=args.procs,
                    use_memory=args.memory,
                    use_text=args.text)
print("Agent loaded\n")

# Initialize logs
utils.seed(args.seed)

# Set device

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Device: {device}\n")

# Load environments

envs = []
for i in range(args.procs):
    env = FoodEnv()
    env.seed(args.seed + 10000 * i)
    envs.append(env)
env = ParallelEnv(envs)
print("Environments loaded\n")

# Load agent

model_dir = utils.get_model_dir(args.model)
agent = utils.Agent(env.observation_space, env.action_space, model_dir, device,
                    args.argmax, args.procs)
print("Agent loaded\n")

# Initialize logs

logs = {"num_frames_per_episode": [], "return_per_episode": []}

# Run agent
Esempio n. 5
0
utils.seed(args.seed)

# Set device

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Device: {device}\n")

# Load environments

envs = []
for i in range(args.procs):
    env = utils.make_env(args.env, args.seed + 10000 * i)
    envs.append(env)
env = ParallelEnv(
    envs
)  # TODO: Is this just to run our model on a lot of environments, to decrease random results?
print("Environments loaded\n")

# Load agent

model_dir = utils.get_model_dir(args.model)
agent = utils.Agent(env.observation_space,
                    env.action_space,
                    model_dir,
                    device=device,
                    argmax=args.argmax,
                    num_envs=args.procs,
                    use_memory=args.memory,
                    use_text=args.text)
print("Agent loaded\n")