def enjoy(environment, model, seed=0, argmax=False, pause=0.1):
    utils.seed(seed)

    # Generate environment

    environment.seed(seed)

    # Define agent

    model_dir = utils.get_model_dir(model)
    agent = utils.Agent(model_dir, environment.observation_space, argmax)

    # Run the agent

    done = True

    while True:
        if done:
            obs = environment.reset()
            print("Instr:", obs["mission"])

        time.sleep(pause)
        renderer = environment.render("human")
        renderer.window.update_imagination_display([[1, 2, 3], 2, 3], None,
                                                   None)

        action = agent.get_action(obs)
        obs, reward, done, _ = environment.step(action)
        agent.analyze_feedback(reward, done)

        if renderer.window is None:
            break
Esempio n. 2
0
                    default=False,
                    help="action with highest probability is selected")
args = parser.parse_args()

# Set seed for all randomness sources

utils.seed(args.seed)

# Generate environment

env = gym.make(args.env)
env.seed(args.seed)

# Define agent

agent = utils.Agent(args.model, env.observation_space, args.deterministic)

# Initialize logs

logs = {"num_frames_per_episode": [], "return_per_episode": []}

# Run the agent

start_time = time.time()

for _ in range(args.episodes):
    obs = env.reset()
    done = False

    num_frames = 0
    returnn = 0
    import nengo_ssp as ssp
    X, Y, _ = ssp.HexagonalBasis(10, 10)
    d = len(X.v)
    env = SSPWrapper(env, d, X, Y)
for _ in range(args.shift):
    env.reset()
print("Environment loaded\n")

# Load agent

model_dir = utils.get_model_dir(args.model)
agent = utils.Agent(env.observation_space,
                    env.action_space,
                    model_dir,
                    model_name=args.algo,
                    device=device,
                    argmax=args.argmax,
                    use_memory=args.memory,
                    use_text=args.text,
                    input_type=args.input,
                    feature_learn=args.feature_learn)
print("Agent loaded\n")

# Run the agent

if args.gif:
    from array2gif import write_gif
    frames = []

plt.imshow(env.render('human'))

for episode in range(args.episodes):
Esempio n. 4
0
utils.seed(args.seed)

# Generate environment

envs = []
for i in range(args.procs):
    env = gym.make(args.env)
    env.seed(args.seed + 10000 * i)
    envs.append(env)
env = ParallelEnv(envs)

# Define agent

save_dir = utils.get_save_dir(args.model)
agent = utils.Agent(save_dir, env.observation_space, args.argmax, args.procs)
print("CUDA available: {}\n".format(torch.cuda.is_available()))

# Initialize logs

logs = {"num_frames_per_episode": [], "return_per_episode": []}

# Run the agent

start_time = time.time()

obss = env.reset()

log_done_counter = 0
log_episode_return = torch.zeros(args.procs, device=agent.device)
log_episode_num_frames = torch.zeros(args.procs, device=agent.device)
Esempio n. 5
0
            for model_idx, model_name in enumerate(model_names):

                for env_idx, env_name in enumerate(envs):
                    results = np.zeros((nr_levels, ))
                    env = gym.make(env_name)

                    # Make the episodes comparable between all agents.
                    env.seed(0)

                    if fully_observable_environment:
                        env = gym_minigrid.wrappers.FullyObsWrapper(env)

                    # Define agent
                    model_dir = utils.get_model_dir(model_name)
                    agent = utils.Agent(env_name,
                                        env.observation_space,
                                        model_dir,
                                        argmax=False)

                    lvl_cnt = 0

                    obs = env.reset()
                    while True:

                        action = agent.get_action(obs)
                        obs, reward, done, _ = env.step(action)
                        agent.analyze_feedback(reward, done)

                        if done:
                            results[lvl_cnt] = reward > 0
                            lvl_cnt += 1
                            pbar.update()
Esempio n. 6
0
# Generate environment

env = gym.make(args.env)
env.seed(args.seed)
if args.fullObs:
    env = gym_minigrid.wrappers.FullyObsWrapper(env)
elif args.POfullObs:
    env = gym_minigrid.wrappers.PartialObsFullGridWrapper(env)
for _ in range(args.shift):
    env.reset()

# Define agent

model_dir = utils.get_model_dir(args.model)
agent = utils.Agent(args.env, env.observation_space, model_dir, args.argmax)

# Run the agent

done = True

while True:
    if done:
        obs = env.reset()

    time.sleep(args.pause)
    renderer = env.render()

    action = agent.get_action(obs)
    obs, reward, done, _ = env.step(action)
    agent.analyze_feedback(reward, done)
Esempio n. 7
0
# Load environment

env = utils.make_env(args.env, args.seed)
for _ in range(args.shift):
    env.reset()
print("Environment loaded\n")

# Load agent

model_dir = utils.get_model_dir(args.env, args.n_columns, args.transfer)
agent = utils.Agent(env.observation_space,
                    env.action_space,
                    model_dir,
                    args.seed,
                    args.n_columns,
                    device=device,
                    argmax=args.argmax,
                    use_memory=args.memory,
                    use_text=args.text)
print("Agent loaded\n")

# Run the agent

if args.gif:
    from array2gif import write_gif
    frames = []

# Create a window to view the environment
env.render('human')
Esempio n. 8
0
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Device: {device}\n")

# Load environments

envs = []
for i in range(args.procs):
    env = utils.make_env(args.env, args.seed + 10000 * i)
    envs.append(env)
env = ParallelEnv(envs)
print("Environments loaded\n")

# Load agent

model_dir = utils.get_model_dir(args.model)
agent = utils.Agent(env.observation_space, env.action_space, model_dir, device, args.argmax, args.procs)
print("Agent loaded\n")

# Initialize logs

logs = {"num_frames_per_episode": [], "return_per_episode": []}

# Run agent

start_time = time.time()

obss = env.reset()

log_done_counter = 0
log_episode_return = torch.zeros(args.procs, device=device)
log_episode_num_frames = torch.zeros(args.procs, device=device)
Esempio n. 9
0
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Device: {device}\n")

# Load environment

env = utils.make_env(args.env, args.seed)
for _ in range(args.shift):
    env.reset()
print("Environment loaded\n")

# Load agent

model_dir = utils.get_model_dir(args.model)
agent = utils.Agent(env.observation_space,
                    env.action_space,
                    model_dir,
                    device=device,
                    argmax=args.argmax,
                    use_text=args.text)
print("Agent loaded\n")

# Run the agent

if args.gif:
    from array2gif import write_gif
    frames = []

# Create a window to view the environment
env.render('human')

for episode in range(args.episodes):
    obs = env.reset()
def visualiseAndSave(envStr,
                     model_name,
                     seed,
                     numEpisodes,
                     txt_logger,
                     gifName="test",
                     save=False,
                     dir=None,
                     agentType=ppo,
                     CNNCLASS=None):

    if agentType != ppo and agentType != dqn:
        raise Exception

    utils.seed(seed)

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    env = utils.make_env(envStr, seed)

    model_dir = utils.get_model_dir(model_name, dir)

    if agentType == ppo:
        agent = utils.Agent(env.observation_space,
                            env.action_space,
                            model_dir,
                            device=device,
                            argmax=True,
                            use_memory=False,
                            use_text=False)
    else:
        if hasattr(env, 'my_shape'):
            model = CNNCLASS(env.my_shape, env.action_space.n)
        else:
            model = CNNCLASS(env.observation_space['image'].shape,
                             env.action_space.n)

        loaded_dict = torch.load(model_dir + "/status.pt")
        model.load_state_dict(loaded_dict["model_state"])

        print("For Test load state frames:", loaded_dict['num_frames'],
              "updates:", loaded_dict['update'])

        model.to(device)
        model.eval()
        if USE_CUDA:
            print("USE CUDA")
            model = model.cuda()

    if save:
        from array2gif import write_gif
        frames = []

    mycumulativereward = 0
    mycumulativeperf = 0
    mycumulativeperffull = 0

    mycumulativeButtons = 0
    mycumulativePhones = 0
    mycumulativeDirts = 0
    mycumulativeMesses = 0

    runsNum = 0
    for episode in range(numEpisodes):
        obs = env.reset()
        myreward = 0
        myperf = 0
        myperffull = 0

        myButtons = 0
        myPhones = 0
        myDirts = 0
        myMesses = 0

        while True:
            if save:
                frames.append(numpy.moveaxis(env.render("rgb_array"), 2, 0))

            if agentType == ppo:
                action = agent.get_action(obs)
            else:
                action = model.act(obs['image'], 0,
                                   True)  # epsilon == 0 so no exploration

            obs, reward, done, info = env.step(action)

            myreward += reward
            myperf += info['performance']
            myperffull += info['performance_full']

            myButtons += info['button_presses']
            myPhones += info['phones_cleaned']
            myDirts += info['dirt_cleaned']
            myMesses += info['messes_cleaned']

            if agentType == ppo:
                agent.analyze_feedback(reward, done)

            if done:
                runsNum += 1
                mycumulativereward += myreward
                mycumulativeperf += myperf
                mycumulativeperffull += myperffull

                mycumulativeButtons += myButtons
                mycumulativePhones += myPhones
                mycumulativeDirts += myDirts
                mycumulativeMesses += myMesses

                averageReward = mycumulativereward / runsNum
                averagePerformance = mycumulativeperf / runsNum
                averagePerformanceFull = mycumulativeperffull / runsNum

                averageButtons = mycumulativeButtons / runsNum
                averageDirts = mycumulativeDirts / runsNum
                averagePhones = mycumulativePhones / runsNum
                averageMesses = mycumulativeMesses / runsNum
                break

    if save:
        saveMeAs = model_dir + "/" + model_name + gifName + ".gif"
        txt_logger.info(("Saving gif to ", saveMeAs, "... "))
        write_gif(numpy.array(frames), saveMeAs, fps=1 / 0.3)
        txt_logger.info("Done.")

    return averageReward, averagePerformance, averagePerformanceFull, averageButtons, averageDirts, averagePhones, averageMesses
Esempio n. 11
0
                    help="pause duration between two consequent actions of the agent")
args = parser.parse_args()

# Set seed for all randomness sources

utils.seed(args.seed)

# Generate environment

env = gym.make(args.env)
env.seed(args.seed)

# Define agent

run_dir = utils.get_run_dir(args.model)
agent = utils.Agent(run_dir, env.observation_space, args.deterministic)

# Run the agent

done = True

while True:
    if done:
        obs = env.reset()
        print("Instr:", obs["mission"])

    time.sleep(args.pause)
    renderer = env.render("human")

    action = agent.get_action(obs)
    obs, reward, done, _ = env.step(action)
Esempio n. 12
0
# Load environment

env = utils.make_env(args.env, args.seed)
for _ in range(args.shift):
    env.reset()
print("Environment loaded\n")

# Load agent

model_dir = utils.get_model_dir(args.model)
autoencoder = torch.load(args.autoencoder_path)
agent = utils.Agent(
    env.observation_space,
    env.action_space,
    model_dir,
    autoencoder=autoencoder,
    device=device,
    argmax=args.argmax,
    use_memory=args.memory,
    use_text=args.text,
)
print("Agent loaded\n")

# Run the agent

if args.gif:
    from array2gif import write_gif

    frames = []
    obs_array = []
    predicted_obs = []
    predicted_uncertainty = []
Esempio n. 13
0
def start(model, seed, episodes, size):
    env_name = "MiniGrid-DoorKey-" + str(size) + "x" + str(size) + "-v0"
    utils.seed(seed)
    procs = 10
    argmax = False
    all_data = np.zeros(shape=(size, 8))
    print("Evaluating storage/" + model)
    for _wall in range(2, size - 2):

        # Generate environment
        envs = []

        for i in range(procs):
            env = gym.make(env_name)
            env.setWallID(_wall)
            envs.append(env)
        env = ParallelEnv(envs)

        # Define agent

        save_dir = utils.get_save_dir(model)
        agent = utils.Agent(save_dir, env.observation_space, argmax, procs)
        # print("CUDA available: {}\n".format(torch.cuda.is_available()))

        # Initialize logs

        logs = {"num_frames_per_episode": [], "return_per_episode": []}

        # Run the agent

        start_time = time.time()

        obss = env.reset()

        log_done_counter = 0
        log_episode_return = torch.zeros(procs, device=agent.device)
        log_episode_num_frames = torch.zeros(procs, device=agent.device)

        while log_done_counter < episodes:
            actions = agent.get_actions(obss)
            obss, rewards, dones, _ = env.step(actions)
            agent.analyze_feedbacks(rewards, dones)

            log_episode_return += torch.tensor(rewards,
                                               device=agent.device,
                                               dtype=torch.float)
            log_episode_num_frames += torch.ones(procs, device=agent.device)

            for i, done in enumerate(dones):
                if done:
                    log_done_counter += 1
                    logs["return_per_episode"].append(
                        log_episode_return[i].item())
                    logs["num_frames_per_episode"].append(
                        log_episode_num_frames[i].item())

            mask = 1 - torch.tensor(
                dones, device=agent.device, dtype=torch.float)
            log_episode_return *= mask
            log_episode_num_frames *= mask

        end_time = time.time()

        # Print logs

        num_frames = sum(logs["num_frames_per_episode"])
        fps = num_frames / (end_time - start_time)
        duration = int(end_time - start_time)
        return_per_episode = utils.synthesize(logs["return_per_episode"])
        num_frames_per_episode = utils.synthesize(
            logs["num_frames_per_episode"])

        print(
            "Wall {:3d} | F {:6.0f} | FPS {:4.0f} | D {:3d} | R:x̄σmM {:.2f} {:.2f} {:.2f} {:.2f} | F:x̄σmM {:6.1f} {:6.1f} {:6.1f} {:6.1f}"
            .format(_wall, num_frames, fps, duration,
                    *return_per_episode.values(),
                    *num_frames_per_episode.values()))

        all_data[_wall, 0] = return_per_episode["mean"]
        all_data[_wall, 1] = return_per_episode["std"]
        all_data[_wall, 2] = return_per_episode["min"]
        all_data[_wall, 3] = return_per_episode["max"]

        all_data[_wall, 4] = num_frames_per_episode["mean"]
        all_data[_wall, 5] = num_frames_per_episode["std"]
        all_data[_wall, 6] = num_frames_per_episode["min"]
        all_data[_wall, 7] = num_frames_per_episode["max"]

    return all_data
Esempio n. 14
0
    return env

env = make_envs(args.procs, args.env, args.seed, args.extrap_min, args.extrap_min+1)

# Load agent
model_dirs = utils.get_models_for_exp(args.exp_id)
agents = defaultdict(list)
for model_dir in model_dirs:
    root = utils.get_model_dir(model_dir, args.exp_id)
    use_nac, use_text, use_memory = utils.get_args_for_model(model_dir)
    for idx, seed in enumerate(os.listdir(root)):
        exp_path = os.path.join(root, seed)
        if args.eval_one_model_per_seed and idx != 0:
            continue
        agents[exp_path].append(utils.Agent(env.observation_space, env.action_space, exp_path,
                            device=device, argmax=args.argmax, num_envs=args.procs,
                            use_memory=use_memory, use_text=use_text, use_nac=use_nac))
obs_space, preprocess_obss = utils.get_obss_preprocessor(env.envs[0].observation_space)
print("Agents loaded\n")


all_logs = defaultdict(list)
start_time = time.time()
for exp_path, agent_list in agents.items():
    for agent in agent_list:
        for offset in range(args.extrap_min, args.extrap_max):
            logs = {"offset": offset, "num_frames_per_episode": [], "return_per_episode": []}
            env = make_envs(args.procs, args.env, args.seed, offset, offset+1)
            obss = env.reset()

            log_done_counter = 0
Esempio n. 15
0
        env = ltl_wrappers.LTLEnv(env, ltl_sampler="Default")

        agent = RandomAgent(env.action_space)

    elif (args["command"] == "viz"):
        # If the config is available (from trainig) then just load it here instead of asking the user of this script to provide all training time configs
        config = vars(utils.load_config(args["model_path"]))
        args.update(config)

        env = gym.make(args["env_id"])
        env = safety_wrappers.Play(env)
        env = ltl_wrappers.LTLEnv(env,
                                  ltl_sampler=args["ltl_sampler"],
                                  progression_mode=args["progression_mode"])

        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        agent = utils.Agent(env,
                            env.observation_space,
                            env.action_space,
                            args["model_path"],
                            args["ignoreLTL"],
                            args["progression_mode"],
                            args["gnn"],
                            device=device,
                            dumb_ac=args["dumb_ac"])
    else:
        print("Incorrect command: ", args["command"])
        exit(1)

    run_policy(agent, env, max_ep_len=30000, num_episodes=1000)
Esempio n. 16
0
# Set device

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Device: {device}\n")

# Load environment

env = utils.make_env(args.env, args.seed)
for _ in range(args.shift):
    env.reset()
print("Environment loaded\n")

# Load agent

model_dir = utils.get_model_dir(args.model)
agent = utils.Agent(env.observation_space, env.action_space, model_dir, device, args.argmax, use_rim = args.use_rim)
print("Agent loaded\n")

# Run the agent

if args.gif:
   from array2gif import write_gif
   frames = []

# Create a window to view the environment
env.render('human')

for episode in range(args.episodes):
    obs = env.reset()
    done2 = False
    while True:
Esempio n. 17
0
def main():
    # Parse arguments

    parser = argparse.ArgumentParser()
    args = parser.parse_args()

    parser.add_argument("--env",
                        required=True,
                        help="name of the environment to be run (REQUIRED)")
    parser.add_argument("--model",
                        required=True,
                        help="name of the trained model (REQUIRED)")
    parser.add_argument("--seed",
                        type=int,
                        default=0,
                        help="random seed (default: 0)")
    parser.add_argument(
        "--shift",
        type=int,
        default=0,
        help=
        "number of times the environment is reset at the beginning (default: 0)"
    )
    parser.add_argument(
        "--argmax",
        action="store_true",
        default=False,
        help="select the action with highest probability (default: False)")
    parser.add_argument(
        "--pause",
        type=float,
        default=0.1,
        help=
        "pause duration between two consequent actions of the agent (default: 0.1)"
    )
    parser.add_argument("--gif",
                        type=str,
                        default=None,
                        help="store output as gif with the given filename")
    parser.add_argument("--episodes",
                        type=int,
                        default=1000000,
                        help="number of episodes to visualize")
    parser.add_argument("--memory",
                        action="store_true",
                        default=False,
                        help="add a LSTM to the model")
    parser.add_argument("--text",
                        action="store_true",
                        default=False,
                        help="add a GRU to the model")

    if len(sys.argv) > 1:
        args = parser.parse_args()
    else:
        args.env = 'MiniGrid-DoorKey-5x5-v0'
        args.env = 'MiniGrid-KeyCorridorGBLA-v0'
        args.model = 'KeyCorridor1'
        args.episodes = 10
        args.seed = 0
        args.shift = 0
        args.argmax = False
        args.memory = False
        args.text = False
        args.gif = 'storage/' + args.model + '/' + args.model
        args.pause = 0.1

    if args.env == 'MiniGrid-KeyCorridorGBLA-v0':
        env_descriptor = [[0, 0, 0], [0, 13, 0], [0, 0, 0]]
        task_descriptor = TaskDescriptor(envD=env_descriptor,
                                         rmDesc=None,
                                         rmOrder=None,
                                         rmSize=4,
                                         observ=True,
                                         seed=None,
                                         time_steps=None)
        env = gym.make('MiniGrid-KeyCorridorGBLA-v0', taskD=task_descriptor)
        goal = GetGoalDescriptor(env)

        goal = goal.refinement[0].refinement[0].refinement[0]

        env = gym_minigrid.wrappers.FullyObsWrapper(env)
        env = gym_minigrid.wrappers.ImgObsWrapper(env)
        env = GoalRL.GoalEnvWrapper(env, goal=goal, verbose=0)
        args.env = env
    else:
        pass

    # Set seed for all randomness sources

    utils.seed(args.seed)

    # Set device

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"Device: {device}\n")

    # Load environment

    if type(args.env) == str:
        env = utils.make_env(args.env, args.seed)
    else:
        env = args.env
    for _ in range(args.shift):
        env.reset()
    print("Environment loaded\n")

    # Load agent

    model_dir = utils.get_model_dir(args.model)
    agent = utils.Agent(env.observation_space,
                        env.action_space,
                        model_dir,
                        device=device,
                        argmax=args.argmax,
                        use_memory=args.memory,
                        use_text=args.text)
    print("Agent loaded\n")

    # Run the agent

    if args.gif:
        from array2gif import write_gif
        frames = []

    # Create a window to view the environment
    env.render('human')

    for episode in range(args.episodes):
        obs = env.reset()

        while True:
            env.render('human')
            if args.gif:
                frames.append(numpy.moveaxis(env.render("rgb_array"), 2, 0))

            action = agent.get_action(obs)
            obs, reward, done, _ = env.step(action)
            agent.analyze_feedback(reward, done)

            if done or env.window.closed:
                break

        if env.window.closed:
            break

    if args.gif:
        print("Saving gif... ", end="")
        write_gif(numpy.array(frames), args.gif + ".gif", fps=1 / args.pause)
        print("Done.")
Esempio n. 18
0
def main():
    # Parse arguments

    parser = argparse.ArgumentParser()
    args = parser.parse_args()

    ## General parameters
    parser.add_argument("--algo", required=True,
                        help="algorithm to use: a2c | ppo (REQUIRED)")
    parser.add_argument("--env", required=True,
                        help="name of the environment to train on (REQUIRED)")
    parser.add_argument("--model", default=None,
                        help="name of the model (default: {ENV}_{ALGO}_{TIME})")
    parser.add_argument("--seed", type=int, default=1,
                        help="random seed (default: 1)")
    parser.add_argument("--log-interval", type=int, default=1,
                        help="number of updates between two logs (default: 1)")
    parser.add_argument("--save-interval", type=int, default=10,
                        help="number of updates between two saves (default: 10, 0 means no saving)")
    parser.add_argument("--procs", type=int, default=16,
                        help="number of processes (default: 16)")
    parser.add_argument("--frames", type=int, default=10**7,
                        help="number of frames of training (default: 1e7)")

    ## Parameters for main algorithm
    parser.add_argument("--epochs", type=int, default=4,
                        help="number of epochs for PPO (default: 4)")
    parser.add_argument("--batch-size", type=int, default=256,
                        help="batch size for PPO (default: 256)")
    parser.add_argument("--frames-per-proc", type=int, default=None,
                        help="number of frames per process before update (default: 5 for A2C and 128 for PPO)")
    parser.add_argument("--discount", type=float, default=0.99,
                        help="discount factor (default: 0.99)")
    parser.add_argument("--lr", type=float, default=0.001,
                        help="learning rate (default: 0.001)")
    parser.add_argument("--gae-lambda", type=float, default=0.95,
                        help="lambda coefficient in GAE formula (default: 0.95, 1 means no gae)")
    parser.add_argument("--entropy-coef", type=float, default=0.01,
                        help="entropy term coefficient (default: 0.01)")
    parser.add_argument("--value-loss-coef", type=float, default=0.5,
                        help="value loss term coefficient (default: 0.5)")
    parser.add_argument("--max-grad-norm", type=float, default=0.5,
                        help="maximum norm of gradient (default: 0.5)")
    parser.add_argument("--optim-eps", type=float, default=1e-8,
                        help="Adam and RMSprop optimizer epsilon (default: 1e-8)")
    parser.add_argument("--optim-alpha", type=float, default=0.99,
                        help="RMSprop optimizer alpha (default: 0.99)")
    parser.add_argument("--clip-eps", type=float, default=0.2,
                        help="clipping epsilon for PPO (default: 0.2)")
    parser.add_argument("--recurrence", type=int, default=1,
                        help="number of time-steps gradient is backpropagated (default: 1). If > 1, a LSTM is added to the model to have memory.")
    parser.add_argument("--text", action="store_true", default=False,
                        help="add a GRU to the model to handle text input")
    parser.add_argument("--argmax", action="store_true", default=False,
                        help="select the action with highest probability (default: False)")

    if len(sys.argv) > 1:
        args = parser.parse_args()
    else:
        args.env = 'MiniGrid-DoorKey-5x5-v0'
        args.env = 'MiniGrid-KeyCorridorGBLA-v0'
        args.algo = 'ppo'
        args.seed = 1234
        args.model = 'KeyCorridor2'
        args.frames = 2e5
        args.procs = 16
        args.text = False
        args.frames_per_proc = None
        args.discount = 0.99
        args.lr = 0.001
        args.gae_lambda = 0.95
        args.entropy_coef = 0.01
        args.value_loss_coef = 0.5
        args.max_grad_norm = 0.5
        args.recurrence = 1
        args.optim_eps = 1e-8
        args.optim_alpha = 0.99
        args.clip_eps = 0.2
        args.epochs = 4
        args.batch_size = 256
        args.log_interval = 1
        args.save_interval = 10

        args.argmax = False

    if args.env == 'MiniGrid-KeyCorridorGBLA-v0':
        env_descriptor = [[0,0,0],[0,13,0],[0,0,0]]
        task_descriptor = TaskDescriptor(envD=env_descriptor,
                                         rmDesc=None,
                                         rmOrder=None,
                                         rmSize=4,
                                         observ=True,
                                         seed=None,
                                         time_steps=None)
        env = gym.make('MiniGrid-KeyCorridorGBLA-v0', taskD=task_descriptor)
        goal = GetGoalDescriptor(env)

        goal = goal.refinement[0].refinement[0].refinement[0]

        env = gym_minigrid.wrappers.FullyObsWrapper(env)
        env = gym_minigrid.wrappers.ImgObsWrapper(env)
        env = GoalRL.GoalEnvWrapper(env,goal=goal, verbose=0)

#        env = Monitor(env, 'storage/{}/{}.monitor.csv'.format(rank, goal.goalId))  # wrap the environment in the monitor object
        args.env = env
    else:
        pass


    args.mem = args.recurrence > 1

    # Set run dir

    date = datetime.datetime.now().strftime("%y-%m-%d-%H-%M-%S")
    default_model_name = f"{args.env}_{args.algo}_seed{args.seed}_{date}"

    model_name = args.model or default_model_name
    model_dir = utils.get_model_dir(model_name)

    # Load loggers and Tensorboard writer

    txt_logger = utils.get_txt_logger(model_dir)

    # Log command and all script arguments

    txt_logger.info("{}\n".format(" ".join(sys.argv)))
    txt_logger.info("{}\n".format(args))

    # Set seed for all randomness sources

    utils.seed(args.seed)

    # Set device

    # Load environments

    envs = []
    for i in range(args.procs):
        if type(args.env) == str:
            envs.append(utils.make_env(args.env, args.seed + 10000 * i))
        else:
            envs.append(deepcopy(args.env))
    txt_logger.info("Environments loaded\n")

    # Load training status


    # Load observations preprocessor

    #obs_space, preprocess_obss = utils.get_obss_preprocessor(envs[0].observation_space)

    # Load model

    agent = utils.Agent(env, model_dir, logger=txt_logger,
                        argmax=args.argmax, use_memory=args.mem, use_text=args.text)

    # Load algo
    if args.algo == 'a2c':
        agent.init_training_algo(algo_type=args.algo,
                num_cpu=args.procs,
                frames_per_proc=args.frames_per_proc,
                discount=args.discount,
                lr=args.lr,
                gae_lambda=args.gae_lambda,
                entropy_coef=args.entropy_coef,
                value_loss_coef=args.value_loss_coef,
                max_grad_norm=args.max_grad_norm,
                recurrence=args.recurrence,
                optim_eps=args.optim_eps,

                optim_alpha=args.optim_alpha)   # args for A2C
    elif args.algo == 'ppo':
        agent.init_training_algo(algo_type=args.algo,
                num_cpu=args.procs,
                frames_per_proc=args.frames_per_proc,
                discount=args.discount,
                lr=args.lr,
                gae_lambda=args.gae_lambda,
                entropy_coef=args.entropy_coef,
                value_loss_coef=args.value_loss_coef,
                max_grad_norm=args.max_grad_norm,
                recurrence=args.recurrence,
                optim_eps=args.optim_eps,

                clip_eps=args.clip_eps,         # args for PPO2
                epochs=args.epochs,
                batch_size=args.batch_size)
    else:
        raise ValueError("Incorrect algorithm name: {}".format(args.algo))


    agent.learn(total_timesteps=args.frames,
                log_interval=args.log_interval,
                save_interval=args.save_interval)

    print('training completed!')
Esempio n. 19
0
def run_eval():
    envs = []
    for i in range(1):
        env = utils.make_env(args.env, args.seed + 10000 * i)
        env.is_teaching = False
        env.end_pos = args.eval_goal
        envs.append(env)
    env = ParallelEnv(envs)

    # Load agent

    model_dir = utils.get_model_dir(args.model)
    agent = utils.Agent(env.observation_space, env.action_space, model_dir, device, args.argmax, args.procs)

    # Initialize logs

    logs = {"num_frames_per_episode": [], "return_per_episode": []}

    # Run agent

    start_time = time.time()

    obss = env.reset()

    log_done_counter = 0
    log_episode_return = torch.zeros(args.procs, device=device)
    log_episode_num_frames = torch.zeros(args.procs, device=device)
    positions = []
    while log_done_counter < args.episodes:
        actions = agent.get_actions(obss)
        obss, rewards, dones, infos = env.step(actions)
        positions.extend([info["agent_pos"] for info in infos])
        agent.analyze_feedbacks(rewards, dones)

        log_episode_return += torch.tensor(rewards, device=device, dtype=torch.float)
        log_episode_num_frames += torch.ones(args.procs, device=device)

        for i, done in enumerate(dones):
            if done:
                log_done_counter += 1
                logs["return_per_episode"].append(log_episode_return[i].item())
                logs["num_frames_per_episode"].append(log_episode_num_frames[i].item())

        mask = 1 - torch.tensor(dones, device=device, dtype=torch.float)
        log_episode_return *= mask
        log_episode_num_frames *= mask

    end_time = time.time()

    # Print logs

    num_frames = sum(logs["num_frames_per_episode"])
    fps = num_frames/(end_time - start_time)
    duration = int(end_time - start_time)
    return_per_episode = utils.synthesize(logs["return_per_episode"])
    num_frames_per_episode = utils.synthesize(logs["num_frames_per_episode"])

    print("Eval: F {} | FPS {:.0f} | D {} | R:μσmM {:.2f} {:.2f} {:.2f} {:.2f} | F:μσmM {:.1f} {:.1f} {} {}"
          .format(num_frames, fps, duration,
                  *return_per_episode.values(),
                  *num_frames_per_episode.values()))
    return return_per_episode
Esempio n. 20
0
# Load environments

envs = []
for i in range(args.procs):
    env = utils.make_env(args.env, args.seed + 10000 * i)
    envs.append(env)
env = ParallelEnv(envs)
print("Environments loaded\n")

# Load agent

model_dir = utils.get_model_dir(args.model)
agent = utils.Agent(env.observation_space,
                    env.action_space,
                    model_dir,
                    argmax=args.argmax,
                    num_envs=args.procs,
                    use_memory=args.memory,
                    use_text=args.text)
print("Agent loaded\n")

# Initialize logs

logs = {"num_frames_per_episode": [], "return_per_episode": []}

# Run agent

start_time = time.time()

obss = env.reset()
# Set device

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Device: {device}\n")

# Load environment

env = utils.make_env(args.env, args.seed)
for _ in range(args.shift):
    env.reset()
print("Environment loaded\n")

# Load agent

model_dir = utils.get_model_dir(args.model)
agent = utils.Agent(env.observation_space, env.action_space, model_dir,
                    args.ipo_model, device, args.argmax)

print("Agent loaded\n")

# Run the agent

if args.gif:
    from array2gif import write_gif
    frames = []

# Create a window to view the environment
env.render('human')

for episode in range(args.episodes):
    obs = env.reset()
Esempio n. 22
0
def main():

    # Parse arguments

    parser = argparse.ArgumentParser()
    parser.add_argument("--env",
                        required=True,
                        help="name of the environment (REQUIRED)")
    parser.add_argument("--model",
                        required=True,
                        help="name of the trained model (REQUIRED)")
    parser.add_argument("--episodes",
                        type=int,
                        default=100,
                        help="number of episodes of evaluation (default: 100)")
    parser.add_argument("--seed",
                        type=int,
                        default=0,
                        help="random seed (default: 0)")
    parser.add_argument("--procs",
                        type=int,
                        default=1,
                        help="number of processes (default: 16)")
    parser.add_argument("--argmax",
                        action="store_true",
                        default=False,
                        help="action with highest probability is selected")
    parser.add_argument("--worst-episodes-to-show",
                        type=int,
                        default=10,
                        help="how many worst episodes to show")
    parser.add_argument("--memory",
                        action="store_true",
                        default=False,
                        help="add a LSTM to the model")
    parser.add_argument("--text",
                        action="store_true",
                        default=False,
                        help="add a GRU to the model")
    parser.add_argument("--visualize", default=False, help="print stuff")
    parser.add_argument("--save_path",
                        default="test_image",
                        help="save path for agent visualizations")
    args = parser.parse_args()

    # Set seed for all randomness sources

    utils.seed(args.seed)

    # Set device

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"Device: {device}\n")

    # Load environments

    envs = []
    for i in range(args.procs):
        env = utils.make_env(args.env, args.seed + 10000 * i)
        envs.append(env)
    env = ParallelEnv(envs)
    print("Environments loaded\n")

    # Load agent

    model_dir = utils.get_model_dir(args.model)
    agent = utils.Agent(env.observation_space,
                        env.action_space,
                        model_dir,
                        device=device,
                        argmax=args.argmax,
                        num_envs=args.procs,
                        use_memory=args.memory,
                        use_text=args.text)
    print("Agent loaded\n")

    # Initialize logs

    logs = {"num_frames_per_episode": [], "return_per_episode": []}

    # Run agent

    start_time = time.time()

    obss = env.reset()

    log_done_counter = 0
    log_episode_return = torch.zeros(args.procs, device=device)
    log_episode_num_frames = torch.zeros(args.procs, device=device)

    img_sum = []
    obss_sum = None
    encoding_sum = None
    img_count = 0

    while log_done_counter < args.episodes:
        actions = agent.get_actions(obss)
        obss, rewards, dones, _ = env.step(actions)

        agent.analyze_feedbacks(rewards, dones)

        log_episode_return += torch.tensor(rewards,
                                           device=device,
                                           dtype=torch.float)
        log_episode_num_frames += torch.ones(args.procs, device=device)

        state = env.get_environment_state()
        img = state.grid.render(32,
                                state.agent_pos,
                                state.agent_dir,
                                highlight_mask=None)
        encoding = state.grid.encode()
        #        img_count += 1
        #        if img_count == 1:
        #            img_sum = img
        ##            obss_sum = obss[0]['image']
        ##            encoding_sum = encoding
        #        else:
        #            img_sum += img
        ##            obss_sum += obss[0]['image']
        ##            encoding_sum += encoding

        for i, done in enumerate(dones):
            if done:
                log_done_counter += 1
                logs["return_per_episode"].append(log_episode_return[i].item())
                logs["num_frames_per_episode"].append(
                    log_episode_num_frames[i].item())

                if args.visualize:
                    if len(img_sum) > 0:
                        img_sum = img_sum / img_count
                        #                        img_sum = img_sum.astype(numpy.uint8)
                        filepath = args.save_path + '_image_' + str(
                            log_done_counter - 1) + '.jpg'
                        imsave(filepath, img_sum)
                        img_sum = []
                        img_count = 0
            else:
                img_count += 1
                if img_count == 1:
                    img_sum = img  #.astype(float)
                else:
                    img_sum += img

        mask = 1 - torch.tensor(dones, device=device, dtype=torch.float)
        log_episode_return *= mask
        log_episode_num_frames *= mask

    end_time = time.time()

    # Print logs

    num_frames = sum(logs["num_frames_per_episode"])
    fps = num_frames / (end_time - start_time)
    duration = int(end_time - start_time)
    return_per_episode = utils.synthesize(logs["return_per_episode"])
    num_frames_per_episode = utils.synthesize(logs["num_frames_per_episode"])

    print(
        "F {} | FPS {:.0f} | D {} | R:μσmM {:.2f} {:.2f} {:.2f} {:.2f} | F:μσmM {:.1f} {:.1f} {} {}"
        .format(num_frames, fps, duration, *return_per_episode.values(),
                *num_frames_per_episode.values()))

    # Print worst episodes

    n = args.worst_episodes_to_show
    if n > 0:
        print("\n{} worst episodes:".format(n))

        indexes = sorted(range(len(logs["return_per_episode"])),
                         key=lambda k: logs["return_per_episode"][k])
        for i in indexes[:n]:
            print("- episode {}: R={}, F={}".format(
                i, logs["return_per_episode"][i],
                logs["num_frames_per_episode"][i]))
Esempio n. 23
0
def get_agent_any_type(type_opps, name, policy_type, env):
    if type_opps == "zoo":
        return load_agent(name, policy_type, "zoo_ant_policy_2", env, 1)
    elif type_opps == "const":
        trained_agent = constant_agent_sampler()
        trained_agent.load(name)
        return trained_agent
    elif type_opps == "lstm":
        policy = LSTMPolicy(scope="agent_new",
                            reuse=False,
                            ob_space=env.observation_space.spaces[0],
                            ac_space=env.action_space.spaces[0],
                            hiddens=[128, 128],
                            normalize=True)

        def get_action(observation):
            return policy.act(stochastic=True, observation=observation)[0]

        trained_agent = Agent(get_action, policy.reset)

        with open(name, "rb") as file:
            values_from_save = pickle.load(file)

        for key, value in values_from_save.items():
            var = tf.get_default_graph().get_tensor_by_name(key)
            sess.run(tf.assign(var, value))

        return trained_agent
    elif type_opps == "our_mlp":
        #TODO DO ANYTHING BUT THIS.  THIS IS VERY DIRTY AND SAD :(
        def make_env(id):
            # TODO: seed (not currently supported)
            # TODO: VecNormalize? (typically good for MuJoCo)
            # TODO: baselines logger?
            # TODO: we're loading identical policy weights into different
            # variables, this is to work-around design choice of Agent's
            # having state stored inside of them.
            sess = utils.make_session()
            with sess.as_default():
                multi_env = env

                attacked_agent = constant_agent_sampler(act_dim=8,
                                                        magnitude=100)

                single_env = Gymify(
                    MultiToSingle(CurryEnv(multi_env, attacked_agent)))
                single_env.spec = gym.envs.registration.EnvSpec('Dummy-v0')

                # TODO: upgrade Gym so don't have to do thi0s
                single_env.observation_space.dtype = np.dtype(np.float32)
            return single_env
            # TODO: close session?

        #TODO DO NOT EVEN READ THE ABOVE CODE :'(

        denv = SubprocVecEnv([functools.partial(make_env, 0)])

        model = ppo2.learn(network="mlp",
                           env=denv,
                           total_timesteps=1,
                           seed=0,
                           nminibatches=4,
                           log_interval=1,
                           save_interval=1,
                           load_path=name)

        stateful_model = StatefulModel(denv, model)
        trained_agent = utils.Agent(action_selector=stateful_model.get_action,
                                    reseter=stateful_model.reset)

        return trained_agent
    raise (Exception('Agent type unrecognized'))