예제 #1
0
def main():
    ########################################################################
    """
    Initialize variables.
    """

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    #device = 'cpu'
    print("The device is " + str(device))
    starting_floor = 0
    total_floors = 9
    max_steps = 1000
    override_threshold = 1000

    ########################################################################
    """
    Retreive the learnt policy.
    """

    policy_model = unpickle_object("policy_model")  #read the object from disk

    ########################################################################
    """
    Let the agent play a game using the policy.
    """

    env = create_env(starting_floor=starting_floor, total_floors=total_floors)
    imitation_play(env,
                   policy_model,
                   device,
                   max_steps=max_steps,
                   override_threshold=override_threshold)
예제 #2
0
            state = next_state
            total_reward += reward
            steps += 1

            if steps >= max_steps_in_demo_episode or done:
                break

    #====
    torch.manual_seed(hyper_params['seed'])
    torch.cuda.manual_seed_all(hyper_params['seed'])
    np.random.seed(hyper_params['seed'])
    random.seed(hyper_params['seed'])

    assert "NoFrameskip" in hyper_params[
        'env'], "Require environment with no frameskip"
    env = create_env(0, 1)
    env.seed(hyper_params['seed'])
    #env = NoopResetEnv(env, noop_max=30)
    env = MaxAndSkipEnv(env, skip=4)
    #env = EpisodicLifeEnv(env)
    #env = FireResetEnv(env)
    env = WarpFrame(env)
    env = PyTorchFrame(env)
    env = ClipRewardEnv(env)
    env = FrameStack(env, 3)

    replay_buffer = ReplayBuffer(hyper_params['replay_buffer_size'])

    agent = DQNAgent(env.observation_space,
                     env.action_space,
                     replay_buffer,
예제 #3
0
#batch update parameters
max_epochs = 5e6  #number of batches to create
batch_size = 500  #number of steps in one batch
mini_batch_size = 250  #how many steps are used to update the loss
ppo_epochs = 2  #number of batches to sample in ppo update || min_batch * ppo_epoch = batch_size recommended
epochs_before_printing = 5  #number of steps at which we print update

#demo information
show_demo = False  #whether or not to make a video of the current progress
max_steps_in_demo_episode = 200  #number of steps to show in demo episode

#environement parameters
starting_floor = 0
total_floors = 1
worker_id = 1
env = create_env(starting_floor, total_floors, worker_id)
policy_actions = unpickle_object(
    'action_map')  #map going grom actions to env actions
override_threshold = 2000  #score used to determine if agent is stuck

#deep learning setup
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = ActorCritic(len(policy_actions)).to(device)
#model = unpickle_object('policy_model')
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
########################################################################
"""
Train the model.
"""
experiment = Experiment(api_key="47QJ41M89a6zNXZgS9sY6NQfI",
                        project_name="unity",
예제 #4
0
import ipdb
import matplotlib.pyplot as plt
from collections import deque
import keyboard
import time
import datetime
import random
import numpy as np
from recorder import create_env, record_game, save_file_as_csv
########################################################################
"""
https://discourse.aicrowd.com/t/actions-meaning-in-retro-mode/931
6 - rotate counter-clockwise
12 - rotate clockwise
18 - move forward
3 - jump
22 - jump and move forward - may want to include this
"""

action_map = {'w': 18, 'q': 6, 'e': 12, 'a': 3, 's': 21}
max_steps_to_record = 10e100
starting_floor = 0
total_floors = 10

########################################################################
env = create_env(starting_floor=starting_floor, total_floors=total_floors)
state_hist, action_hist, reward_hist, key_hist = record_game(
    env, max_steps_to_record, action_map)
save_file_as_csv(state_hist, action_hist, reward_hist, key_hist)
########################################################################