def main(): ######################################################################## """ Initialize variables. """ device = torch.device("cuda" if torch.cuda.is_available() else "cpu") #device = 'cpu' print("The device is " + str(device)) starting_floor = 0 total_floors = 9 max_steps = 1000 override_threshold = 1000 ######################################################################## """ Retreive the learnt policy. """ policy_model = unpickle_object("policy_model") #read the object from disk ######################################################################## """ Let the agent play a game using the policy. """ env = create_env(starting_floor=starting_floor, total_floors=total_floors) imitation_play(env, policy_model, device, max_steps=max_steps, override_threshold=override_threshold)
state = next_state total_reward += reward steps += 1 if steps >= max_steps_in_demo_episode or done: break #==== torch.manual_seed(hyper_params['seed']) torch.cuda.manual_seed_all(hyper_params['seed']) np.random.seed(hyper_params['seed']) random.seed(hyper_params['seed']) assert "NoFrameskip" in hyper_params[ 'env'], "Require environment with no frameskip" env = create_env(0, 1) env.seed(hyper_params['seed']) #env = NoopResetEnv(env, noop_max=30) env = MaxAndSkipEnv(env, skip=4) #env = EpisodicLifeEnv(env) #env = FireResetEnv(env) env = WarpFrame(env) env = PyTorchFrame(env) env = ClipRewardEnv(env) env = FrameStack(env, 3) replay_buffer = ReplayBuffer(hyper_params['replay_buffer_size']) agent = DQNAgent(env.observation_space, env.action_space, replay_buffer,
#batch update parameters max_epochs = 5e6 #number of batches to create batch_size = 500 #number of steps in one batch mini_batch_size = 250 #how many steps are used to update the loss ppo_epochs = 2 #number of batches to sample in ppo update || min_batch * ppo_epoch = batch_size recommended epochs_before_printing = 5 #number of steps at which we print update #demo information show_demo = False #whether or not to make a video of the current progress max_steps_in_demo_episode = 200 #number of steps to show in demo episode #environement parameters starting_floor = 0 total_floors = 1 worker_id = 1 env = create_env(starting_floor, total_floors, worker_id) policy_actions = unpickle_object( 'action_map') #map going grom actions to env actions override_threshold = 2000 #score used to determine if agent is stuck #deep learning setup device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model = ActorCritic(len(policy_actions)).to(device) #model = unpickle_object('policy_model') optimizer = optim.Adam(model.parameters(), lr=learning_rate) ######################################################################## """ Train the model. """ experiment = Experiment(api_key="47QJ41M89a6zNXZgS9sY6NQfI", project_name="unity",
import ipdb import matplotlib.pyplot as plt from collections import deque import keyboard import time import datetime import random import numpy as np from recorder import create_env, record_game, save_file_as_csv ######################################################################## """ https://discourse.aicrowd.com/t/actions-meaning-in-retro-mode/931 6 - rotate counter-clockwise 12 - rotate clockwise 18 - move forward 3 - jump 22 - jump and move forward - may want to include this """ action_map = {'w': 18, 'q': 6, 'e': 12, 'a': 3, 's': 21} max_steps_to_record = 10e100 starting_floor = 0 total_floors = 10 ######################################################################## env = create_env(starting_floor=starting_floor, total_floors=total_floors) state_hist, action_hist, reward_hist, key_hist = record_game( env, max_steps_to_record, action_map) save_file_as_csv(state_hist, action_hist, reward_hist, key_hist) ########################################################################