def create_and_fill_memory(stack_size=4, pretrain_length=64): # Arguments: # stack_size: The size of stacks (how many frames are we going to stack together as a state) # pretrain_length: How many instances we're going to fill the memory with before starting training # Returns: # memory: The memory object that's going to be used for experience replay # Implements: # It creates and fills a memory object which we're going to use to train our agent. # Create an empty deque, instantiate a memory object and create the environment stacked_frames = deque([np.zeros((84, 84), dtype=np.int) for i in range(stack_size)], maxlen=stack_size) memory = Memory() game, possible_actions = environment.create_environment() for i in range(pretrain_length): # Start a new episode of the game after it ends game.new_episode() # Fill the deque with the first frame. this is going to be filled iteratively as the game is played with different consecutive frames stacked_frames = stack_frames.stack_frames(stacked_frames, game.get_state().screen_buffer, True, stack_size) while not game.is_episode_finished(): # Until we finish the game (Kill the monster) stack current frame in the state, choose a random action and calculate its reward # Add all of them to the memory state = game.get_state() stacked_frames = stack_frames.stack_frames(stacked_frames, state.screen_buffer, False) action = random.choice(possible_actions) reward = game.make_action(action) done = game.is_episode_finished() next_stacked_frames = stack_frames.stack_frames(stacked_frames, state.screen_buffer, False) memory.add(np.asarray(stacked_frames).T, action, reward, np.asarray(next_stacked_frames).T, done) return memory
def __init__(self, master_agent: MasterAgent): self.model = tf.keras.models.clone_model( master_agent.model) # clone MasterAgent's network architecture self.model.set_weights(master_agent.model.get_weights()) self.environment, self.possible_actions = environment.create_environment( ) self.episode_info = {'states': [], 'rewards': []} self.frame_skip = 4
def __init__(self, master: Master, id): self.master = master self.model = tf.keras.models.clone_model(master.model) self.environment, self.possible_actions = environment.create_environment() self.environment.new_episode() self.actions = [0, 1, 2] self.game_start = True self.state = [] self.minion_id = id
def main(args = None, output = None): if output is None: output = sys.stdout parser = optparse.OptionParser() # closure template options that we support parser.add_option( "--outputPathFormat", dest = "output_format", help = "A format string that specifies how to build the path to each output file. You can include literal characters as well as the following $variables: ${INPUT_FILE_NAME}, ${INPUT_FILE_NAME_NO_EXT}, and ${INPUT_DIRECTORY}.", metavar = "OUTPUT_FORMAT") # jinja2js specific options parser.add_option( "--directories", dest = "directories", default = [], action = "append", help = "List of directories to look for template files.", metavar = "DIRECTORIES") parser.add_option( "--packages", dest = "packages", default = [], action = "append", help = "List of packages to look for template files.", metavar = "PACKAGE") parser.add_option( "--codeStyle", choices = ["stringbuilder", "concat"], dest = "codeStyle", default = "concat", type = "choice", help = "The code style to use when generating JS code. Either `stringbuilder` or `concat` styles.") options, files = parser.parse_args(args) outputPathFormat = options.output_format if not outputPathFormat: parser.print_help(output) return 1 env = environment.create_environment( packages = options.packages, directories = options.directories, writer = writerclasses[options.codeStyle]) for filename in files: name = os.path.basename(filename) node = env._parse(open(filename).read(), name, filename) output = jscompiler.generate(node, env, name, filename) output_filename = get_output_filename(options.output_format, filename) open(output_filename, "w").write(output) return 0
def run(): """Runs RL algorithm on selected agent in selected environment.""" print('Running...\n') # print_all_available_environments() # Create and render gym environment env = create_environment(GYM_ENVS['classical_control'][1]) env.render(mode='human') print_environment_info(env) # Create agent # agent = Agent(env.action_space) agent = QLearningAgent(env.action_space, env.observation_space) print(f'Q-table size: {agent.Q.shape}') for episode in range(EPISODES): env.render(mode='human') print(f'Episode {episode + 1}') # Initialize observed state and reward signal observation = env.reset() reward = 0 done = False while not done: action = agent.act(observation) new_observation, reward, done, _ = env.step(action) agent.update_Q_table(observation, new_observation, action, reward) observation = new_observation # action = agent.act(observation, reward, done) # observation, reward, done, _ = env.step(action) # Adjust render rate sleep(1. / 20.) env.close()
def play_game(agent, epsilon): # Arguments: # agent: an agent instance # epsilon: exploration rate # Returns: # - # Implements: # playing DOOM! # create an environment for agent to play in game, possible_actions = environment.create_environment() game.new_episode() stacked_frames = deque([np.zeros((84, 84), dtype=np.int) for _ in range(4)], maxlen=4) # get current state state = game.get_state() # Since after creating an enviroment we have only one frame to go with, copy it 4 times stacked_frames = stack_frames.stack_frames(stacked_frames, state.screen_buffer, True) # take action action = agent.get_action(np.asarray(stacked_frames).reshape(1, 84, 84, 4), epsilon) # get reward reward = game.make_action(action) # determine if we're in the terminal state or not done = game.is_episode_finished() # get the next state next_stacked_frames = stack_frames.stack_frames(stacked_frames, state.screen_buffer, False) # add it to agent's replay buffer agent.add_experience((np.asarray(stacked_frames).T, action, reward, np.asarray(next_stacked_frames).T, done)) # until we reach the terminal state: while not game.is_episode_finished(): state = game.get_state() stacked_frames = stack_frames.stack_frames(stacked_frames, state.screen_buffer, False) action = agent.get_action(np.asarray(stacked_frames).reshape(1, 84, 84, 4), epsilon) reward = game.make_action(action) done = game.is_episode_finished() next_stacked_frames = stack_frames.stack_frames(stacked_frames, state.screen_buffer, False) agent.add_experience((np.asarray(stacked_frames).T, action, reward, np.asarray(next_stacked_frames).T, done))
# done = game.is_episode_finished() # # get the next state # next_stacked_frames = stack_frames.stack_frames(stacked_frames, state.screen_buffer, False) # # add it to agent's replay buffer # agent.add_experience((np.asarray(stacked_frames).T, action, reward, np.asarray(next_stacked_frames).T, done)) # # # until we reach the terminal state: # while not game.is_episode_finished(): # state = game.get_state() # stacked_frames = stack_frames.stack_frames(stacked_frames, state.screen_buffer, False) # action = agent.get_action(np.asarray(stacked_frames).reshape(1, 84, 84, 4), epsilon) # reward = game.make_action(action) # done = game.is_episode_finished() # next_stacked_frames = stack_frames.stack_frames(stacked_frames, state.screen_buffer, False) # agent.add_experience((np.asarray(stacked_frames).T, action, reward, np.asarray(next_stacked_frames).T, done)) game, possible_actions = environment.create_environment() game.new_episode() agent = DDQN(input_shape=[100, 120, 4],learning_rate=0.01,actions=possible_actions,batch_size=4,gamma=0.0001 ) epsilon=0.01 stacked_frames = deque([np.zeros((100, 120), dtype=np.int) for _ in range(4)], maxlen=4) # get current state state = game.get_state() # Since after creating an enviroment we have only one frame to go with, copy it 4 times stacked_frames = stack_frames.stack_frames(stacked_frames, state.screen_buffer, True) # take action action = agent.get_action(np.asarray(stacked_frames).reshape(1, 100, 120, 4), epsilon) # get reward reward = game.make_action(action) # determine if we're in the terminal state or not done = game.is_episode_finished() # get the next state
import random import torch import torch.nn as nn import torch.optim as optim from itertools import count from policy import DQN from torchagent.agents import DQNAgent from torchagent.memory import SequentialMemory from torchagent.policy import DecayingEpsilonGreedyPolicy from environment import create_environment device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') env = create_environment() model = DQN(80, 88, 1, env.action_space.n).to(device) loss = nn.MSELoss() optimizer = optim.RMSprop(model.parameters(), lr=0.00025) memory = SequentialMemory(1000000) # Define an agent that uses the experience buffer and value-function model that we created before. # The agent will try to optimize the value-function to estimate the value of each possible state/action pair. # # The agent uses a decaying epsilon-greedy policy to determine the action to execute in the environment. # We're moving from 100% random to 0.1% chance of random actions in 1M steps. # This means, we'll be totally random at the start and slowly moving towards a deterministic policy. # # Note on the gamma parameter: this controls the discount of future rewards when it comes to estimating # the value-function. A reward received from the current action accounts for 100% of the value of an action
def __init__(self, *args, **kwargs): self.env = environment.create_environment( packages = kwargs.get("packages", "").split(), autoescape = kwargs.get("autoescape", "").split(), writer = kwargs.get("writer", "pwt.jinja2js.jscompiler.StringBuilder"))