Esempio n. 1
0
def create_and_fill_memory(stack_size=4, pretrain_length=64):
    # Arguments:
    #   stack_size: The size of stacks (how many frames are we going to stack together as a state)
    #   pretrain_length: How many instances we're going to fill the memory with before starting training
    # Returns:
    #   memory: The memory object that's going to be used for experience replay
    # Implements:
    #   It creates and fills a memory object which we're going to use to train our agent.

    # Create an empty deque, instantiate a memory object and create the environment
    stacked_frames = deque([np.zeros((84, 84), dtype=np.int) for i in range(stack_size)], maxlen=stack_size)
    memory = Memory()
    game, possible_actions = environment.create_environment()

    for i in range(pretrain_length):
        # Start a new episode of the game after it ends
        game.new_episode()
        # Fill the deque with the first frame. this is going to be filled iteratively as the game is played with different consecutive frames
        stacked_frames = stack_frames.stack_frames(stacked_frames, game.get_state().screen_buffer, True, stack_size)
        while not game.is_episode_finished():
            # Until we finish the game (Kill the monster) stack current frame in the state, choose a random action and calculate its reward
            # Add all of them to the memory
            state = game.get_state()
            stacked_frames = stack_frames.stack_frames(stacked_frames, state.screen_buffer, False)
            action = random.choice(possible_actions)
            reward = game.make_action(action)
            done = game.is_episode_finished()
            next_stacked_frames = stack_frames.stack_frames(stacked_frames, state.screen_buffer, False)
            memory.add(np.asarray(stacked_frames).T, action, reward, np.asarray(next_stacked_frames).T, done)

    return memory
 def __init__(self, master_agent: MasterAgent):
     self.model = tf.keras.models.clone_model(
         master_agent.model)  # clone MasterAgent's network architecture
     self.model.set_weights(master_agent.model.get_weights())
     self.environment, self.possible_actions = environment.create_environment(
     )
     self.episode_info = {'states': [], 'rewards': []}
     self.frame_skip = 4
Esempio n. 3
0
 def __init__(self, master: Master, id):
     self.master = master
     self.model = tf.keras.models.clone_model(master.model)
     self.environment, self.possible_actions = environment.create_environment()
     self.environment.new_episode()
     self.actions = [0, 1, 2]
     self.game_start = True
     self.state = []
     self.minion_id = id
Esempio n. 4
0
def main(args = None, output = None):
    if output is None:
        output = sys.stdout

    parser = optparse.OptionParser()
    # closure template options that we support
    parser.add_option(
        "--outputPathFormat", dest = "output_format",
        help = "A format string that specifies how to build the path to each output file. You can include literal characters as well as the following $variables: ${INPUT_FILE_NAME}, ${INPUT_FILE_NAME_NO_EXT}, and ${INPUT_DIRECTORY}.",
        metavar = "OUTPUT_FORMAT")

    # jinja2js specific options
    parser.add_option(
        "--directories", dest = "directories",
        default = [], action = "append",
        help = "List of directories to look for template files.",
        metavar = "DIRECTORIES")
    parser.add_option(
        "--packages", dest = "packages",
        default = [], action = "append",
        help = "List of packages to look for template files.",
        metavar = "PACKAGE")

    parser.add_option(
        "--codeStyle", choices = ["stringbuilder", "concat"],
        dest = "codeStyle", default = "concat", type = "choice",
        help = "The code style to use when generating JS code. Either `stringbuilder` or `concat` styles.")

    options, files = parser.parse_args(args)

    outputPathFormat = options.output_format
    if not outputPathFormat:
        parser.print_help(output)
        return 1

    env = environment.create_environment(
        packages = options.packages,
        directories = options.directories,
        writer = writerclasses[options.codeStyle])

    for filename in files:
        name = os.path.basename(filename)
        node = env._parse(open(filename).read(), name, filename)

        output = jscompiler.generate(node, env, name, filename)

        output_filename = get_output_filename(options.output_format, filename)

        open(output_filename, "w").write(output)

    return 0
Esempio n. 5
0
def run():
    """Runs RL algorithm on selected agent in selected environment."""
    print('Running...\n')

    # print_all_available_environments()

    # Create and render gym environment
    env = create_environment(GYM_ENVS['classical_control'][1])
    env.render(mode='human')

    print_environment_info(env)

    # Create agent
    # agent = Agent(env.action_space)
    agent = QLearningAgent(env.action_space, env.observation_space)

    print(f'Q-table size: {agent.Q.shape}')

    for episode in range(EPISODES):
        env.render(mode='human')
        print(f'Episode {episode + 1}')

        # Initialize observed state and reward signal
        observation = env.reset()
        reward = 0

        done = False

        while not done:
            action = agent.act(observation)
            new_observation, reward, done, _ = env.step(action)
            agent.update_Q_table(observation, new_observation, action, reward)

            observation = new_observation

        # action = agent.act(observation, reward, done)
        # observation, reward, done, _ = env.step(action)

        # Adjust render rate
        sleep(1. / 20.)

    env.close()
def play_game(agent, epsilon):
    # Arguments:
    #   agent: an agent instance
    #   epsilon: exploration rate
    # Returns:
    #   -
    # Implements:
    #   playing DOOM!

    # create an environment for agent to play in
    game, possible_actions = environment.create_environment()
    game.new_episode()

    stacked_frames = deque([np.zeros((84, 84), dtype=np.int) for _ in range(4)], maxlen=4)
    # get current state
    state = game.get_state()
    # Since after creating an enviroment we have only one frame to go with, copy it 4 times
    stacked_frames = stack_frames.stack_frames(stacked_frames, state.screen_buffer, True)
    # take action
    action = agent.get_action(np.asarray(stacked_frames).reshape(1, 84, 84, 4), epsilon)
    # get reward
    reward = game.make_action(action)
    # determine if we're in the terminal state or not
    done = game.is_episode_finished()
    # get the next state
    next_stacked_frames = stack_frames.stack_frames(stacked_frames, state.screen_buffer, False)
    # add it to agent's replay buffer
    agent.add_experience((np.asarray(stacked_frames).T, action, reward, np.asarray(next_stacked_frames).T, done))

    # until we reach the terminal state:
    while not game.is_episode_finished():
        state = game.get_state()
        stacked_frames = stack_frames.stack_frames(stacked_frames, state.screen_buffer, False)
        action = agent.get_action(np.asarray(stacked_frames).reshape(1, 84, 84, 4), epsilon)
        reward = game.make_action(action)
        done = game.is_episode_finished()
        next_stacked_frames = stack_frames.stack_frames(stacked_frames, state.screen_buffer, False)
        agent.add_experience((np.asarray(stacked_frames).T, action, reward, np.asarray(next_stacked_frames).T, done))
Esempio n. 7
0
#     done = game.is_episode_finished()
#     # get the next state
#     next_stacked_frames = stack_frames.stack_frames(stacked_frames, state.screen_buffer, False)
#     # add it to agent's replay buffer
#     agent.add_experience((np.asarray(stacked_frames).T, action, reward, np.asarray(next_stacked_frames).T, done))
#
#     # until we reach the terminal state:
#     while not game.is_episode_finished():
#         state = game.get_state()
#         stacked_frames = stack_frames.stack_frames(stacked_frames, state.screen_buffer, False)
#         action = agent.get_action(np.asarray(stacked_frames).reshape(1, 84, 84, 4), epsilon)
#         reward = game.make_action(action)
#         done = game.is_episode_finished()
#         next_stacked_frames = stack_frames.stack_frames(stacked_frames, state.screen_buffer, False)
#         agent.add_experience((np.asarray(stacked_frames).T, action, reward, np.asarray(next_stacked_frames).T, done))
game, possible_actions = environment.create_environment()
game.new_episode()
agent = DDQN(input_shape=[100, 120, 4],learning_rate=0.01,actions=possible_actions,batch_size=4,gamma=0.0001 )
epsilon=0.01
stacked_frames = deque([np.zeros((100, 120), dtype=np.int) for _ in range(4)], maxlen=4)
# get current state
state = game.get_state()
# Since after creating an enviroment we have only one frame to go with, copy it 4 times
stacked_frames = stack_frames.stack_frames(stacked_frames, state.screen_buffer, True)
# take action
action = agent.get_action(np.asarray(stacked_frames).reshape(1, 100, 120, 4), epsilon)
# get reward
reward = game.make_action(action)
# determine if we're in the terminal state or not
done = game.is_episode_finished()
# get the next state
Esempio n. 8
0
import random
import torch
import torch.nn as nn
import torch.optim as optim
from itertools import count

from policy import DQN
from torchagent.agents import DQNAgent
from torchagent.memory import SequentialMemory
from torchagent.policy import DecayingEpsilonGreedyPolicy

from environment import create_environment

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

env = create_environment()
model = DQN(80, 88, 1, env.action_space.n).to(device)

loss = nn.MSELoss()
optimizer = optim.RMSprop(model.parameters(), lr=0.00025)
memory = SequentialMemory(1000000)

# Define an agent that uses the experience buffer and value-function model that we created before.
# The agent will try to optimize the value-function to estimate the value of each possible state/action pair.
#
# The agent uses a decaying epsilon-greedy policy to determine the action to execute in the environment.
# We're moving from 100% random to 0.1% chance of random actions in 1M steps.
# This means, we'll be totally random at the start and slowly moving towards a deterministic policy.
#
# Note on the gamma parameter: this controls the discount of future rewards when it comes to estimating
# the value-function. A reward received from the current action accounts for 100% of the value of an action
Esempio n. 9
0
 def __init__(self, *args, **kwargs):
     self.env = environment.create_environment(
         packages = kwargs.get("packages", "").split(),
         autoescape = kwargs.get("autoescape", "").split(),
         writer = kwargs.get("writer", "pwt.jinja2js.jscompiler.StringBuilder"))