Ejemplo n.º 1
0
def make_env():
    env_spec = gym.spec('ppaquette/DoomBasic-v0')
    env_spec.id = 'DoomBasic-v0'
    env = env_spec.make()
    e = PreprocessImage(SkipWrapper(4)(ToDiscrete("minimal")(env)),
                                 width=80, height=80, grayscale=True)
    return e
Ejemplo n.º 2
0
def make_env(arg_env_spec, arg_env_spec_id):
    env_spec = gym.spec(arg_env_spec)
    env_spec.id = arg_env_spec_id
    env = env_spec.make()
    e = PreprocessImage(SkipWrapper(4)(ToDiscrete("minimal")(env)),
                        width=80,
                        height=80,
                        grayscale=True)
    return e
Ejemplo n.º 3
0
def make_env():
    env_spec = gym.spec('MsPacman-v0')
    env_spec.id = 'MsPacman-v0'
    env = env_spec.make()
    e = PreprocessImage(SkipWrapper(4)(env),
                        width=80,
                        height=80,
                        grayscale=True)
    return e
Ejemplo n.º 4
0
 def make_env():
     e = wrappers.PreprocessImage(SkipWrapper(4)(ToDiscrete("minimal")(
         gym.make(run.get("defaults", "env")))),
                                  width=im_width,
                                  height=im_height,
                                  grayscale=grayscale)
     if args.monitor:
         e = gym.wrappers.Monitor(e, args.monitor)
     return e
Ejemplo n.º 5
0
 def _thunk():
     env_spec = gym.spec('ppaquette/DoomBasic-v0')
     env_spec.id = 'DoomBasic-v0'
     env = env_spec.make()
     env.seed(seed + rank)
     env = PreprocessImage((SkipWrapper(4)(ToDiscrete("minimal")(env))))
     if logger.get_dir():
         env = bench.Monitor(env, os.path.join(logger.get_dir(), "{}.monitor.json".format(rank)))
     gym.logger.setLevel(logging.WARN)
     return ScaleRewardEnv(env)
Ejemplo n.º 6
0
def make_env(scenario, grayscale, input_shape):

    width, height = input_shape
    env_spec = gym.spec('ppaquette/' + scenario)
    env_spec.id = scenario  #'DoomBasic-v0'
    env = env_spec.make()
    e = PreprocessImage(SkipWrapper(4)(ToDiscrete("minimal")(env)),
                        width=width,
                        height=height,
                        grayscale=grayscale)
    return e
Ejemplo n.º 7
0
def simpleSSBMEnv(act_every=3, **kwargs):
  env = SSBMEnv(**kwargs)

  # TODO: make this a wrapper
  env.action_space = spaces.Discrete(len(ssbm.simpleControllerStates))
  env.realController = lambda action: ssbm.simpleControllerStates[action].realController()

  from .box_wrapper import BoxWrapper
  env = BoxWrapper(env)

  from gym.wrappers import SkipWrapper
  return SkipWrapper(3)(env)
Ejemplo n.º 8
0
def wrap_env(env):
    # Turns standard env into env trained from pixels
    env = RenderEnv(env)
    # Applys an action for k frames
    env = SkipWrapper(4)(env)
    # Reduces frame to 84x84 per DeepMind Atari
    env = WarpFrame(env)
    # Stack frames to maintain Markov property
    env = FrameStack(env, 4)
    # Maybe clip rewards but probably not nessary
    # env = ClipRewardEnv(env)
    return env
Ejemplo n.º 9
0
    def _make_env():
        env_spec = gym.spec(env_name)
        env_spec.id = env_name.split('/')[1]
        env = env_spec.make()
        env = SetResolution('160x120')(env)
        env = PreprocessImage((SkipWrapper(4)(ToDiscrete("minimal")(env))),
                              width=80,
                              height=80)

        scale = 1.0
        if 'DoomBasic' in env_name:
            scale = 400.0

        return ScaleRewardEnv(env, scale)
Ejemplo n.º 10
0
def run(env, n_epochs, discount_factor,
        plot_stats=False, api_key=None,
        network=None, batch_size=32, buffer_len=10000, initial_epsilon=0.25,
        load=False, gpu_option=0.4, initial_lr=1e-4):
    env_name = env
    make_env = lambda: PreprocessImage(
        SkipWrapper(4)(ToDiscrete("minimal")(gym.make(env_name))),
        width=80, height=80, grayscale=True)
    env = make_env()

    n_actions = env.action_space.n
    state_shape = env.observation_space.shape
    special = {
        "batch_size": batch_size,
        "buffer_len": buffer_len,
        "lr": initial_lr
    }

    network = network or conv_network
    agent = DqnAgent(
        state_shape, n_actions, network,
        gamma=discount_factor,
        special=special)

    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=gpu_option)

    with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess:
        saver = tf.train.Saver()
        model_dir = "./logs_" + env_name.replace(string.punctuation, "_")
        if not load:
            sess.run(tf.global_variables_initializer())
        else:
            saver.restore(sess, "{}/model.ckpt".format(model_dir))

        stats = q_learning(sess, agent, env, n_epochs, initial_epsilon=initial_epsilon)
        create_if_need(model_dir)
        saver.save(sess, "{}/model.ckpt".format(model_dir))

        if plot_stats:
            save_stats(stats)

        if api_key is not None:
            env = gym.wrappers.Monitor(env, "{}/monitor".format(model_dir), force=True)
            sessions = [generate_session(sess, agent, env, 0.0, int(1e10)) for _ in range(300)]
            env.close()
            gym.upload("{}/monitor".format(model_dir), api_key=api_key)
Ejemplo n.º 11
0
def make_image_env(env_name,
                   n_games=1,
                   episode_limit=None,
                   n_frames=1,
                   autoreload_envs=False,
                   width=64,
                   height=64,
                   grayscale=True,
                   crop=None):
    env = gym.make(env_name) if episode_limit is None else gym.make(
        env_name).env
    if "ppaquette" in env_name:
        env = SkipWrapper(4)(ToDiscrete("minimal")(env))
    env = PreprocessImage(env,
                          width=width,
                          height=height,
                          grayscale=grayscale,
                          crop=crop)
    env = FrameBuffer(env, n_frames=n_frames) if n_frames > 1 else env
    if episode_limit is not None:
        env = TimeLimit(env, max_episode_steps=episode_limit)
    return EnvPool(env, n_games, autoreload_envs) if n_games > 0 else env
Ejemplo n.º 12
0
def test_skip():
    every_two_frame = SkipWrapper(2)
    env = gym.make("FrozenLake-v0")
    env = every_two_frame(env)
    obs = env.reset()
    env.render()
Ejemplo n.º 13
0
    def __init__(self, brain, body):
        self.brain = brain
        self.body = body

    def __call__(self, inputs):
        input = Variable(torch.from_numpy(np.array(inputs, dtype = np.float32)))
        #converting the image into torch format ie. convert input images into numpy array, then convert them into torch tensor and then put it inside torch Variable which contains both tensor and gradient.
        output = self.brain(input)
        actions = self.body(output)
        return actions.data.numpy() 


# Part 2 - Training the AI with Deep Convolutional Q-Learning

# Getting the Doom environment
doom_env = image_preprocessing.PreprocessImage(SkipWrapper(4)(ToDiscrete("minimal")(gym.make("ppaquette/DoomCorridor-v0"))), width = 80, height = 80, grayscale = True)
#gym.make("ppaquette/DoomCorridor-v0") : DoomCorridor-v0 is the environment name of the game which we are playing is imported using gym.make()
#We used PreprocessImage class from image_preprocessing.py to pre-process the input images in square format with dimension 80*80 with gray scale that will come into neural network
doom_env = gym.wrappers.Monitor(doom_env, "videos", force = True)
#whole game's video is imported with above line of code into "videos" folder. After end of the game, we can see the videos of it.
number_actions = doom_env.action_space.n      #no. of actions (7) for the doom move left/right/straight, turn left/right, run & shoot

# Building an AI
cnn = CNN(number_actions)
softmax_body = SoftmaxBody(T = 1.0)        #Temperature = 1.0
ai = AI(brain = cnn, body = softmax_body)

# Setting up Experience Replay using Eligibility trace (step size = 10)
n_steps = experience_replay.NStepProgress(env = doom_env, ai = ai, n_step = 10)
memory = experience_replay.ReplayMemory(n_steps = n_steps, capacity = 10000)
#memory capacity is 10000 ie. memory is dependent on the last 10000 steps performed by the AI. It gonna learn every 10 steps.
Ejemplo n.º 14
0
    def __init__(self,
                 name,
                 sess,
                 env='ppaquette/DoomDefendCenter-v0',
                 eps=100,
                 eps_save=0,
                 time=400,
                 learning=1e-6,
                 gamma=.96,
                 tau=.94,
                 seed=None,
                 render=False,
                 rec=False):
        self.name = 'agent_' + str(name)
        self.eps = eps
        self.eps_ran = 0
        self.time = time
        self.render = render
        self.learning = learning
        self.gamma = gamma
        self.tau = tau
        self.sess = sess
        self.eps_save = eps_save

        self.env = gym.make(env)
        self.env = ToDiscrete("minimal")(self.env)
        self.env = SkipWrapper(2)(self.env)
        self.env = PreprocessImage(self.env,
                                   height=80,
                                   width=80,
                                   grayscale=True)
        if rec:
            self.env = gym.wrappers.Monitor(self.env, 'videos', force=True)
        if seed is not None:
            self.env.seed(seed)
            # rd.seed(seed)
            # tf.set_random_seed(seed)

        self.output = self.env.action_space.n
        self.input = np.concatenate(
            ([None], list(self.env.observation_space.shape)))

        with tf.variable_scope('global', reuse=True):
            self.global_step = tf.Variable(0,
                                           name='global_step',
                                           trainable=False)
            global_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                            'global')

        if self.eps_save > 0:
            vars_save = {v.op.name: v for v in global_vars}
            self.saver = tf.train.Saver(vars_save)
            self.check_save = self.save
        else:
            self.check_save = lambda: None

        with tf.variable_scope(self.name):
            self.ac = AC(self.input, self.output, self.sess)
            local_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                           self.name)
            with tf.variable_scope('loss_critic'):
                self.target_critic = tf.placeholder(tf.float32, [None],
                                                    name='target_critic')
                self.batch = tf.placeholder(tf.int32, name='batch')
                self.loss_critic = tf.reduce_sum(
                    tf.square(self.target_critic -
                              tf.reshape(self.ac.value_critic, [self.batch]),
                              name='square_loss_critic') * .5,
                    name='reduce_sum_critic')

            with tf.variable_scope('loss_actor'):
                self.td_error = tf.placeholder(tf.float32, [None],
                                               name='td_error')
                self.action = tf.placeholder(tf.int32, [None], name='action')
                self.loss_actor = tf.reduce_sum(
                    self.td_error *
                    tf.nn.sparse_softmax_cross_entropy_with_logits(
                        logits=self.ac.action_logits, labels=self.action))
                entropy = -tf.reduce_sum(
                    tf.nn.softmax(self.ac.action_logits) *
                    tf.nn.log_softmax(self.ac.action_logits))
                self.loss_actor -= entropy * .02

            with tf.variable_scope('loss'):
                self.loss_total = self.loss_actor + .5 * self.loss_critic

        gradients = tf.gradients(self.loss_total, local_vars)
        gradients, _ = tf.clip_by_global_norm(gradients, 10.)

        with tf.variable_scope('global', reuse=tf.AUTO_REUSE):
            opt = tf.train.AdagradOptimizer(self.learning)
            self.train_op = [
                opt.apply_gradients(zip(gradients, global_vars)),
                self.global_step.assign_add(self.batch)
            ]

        self.reset_model = [
            l.assign(g) for l, g in zip(local_vars, global_vars)
        ]
class AI:
    def __init__(self, brain, body):
        self.brain = brain
        self.body = body

    def __call__(self, inputs):
        input = Variable(torch.from_numpy(np.array(inputs, dtype=np.float32)))
        output = self.brain(input)
        actions = self.body(output)
        return actions.data.numpy()


# Part 2 - Training the AI with Deep Convolutional Q-Learning

# Getting the Warrior environment
warrior_env = image_preprocessing.PreprocessImage(SkipWrapper(4)(
    ToDiscrete("minimal")(gym.make("ppaquette/WarriorCorridor-v0"))),
                                                  width=80,
                                                  height=80,
                                                  grayscale=True)
warrior_env = gym.wrappers.Monitor(warrior_env, "videos", force=True)
number_actions = warrior_env.action_space.n

# Building an AI
cnn = CNN(number_actions)
softmax_body = SoftmaxBody(T=1.0)
ai = AI(brain=cnn, body=softmax_body)

# Setting up Experience Replay
n_steps = experience_replay.NStepProgress(env=warrior_env, ai=ai, n_step=10)
memory = experience_replay.ReplayMemory(n_steps=n_steps, capacity=10000)
Ejemplo n.º 16
0

class ai:
    def __init__(self, brain, body):
        self.brain = brain
        self.body = body

    def __call__(self, inputs):
        input = Variable(torch.from_numpy(np.array(inputs, dtype=np.float32)))
        output = self.brain(input)
        actions = self.body(output)
        return actions.data.numpy()


env = gym.make('ppaquette/DoomBasic-v0')
game_env = image_preprocessing.PreprocessImage(SkipWrapper(4)(
    ToDiscrete("minimal")(env)),
                                               width=80,
                                               height=80,
                                               grayscale=True)
game_env = gym.wrappers.Monitor(game_env, "videos", force=True)
number_actions = game_env.action_space.n

#building AI
cnn = CNN(number_actions)
softmaxbody = SoftmaxBody(temp=1.0)
ai = ai(brain=cnn, body=softmaxbody)

#setting up experiecne replay
n_steps = experience_replay.NStepProgress(env=game_env, ai=ai, n_step=10)
memory = experience_replay.ReplayMemory(n_steps=n_steps, capacity=10000)
Ejemplo n.º 17
0
class AI:
    def __init__(self, brain, body):
        self.brain = brain
        self.body = body

    def __call__(self, inputs):
        input = Variable(torch.from_numpy(np.array(inputs, dtype=np.float32)))
        output = self.brain(input)
        actions = self.body(output)
        return actions.data.numpy()


# Part 2 - Training the AI with Deep Convolutional Q-Learning

# Getting the Doom environment
doom_env = image_preprocessing.PreprocessImage(SkipWrapper(1)(
    ToDiscrete("minimal")(gym.make("ppaquette/DoomCorridor-v0"))),
                                               width=80,
                                               height=80,
                                               grayscale=True)
doom_env = gym.wrappers.Monitor(doom_env, "videos", force=True)
number_actions = doom_env.action_space.n

# Building an AI
cnn = CNN(number_actions)
softmax_body = SoftmaxBody(T=1.0)
ai = AI(brain=cnn, body=softmax_body)

# Setting up Experience Replay
n_steps = experience_replay.NStepProgress(env=doom_env, ai=ai, n_step=10)
memory = experience_replay.ReplayMemory(n_steps=n_steps, capacity=10000)
            # test events, set key states
            if event.type == pygame.KEYDOWN:
                if event.key in relevant_keys:
                    pressed_keys.append(event.key)
                elif event.key == 27:
                    running = False
            elif event.type == pygame.KEYUP:
                if event.key in relevant_keys:
                    pressed_keys.remove(event.key)
            elif event.type == pygame.QUIT:
                running = False
            elif event.type == VIDEORESIZE:
                video_size = event.size
                screen = pygame.display.set_mode(video_size)
                print(video_size)

        pygame.display.flip()
        clock.tick(fps)
    pygame.quit()


if __name__ == '__main__':
    env = gym.make('Breakout-v0')
    wrapper = SkipWrapper(SPEED)  # 0 = don't skip
    env = wrapper(env)
    env = PreproWrapper(env,
                        prepro=lambda x: downsample(x),
                        shape=(105, 80, 3))

    record_game(env, RECORD_FILE, zoom=4)
Ejemplo n.º 19
0
# Make ai
class AI:
    def __init__(self, brain, body):
        self.brain = brain
        self.body = body

    def __call__(self, inputs):
        input = Variable(torch.from_numpy(np.array(inputs, dtype=np.float32)))
        brain_out = self.brain.forward(input)
        actions = self.body.forward(brain_out)
        return actions.data.numpy()


# train deep q conv
doom_env = image_preprocessing.PreprocessImage(SkipWrapper(4)(
    ToDiscrete("minimal")(gym.make("ppaquette/DoomDefendLine-v0"))),
                                               height=80,
                                               width=80,
                                               grayscale=True)
doom_env = gym.wrappers.Monitor(doom_env, "videos", force=True)
number_actions = doom_env.action_space.n

cnn = CNN(number_actions)
softmax_body = SoftmaxBody(T=1.0)
ai = AI(brain=cnn, body=softmax_body)

# setting up exp Replay
n_steps = experience_replay.NStepProgress(doom_env, ai, 10)
memory = experience_replay.ReplayMemory(n_steps=n_steps, capacity=10000)

#eligibility retrace ...  n-step Q-Learning
Ejemplo n.º 20
0
        n_colors = 1 if self.grayscale else 3
        self.observation_space = Box(0.0, 1.0, [n_colors, height, width])

    def _observation(self, img):
        img = self.crop(img)
        img = imresize(img, self.img_size)
        if self.grayscale:
            img = img.mean(-1, keepdims=True)
        img = np.transpose(img, (2, 0, 1))
        img = img.astype('float32') / 255.
        return img

# Getting the Doom environment

doom_env = PreprocessImage(
    SkipWrapper(4)(ToDiscrete("minimal")(gym.make("ppaquette/DoomCorridor-v0"))), width=80, height=80, grayscale=True)
doom_env = gym.wrappers.Monitor(doom_env, "videos", force=True)
print("Environment created!")
number_actions = doom_env.action_space.n

# Building an AI

cnn = CNN(number_actions)
softmax_body = SoftmaxBody(T=1.0)
ai = AI(brain=cnn, body=softmax_body)

# Setting up Replay

n_steps = NStepProgress(env=doom_env, ai=ai, n_step=10)
memory = ReplayMemory(n_steps=n_steps, capacity=10000)
        #Putting data through the brain and taking the output
        output = self.brain(input_data)

        #Putting output into body and taking the actions
        actions = self.body(output)

        #Returning actions in right format
        return actions.data.numpy()


#Part 2 - Implementing Deep COnvolutional Q-Learning

#Getting the doom enviroment
#gym.make imports the enviroment
#image_preprocessing proccesses images coming in with 80 by 80 size in grayscale
doom_env = image_preprocessing.PreprocessImage(SkipWrapper(4)(
    ToDiscrete('minimal')(gym.make('ppaquette/DoomCorridor-v0'))),
                                               width=80,
                                               height=80,
                                               grayscale=True)
#Saves videos of AI playing doom into the videos folder
doom_env = gym.wrappers.Monitor(doom_env, 'videos', force=True)

#Getting number of actions from doom_enviroment
number_actions = doom_env.action_space.n

#Building an AI
#Creating an object of our CNN class
cnn = CNN(number_actions)
#Creating an object of our SoftmaxBoddy class and inputing temperature
softmax_body = SoftmaxBody(T=1.0)
#Creating an object of our AI class and inputing the brain and body
Ejemplo n.º 22
0
    def __call__(self, inputs):
        #converting images to correct format
        input = Variable(torch.from_numpy(np.array(inputs, dtype=np.float32)))
        #converting image to numpy array
        #converting pixels to float 32
        #converting mupy to torch tensor(variable class)
        output = self.brain(input)  #passing tensor to brain
        actions = self.body(output)  #passing output to body
        return actions.data.numpy()  #converting action to numpy


# Part 2 - Training the AI with Deep Convolutional Q-Learning

# Getting the Doom environment
doom_env = image_preprocessing.PreprocessImage(
    SkipWrapper(4)(ToDiscrete("minimal")(
        gym.make("ppaquette/DoomCorridor-v0"))),
    width=80,
    height=80,
    grayscale=True)  # importing the environment and preprocessing it
#dimensions should be same as neural network
doom_env = gym.wrappers.Monitor(doom_env, "videos",
                                force=True)  #saving to videos
number_actions = doom_env.action_space.n  # number of actions possible in the environment

# Building an AI
cnn = CNN(number_actions)  #calling cnn
softmax_body = SoftmaxBody(T=1.0)  #setting value of temperature for softmax
ai = AI(brain=cnn, body=softmax_body)  #calling brain

# Setting up Experience Replay
n_steps = experience_replay.NStepProgress(env=doom_env, ai=ai,