Exemple #1
0
def wrap_doom_84x84_v2(env, frame_skip=4, action_delay=None, preproc_mask=False, preproc_mask_big=False, preproc_mask_out=False, zclip=False, depth=False, frame_flicker=False, frame_repeat=None, history_len=4):
  env = ToDiscrete("minimal")(env)
  if frame_skip > 1:
    env = SkipFrameEnv(env, skip=frame_skip)
  if action_delay is not None and action_delay > 0:
    env = DelayedActionEnv(env, action_delay)
  if preproc_mask:
    env = PreprocDoomRGBDMask(env)
  elif preproc_mask_big:
    env = PreprocDoomRGBDMaskBig(env)
  elif preproc_mask_out:
    env = PreprocDoomRGBDMaskOut(env)
  if zclip:
    env = ProcessDoomRGBDHardZClip(env)
  elif depth:
    env = ProcessDoomRGBDDepth(env)
  elif frame_flicker:
    env = ProcessDoomRGBDFlicker(env, 0.5)
  elif frame_repeat is not None and frame_repeat > 1:
    env = ProcessDoomRGBDRepeat(env, frame_repeat)
  else:
    env = ProcessDoomRGBD(env)
  env = Transpose3DEnv(env)
  if history_len > 1:
    env = StackFrameEnv(env, history_len)
  return env
Exemple #2
0
def make_env():
    env_spec = gym.spec('ppaquette/DoomBasic-v0')
    env_spec.id = 'DoomBasic-v0'
    env = env_spec.make()
    e = PreprocessImage(SkipWrapper(4)(ToDiscrete("minimal")(env)),
                                 width=80, height=80, grayscale=True)
    return e
Exemple #3
0
def wrap_doom_84x84(env, frame_skip=4, history_len=4):
  env = ToDiscrete("minimal")(env)
  env = SkipFrameEnv(env, skip=frame_skip)
  env = ProcessFrame84x84Doom(env)
  env = Transpose3DEnv(env)
  if history_len > 1:
    env = StackFrameEnv(env, history_len)
  return env
Exemple #4
0
def make_env(arg_env_spec, arg_env_spec_id):
    env_spec = gym.spec(arg_env_spec)
    env_spec.id = arg_env_spec_id
    env = env_spec.make()
    e = PreprocessImage(SkipWrapper(4)(ToDiscrete("minimal")(env)),
                        width=80,
                        height=80,
                        grayscale=True)
    return e
Exemple #5
0
 def make_env():
     e = wrappers.PreprocessImage(SkipWrapper(4)(ToDiscrete("minimal")(
         gym.make(run.get("defaults", "env")))),
                                  width=im_width,
                                  height=im_height,
                                  grayscale=grayscale)
     if args.monitor:
         e = gym.wrappers.Monitor(e, args.monitor)
     return e
Exemple #6
0
 def _thunk():
     env_spec = gym.spec('ppaquette/DoomBasic-v0')
     env_spec.id = 'DoomBasic-v0'
     env = env_spec.make()
     env.seed(seed + rank)
     env = PreprocessImage((SkipWrapper(4)(ToDiscrete("minimal")(env))))
     if logger.get_dir():
         env = bench.Monitor(env, os.path.join(logger.get_dir(), "{}.monitor.json".format(rank)))
     gym.logger.setLevel(logging.WARN)
     return ScaleRewardEnv(env)
    def __init__(self,
                 env_name,
                 record_video=True,
                 video_schedule=None,
                 log_dir=None,
                 record_log=True,
                 force_reset=False,
                 screen_width=84,
                 screen_height=84):
        if log_dir is None:
            if logger.get_snapshot_dir() is None:
                logger.log(
                    "Warning: skipping Gym environment monitoring since snapshot_dir not configured."
                )
            else:
                log_dir = os.path.join(logger.get_snapshot_dir(), "gym_log")
        Serializable.quick_init(self, locals())

        env = gym.envs.make(env_name)
        if 'Doom' in env_name:
            from ppaquette_gym_doom.wrappers.action_space import ToDiscrete
            wrapper = ToDiscrete('minimal')
            env = wrapper(env)

        self.env = env
        self.env_id = env.spec.id

        monitor_manager.logger.setLevel(logging.WARNING)

        assert not (not record_log and record_video)

        if log_dir is None or record_log is False:
            self.monitoring = False
        else:
            if not record_video:
                video_schedule = NoVideoSchedule()
            else:
                if video_schedule is None:
                    video_schedule = CappedCubicVideoSchedule()
            self.env = gym.wrappers.Monitor(self.env,
                                            log_dir,
                                            video_callable=video_schedule,
                                            force=True)
            self.monitoring = True

        self._observation_space = convert_gym_space(env.observation_space)
        self._action_space = convert_gym_space(env.action_space)
        self._horizon = env.spec.timestep_limit
        self._log_dir = log_dir
        self._force_reset = force_reset
        self.screen_width = screen_width
        self.screen_height = screen_height
        self._observation_space = Box(low=0,
                                      high=1,
                                      shape=(screen_width, screen_height, 1))
Exemple #8
0
def make_env(scenario, grayscale, input_shape):

    width, height = input_shape
    env_spec = gym.spec('ppaquette/' + scenario)
    env_spec.id = scenario  #'DoomBasic-v0'
    env = env_spec.make()
    e = PreprocessImage(SkipWrapper(4)(ToDiscrete("minimal")(env)),
                        width=width,
                        height=height,
                        grayscale=grayscale)
    return e
Exemple #9
0
def wrap_doom_1x1_v2(env, frame_skip=4, history_len=4, zclip=False):
  env = ToDiscrete("minimal")(env)
  if frame_skip > 1:
    env = SkipFrameEnv(env, skip=frame_skip)
  if zclip:
    raise NotImplementedError
  else:
    env = ProcessDoomRGBD1x1(env)
  if history_len > 1:
    env = StackFrameEnv(env, history_len)
  return env
Exemple #10
0
    def _make_env():
        env_spec = gym.spec(env_name)
        env_spec.id = env_name.split('/')[1]
        env = env_spec.make()
        env = SetResolution('160x120')(env)
        env = PreprocessImage((SkipWrapper(4)(ToDiscrete("minimal")(env))),
                              width=80,
                              height=80)

        scale = 1.0
        if 'DoomBasic' in env_name:
            scale = 400.0

        return ScaleRewardEnv(env, scale)
Exemple #11
0
def run(env, n_epochs, discount_factor,
        plot_stats=False, api_key=None,
        network=None, batch_size=32, buffer_len=10000, initial_epsilon=0.25,
        load=False, gpu_option=0.4, initial_lr=1e-4):
    env_name = env
    make_env = lambda: PreprocessImage(
        SkipWrapper(4)(ToDiscrete("minimal")(gym.make(env_name))),
        width=80, height=80, grayscale=True)
    env = make_env()

    n_actions = env.action_space.n
    state_shape = env.observation_space.shape
    special = {
        "batch_size": batch_size,
        "buffer_len": buffer_len,
        "lr": initial_lr
    }

    network = network or conv_network
    agent = DqnAgent(
        state_shape, n_actions, network,
        gamma=discount_factor,
        special=special)

    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=gpu_option)

    with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess:
        saver = tf.train.Saver()
        model_dir = "./logs_" + env_name.replace(string.punctuation, "_")
        if not load:
            sess.run(tf.global_variables_initializer())
        else:
            saver.restore(sess, "{}/model.ckpt".format(model_dir))

        stats = q_learning(sess, agent, env, n_epochs, initial_epsilon=initial_epsilon)
        create_if_need(model_dir)
        saver.save(sess, "{}/model.ckpt".format(model_dir))

        if plot_stats:
            save_stats(stats)

        if api_key is not None:
            env = gym.wrappers.Monitor(env, "{}/monitor".format(model_dir), force=True)
            sessions = [generate_session(sess, agent, env, 0.0, int(1e10)) for _ in range(300)]
            env.close()
            gym.upload("{}/monitor".format(model_dir), api_key=api_key)
Exemple #12
0
def wrap_doom_20x20_v2(env, frame_skip=4, action_delay=None, zclip=False, depth=False, frame_flicker=False, history_len=4):
  env = ToDiscrete("minimal")(env)
  if frame_skip > 1:
    env = SkipFrameEnv(env, skip=frame_skip)
  if action_delay is not None and action_delay >= 1:
    raise NotImplementedError
  if zclip:
    raise NotImplementedError
  elif depth:
    raise NotImplementedError
  elif frame_flicker:
    raise NotImplementedError
  else:
    env = ProcessDoomRGBD20(env)
  env = Transpose3DEnv(env)
  if history_len > 1:
    env = StackFrameEnv(env, history_len)
  return env
def make_image_env(env_name,
                   n_games=1,
                   episode_limit=None,
                   n_frames=1,
                   autoreload_envs=False,
                   width=64,
                   height=64,
                   grayscale=True,
                   crop=None):
    env = gym.make(env_name) if episode_limit is None else gym.make(
        env_name).env
    if "ppaquette" in env_name:
        env = SkipWrapper(4)(ToDiscrete("minimal")(env))
    env = PreprocessImage(env,
                          width=width,
                          height=height,
                          grayscale=grayscale,
                          crop=crop)
    env = FrameBuffer(env, n_frames=n_frames) if n_frames > 1 else env
    if episode_limit is not None:
        env = TimeLimit(env, max_episode_steps=episode_limit)
    return EnvPool(env, n_games, autoreload_envs) if n_games > 0 else env
Exemple #14
0
# Make ai
class AI:
    def __init__(self, brain, body):
        self.brain = brain
        self.body = body

    def __call__(self, inputs):
        input = Variable(torch.from_numpy(np.array(inputs, dtype=np.float32)))
        brain_out = self.brain.forward(input)
        actions = self.body.forward(brain_out)
        return actions.data.numpy()


# train deep q conv
doom_env = image_preprocessing.PreprocessImage(SkipWrapper(4)(
    ToDiscrete("minimal")(gym.make("ppaquette/DoomDefendLine-v0"))),
                                               height=80,
                                               width=80,
                                               grayscale=True)
doom_env = gym.wrappers.Monitor(doom_env, "videos", force=True)
number_actions = doom_env.action_space.n

cnn = CNN(number_actions)
softmax_body = SoftmaxBody(T=1.0)
ai = AI(brain=cnn, body=softmax_body)

# setting up exp Replay
n_steps = experience_replay.NStepProgress(doom_env, ai, 10)
memory = experience_replay.ReplayMemory(n_steps=n_steps, capacity=10000)

#eligibility retrace ...  n-step Q-Learning
Exemple #15
0
class ai:
    def __init__(self, brain, body):
        self.brain = brain
        self.body = body

    def __call__(self, inputs):
        input = Variable(torch.from_numpy(np.array(inputs, dtype=np.float32)))
        output = self.brain(input)
        actions = self.body(output)
        return actions.data.numpy()


env = gym.make('ppaquette/DoomBasic-v0')
game_env = image_preprocessing.PreprocessImage(SkipWrapper(4)(
    ToDiscrete("minimal")(env)),
                                               width=80,
                                               height=80,
                                               grayscale=True)
game_env = gym.wrappers.Monitor(game_env, "videos", force=True)
number_actions = game_env.action_space.n

#building AI
cnn = CNN(number_actions)
softmaxbody = SoftmaxBody(temp=1.0)
ai = ai(brain=cnn, body=softmaxbody)

#setting up experiecne replay
n_steps = experience_replay.NStepProgress(env=game_env, ai=ai, n_step=10)
memory = experience_replay.ReplayMemory(n_steps=n_steps, capacity=10000)
Exemple #16
0
    def __init__(self, brain, body):
        self.brain = brain
        self.body = body

    def __call__(self, inputs):
        input = Variable(torch.from_numpy(np.array(inputs, dtype=np.float32)))
        output = self.brain(input)
        actions = self.body(output)
        return actions.data.numpy()


# Part 2 - Training the AI with Deep Convolutional Q-Learning

# Getting the Doom environment
doom_env = image_preprocessing.PreprocessImage(SkipWrapper(4)(
    ToDiscrete("minimal")(gym.make("ppaquette/DoomCorridor-v0"))),
                                               width=80,
                                               height=80,
                                               grayscale=True)
doom_env = gym.wrappers.Monitor(doom_env, "videos", force=True)
number_actions = doom_env.action_space.n

# Building an AI
cnn = CNN(number_actions)
softmax_body = SoftmaxBody(T=1.0)
ai = AI(brain=cnn, body=softmax_body)

# Setting up Experience Replay
n_steps = experience_replay.NStepProgress(env=doom_env, ai=ai, n_step=10)
memory = experience_replay.ReplayMemory(n_steps=n_steps, capacity=10000)
Exemple #17
0
    def __init__(self,
                 name,
                 sess,
                 env='ppaquette/DoomDefendCenter-v0',
                 eps=100,
                 eps_save=0,
                 time=400,
                 learning=1e-6,
                 gamma=.96,
                 tau=.94,
                 seed=None,
                 render=False,
                 rec=False):
        self.name = 'agent_' + str(name)
        self.eps = eps
        self.eps_ran = 0
        self.time = time
        self.render = render
        self.learning = learning
        self.gamma = gamma
        self.tau = tau
        self.sess = sess
        self.eps_save = eps_save

        self.env = gym.make(env)
        self.env = ToDiscrete("minimal")(self.env)
        self.env = SkipWrapper(2)(self.env)
        self.env = PreprocessImage(self.env,
                                   height=80,
                                   width=80,
                                   grayscale=True)
        if rec:
            self.env = gym.wrappers.Monitor(self.env, 'videos', force=True)
        if seed is not None:
            self.env.seed(seed)
            # rd.seed(seed)
            # tf.set_random_seed(seed)

        self.output = self.env.action_space.n
        self.input = np.concatenate(
            ([None], list(self.env.observation_space.shape)))

        with tf.variable_scope('global', reuse=True):
            self.global_step = tf.Variable(0,
                                           name='global_step',
                                           trainable=False)
            global_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                            'global')

        if self.eps_save > 0:
            vars_save = {v.op.name: v for v in global_vars}
            self.saver = tf.train.Saver(vars_save)
            self.check_save = self.save
        else:
            self.check_save = lambda: None

        with tf.variable_scope(self.name):
            self.ac = AC(self.input, self.output, self.sess)
            local_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                           self.name)
            with tf.variable_scope('loss_critic'):
                self.target_critic = tf.placeholder(tf.float32, [None],
                                                    name='target_critic')
                self.batch = tf.placeholder(tf.int32, name='batch')
                self.loss_critic = tf.reduce_sum(
                    tf.square(self.target_critic -
                              tf.reshape(self.ac.value_critic, [self.batch]),
                              name='square_loss_critic') * .5,
                    name='reduce_sum_critic')

            with tf.variable_scope('loss_actor'):
                self.td_error = tf.placeholder(tf.float32, [None],
                                               name='td_error')
                self.action = tf.placeholder(tf.int32, [None], name='action')
                self.loss_actor = tf.reduce_sum(
                    self.td_error *
                    tf.nn.sparse_softmax_cross_entropy_with_logits(
                        logits=self.ac.action_logits, labels=self.action))
                entropy = -tf.reduce_sum(
                    tf.nn.softmax(self.ac.action_logits) *
                    tf.nn.log_softmax(self.ac.action_logits))
                self.loss_actor -= entropy * .02

            with tf.variable_scope('loss'):
                self.loss_total = self.loss_actor + .5 * self.loss_critic

        gradients = tf.gradients(self.loss_total, local_vars)
        gradients, _ = tf.clip_by_global_norm(gradients, 10.)

        with tf.variable_scope('global', reuse=tf.AUTO_REUSE):
            opt = tf.train.AdagradOptimizer(self.learning)
            self.train_op = [
                opt.apply_gradients(zip(gradients, global_vars)),
                self.global_step.assign_add(self.batch)
            ]

        self.reset_model = [
            l.assign(g) for l, g in zip(local_vars, global_vars)
        ]
Exemple #18
0
        n_colors = 1 if self.grayscale else 3
        self.observation_space = Box(0.0, 1.0, [n_colors, height, width])

    def _observation(self, img):
        img = self.crop(img)
        img = imresize(img, self.img_size)
        if self.grayscale:
            img = img.mean(-1, keepdims=True)
        img = np.transpose(img, (2, 0, 1))
        img = img.astype('float32') / 255.
        return img

# Getting the Doom environment

doom_env = PreprocessImage(
    SkipWrapper(4)(ToDiscrete("minimal")(gym.make("ppaquette/DoomCorridor-v0"))), width=80, height=80, grayscale=True)
doom_env = gym.wrappers.Monitor(doom_env, "videos", force=True)
print("Environment created!")
number_actions = doom_env.action_space.n

# Building an AI

cnn = CNN(number_actions)
softmax_body = SoftmaxBody(T=1.0)
ai = AI(brain=cnn, body=softmax_body)

# Setting up Replay

n_steps = NStepProgress(env=doom_env, ai=ai, n_step=10)
memory = ReplayMemory(n_steps=n_steps, capacity=10000)
Exemple #19
0
    def __init__(self, brain, body):
        self.brain = brain
        self.body = body

    def __call__(self, inputs):
        input = Variable(torch.from_numpy(np.array(inputs, dtype = np.float32)))
        #converting the image into torch format ie. convert input images into numpy array, then convert them into torch tensor and then put it inside torch Variable which contains both tensor and gradient.
        output = self.brain(input)
        actions = self.body(output)
        return actions.data.numpy() 


# Part 2 - Training the AI with Deep Convolutional Q-Learning

# Getting the Doom environment
doom_env = image_preprocessing.PreprocessImage(SkipWrapper(4)(ToDiscrete("minimal")(gym.make("ppaquette/DoomCorridor-v0"))), width = 80, height = 80, grayscale = True)
#gym.make("ppaquette/DoomCorridor-v0") : DoomCorridor-v0 is the environment name of the game which we are playing is imported using gym.make()
#We used PreprocessImage class from image_preprocessing.py to pre-process the input images in square format with dimension 80*80 with gray scale that will come into neural network
doom_env = gym.wrappers.Monitor(doom_env, "videos", force = True)
#whole game's video is imported with above line of code into "videos" folder. After end of the game, we can see the videos of it.
number_actions = doom_env.action_space.n      #no. of actions (7) for the doom move left/right/straight, turn left/right, run & shoot

# Building an AI
cnn = CNN(number_actions)
softmax_body = SoftmaxBody(T = 1.0)        #Temperature = 1.0
ai = AI(brain = cnn, body = softmax_body)

# Setting up Experience Replay using Eligibility trace (step size = 10)
n_steps = experience_replay.NStepProgress(env = doom_env, ai = ai, n_step = 10)
memory = experience_replay.ReplayMemory(n_steps = n_steps, capacity = 10000)
#memory capacity is 10000 ie. memory is dependent on the last 10000 steps performed by the AI. It gonna learn every 10 steps.
        output = self.brain(input)
        actions = self.body(output)

        # actions are in torch format, convert to np array and return
        return actions.data.numpy()


# Part 2 - Training the AI using Deep Q-Learning

# getting the Doom environment
# we can change the environment by changing the argument to gym.make
# set image width and height to 80X80 as we set our input format to 80X80
# set grayscale to true as we take black and white input

doom_env = image_preprocessing.PreprocessImage(SkipWrapper(4)(
    ToDiscrete("minimal")(gym.make("ppaquette/DoomDefendCenter-v0"))),
                                               width=80,
                                               height=80,
                                               grayscale=True)
doom_env = gym.wrappers.Monitor(doom_env, "videos", force=True)

# Gets the number of actions allowed in that environment
number_actions = doom_env.action_space.n

# Building an AI

# Creating a Brain
cnn = CNN(number_actions)

# Creating a Body
softmax_body = SoftmaxBody(T=1.5)
        output = self.brain(input_data)

        #Putting output into body and taking the actions
        actions = self.body(output)

        #Returning actions in right format
        return actions.data.numpy()


#Part 2 - Implementing Deep COnvolutional Q-Learning

#Getting the doom enviroment
#gym.make imports the enviroment
#image_preprocessing proccesses images coming in with 80 by 80 size in grayscale
doom_env = image_preprocessing.PreprocessImage(SkipWrapper(4)(
    ToDiscrete('minimal')(gym.make('ppaquette/DoomCorridor-v0'))),
                                               width=80,
                                               height=80,
                                               grayscale=True)
#Saves videos of AI playing doom into the videos folder
doom_env = gym.wrappers.Monitor(doom_env, 'videos', force=True)

#Getting number of actions from doom_enviroment
number_actions = doom_env.action_space.n

#Building an AI
#Creating an object of our CNN class
cnn = CNN(number_actions)
#Creating an object of our SoftmaxBoddy class and inputing temperature
softmax_body = SoftmaxBody(T=1.0)
#Creating an object of our AI class and inputing the brain and body
Exemple #22
0
    def __call__(self, inputs):
        #converting images to correct format
        input = Variable(torch.from_numpy(np.array(inputs, dtype=np.float32)))
        #converting image to numpy array
        #converting pixels to float 32
        #converting mupy to torch tensor(variable class)
        output = self.brain(input)  #passing tensor to brain
        actions = self.body(output)  #passing output to body
        return actions.data.numpy()  #converting action to numpy


# Part 2 - Training the AI with Deep Convolutional Q-Learning

# Getting the Doom environment
doom_env = image_preprocessing.PreprocessImage(
    SkipWrapper(4)(ToDiscrete("minimal")(
        gym.make("ppaquette/DoomCorridor-v0"))),
    width=80,
    height=80,
    grayscale=True)  # importing the environment and preprocessing it
#dimensions should be same as neural network
doom_env = gym.wrappers.Monitor(doom_env, "videos",
                                force=True)  #saving to videos
number_actions = doom_env.action_space.n  # number of actions possible in the environment

# Building an AI
cnn = CNN(number_actions)  #calling cnn
softmax_body = SoftmaxBody(T=1.0)  #setting value of temperature for softmax
ai = AI(brain=cnn, body=softmax_body)  #calling brain

# Setting up Experience Replay
n_steps = experience_replay.NStepProgress(env=doom_env, ai=ai,