def wrap_doom_84x84_v2(env, frame_skip=4, action_delay=None, preproc_mask=False, preproc_mask_big=False, preproc_mask_out=False, zclip=False, depth=False, frame_flicker=False, frame_repeat=None, history_len=4): env = ToDiscrete("minimal")(env) if frame_skip > 1: env = SkipFrameEnv(env, skip=frame_skip) if action_delay is not None and action_delay > 0: env = DelayedActionEnv(env, action_delay) if preproc_mask: env = PreprocDoomRGBDMask(env) elif preproc_mask_big: env = PreprocDoomRGBDMaskBig(env) elif preproc_mask_out: env = PreprocDoomRGBDMaskOut(env) if zclip: env = ProcessDoomRGBDHardZClip(env) elif depth: env = ProcessDoomRGBDDepth(env) elif frame_flicker: env = ProcessDoomRGBDFlicker(env, 0.5) elif frame_repeat is not None and frame_repeat > 1: env = ProcessDoomRGBDRepeat(env, frame_repeat) else: env = ProcessDoomRGBD(env) env = Transpose3DEnv(env) if history_len > 1: env = StackFrameEnv(env, history_len) return env
def make_env(): env_spec = gym.spec('ppaquette/DoomBasic-v0') env_spec.id = 'DoomBasic-v0' env = env_spec.make() e = PreprocessImage(SkipWrapper(4)(ToDiscrete("minimal")(env)), width=80, height=80, grayscale=True) return e
def wrap_doom_84x84(env, frame_skip=4, history_len=4): env = ToDiscrete("minimal")(env) env = SkipFrameEnv(env, skip=frame_skip) env = ProcessFrame84x84Doom(env) env = Transpose3DEnv(env) if history_len > 1: env = StackFrameEnv(env, history_len) return env
def make_env(arg_env_spec, arg_env_spec_id): env_spec = gym.spec(arg_env_spec) env_spec.id = arg_env_spec_id env = env_spec.make() e = PreprocessImage(SkipWrapper(4)(ToDiscrete("minimal")(env)), width=80, height=80, grayscale=True) return e
def make_env(): e = wrappers.PreprocessImage(SkipWrapper(4)(ToDiscrete("minimal")( gym.make(run.get("defaults", "env")))), width=im_width, height=im_height, grayscale=grayscale) if args.monitor: e = gym.wrappers.Monitor(e, args.monitor) return e
def _thunk(): env_spec = gym.spec('ppaquette/DoomBasic-v0') env_spec.id = 'DoomBasic-v0' env = env_spec.make() env.seed(seed + rank) env = PreprocessImage((SkipWrapper(4)(ToDiscrete("minimal")(env)))) if logger.get_dir(): env = bench.Monitor(env, os.path.join(logger.get_dir(), "{}.monitor.json".format(rank))) gym.logger.setLevel(logging.WARN) return ScaleRewardEnv(env)
def __init__(self, env_name, record_video=True, video_schedule=None, log_dir=None, record_log=True, force_reset=False, screen_width=84, screen_height=84): if log_dir is None: if logger.get_snapshot_dir() is None: logger.log( "Warning: skipping Gym environment monitoring since snapshot_dir not configured." ) else: log_dir = os.path.join(logger.get_snapshot_dir(), "gym_log") Serializable.quick_init(self, locals()) env = gym.envs.make(env_name) if 'Doom' in env_name: from ppaquette_gym_doom.wrappers.action_space import ToDiscrete wrapper = ToDiscrete('minimal') env = wrapper(env) self.env = env self.env_id = env.spec.id monitor_manager.logger.setLevel(logging.WARNING) assert not (not record_log and record_video) if log_dir is None or record_log is False: self.monitoring = False else: if not record_video: video_schedule = NoVideoSchedule() else: if video_schedule is None: video_schedule = CappedCubicVideoSchedule() self.env = gym.wrappers.Monitor(self.env, log_dir, video_callable=video_schedule, force=True) self.monitoring = True self._observation_space = convert_gym_space(env.observation_space) self._action_space = convert_gym_space(env.action_space) self._horizon = env.spec.timestep_limit self._log_dir = log_dir self._force_reset = force_reset self.screen_width = screen_width self.screen_height = screen_height self._observation_space = Box(low=0, high=1, shape=(screen_width, screen_height, 1))
def make_env(scenario, grayscale, input_shape): width, height = input_shape env_spec = gym.spec('ppaquette/' + scenario) env_spec.id = scenario #'DoomBasic-v0' env = env_spec.make() e = PreprocessImage(SkipWrapper(4)(ToDiscrete("minimal")(env)), width=width, height=height, grayscale=grayscale) return e
def wrap_doom_1x1_v2(env, frame_skip=4, history_len=4, zclip=False): env = ToDiscrete("minimal")(env) if frame_skip > 1: env = SkipFrameEnv(env, skip=frame_skip) if zclip: raise NotImplementedError else: env = ProcessDoomRGBD1x1(env) if history_len > 1: env = StackFrameEnv(env, history_len) return env
def _make_env(): env_spec = gym.spec(env_name) env_spec.id = env_name.split('/')[1] env = env_spec.make() env = SetResolution('160x120')(env) env = PreprocessImage((SkipWrapper(4)(ToDiscrete("minimal")(env))), width=80, height=80) scale = 1.0 if 'DoomBasic' in env_name: scale = 400.0 return ScaleRewardEnv(env, scale)
def run(env, n_epochs, discount_factor, plot_stats=False, api_key=None, network=None, batch_size=32, buffer_len=10000, initial_epsilon=0.25, load=False, gpu_option=0.4, initial_lr=1e-4): env_name = env make_env = lambda: PreprocessImage( SkipWrapper(4)(ToDiscrete("minimal")(gym.make(env_name))), width=80, height=80, grayscale=True) env = make_env() n_actions = env.action_space.n state_shape = env.observation_space.shape special = { "batch_size": batch_size, "buffer_len": buffer_len, "lr": initial_lr } network = network or conv_network agent = DqnAgent( state_shape, n_actions, network, gamma=discount_factor, special=special) gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=gpu_option) with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess: saver = tf.train.Saver() model_dir = "./logs_" + env_name.replace(string.punctuation, "_") if not load: sess.run(tf.global_variables_initializer()) else: saver.restore(sess, "{}/model.ckpt".format(model_dir)) stats = q_learning(sess, agent, env, n_epochs, initial_epsilon=initial_epsilon) create_if_need(model_dir) saver.save(sess, "{}/model.ckpt".format(model_dir)) if plot_stats: save_stats(stats) if api_key is not None: env = gym.wrappers.Monitor(env, "{}/monitor".format(model_dir), force=True) sessions = [generate_session(sess, agent, env, 0.0, int(1e10)) for _ in range(300)] env.close() gym.upload("{}/monitor".format(model_dir), api_key=api_key)
def wrap_doom_20x20_v2(env, frame_skip=4, action_delay=None, zclip=False, depth=False, frame_flicker=False, history_len=4): env = ToDiscrete("minimal")(env) if frame_skip > 1: env = SkipFrameEnv(env, skip=frame_skip) if action_delay is not None and action_delay >= 1: raise NotImplementedError if zclip: raise NotImplementedError elif depth: raise NotImplementedError elif frame_flicker: raise NotImplementedError else: env = ProcessDoomRGBD20(env) env = Transpose3DEnv(env) if history_len > 1: env = StackFrameEnv(env, history_len) return env
def make_image_env(env_name, n_games=1, episode_limit=None, n_frames=1, autoreload_envs=False, width=64, height=64, grayscale=True, crop=None): env = gym.make(env_name) if episode_limit is None else gym.make( env_name).env if "ppaquette" in env_name: env = SkipWrapper(4)(ToDiscrete("minimal")(env)) env = PreprocessImage(env, width=width, height=height, grayscale=grayscale, crop=crop) env = FrameBuffer(env, n_frames=n_frames) if n_frames > 1 else env if episode_limit is not None: env = TimeLimit(env, max_episode_steps=episode_limit) return EnvPool(env, n_games, autoreload_envs) if n_games > 0 else env
# Make ai class AI: def __init__(self, brain, body): self.brain = brain self.body = body def __call__(self, inputs): input = Variable(torch.from_numpy(np.array(inputs, dtype=np.float32))) brain_out = self.brain.forward(input) actions = self.body.forward(brain_out) return actions.data.numpy() # train deep q conv doom_env = image_preprocessing.PreprocessImage(SkipWrapper(4)( ToDiscrete("minimal")(gym.make("ppaquette/DoomDefendLine-v0"))), height=80, width=80, grayscale=True) doom_env = gym.wrappers.Monitor(doom_env, "videos", force=True) number_actions = doom_env.action_space.n cnn = CNN(number_actions) softmax_body = SoftmaxBody(T=1.0) ai = AI(brain=cnn, body=softmax_body) # setting up exp Replay n_steps = experience_replay.NStepProgress(doom_env, ai, 10) memory = experience_replay.ReplayMemory(n_steps=n_steps, capacity=10000) #eligibility retrace ... n-step Q-Learning
class ai: def __init__(self, brain, body): self.brain = brain self.body = body def __call__(self, inputs): input = Variable(torch.from_numpy(np.array(inputs, dtype=np.float32))) output = self.brain(input) actions = self.body(output) return actions.data.numpy() env = gym.make('ppaquette/DoomBasic-v0') game_env = image_preprocessing.PreprocessImage(SkipWrapper(4)( ToDiscrete("minimal")(env)), width=80, height=80, grayscale=True) game_env = gym.wrappers.Monitor(game_env, "videos", force=True) number_actions = game_env.action_space.n #building AI cnn = CNN(number_actions) softmaxbody = SoftmaxBody(temp=1.0) ai = ai(brain=cnn, body=softmaxbody) #setting up experiecne replay n_steps = experience_replay.NStepProgress(env=game_env, ai=ai, n_step=10) memory = experience_replay.ReplayMemory(n_steps=n_steps, capacity=10000)
def __init__(self, brain, body): self.brain = brain self.body = body def __call__(self, inputs): input = Variable(torch.from_numpy(np.array(inputs, dtype=np.float32))) output = self.brain(input) actions = self.body(output) return actions.data.numpy() # Part 2 - Training the AI with Deep Convolutional Q-Learning # Getting the Doom environment doom_env = image_preprocessing.PreprocessImage(SkipWrapper(4)( ToDiscrete("minimal")(gym.make("ppaquette/DoomCorridor-v0"))), width=80, height=80, grayscale=True) doom_env = gym.wrappers.Monitor(doom_env, "videos", force=True) number_actions = doom_env.action_space.n # Building an AI cnn = CNN(number_actions) softmax_body = SoftmaxBody(T=1.0) ai = AI(brain=cnn, body=softmax_body) # Setting up Experience Replay n_steps = experience_replay.NStepProgress(env=doom_env, ai=ai, n_step=10) memory = experience_replay.ReplayMemory(n_steps=n_steps, capacity=10000)
def __init__(self, name, sess, env='ppaquette/DoomDefendCenter-v0', eps=100, eps_save=0, time=400, learning=1e-6, gamma=.96, tau=.94, seed=None, render=False, rec=False): self.name = 'agent_' + str(name) self.eps = eps self.eps_ran = 0 self.time = time self.render = render self.learning = learning self.gamma = gamma self.tau = tau self.sess = sess self.eps_save = eps_save self.env = gym.make(env) self.env = ToDiscrete("minimal")(self.env) self.env = SkipWrapper(2)(self.env) self.env = PreprocessImage(self.env, height=80, width=80, grayscale=True) if rec: self.env = gym.wrappers.Monitor(self.env, 'videos', force=True) if seed is not None: self.env.seed(seed) # rd.seed(seed) # tf.set_random_seed(seed) self.output = self.env.action_space.n self.input = np.concatenate( ([None], list(self.env.observation_space.shape))) with tf.variable_scope('global', reuse=True): self.global_step = tf.Variable(0, name='global_step', trainable=False) global_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, 'global') if self.eps_save > 0: vars_save = {v.op.name: v for v in global_vars} self.saver = tf.train.Saver(vars_save) self.check_save = self.save else: self.check_save = lambda: None with tf.variable_scope(self.name): self.ac = AC(self.input, self.output, self.sess) local_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, self.name) with tf.variable_scope('loss_critic'): self.target_critic = tf.placeholder(tf.float32, [None], name='target_critic') self.batch = tf.placeholder(tf.int32, name='batch') self.loss_critic = tf.reduce_sum( tf.square(self.target_critic - tf.reshape(self.ac.value_critic, [self.batch]), name='square_loss_critic') * .5, name='reduce_sum_critic') with tf.variable_scope('loss_actor'): self.td_error = tf.placeholder(tf.float32, [None], name='td_error') self.action = tf.placeholder(tf.int32, [None], name='action') self.loss_actor = tf.reduce_sum( self.td_error * tf.nn.sparse_softmax_cross_entropy_with_logits( logits=self.ac.action_logits, labels=self.action)) entropy = -tf.reduce_sum( tf.nn.softmax(self.ac.action_logits) * tf.nn.log_softmax(self.ac.action_logits)) self.loss_actor -= entropy * .02 with tf.variable_scope('loss'): self.loss_total = self.loss_actor + .5 * self.loss_critic gradients = tf.gradients(self.loss_total, local_vars) gradients, _ = tf.clip_by_global_norm(gradients, 10.) with tf.variable_scope('global', reuse=tf.AUTO_REUSE): opt = tf.train.AdagradOptimizer(self.learning) self.train_op = [ opt.apply_gradients(zip(gradients, global_vars)), self.global_step.assign_add(self.batch) ] self.reset_model = [ l.assign(g) for l, g in zip(local_vars, global_vars) ]
n_colors = 1 if self.grayscale else 3 self.observation_space = Box(0.0, 1.0, [n_colors, height, width]) def _observation(self, img): img = self.crop(img) img = imresize(img, self.img_size) if self.grayscale: img = img.mean(-1, keepdims=True) img = np.transpose(img, (2, 0, 1)) img = img.astype('float32') / 255. return img # Getting the Doom environment doom_env = PreprocessImage( SkipWrapper(4)(ToDiscrete("minimal")(gym.make("ppaquette/DoomCorridor-v0"))), width=80, height=80, grayscale=True) doom_env = gym.wrappers.Monitor(doom_env, "videos", force=True) print("Environment created!") number_actions = doom_env.action_space.n # Building an AI cnn = CNN(number_actions) softmax_body = SoftmaxBody(T=1.0) ai = AI(brain=cnn, body=softmax_body) # Setting up Replay n_steps = NStepProgress(env=doom_env, ai=ai, n_step=10) memory = ReplayMemory(n_steps=n_steps, capacity=10000)
def __init__(self, brain, body): self.brain = brain self.body = body def __call__(self, inputs): input = Variable(torch.from_numpy(np.array(inputs, dtype = np.float32))) #converting the image into torch format ie. convert input images into numpy array, then convert them into torch tensor and then put it inside torch Variable which contains both tensor and gradient. output = self.brain(input) actions = self.body(output) return actions.data.numpy() # Part 2 - Training the AI with Deep Convolutional Q-Learning # Getting the Doom environment doom_env = image_preprocessing.PreprocessImage(SkipWrapper(4)(ToDiscrete("minimal")(gym.make("ppaquette/DoomCorridor-v0"))), width = 80, height = 80, grayscale = True) #gym.make("ppaquette/DoomCorridor-v0") : DoomCorridor-v0 is the environment name of the game which we are playing is imported using gym.make() #We used PreprocessImage class from image_preprocessing.py to pre-process the input images in square format with dimension 80*80 with gray scale that will come into neural network doom_env = gym.wrappers.Monitor(doom_env, "videos", force = True) #whole game's video is imported with above line of code into "videos" folder. After end of the game, we can see the videos of it. number_actions = doom_env.action_space.n #no. of actions (7) for the doom move left/right/straight, turn left/right, run & shoot # Building an AI cnn = CNN(number_actions) softmax_body = SoftmaxBody(T = 1.0) #Temperature = 1.0 ai = AI(brain = cnn, body = softmax_body) # Setting up Experience Replay using Eligibility trace (step size = 10) n_steps = experience_replay.NStepProgress(env = doom_env, ai = ai, n_step = 10) memory = experience_replay.ReplayMemory(n_steps = n_steps, capacity = 10000) #memory capacity is 10000 ie. memory is dependent on the last 10000 steps performed by the AI. It gonna learn every 10 steps.
output = self.brain(input) actions = self.body(output) # actions are in torch format, convert to np array and return return actions.data.numpy() # Part 2 - Training the AI using Deep Q-Learning # getting the Doom environment # we can change the environment by changing the argument to gym.make # set image width and height to 80X80 as we set our input format to 80X80 # set grayscale to true as we take black and white input doom_env = image_preprocessing.PreprocessImage(SkipWrapper(4)( ToDiscrete("minimal")(gym.make("ppaquette/DoomDefendCenter-v0"))), width=80, height=80, grayscale=True) doom_env = gym.wrappers.Monitor(doom_env, "videos", force=True) # Gets the number of actions allowed in that environment number_actions = doom_env.action_space.n # Building an AI # Creating a Brain cnn = CNN(number_actions) # Creating a Body softmax_body = SoftmaxBody(T=1.5)
output = self.brain(input_data) #Putting output into body and taking the actions actions = self.body(output) #Returning actions in right format return actions.data.numpy() #Part 2 - Implementing Deep COnvolutional Q-Learning #Getting the doom enviroment #gym.make imports the enviroment #image_preprocessing proccesses images coming in with 80 by 80 size in grayscale doom_env = image_preprocessing.PreprocessImage(SkipWrapper(4)( ToDiscrete('minimal')(gym.make('ppaquette/DoomCorridor-v0'))), width=80, height=80, grayscale=True) #Saves videos of AI playing doom into the videos folder doom_env = gym.wrappers.Monitor(doom_env, 'videos', force=True) #Getting number of actions from doom_enviroment number_actions = doom_env.action_space.n #Building an AI #Creating an object of our CNN class cnn = CNN(number_actions) #Creating an object of our SoftmaxBoddy class and inputing temperature softmax_body = SoftmaxBody(T=1.0) #Creating an object of our AI class and inputing the brain and body
def __call__(self, inputs): #converting images to correct format input = Variable(torch.from_numpy(np.array(inputs, dtype=np.float32))) #converting image to numpy array #converting pixels to float 32 #converting mupy to torch tensor(variable class) output = self.brain(input) #passing tensor to brain actions = self.body(output) #passing output to body return actions.data.numpy() #converting action to numpy # Part 2 - Training the AI with Deep Convolutional Q-Learning # Getting the Doom environment doom_env = image_preprocessing.PreprocessImage( SkipWrapper(4)(ToDiscrete("minimal")( gym.make("ppaquette/DoomCorridor-v0"))), width=80, height=80, grayscale=True) # importing the environment and preprocessing it #dimensions should be same as neural network doom_env = gym.wrappers.Monitor(doom_env, "videos", force=True) #saving to videos number_actions = doom_env.action_space.n # number of actions possible in the environment # Building an AI cnn = CNN(number_actions) #calling cnn softmax_body = SoftmaxBody(T=1.0) #setting value of temperature for softmax ai = AI(brain=cnn, body=softmax_body) #calling brain # Setting up Experience Replay n_steps = experience_replay.NStepProgress(env=doom_env, ai=ai,