def __init__(self, params): ############# ## INIT ############# # Get params, create logger, create TF session self.params = params self.logger = Logger(self.params['logdir']) self.sess = create_tf_session(self.params['use_gpu'], which_gpu=self.params['which_gpu']) # Set random seeds seed = self.params['seed'] tf.set_random_seed(seed) np.random.seed(seed) ############# ## ENV ############# # Make the gym environment self.env = gym.make(self.params['env_name']) self.env.seed(seed) # Maximum length for episodes self.params['ep_len'] = self.params[ 'ep_len'] or self.env.spec.max_episode_steps MAX_VIDEO_LEN = self.params['ep_len'] # Is this env continuous, or self.discrete? discrete = isinstance(self.env.action_space, gym.spaces.Discrete) self.params['agent_params']['discrete'] = discrete # Observation and action sizes ob_dim = self.env.observation_space.shape[0] ac_dim = self.env.action_space.n if discrete else self.env.action_space.shape[ 0] self.params['agent_params']['ac_dim'] = ac_dim self.params['agent_params']['ob_dim'] = ob_dim # simulation timestep, will be used for video saving if 'model' in dir(self.env): self.fps = 1 / self.env.model.opt.timestep else: self.fps = self.env.env.metadata['video.frames_per_second'] ############# ## AGENT ############# agent_class = self.params['agent_class'] self.agent = agent_class(self.sess, self.env, self.params['agent_params']) ############# ## INIT VARS ############# tf.global_variables_initializer().run(session=self.sess)
def __init__(self, params): ############# # INIT ############# # Get params, create logger, create TF session self.params = params self.logger = Logger(self.params['logdir']) self.sess = create_tf_session(self.params['use_gpu'], which_gpu=self.params['which_gpu']) # Set random seeds seed = self.params['seed'] tf.set_random_seed(seed) np.random.seed(seed) ############# # ENV ############# # Make the gym environment self.env = gym.make(self.params['env_name']) if 'env_wrappers' in self.params: # These operations are currently only for Atari envs self.env = wrappers.Monitor(self.env, os.path.join(self.params['logdir'], "gym"), force=True) self.env = params['env_wrappers'](self.env) self.mean_episode_reward = -float('nan') self.best_mean_episode_reward = -float('inf') self.env.seed(seed) # Maximum length for episodes self.params['ep_len'] = self.params[ 'ep_len'] or self.env.spec.max_episode_steps MAX_VIDEO_LEN = self.params['ep_len'] # Is this env continuous, or self.discrete? discrete = isinstance(self.env.action_space, gym.spaces.Discrete) # Are the observations images? img = len(self.env.observation_space.shape) > 2 self.params['agent_params']['discrete'] = discrete # Observation and action sizes ob_dim = self.env.observation_space.shape if img else self.env.observation_space.shape[ 0] ac_dim = self.env.action_space.n if discrete else self.env.action_space.shape[ 0] self.params['agent_params']['ac_dim'] = ac_dim self.params['agent_params']['ob_dim'] = ob_dim # simulation timestep, will be used for video saving if 'model' in dir(self.env): self.fps = 1 / self.env.model.opt.timestep elif 'env_wrappers' in self.params: self.fps = 30 # This is not actually used when using the Monitor wrapper else: self.fps = self.env.env.metadata['video.frames_per_second'] ############# # AGENT ############# agent_class = self.params['agent_class'] self.agent = agent_class(self.sess, self.env, self.params['agent_params']) ############# # INIT VARS ############# tf.global_variables_initializer().run(session=self.sess)
def __init__(self, params): ############# ## INIT ############# # Get params, create logger, create TF session self.params = params self.sess = create_tf_session(self.params['use_gpu'], which_gpu=self.params['which_gpu']) # Set random seeds seed = self.params['seed'] tf.set_random_seed(seed) np.random.seed(seed) ############# ## ENV ############# # Make the gym environment self.env = make_envs(env_generator(self.params['env_name'], seed), self.params['n_worker']) # self.env = gym.make(self.params['env_name']) # self.env.seed(seed) # Maximum length for episodes self.params['ep_len'] = self.params[ 'ep_len'] or self.env.spec.max_episode_steps # Is this env continuous, or self.discrete? discrete = isinstance(self.env.action_space, gym.spaces.Discrete) self.params['agent_params']['discrete'] = discrete # Observation and action sizes ob_dim = self.env.observation_space.shape[0] ac_dim = self.env.action_space.n if discrete else self.env.action_space.shape[ 0] self.params['agent_params']['ac_dim'] = ac_dim self.params['agent_params']['ob_dim'] = ob_dim print( "******************************************************************" ) print("Action Dimension: ", self.params['agent_params']['ac_dim']) print("Observation Dimension: ", self.params['agent_params']['ob_dim']) print( "******************************************************************" ) if self.params['use_wandb'] == 1: wandb.init(project="cs285_hw2", tensorboard=False) wandb.config.env_name = self.params['env_name'] wandb.config.exp_name = self.params['exp_name'] wandb.config.n_iter = self.params['n_iter'] wandb.config.reward_to_go = self.params['reward_to_go'] wandb.config.nn_baseline = self.params['nn_baseline'] wandb.config.standardize_advantages = self.params[ 'standardize_advantages'] wandb.config.discount = self.params['discount'] wandb.config.num_agent_train_steps_per_iter = self.params[ 'num_agent_train_steps_per_iter'] wandb.config.learning_rate = self.params['learning_rate'] wandb.config.n_layers = self.params['n_layers'] wandb.config.size = self.params['size'] wandb.config.ep_len = self.params['ep_len'] wandb.config.seed = self.params['seed'] wandb.config.use_gpu = self.params['use_gpu'] wandb.config.which_gpu = self.params['which_gpu'] wandb.config.n_eval = self.params['n_eval'] wandb.config.render_after_training = self.params[ 'render_after_training'] wandb.config.n_worker = self.params['n_worker'] wandb.config.gae = self.params['gae'] wandb.config.gae_gamma = self.params['gae_gamma'] wandb.config.gae_lambda = self.params['gae_lambda'] ############# ## AGENT ############# agent_class = self.params['agent_class'] self.agent = agent_class(self.sess, self.env, self.params['agent_params']) ############# ## INIT VARS ############# tf.global_variables_initializer().run(session=self.sess)
def __init__(self, params): ############# ## INIT ############# # Get params, create logger, create TF session self.params = params self.logger = Logger(self.params['logdir']) self.sess = create_tf_session(self.params['use_gpu'], which_gpu=self.params['which_gpu']) # Set random seeds seed = self.params['seed'] tf.set_random_seed(seed) np.random.seed(seed) ############# ## ENV ############# # Make the gym environment self.env = gym.make(self.params['env_name']) self.env.seed(seed) # Maximum length for episodes self.params['ep_len'] = self.params[ 'ep_len'] or self.env.spec.max_episode_steps # Is this env continuous, or self.discrete? discrete = isinstance(self.env.action_space, gym.spaces.Discrete) self.params['agent_params']['discrete'] = discrete # Observation and action sizes ob_dim = self.env.observation_space.shape[0] ac_dim = self.env.action_space.n if discrete else self.env.action_space.shape[ 0] self.params['agent_params']['ac_dim'] = ac_dim self.params['agent_params']['ob_dim'] = ob_dim print('\n ep_len: {0}'.format(self.params['ep_len'])) print('\ndiscrete: {0}'.format(discrete)) print('\nob_dim: {0}'.format(ob_dim)) print('\nac_dim: {0}'.format(ac_dim)) # simulation timestep, will be used for video saving if 'model' in dir(self.env): self.fps = 1 / self.env.model.opt.timestep else: self.fps = self.env.env.metadata['video.frames_per_second'] ############# ## AGENT ############# agent_class = self.params['agent_class'] self.agent = agent_class(self.sess, self.env, self.params['agent_params']) ############# ## INIT VARS ############# ## TODO initialize all of the TF variables (that were created by agent, etc.) ## HINT: use global_variables_initializer self.sess.run(tf.global_variables_initializer()) ############# ## INIT WANDB ############# self.init_wandb()
def __init__(self, params): ############# ## INIT ############# # Get params, create logger, create TF session self.params = params self.sess = create_tf_session(self.params['use_gpu'], which_gpu=self.params['which_gpu']) # Set random seeds seed = self.params['seed'] tf.set_random_seed(seed) np.random.seed(seed) ############# ## ENV ############# # Make the gym environment self.env = gym.make(self.params['env_name']) self.env.seed(seed) # Maximum length for episodes self.params['ep_len'] = self.env.spec.max_episode_steps # Is this env continuous, or self.discrete? discrete = isinstance(self.env.action_space, gym.spaces.Discrete) self.params['agent_params']['discrete'] = discrete # Observation and action sizes ob_dim = self.env.observation_space.shape[0] ac_dim = self.env.action_space.n if discrete else self.env.action_space.shape[ 0] self.params['agent_params']['ac_dim'] = ac_dim self.params['agent_params']['ob_dim'] = ob_dim # simulation timestep, will be used for video saving if 'model' in dir(self.env): self.fps = 1 / self.env.model.opt.timestep else: self.fps = self.env.env.metadata['video.frames_per_second'] if self.params['use_wandb'] == 1: wandb.init(project="cs285_hw1", tensorboard=False) wandb.config.expert_policy_file = self.params['expert_policy_file'] wandb.config.env_name = self.params['env_name'] wandb.config.exp_name = self.params['exp_name'] wandb.config.do_dagger = self.params['do_dagger'] wandb.config.ep_len = self.params['ep_len'] wandb.config.num_agent_train_steps_per_iter = self.params[ 'num_agent_train_steps_per_iter'] wandb.config.n_iter = self.params['n_iter'] wandb.config.batch_size = self.params['batch_size'] wandb.config.eval_batch_size = self.params['eval_batch_size'] wandb.config.train_batch_size = self.params['train_batch_size'] wandb.config.n_layers = self.params['n_layers'] wandb.config.size = self.params['size'] wandb.config.learning_rate = self.params['learning_rate'] wandb.config.scalar_log_freq = self.params['scalar_log_freq'] wandb.config.use_gpu = self.params['use_gpu'] wandb.config.which_gpu = self.params['which_gpu'] wandb.config.max_replay_buffer_size = self.params[ 'max_replay_buffer_size'] wandb.config.seed = self.params['seed'] wandb.config.n_eval = self.params['n_eval'] ############# ## AGENT ############# agent_class = self.params['agent_class'] self.agent = agent_class(self.sess, self.env, self.params['agent_params']) ############# ## INIT VARS ############# ## TODO initialize all of the TF variables (that were created by agent, etc.) ## HINT: use global_variables_initializer self.sess.run(tf.global_variables_initializer())
def __init__(self, params): ############# ## INIT ############# # Get params, create logger, create TF session self.params = params self.logger = Logger(self.params['logdir']) self.sess = create_tf_session(self.params['use_gpu'], which_gpu=self.params['which_gpu']) # Set random seeds seed = self.params['seed'] tf.set_random_seed(seed) np.random.seed(seed) ############# ## ENV ############# # Make the gym environment #self.env = FlatObsWrapper(gym.make(self.params['env_name'])) #self.env = gym.make(self.params['env_name']) #self.env.seed(seed) eval_seed = 10 self.env_teacher, self.env_student = gym.make( "MiniGrid-TeacherDoorKeyEnv-5x5-v0"), gym.make( "MiniGrid-TeacherDoorKeyEnv-5x5-v0") self.eval_env = gym.make("MiniGrid-TeacherDoorKeyEnv-5x5-v0") # env_student = gym.make("MiniGrid-StudentEnv-5x5-v0") self.eval_env.end_pos = [3, 1] self.eval_env.is_teaching = False # # env_student.teacher_step_count = teacher_step_count self.eval_env.seed(eval_seed) # Maximum length for episodes self.params['ep_len'] = self.params[ 'ep_len'] or self.env_teacher.spec.max_episode_steps MAX_VIDEO_LEN = self.params['ep_len'] # Is this env continuous, or self.discrete? discrete = isinstance(self.env_teacher.action_space, gym.spaces.Discrete) #print(self.env.action_space) #print("DIS",spaces.Discrete(6)) #print("HIIII") self.params['agent_params']['discrete'] = discrete # Observation and action sizes #print(self.env.observation_space) print(self.env_teacher.observation_space) ob_dim = self.env_teacher.observation_space.shape[0] ac_dim = self.env_teacher.action_space.n if discrete else self.env_teacher.action_space.shape[ 0] self.params['agent_params']['ac_dim'] = ac_dim self.params['agent_params']['ob_dim'] = ob_dim # simulation timestep, will be used for video saving # if 'model' in dir(self.env): # self.fps = 1/self.env.model.opt.timestep # else: # self.fps = self.env.env.metadata['video.frames_per_second'] ############# ## AGENT ############# agent_class = self.params['agent_class'] self.agent_teacher = agent_class(self.sess, self.env_teacher, self.params['agent_params']) agent_class = self.params['agent_class'] self.agent_student = agent_class(self.sess, self.env_student, self.params['agent_params']) ############# ## INIT VARS ############# tf.global_variables_initializer().run(session=self.sess)
def __init__(self, params): ############# ## INIT ############# # Get params, create logger, create TF session self.params = params self.sess = create_tf_session(self.params['use_gpu'], which_gpu=self.params['which_gpu']) # Set random seeds seed = self.params['seed'] tf.set_random_seed(seed) np.random.seed(seed) ############# ## ENV ############# # Make the gym environment self.env = gym.make(self.params['env_name']) if 'env_wrappers' in self.params: # These operations are currently only for Atari envs self.env = wrappers.Monitor( self.env, os.path.join(self.params['logdir'], "gym"), force=True ) # Delete video_callalbe=False to render while training self.env = params['env_wrappers'](self.env) self.mean_episode_reward = -float('nan') self.best_mean_episode_reward = -float('inf') self.env.seed(seed) # Maximum length for episodes self.params['ep_len'] = self.params[ 'ep_len'] or self.env.spec.max_episode_steps # Is this env continuous, or self.discrete? discrete = isinstance(self.env.action_space, gym.spaces.Discrete) # Are the observations images? img = len(self.env.observation_space.shape) > 2 self.params['agent_params']['discrete'] = discrete # Observation and action sizes ob_dim = self.env.observation_space.shape if img else self.env.observation_space.shape[ 0] ac_dim = self.env.action_space.n if discrete else self.env.action_space.shape[ 0] self.params['agent_params']['ac_dim'] = ac_dim self.params['agent_params']['ob_dim'] = ob_dim print( "******************************************************************" ) print("Action Dimension: ", self.params['agent_params']['ac_dim']) print("Observation Dimension: ", self.params['agent_params']['ob_dim']) print( "******************************************************************" ) if self.params['use_wandb'] == 1: wandb.init(project="cs285_hw3", tensorboard=False) wandb.config.env_name = self.params['env_name'] wandb.config.ac_dim = self.params['agent_params']['ac_dim'] wandb.config.ob_dim = self.params['agent_params']['ob_dim'] ############# ## AGENT ############# agent_class = self.params['agent_class'] self.agent = agent_class(self.sess, self.env, self.params['agent_params']) ############# ## INIT VARS ############# tf.global_variables_initializer().run(session=self.sess)
def __init__(self, params): ############# ## INIT ############# # Get params, create logger, create TF session self.params = params self.logger = Logger(self.params["logdir"]) self.sess = create_tf_session(self.params["use_gpu"], which_gpu=self.params["which_gpu"]) # Set random seeds seed = self.params["seed"] tf.set_random_seed(seed) np.random.seed(seed) ############# ## ENV ############# # Make the gym environment self.env = gym.make(self.params["env_name"]) self.env.seed(seed) # Maximum length for episodes self.params["ep_len"] = self.params[ "ep_len"] or self.env.spec.max_episode_steps # Is this env continuous, or self.discrete? discrete = isinstance(self.env.action_space, gym.spaces.Discrete) self.params["agent_params"]["discrete"] = discrete # Observation and action sizes ob_dim = self.env.observation_space.shape[0] ac_dim = self.env.action_space.n if discrete else self.env.action_space.shape[ 0] self.params["agent_params"]["ac_dim"] = ac_dim self.params["agent_params"]["ob_dim"] = ob_dim # simulation timestep, will be used for video saving if "model" in dir(self.env): self.fps = 1 / self.env.model.opt.timestep else: self.fps = self.env.env.metadata["video.frames_per_second"] ############# ## AGENT ############# agent_class = self.params["agent_class"] self.agent = agent_class(self.sess, self.env, self.params["agent_params"]) ############# ## INIT VARS ############# ## TODO initialize all of the TF variables (that were created by agent, etc.) ## HINT: use global_variables_initializer self.sess.run(tf.global_variables_initializer())