예제 #1
0
    def __init__(self, params):

        #############
        ## INIT
        #############

        # Get params, create logger, create TF session
        self.params = params
        self.logger = Logger(self.params['logdir'])
        self.sess = create_tf_session(self.params['use_gpu'],
                                      which_gpu=self.params['which_gpu'])

        # Set random seeds
        seed = self.params['seed']
        tf.set_random_seed(seed)
        np.random.seed(seed)

        #############
        ## ENV
        #############

        # Make the gym environment
        self.env = gym.make(self.params['env_name'])
        self.env.seed(seed)

        # Maximum length for episodes
        self.params['ep_len'] = self.params[
            'ep_len'] or self.env.spec.max_episode_steps
        MAX_VIDEO_LEN = self.params['ep_len']

        # Is this env continuous, or self.discrete?
        discrete = isinstance(self.env.action_space, gym.spaces.Discrete)
        self.params['agent_params']['discrete'] = discrete

        # Observation and action sizes
        ob_dim = self.env.observation_space.shape[0]
        ac_dim = self.env.action_space.n if discrete else self.env.action_space.shape[
            0]
        self.params['agent_params']['ac_dim'] = ac_dim
        self.params['agent_params']['ob_dim'] = ob_dim

        # simulation timestep, will be used for video saving
        if 'model' in dir(self.env):
            self.fps = 1 / self.env.model.opt.timestep
        else:
            self.fps = self.env.env.metadata['video.frames_per_second']

        #############
        ## AGENT
        #############

        agent_class = self.params['agent_class']
        self.agent = agent_class(self.sess, self.env,
                                 self.params['agent_params'])

        #############
        ## INIT VARS
        #############

        tf.global_variables_initializer().run(session=self.sess)
예제 #2
0
    def __init__(self, params):

        #############
        # INIT
        #############

        # Get params, create logger, create TF session
        self.params = params
        self.logger = Logger(self.params['logdir'])
        self.sess = create_tf_session(self.params['use_gpu'],
                                      which_gpu=self.params['which_gpu'])

        # Set random seeds
        seed = self.params['seed']
        tf.set_random_seed(seed)
        np.random.seed(seed)

        #############
        # ENV
        #############

        # Make the gym environment
        self.env = gym.make(self.params['env_name'])
        if 'env_wrappers' in self.params:
            # These operations are currently only for Atari envs
            self.env = wrappers.Monitor(self.env,
                                        os.path.join(self.params['logdir'],
                                                     "gym"),
                                        force=True)
            self.env = params['env_wrappers'](self.env)
            self.mean_episode_reward = -float('nan')
            self.best_mean_episode_reward = -float('inf')
        self.env.seed(seed)

        # Maximum length for episodes
        self.params['ep_len'] = self.params[
            'ep_len'] or self.env.spec.max_episode_steps
        MAX_VIDEO_LEN = self.params['ep_len']

        # Is this env continuous, or self.discrete?
        discrete = isinstance(self.env.action_space, gym.spaces.Discrete)
        # Are the observations images?
        img = len(self.env.observation_space.shape) > 2

        self.params['agent_params']['discrete'] = discrete

        # Observation and action sizes

        ob_dim = self.env.observation_space.shape if img else self.env.observation_space.shape[
            0]
        ac_dim = self.env.action_space.n if discrete else self.env.action_space.shape[
            0]
        self.params['agent_params']['ac_dim'] = ac_dim
        self.params['agent_params']['ob_dim'] = ob_dim

        # simulation timestep, will be used for video saving
        if 'model' in dir(self.env):
            self.fps = 1 / self.env.model.opt.timestep
        elif 'env_wrappers' in self.params:
            self.fps = 30  # This is not actually used when using the Monitor wrapper
        else:
            self.fps = self.env.env.metadata['video.frames_per_second']

        #############
        # AGENT
        #############

        agent_class = self.params['agent_class']
        self.agent = agent_class(self.sess, self.env,
                                 self.params['agent_params'])

        #############
        # INIT VARS
        #############

        tf.global_variables_initializer().run(session=self.sess)
예제 #3
0
    def __init__(self, params):

        #############
        ## INIT
        #############

        # Get params, create logger, create TF session
        self.params = params
        self.sess = create_tf_session(self.params['use_gpu'],
                                      which_gpu=self.params['which_gpu'])

        # Set random seeds
        seed = self.params['seed']
        tf.set_random_seed(seed)
        np.random.seed(seed)

        #############
        ## ENV
        #############

        # Make the gym environment
        self.env = make_envs(env_generator(self.params['env_name'], seed),
                             self.params['n_worker'])
        # self.env = gym.make(self.params['env_name'])
        # self.env.seed(seed)

        # Maximum length for episodes
        self.params['ep_len'] = self.params[
            'ep_len'] or self.env.spec.max_episode_steps

        # Is this env continuous, or self.discrete?
        discrete = isinstance(self.env.action_space, gym.spaces.Discrete)
        self.params['agent_params']['discrete'] = discrete

        # Observation and action sizes
        ob_dim = self.env.observation_space.shape[0]
        ac_dim = self.env.action_space.n if discrete else self.env.action_space.shape[
            0]
        self.params['agent_params']['ac_dim'] = ac_dim
        self.params['agent_params']['ob_dim'] = ob_dim
        print(
            "******************************************************************"
        )
        print("Action Dimension: ", self.params['agent_params']['ac_dim'])
        print("Observation Dimension: ", self.params['agent_params']['ob_dim'])
        print(
            "******************************************************************"
        )

        if self.params['use_wandb'] == 1:
            wandb.init(project="cs285_hw2", tensorboard=False)
            wandb.config.env_name = self.params['env_name']
            wandb.config.exp_name = self.params['exp_name']
            wandb.config.n_iter = self.params['n_iter']
            wandb.config.reward_to_go = self.params['reward_to_go']
            wandb.config.nn_baseline = self.params['nn_baseline']
            wandb.config.standardize_advantages = self.params[
                'standardize_advantages']
            wandb.config.discount = self.params['discount']
            wandb.config.num_agent_train_steps_per_iter = self.params[
                'num_agent_train_steps_per_iter']
            wandb.config.learning_rate = self.params['learning_rate']
            wandb.config.n_layers = self.params['n_layers']
            wandb.config.size = self.params['size']
            wandb.config.ep_len = self.params['ep_len']
            wandb.config.seed = self.params['seed']
            wandb.config.use_gpu = self.params['use_gpu']
            wandb.config.which_gpu = self.params['which_gpu']
            wandb.config.n_eval = self.params['n_eval']
            wandb.config.render_after_training = self.params[
                'render_after_training']
            wandb.config.n_worker = self.params['n_worker']
            wandb.config.gae = self.params['gae']
            wandb.config.gae_gamma = self.params['gae_gamma']
            wandb.config.gae_lambda = self.params['gae_lambda']

        #############
        ## AGENT
        #############

        agent_class = self.params['agent_class']
        self.agent = agent_class(self.sess, self.env,
                                 self.params['agent_params'])

        #############
        ## INIT VARS
        #############

        tf.global_variables_initializer().run(session=self.sess)
예제 #4
0
    def __init__(self, params):

        #############
        ## INIT
        #############

        # Get params, create logger, create TF session
        self.params = params
        self.logger = Logger(self.params['logdir'])
        self.sess = create_tf_session(self.params['use_gpu'],
                                      which_gpu=self.params['which_gpu'])

        # Set random seeds
        seed = self.params['seed']
        tf.set_random_seed(seed)
        np.random.seed(seed)

        #############
        ## ENV
        #############

        # Make the gym environment
        self.env = gym.make(self.params['env_name'])
        self.env.seed(seed)

        # Maximum length for episodes
        self.params['ep_len'] = self.params[
            'ep_len'] or self.env.spec.max_episode_steps

        # Is this env continuous, or self.discrete?
        discrete = isinstance(self.env.action_space, gym.spaces.Discrete)
        self.params['agent_params']['discrete'] = discrete

        # Observation and action sizes
        ob_dim = self.env.observation_space.shape[0]
        ac_dim = self.env.action_space.n if discrete else self.env.action_space.shape[
            0]
        self.params['agent_params']['ac_dim'] = ac_dim
        self.params['agent_params']['ob_dim'] = ob_dim

        print('\n ep_len: {0}'.format(self.params['ep_len']))
        print('\ndiscrete: {0}'.format(discrete))
        print('\nob_dim: {0}'.format(ob_dim))
        print('\nac_dim: {0}'.format(ac_dim))
        # simulation timestep, will be used for video saving
        if 'model' in dir(self.env):
            self.fps = 1 / self.env.model.opt.timestep
        else:
            self.fps = self.env.env.metadata['video.frames_per_second']

        #############
        ## AGENT
        #############

        agent_class = self.params['agent_class']
        self.agent = agent_class(self.sess, self.env,
                                 self.params['agent_params'])

        #############
        ## INIT VARS
        #############

        ## TODO initialize all of the TF variables (that were created by agent, etc.)
        ## HINT: use global_variables_initializer
        self.sess.run(tf.global_variables_initializer())

        #############
        ## INIT WANDB
        #############
        self.init_wandb()
예제 #5
0
    def __init__(self, params):

        #############
        ## INIT
        #############

        # Get params, create logger, create TF session
        self.params = params
        self.sess = create_tf_session(self.params['use_gpu'],
                                      which_gpu=self.params['which_gpu'])

        # Set random seeds
        seed = self.params['seed']
        tf.set_random_seed(seed)
        np.random.seed(seed)

        #############
        ## ENV
        #############

        # Make the gym environment
        self.env = gym.make(self.params['env_name'])
        self.env.seed(seed)

        # Maximum length for episodes
        self.params['ep_len'] = self.env.spec.max_episode_steps

        # Is this env continuous, or self.discrete?
        discrete = isinstance(self.env.action_space, gym.spaces.Discrete)
        self.params['agent_params']['discrete'] = discrete

        # Observation and action sizes
        ob_dim = self.env.observation_space.shape[0]
        ac_dim = self.env.action_space.n if discrete else self.env.action_space.shape[
            0]
        self.params['agent_params']['ac_dim'] = ac_dim
        self.params['agent_params']['ob_dim'] = ob_dim

        # simulation timestep, will be used for video saving
        if 'model' in dir(self.env):
            self.fps = 1 / self.env.model.opt.timestep
        else:
            self.fps = self.env.env.metadata['video.frames_per_second']

        if self.params['use_wandb'] == 1:
            wandb.init(project="cs285_hw1", tensorboard=False)
            wandb.config.expert_policy_file = self.params['expert_policy_file']
            wandb.config.env_name = self.params['env_name']
            wandb.config.exp_name = self.params['exp_name']
            wandb.config.do_dagger = self.params['do_dagger']
            wandb.config.ep_len = self.params['ep_len']
            wandb.config.num_agent_train_steps_per_iter = self.params[
                'num_agent_train_steps_per_iter']
            wandb.config.n_iter = self.params['n_iter']
            wandb.config.batch_size = self.params['batch_size']
            wandb.config.eval_batch_size = self.params['eval_batch_size']
            wandb.config.train_batch_size = self.params['train_batch_size']
            wandb.config.n_layers = self.params['n_layers']
            wandb.config.size = self.params['size']
            wandb.config.learning_rate = self.params['learning_rate']
            wandb.config.scalar_log_freq = self.params['scalar_log_freq']
            wandb.config.use_gpu = self.params['use_gpu']
            wandb.config.which_gpu = self.params['which_gpu']
            wandb.config.max_replay_buffer_size = self.params[
                'max_replay_buffer_size']
            wandb.config.seed = self.params['seed']
            wandb.config.n_eval = self.params['n_eval']

        #############
        ## AGENT
        #############

        agent_class = self.params['agent_class']
        self.agent = agent_class(self.sess, self.env,
                                 self.params['agent_params'])

        #############
        ## INIT VARS
        #############

        ## TODO initialize all of the TF variables (that were created by agent, etc.)
        ## HINT: use global_variables_initializer
        self.sess.run(tf.global_variables_initializer())
예제 #6
0
    def __init__(self, params):

        #############
        ## INIT
        #############

        # Get params, create logger, create TF session
        self.params = params
        self.logger = Logger(self.params['logdir'])
        self.sess = create_tf_session(self.params['use_gpu'],
                                      which_gpu=self.params['which_gpu'])

        # Set random seeds
        seed = self.params['seed']
        tf.set_random_seed(seed)
        np.random.seed(seed)

        #############
        ## ENV
        #############

        # Make the gym environment
        #self.env = FlatObsWrapper(gym.make(self.params['env_name']))
        #self.env = gym.make(self.params['env_name'])
        #self.env.seed(seed)

        eval_seed = 10

        self.env_teacher, self.env_student = gym.make(
            "MiniGrid-TeacherDoorKeyEnv-5x5-v0"), gym.make(
                "MiniGrid-TeacherDoorKeyEnv-5x5-v0")

        self.eval_env = gym.make("MiniGrid-TeacherDoorKeyEnv-5x5-v0")
        # env_student = gym.make("MiniGrid-StudentEnv-5x5-v0")
        self.eval_env.end_pos = [3, 1]
        self.eval_env.is_teaching = False
        # # env_student.teacher_step_count = teacher_step_count
        self.eval_env.seed(eval_seed)

        # Maximum length for episodes
        self.params['ep_len'] = self.params[
            'ep_len'] or self.env_teacher.spec.max_episode_steps
        MAX_VIDEO_LEN = self.params['ep_len']

        # Is this env continuous, or self.discrete?
        discrete = isinstance(self.env_teacher.action_space,
                              gym.spaces.Discrete)
        #print(self.env.action_space)
        #print("DIS",spaces.Discrete(6))
        #print("HIIII")
        self.params['agent_params']['discrete'] = discrete

        # Observation and action sizes
        #print(self.env.observation_space)
        print(self.env_teacher.observation_space)
        ob_dim = self.env_teacher.observation_space.shape[0]
        ac_dim = self.env_teacher.action_space.n if discrete else self.env_teacher.action_space.shape[
            0]
        self.params['agent_params']['ac_dim'] = ac_dim
        self.params['agent_params']['ob_dim'] = ob_dim

        # simulation timestep, will be used for video saving
        # if 'model' in dir(self.env):
        #     self.fps = 1/self.env.model.opt.timestep
        # else:
        #     self.fps = self.env.env.metadata['video.frames_per_second']

        #############
        ## AGENT
        #############

        agent_class = self.params['agent_class']
        self.agent_teacher = agent_class(self.sess, self.env_teacher,
                                         self.params['agent_params'])

        agent_class = self.params['agent_class']
        self.agent_student = agent_class(self.sess, self.env_student,
                                         self.params['agent_params'])

        #############
        ## INIT VARS
        #############

        tf.global_variables_initializer().run(session=self.sess)
예제 #7
0
    def __init__(self, params):

        #############
        ## INIT
        #############

        # Get params, create logger, create TF session
        self.params = params
        self.sess = create_tf_session(self.params['use_gpu'],
                                      which_gpu=self.params['which_gpu'])

        # Set random seeds
        seed = self.params['seed']
        tf.set_random_seed(seed)
        np.random.seed(seed)

        #############
        ## ENV
        #############

        # Make the gym environment
        self.env = gym.make(self.params['env_name'])
        if 'env_wrappers' in self.params:
            # These operations are currently only for Atari envs
            self.env = wrappers.Monitor(
                self.env,
                os.path.join(self.params['logdir'], "gym"),
                force=True
            )  # Delete video_callalbe=False to render while training
            self.env = params['env_wrappers'](self.env)
            self.mean_episode_reward = -float('nan')
            self.best_mean_episode_reward = -float('inf')
        self.env.seed(seed)

        # Maximum length for episodes
        self.params['ep_len'] = self.params[
            'ep_len'] or self.env.spec.max_episode_steps

        # Is this env continuous, or self.discrete?
        discrete = isinstance(self.env.action_space, gym.spaces.Discrete)
        # Are the observations images?
        img = len(self.env.observation_space.shape) > 2

        self.params['agent_params']['discrete'] = discrete

        # Observation and action sizes

        ob_dim = self.env.observation_space.shape if img else self.env.observation_space.shape[
            0]
        ac_dim = self.env.action_space.n if discrete else self.env.action_space.shape[
            0]
        self.params['agent_params']['ac_dim'] = ac_dim
        self.params['agent_params']['ob_dim'] = ob_dim

        print(
            "******************************************************************"
        )
        print("Action Dimension: ", self.params['agent_params']['ac_dim'])
        print("Observation Dimension: ", self.params['agent_params']['ob_dim'])
        print(
            "******************************************************************"
        )

        if self.params['use_wandb'] == 1:
            wandb.init(project="cs285_hw3", tensorboard=False)
            wandb.config.env_name = self.params['env_name']
            wandb.config.ac_dim = self.params['agent_params']['ac_dim']
            wandb.config.ob_dim = self.params['agent_params']['ob_dim']

        #############
        ## AGENT
        #############

        agent_class = self.params['agent_class']
        self.agent = agent_class(self.sess, self.env,
                                 self.params['agent_params'])

        #############
        ## INIT VARS
        #############

        tf.global_variables_initializer().run(session=self.sess)
예제 #8
0
    def __init__(self, params):

        #############
        ## INIT
        #############

        # Get params, create logger, create TF session
        self.params = params
        self.logger = Logger(self.params["logdir"])
        self.sess = create_tf_session(self.params["use_gpu"],
                                      which_gpu=self.params["which_gpu"])

        # Set random seeds
        seed = self.params["seed"]
        tf.set_random_seed(seed)
        np.random.seed(seed)

        #############
        ## ENV
        #############

        # Make the gym environment
        self.env = gym.make(self.params["env_name"])
        self.env.seed(seed)

        # Maximum length for episodes
        self.params["ep_len"] = self.params[
            "ep_len"] or self.env.spec.max_episode_steps

        # Is this env continuous, or self.discrete?
        discrete = isinstance(self.env.action_space, gym.spaces.Discrete)
        self.params["agent_params"]["discrete"] = discrete

        # Observation and action sizes
        ob_dim = self.env.observation_space.shape[0]
        ac_dim = self.env.action_space.n if discrete else self.env.action_space.shape[
            0]
        self.params["agent_params"]["ac_dim"] = ac_dim
        self.params["agent_params"]["ob_dim"] = ob_dim

        # simulation timestep, will be used for video saving
        if "model" in dir(self.env):
            self.fps = 1 / self.env.model.opt.timestep
        else:
            self.fps = self.env.env.metadata["video.frames_per_second"]

        #############
        ## AGENT
        #############

        agent_class = self.params["agent_class"]
        self.agent = agent_class(self.sess, self.env,
                                 self.params["agent_params"])

        #############
        ## INIT VARS
        #############

        ## TODO initialize all of the TF variables (that were created by agent, etc.)
        ## HINT: use global_variables_initializer
        self.sess.run(tf.global_variables_initializer())