Example #1
0
    def __init__(self, params):

        #############
        # INIT
        #############

        # Get params, create logger, create TF session
        self.params = params
        self.logger = Logger(self.params['logdir'])

        # Set random seeds
        seed = self.params['seed']
        np.random.seed(seed)
        torch.manual_seed(seed)
        ptu.init_gpu(
            use_gpu=not self.params['no_gpu'],
            gpu_id=self.params['which_gpu']
        )

        self.total_env_steps = 0
        self.start_time = None
        self.log_video = False
        self.log_metrics = False
        self.initial_return = None

        #############
        # ENV
        #############

        # Make the gym environment
        self.env = gym.make(self.params['env_name'])
        self.env.seed(seed)

        # Maximum length for episodes
        self.params['ep_len'] = self.params['ep_len'] or self.env.spec.max_episode_steps
        MAX_VIDEO_LEN = self.params['ep_len']

        # Is this env continuous, or self.discrete?
        discrete = isinstance(self.env.action_space, gym.spaces.Discrete)
        self.params['agent_params']['discrete'] = discrete

        # Observation and action sizes
        ob_dim = self.env.observation_space.shape[0]
        ac_dim = self.env.action_space.n if discrete else self.env.action_space.shape[0]
        self.params['agent_params']['ac_dim'] = ac_dim
        self.params['agent_params']['ob_dim'] = ob_dim

        # simulation time step, will be used for video saving
        if 'model' in dir(self.env):
            self.fps = 1 / self.env.model.opt.timestep
        else:
            self.fps = self.env.env.metadata['video.frames_per_second']

        #############
        # AGENT
        #############

        agent_class = self.params['agent_class']
        self.agent = agent_class(self.env, self.params['agent_params'])
Example #2
0
def mp_worker(result_queue, env, serialized_policy, max_path_length, render,
              render_mode):
    ptu.init_gpu(use_gpu=False)
    cls = serialized_policy["__class__"]
    policy = cls.deserialize(serialized_policy)
    while True:
        result = sample_trajectory(env, policy, max_path_length, render,
                                   render_mode)
        result_queue.put(result)
Example #3
0
    def __init__(self, params):
        #############
        ## INIT
        #############

        # Get params, create logger
        self.params = params

        # Set random seeds
        seed = self.params['seed']
        np.random.seed(seed)
        torch.manual_seed(seed)
        ptu.init_gpu()

        #############
        ## ENV
        #############

        # Make the gym environment
        self.env = gym.make(self.params['env_name'])
        self.env.seed(seed)

        # Maximum length for episodes
        self.params['ep_len'] = self.params['ep_len'] or self.env.spec.max_episode_steps

        # Observation and action sizes
        # ob_dim = self.env.observation_space.shape[0]
        # ac_dim = self.env.action_space.n if discrete else self.env.action_space.shape[0]
        # self.params['agent_params']['ac_dim'] = ac_dim
        # self.params['agent_params']['ob_dim'] = ob_dim

        #############
        ## AGENT
        #############

        n = 3
        m = self.params['bins']
        self.input_features = 11
        self.hidden_features = 3
        self.costs_vertices = np.random.randn(n, m) * 0.0005
        self.costs_edges = np.random.randn(n, n, m, m) * 0.0005

        self.edges = list(itertools.combinations(range(n), r=2))

        # initiate the graphical model neural network
        self.gm_net = GraphicalModelNet(self.params['eta'], self.params['emp_epoch'], self.input_features,
                                        self.hidden_features,
                                        self.costs_vertices, self.costs_edges, self.edges, self.params['bins'],
                                        ptu.device)

        # load pre-trained dicts if available
        if self.params['load_dict']:
            print("Loading stored dict...")
            self.gm_net.load_state_dict(torch.load('cs285/data/emp_dagger_buckets_Hopper-v2_20_epoch7_eta40' + '_state_dict.pt'))
            self.gm_net.w1 = torch.load('cs285/data/emp_dagger_buckets_Hopper-v2_20_epoch7_eta40' + '_w1.pt', map_location=ptu.device)
            self.gm_net.w2 = torch.load('cs285/data/emp_dagger_buckets_Hopper-v2_20_epoch7_eta40' + '_w2.pt', map_location=ptu.device)
    def __init__(self, params):

        #############
        ## INIT
        #############

        # Get params, create logger, create TF session
        self.params = params
        self.logger = Logger(self.params["logdir"])

        # Set random seeds
        seed = self.params["seed"]
        np.random.seed(seed)
        torch.manual_seed(seed)
        ptu.init_gpu(use_gpu=not self.params["no_gpu"],
                     gpu_id=self.params["which_gpu"])

        #############
        ## ENV
        #############

        # Make the gym environment
        self.env = gym.make(self.params["env_name"])
        self.env.seed(seed)

        # Maximum length for episodes
        self.params["ep_len"] = self.params[
            "ep_len"] or self.env.spec.max_episode_steps
        MAX_VIDEO_LEN = self.params[
            "ep_len"] if not "Humanoid" in self.params["env_name"] else 1000

        # Is this env continuous, or self.discrete?
        discrete = isinstance(self.env.action_space, gym.spaces.Discrete)
        self.params["agent_params"]["discrete"] = discrete

        # Observation and action sizes
        ob_dim = self.env.observation_space.shape[0]
        ac_dim = self.env.action_space.n if discrete else self.env.action_space.shape[
            0]
        self.params["agent_params"]["ac_dim"] = ac_dim
        self.params["agent_params"]["ob_dim"] = ob_dim

        # simulation timestep, will be used for video saving
        if "model" in dir(self.env):
            self.fps = 1 / self.env.model.opt.timestep
        else:
            self.fps = self.env.env.metadata["video.frames_per_second"]

        #############
        ## AGENT
        #############

        agent_class = self.params["agent_class"]
        self.agent = agent_class(self.env, self.params["agent_params"])
Example #5
0
    def __init__(self, params):

        #############
        ## INIT
        #############

        # Get params, create logger
        self.params = params
        self.logger = Logger(self.params['logdir'])

        # Set random seeds
        seed = self.params['seed']
        np.random.seed(seed)
        torch.manual_seed(seed)
        ptu.init_gpu(use_gpu=not self.params['no_gpu'],
                     gpu_id=self.params['which_gpu'])

        #############
        ## ENV
        #############

        # Make the gym environment
        self.env = gym.make(self.params['env_name'])
        self.env.seed(seed)

        # import plotting (locally if 'obstacles' env)
        if not (self.params['env_name'] == 'obstacles-cs285-v0'):
            import matplotlib
            matplotlib.use('Agg')

        # Maximum length for episodes
        self.params['ep_len'] = self.params[
            'ep_len'] or self.env.spec.max_episode_steps
        global MAX_VIDEO_LEN
        MAX_VIDEO_LEN = self.params['ep_len']

        # Is this env continuous, or self.discrete?
        discrete = isinstance(self.env.action_space, gym.spaces.Discrete)
        # Are the observations images?
        img = len(self.env.observation_space.shape) > 2

        self.params['agent_params']['discrete'] = discrete

        # Observation and action sizes

        ob_dim = self.env.observation_space.shape if img else self.env.observation_space.shape[
            0]
        ac_dim = self.env.action_space.n if discrete else self.env.action_space.shape[
            0]
        self.params['agent_params']['ac_dim'] = ac_dim
        self.params['agent_params']['ob_dim'] = ob_dim

        # simulation timestep, will be used for video saving
        if 'model' in dir(self.env):
            self.fps = 1 / self.env.model.opt.timestep
        elif 'env_wrappers' in self.params:
            self.fps = 30  # This is not actually used when using the Monitor wrapper
        elif 'video.frames_per_second' in self.env.env.metadata.keys():
            self.fps = self.env.env.metadata['video.frames_per_second']
        else:
            self.fps = 10

        #############
        ## AGENT
        #############

        agent_class = self.params['agent_class']
        self.agent = agent_class(self.env, self.params['agent_params'])
Example #6
0
    def __init__(self, params):

        #############
        ## INIT
        #############

        # Get params, create logger
        self.params = params
        self.logger = Logger(self.params['logdir'])

        # Set random seeds
        seed = self.params['seed']
        np.random.seed(seed)
        torch.manual_seed(seed)
        ptu.init_gpu(use_gpu=not self.params['no_gpu'],
                     gpu_id=self.params['which_gpu'])

        #############
        ## ENV
        #############

        # Make the gym environment
        register_custom_envs()
        self.env = gym.make(self.params['env_name'])
        self.eval_env = gym.make(self.params['env_name'])
        if not ('pointmass' in self.params['env_name']):
            import matplotlib
            matplotlib.use('Agg')
            self.env.set_logdir(self.params['logdir'] + '/expl_')
            self.eval_env.set_logdir(self.params['logdir'] + '/eval_')

        if 'env_wrappers' in self.params:
            # These operations are currently only for Atari envs
            self.env = wrappers.Monitor(self.env,
                                        os.path.join(self.params['logdir'],
                                                     "gym"),
                                        force=True)
            self.eval_env = wrappers.Monitor(self.eval_env,
                                             os.path.join(
                                                 self.params['logdir'], "gym"),
                                             force=True)
            self.env = params['env_wrappers'](self.env)
            self.eval_env = params['env_wrappers'](self.eval_env)
            self.mean_episode_reward = -float('nan')
            self.best_mean_episode_reward = -float('inf')
        if 'non_atari_colab_env' in self.params and self.params[
                'video_log_freq'] > 0:
            self.env = wrappers.Monitor(self.env,
                                        os.path.join(self.params['logdir'],
                                                     "gym"),
                                        write_upon_reset=True)  #, force=True)
            self.eval_env = wrappers.Monitor(self.eval_env,
                                             os.path.join(
                                                 self.params['logdir'], "gym"),
                                             write_upon_reset=True)
            self.mean_episode_reward = -float('nan')
            self.best_mean_episode_reward = -float('inf')
        self.env.seed(seed)
        self.eval_env.seed(seed)

        # Maximum length for episodes
        self.params['ep_len'] = self.params[
            'ep_len'] or self.env.spec.max_episode_steps
        global MAX_VIDEO_LEN
        MAX_VIDEO_LEN = self.params['ep_len']

        # Is this env continuous, or self.discrete?
        discrete = isinstance(self.env.action_space, gym.spaces.Discrete)
        # Are the observations images?
        img = len(self.env.observation_space.shape) > 2

        self.params['agent_params']['discrete'] = discrete

        # Observation and action sizes

        ob_dim = self.env.observation_space.shape if img else self.env.observation_space.shape[
            0]
        ac_dim = self.env.action_space.n if discrete else self.env.action_space.shape[
            0]
        self.params['agent_params']['ac_dim'] = ac_dim
        self.params['agent_params']['ob_dim'] = ob_dim

        # simulation timestep, will be used for video saving
        if 'model' in dir(self.env):
            self.fps = 1 / self.env.model.opt.timestep
        elif 'env_wrappers' in self.params:
            self.fps = 30  # This is not actually used when using the Monitor wrapper
        elif 'video.frames_per_second' in self.env.env.metadata.keys():
            self.fps = self.env.env.metadata['video.frames_per_second']
        else:
            self.fps = 10

        #############
        ## AGENT
        #############

        agent_class = self.params['agent_class']
        self.agent = agent_class(self.env, self.params['agent_params'])
    def __init__(self, params):

        #############
        ## INIT
        #############

        # Get params, create logger, create TF session
        self.params = params
        self.logger = Logger(self.params['logdir'])
        self.sess = create_tf_session(self.params['use_gpu'],
                                      which_gpu=self.params['which_gpu'])

        # Set random seeds
        seed = self.params['seed']
        tf.set_random_seed(seed)
        np.random.seed(seed)
        torch.manual_seed(seed)
        ptu.init_gpu(use_gpu=not self.params['no_gpu'],
                     gpu_id=self.params['which_gpu'])

        #############
        ## ENV
        #############

        # Make the gym environment
        self.env = gym.make(self.params['env_name'])
        self.env.seed(seed)

        # Maximum length for episodes
        self.params['ep_len'] = self.params[
            'ep_len'] or self.env.spec.max_episode_steps

        # Is this env continuous, or self.discrete?
        discrete = isinstance(self.env.action_space, gym.spaces.Discrete)
        self.params['agent_params']['discrete'] = discrete

        # Observation and action sizes
        ob_dim = self.env.observation_space.shape[0]
        ac_dim = self.env.action_space.n if discrete else self.env.action_space.shape[
            0]
        self.params['agent_params']['ac_dim'] = ac_dim
        self.params['agent_params']['ob_dim'] = ob_dim

        # simulation timestep, will be used for video saving
        if 'model' in dir(self.env):
            self.fps = 1 / self.env.model.opt.timestep
        else:
            self.fps = self.env.env.metadata['video.frames_per_second']

        #############
        ## AGENT
        #############

        agent_class = self.params['agent_class']
        self.agent = agent_class(self.sess, self.env,
                                 self.params['agent_params'])

        #############
        ## INIT VARS
        #############

        ## TODO initialize all of the TF variables (that were created by agent, etc.)
        ## HINT: use global_variables_initializer
        TODO
Example #8
0
    def __init__(self, params):

        #############
        ## INIT
        #############

        # Get params, create logger
        self.params = params
        self.logger = Logger(self.params['logdir'])

        # Set random seeds
        seed = self.params['seed']
        np.random.seed(seed)
        torch.manual_seed(seed)
        ptu.init_gpu(use_gpu=not self.params['no_gpu'],
                     gpu_id=self.params['which_gpu'])

        #############
        ## ENV
        #############

        # Make the gym environment
        #register_custom_envs()
        self.env = City((self.params['width'], self.params['height']),
                        self.params['n_drivers'], self.params['n_restaurants'])
        """
        if 'env_wrappers' in self.params:
            # These operations are currently only for Atari envs
            self.env = wrappers.Monitor(self.env, os.path.join(self.params['logdir'], "gym"), force=True)
            self.env = params['env_wrappers'](self.env)
            self.mean_episode_reward = -float('nan')
            self.best_mean_episode_reward = -float('inf')
        if 'non_atari_colab_env' in self.params and self.params['video_log_freq'] > 0:
            self.env = wrappers.Monitor(self.env, os.path.join(self.params['logdir'], "gym"), force=True)
            self.mean_episode_reward = -float('nan')
            self.best_mean_episode_reward = -float('inf')
        """
        self.env.seed(seed)

        # import plotting (locally if 'obstacles' env)
        if not (self.params['env_name'] == 'obstacles-cs285-v0'):
            import matplotlib
            matplotlib.use('Agg')

        # Maximum length for episodes
        self.params['ep_len'] = self.params[
            'ep_len'] or self.env.spec.max_episode_steps
        global MAX_VIDEO_LEN
        MAX_VIDEO_LEN = self.params['ep_len']

        # Is this env multi binary, or self.discrete?
        #multi_bi = isinstance(self.env.action_space, gym.spaces.MultiBinary)
        is_city = True
        # Are the observations images?
        img = False

        self.params['agent_params']['is_city'] = is_city

        # Observation and action sizes

        #ob_dim = self.env.observation_space.shape if img else self.env.observation_space.shape[0]
        #ac_dim = self.env.action_space.n if multi_bi else self.env.action_space.shape[0]
        #ob_dim = self.env.observation_space.shape[0]
        #ac_dim = self.env.action_space.shape[0]

        self.params['agent_params']['n_drivers'] = self.params['n_drivers']
        self.params['agent_params']['ac_dim'] = self.params['n_drivers']
        self.params['agent_params']['ob_dim'] = (self.params['n_drivers'],
                                                 (3 + 2 * MAX_CAP + 5 +
                                                  5 * MAX_CAND_NUM))
        self.params['agent_params']['shared_exp'] = self.params['shared_exp']
        self.params['agent_params']['shared_exp_lambda'] = self.params[
            'shared_exp_lambda']
        self.params['agent_params']['size_ac'] = self.params['size_ac']
        self.params['agent_params']['size_cr'] = self.params['size_cr']
        # simulation timestep, will be used for video saving
        #if 'model' in dir(self.env):
        #    self.fps = 1/self.env.model.opt.timestep
        #elif 'env_wrappers' in self.params:
        #    self.fps = 30 # This is not actually used when using the Monitor wrapper
        #elif 'video.frames_per_second' in self.env.env.metadata.keys():
        #    self.fps = self.env.env.metadata['video.frames_per_second']
        #else:
        #    self.fps = 10

        #############
        ## AGENT
        #############

        agent_class = self.params['agent_class']
        self.agent = agent_class(self.env, self.params['agent_params'])
    def __init__(self, params):

        #############
        ## INIT
        #############

        # Get params, create logger
        self.params = params
        self.logger = Logger(self.params["logdir"])

        # Set random seeds
        seed = self.params["seed"]
        np.random.seed(seed)
        torch.manual_seed(seed)
        ptu.init_gpu(use_gpu=not self.params["no_gpu"],
                     gpu_id=self.params["which_gpu"])

        #############
        ## ENV
        #############

        # Make the gym environment
        register_custom_envs()
        self.env = gym.make(self.params["env_name"])
        if "env_wrappers" in self.params:
            # These operations are currently only for Atari envs
            self.env = wrappers.Monitor(
                self.env,
                os.path.join(self.params["logdir"], "gym"),
                force=True,
                video_callable=(None if self.params["video_log_freq"] > 0 else
                                False),
            )
            self.env = params["env_wrappers"](self.env)
            self.mean_episode_reward = -float("nan")
            self.best_mean_episode_reward = -float("inf")
        if "non_atari_colab_env" in self.params and self.params[
                "video_log_freq"] > 0:
            self.env = wrappers.Monitor(
                self.env,
                os.path.join(self.params["logdir"], "gym"),
                force=True,
                video_callable=(None if self.params["video_log_freq"] > 0 else
                                False),
            )
            self.mean_episode_reward = -float("nan")
            self.best_mean_episode_reward = -float("inf")

        self.env.seed(seed)

        # import plotting (locally if 'obstacles' env)
        if not (self.params["env_name"] == "obstacles-cs285-v0"):
            import matplotlib

            matplotlib.use("Agg")

        # Maximum length for episodes
        self.params["ep_len"] = self.params[
            "ep_len"] or self.env.spec.max_episode_steps
        global MAX_VIDEO_LEN
        MAX_VIDEO_LEN = self.params["ep_len"]

        # Is this env continuous, or self.discrete?
        discrete = isinstance(self.env.action_space, gym.spaces.Discrete)
        # Are the observations images?
        img = len(self.env.observation_space.shape) > 2

        self.params["agent_params"]["discrete"] = discrete

        # Observation and action sizes

        ob_dim = (self.env.observation_space.shape
                  if img else self.env.observation_space.shape[0])
        ac_dim = self.env.action_space.n if discrete else self.env.action_space.shape[
            0]
        self.params["agent_params"]["ac_dim"] = ac_dim
        self.params["agent_params"]["ob_dim"] = ob_dim

        # simulation timestep, will be used for video saving
        if "model" in dir(self.env):
            self.fps = 1 / self.env.model.opt.timestep
        elif "env_wrappers" in self.params:
            self.fps = 30  # This is not actually used when using the Monitor wrapper
        elif "video.frames_per_second" in self.env.env.metadata.keys():
            self.fps = self.env.env.metadata["video.frames_per_second"]
        else:
            self.fps = 10

        #############
        ## AGENT
        #############

        agent_class = self.params["agent_class"]
        self.agent = agent_class(self.env, self.params["agent_params"])
Example #10
0
    def __init__(self, params):
        #############
        ## INIT
        #############

        # Get params, create logger
        self.params = params

        # Set random seeds
        seed = self.params['seed']
        np.random.seed(seed)
        torch.manual_seed(seed)
        ptu.init_gpu()

        #############
        ## ENV
        #############

        # Make the gym environment
        self.env = gym.make(self.params['env_name'])
        self.env.seed(seed)

        # Maximum length for episodes
        self.params['ep_len'] = self.params[
            'ep_len'] or self.env.spec.max_episode_steps

        # Observation and action sizes
        # ob_dim = self.env.observation_space.shape[0]
        # ac_dim = self.env.action_space.n if discrete else self.env.action_space.shape[0]
        # self.params['agent_params']['ac_dim'] = ac_dim
        # self.params['agent_params']['ob_dim'] = ob_dim

        #############
        ## AGENT
        #############

        n = self.params['vertices'] * self.params['bins']
        m = 2
        self.input_features = self.params['obs_space']
        self.hidden_features = 10
        self.costs_vertices = np.ones((n, m)) * 0.005
        self.costs_edges = np.ones((n, n, m, m)) * 0.005
        for row in range(3):
            for i in range(row * self.params['bins'],
                           row * self.params['bins'] + self.params['bins']):
                for j in range(row * self.params['bins'],
                               row * self.params['bins'] +
                               self.params['bins']):
                    if i != j:
                        self.costs_edges[i, j] = [[.3, .03], [.03, -.3]]
                        self.costs_edges[j, i] = [[.3, .03], [.03, -.3]]

        for i in range(n):
            self.costs_vertices[i] = [.3, .03]

        self.edges = list(itertools.combinations(range(n), r=2))

        # initiate the graphical model neural network
        self.gm_net = GraphicalModelNet(
            self.params['eta'], self.params['emp_epoch'], self.input_features,
            self.hidden_features, self.costs_vertices, self.costs_edges,
            self.edges, self.params['bins'], ptu.device)

        # load pre-trained dicts if available
        if self.params['load_dict']:
            print("Loading stored dict...")
            self.gm_net.load_state_dict(
                torch.load('cs285/data/' + self.params['load_path'] +
                           '_state_dict.pt'))
            self.gm_net.w1 = torch.load('cs285/data/' +
                                        self.params['load_path'] + '_w1.pt',
                                        map_location=ptu.device)
            self.gm_net.w2 = torch.load('cs285/data/' +
                                        self.params['load_path'] + '_w2.pt',
                                        map_location=ptu.device)
Example #11
0
    def __init__(self, params):

        #############
        ## INIT
        #############

        # Get params, create logger
        self.params = params
        self.logger = Logger(self.params['logdir'])

        # Set random seeds
        seed = self.params['seed']
        np.random.seed(seed)
        torch.manual_seed(seed)
        ptu.init_gpu(
            use_gpu=not self.params['no_gpu'],
            gpu_id=self.params['which_gpu']
        )

        #############
        ## ENV
        #############

        # Make the gym environment
        register_custom_envs()
        self.env = gym.make(self.params['env_name'])
        if 'env_wrappers' in self.params:
            # These operations are currently only for Atari envs
# <<<<<<< HEAD
#             self.env = wrappers.Monitor(self.env, os.path.join(self.params['logdir'], "gym"), force=True)
# #             self.env.enabled = (self.params['video_log_freq'] > 0)
# =======
            self.env = wrappers.Monitor(
                self.env,
                os.path.join(self.params['logdir'], "gym"),
                force=True,
                video_callable=(None if self.params['video_log_freq'] > 0 else False),
            )
# >>>>>>> b5d34989d30c72b353acc2a64e691d17ddbea81f
            self.env = params['env_wrappers'](self.env)
            self.mean_episode_reward = -float('nan')
            self.best_mean_episode_reward = -float('inf')
        if 'non_atari_colab_env' in self.params and self.params['video_log_freq'] > 0:
            self.env = wrappers.Monitor(
                self.env,
                os.path.join(self.params['logdir'], "gym"),
                force=True,
                video_callable=(None if self.params['video_log_freq'] > 0 else False),
            )
            self.mean_episode_reward = -float('nan')
            self.best_mean_episode_reward = -float('inf')

        self.env.seed(seed)

        # import plotting (locally if 'obstacles' env)
        if not(self.params['env_name']=='obstacles-cs285-v0'):
            import matplotlib
            matplotlib.use('Agg')

        # Maximum length for episodes
        self.params['ep_len'] = self.params['ep_len'] or self.env.spec.max_episode_steps
        global MAX_VIDEO_LEN
        MAX_VIDEO_LEN = self.params['ep_len']

        # Is this env continuous, or self.discrete?
        discrete = isinstance(self.env.action_space, gym.spaces.Discrete)
        # Are the observations images?
        img = len(self.env.observation_space.shape) > 2

        self.params['agent_params']['discrete'] = discrete

        # Observation and action sizes

        ob_dim = self.env.observation_space.shape if img else self.env.observation_space.shape[0]
        ac_dim = self.env.action_space.n if discrete else self.env.action_space.shape[0]
        self.params['agent_params']['ac_dim'] = ac_dim
        self.params['agent_params']['ob_dim'] = ob_dim

        # simulation timestep, will be used for video saving
        if 'model' in dir(self.env):
            self.fps = 1/self.env.model.opt.timestep
        elif 'env_wrappers' in self.params:
            self.fps = 30 # This is not actually used when using the Monitor wrapper
        elif 'video.frames_per_second' in self.env.env.metadata.keys():
            self.fps = self.env.env.metadata['video.frames_per_second']
        else:
            self.fps = 10


        #############
        ## AGENT
        #############

        agent_class = self.params['agent_class']
        if 'hparam' in self.params['exp_name']: 
            print('changing optimizer')
            self.params['agent_params']['optimizer_spec'] = lander_optimizer(self.params['lr'])
            print('using lr  = ', self.params['lr'])
            self.agent = agent_class(self.env, self.params['agent_params'])
        else: 
            self.agent = agent_class(self.env, self.params['agent_params'])
Example #12
0
    def __init__(self, params):
        self.params = params
        self.params['agent_params']['gamma'] = self.params['gamma']
        # Set random seeds
        seed = self.params['seed']
        torch.manual_seed(seed)
        # Setup GPU
        ptu.init_gpu(use_gpu=not self.params['no_gpu'],
                     gpu_id=self.params['which_gpu'])
        self.logger = Logger(self.params['logdir'])
        #############
        ## ENV
        #############
        groups = self.params['groups']
        self.env_name = self.params['env']
        if self.env_name == 'StarCraft2Env':
            from smac.env import StarCraft2Env
            from cs285.infrastructure.wrappers import SC2Wrapper
            self.env = SC2Wrapper(StarCraft2Env(map_name=self.params['env_map'],seed = seed),\
            groups = groups)
        elif self.env_name == "Paticles":
            from multiagent.environment import MultiAgentEnv
            import multiagent.scenarios as scenarios
            from cs285.infrastructure.wrappers import ParticlesWrapper
            scenario = scenarios.load(scenario_name + ".py").Scenario()
            world = scenario.make_world()
            self.env = ParticlesWrapper(\
            MultiAgentEnv(world, scenario.reset_world, scenario.reward, scenario.observation),\
            groups = groups)
        elif self.env_name == "Test":
            if self.params['random_init_state']:
                init_state = [
                    random.randint(2, 9),
                    random.randint(2, 9),
                    random.randint(2, 9)
                ]
            else:
                init_state = self.params['init_state']
            from cs285.infrastructure.wrappers import Test
            self.env = Test(groups,
                            init_state=init_state,
                            goal_state=self.params['goal_state'])

        #############
        ## AGENT
        #############
        self.agents = []
        agent_critics = []
        for g_idx in range(len(groups)):
            ob_dim = len(self.env.observation_space[g_idx]['obs'])
            ac_dim = len(self.env.action_space[g_idx])
            avail_ac_dim = sum([ac.n for ac in self.env.action_space[g_idx]])
            self.params['agent_params']['n_agents'] = groups[g_idx]
            self.params['agent_params']['actor']['avail_ac_dim'] = avail_ac_dim
            self.params['agent_params']['actor']['ac_dim'] = ac_dim
            self.params['agent_params']['actor']['ob_dim'] = ob_dim
            self.params['agent_params']['critic']['ob_dim'] = ob_dim
            self.params['agent_params']['critic']['gamma'] = self.params[
                'agent_params']['gamma']
            agent = GACAgent(self.env, self.params['agent_params'],
                             groups[g_idx])
            self.agents.append(agent)
            agent_critics.append(agent.critic)
        self.centralized_mixer = BootstrappedSumCriticMixer(
            self.params['agent_params']['critic'], agent_critics)