def __init__( self, index, seed, in_mdp_batch_size, ): torch.set_num_threads(1) self.storage = [] self.in_mdp_batch_size = in_mdp_batch_size self.buffer_size = 0 set_seed(10 * seed + 2337 + index)
def __init__(self, index, variant, candidate_size=10): ptu.set_gpu_mode(True) torch.set_num_threads(1) import sys sys.argv = [''] del sys env_max_action = variant['env_max_action'] obs_dim = variant['obs_dim'] action_dim = variant['action_dim'] latent_dim = variant['latent_dim'] vae_latent_dim = 2 * action_dim mlp_enconder_input_size = 2 * obs_dim + action_dim + 1 if variant[ 'use_next_obs_in_context'] else obs_dim + action_dim + 1 mlp_enconder = MlpEncoder(hidden_sizes=[200, 200, 200], input_size=mlp_enconder_input_size, output_size=2 * variant['latent_dim']) self.context_encoder = ProbabilisticContextEncoder( mlp_enconder, variant['latent_dim']) self.Qs = FlattenMlp( hidden_sizes=variant['Qs_hidden_sizes'], input_size=obs_dim + action_dim + latent_dim, output_size=1, ) self.vae_decoder = VaeDecoder( max_action=variant['env_max_action'], hidden_sizes=variant['vae_hidden_sizes'], input_size=obs_dim + vae_latent_dim + latent_dim, output_size=action_dim, ) self.perturbation_generator = PerturbationGenerator( max_action=env_max_action, hidden_sizes=variant['perturbation_hidden_sizes'], input_size=obs_dim + action_dim + latent_dim, output_size=action_dim, ) self.use_next_obs_in_context = variant['use_next_obs_in_context'] self.env = env_producer(variant['domain'], variant['seed']) self.num_evals = variant['num_evals'] self.max_path_length = variant['max_path_length'] self.vae_latent_dim = vae_latent_dim self.candidate_size = variant['candidate_size'] self.env.seed(10 * variant['seed'] + 1234 + index) set_seed(10 * variant['seed'] + 1234 + index) self.env.action_space.np_random.seed(123 + index)
def __init__(self, index, variant, candidate_size=10): ptu.set_gpu_mode(True) torch.set_num_threads(1) import sys sys.argv = [''] del sys env_max_action = variant['env_max_action'] obs_dim = variant['obs_dim'] action_dim = variant['action_dim'] latent_dim = variant['latent_dim'] vae_latent_dim = 2 * action_dim self.f = MlpEncoder( g_hidden_sizes=variant['g_hidden_sizes'], g_input_sizes=obs_dim + action_dim + 1, g_latent_dim=variant['g_latent_dim'], h_hidden_sizes=variant['h_hidden_sizes'], latent_dim=latent_dim, ) self.Qs = FlattenMlp( hidden_sizes=variant['Qs_hidden_sizes'], input_size=obs_dim + action_dim + latent_dim, output_size=1, ) self.vae_decoder = VaeDecoder( max_action=variant['env_max_action'], hidden_sizes=variant['vae_hidden_sizes'], input_size=obs_dim + vae_latent_dim + latent_dim, output_size=action_dim, ) self.perturbation_generator = PerturbationGenerator( max_action=env_max_action, hidden_sizes=variant['perturbation_hidden_sizes'], input_size=obs_dim + action_dim + latent_dim, output_size=action_dim, ) self.env = env_producer(variant['domain'], variant['seed']) self.num_evals = variant['algo_params']['num_evals'] self.max_path_length = variant['max_path_length'] self.vae_latent_dim = vae_latent_dim self.num_trans_context = variant['num_trans_context'] self.candidate_size = variant['candidate_size'] self.seed = variant['seed'] self.index = index self.env.seed(10 * self.seed + 1234 + index) set_seed(10 * self.seed + 1234 + index)
def __init__( self, index, seed, in_mdp_batch_size, use_next_obs_in_context=False, ): torch.set_num_threads(1) self.storage = [] self.in_mdp_batch_size = in_mdp_batch_size self.use_next_obs_in_context = use_next_obs_in_context self.buffer_size = 0 set_seed(10 * seed + 2337 + index)
def __init__( self, index, seed, num_trans_context, in_mdp_batch_size, num_candidate_context=10, use_next_obs_in_context=True, ): torch.set_num_threads(1) self.storage = [] self.num_candidate_context = num_candidate_context self.num_trans_context = num_trans_context self.in_mdp_batch_size = in_mdp_batch_size self.use_next_obs_in_context = use_next_obs_in_context self.buffer_size = 0 set_seed(10 * seed + 2337 + index)
def __init__(self, index, variant, candidate_size=10): ptu.set_gpu_mode(True) torch.set_num_threads(1) import sys sys.argv = [''] del sys self.env = env_producer(variant['domain'], variant['seed']) state_dim = self.env.observation_space.low.size action_dim = self.env.action_space.low.size max_action = float(self.env.action_space.high[0]) self.policy = BCQ(state_dim, action_dim, max_action, **variant['policy_params']) self.num_evals = variant['num_evals'] self.max_path_length = variant['max_path_length'] self.seed = variant['seed'] self.index = index self.env.seed(10 * self.seed + 1234 + index) set_seed(10 * self.seed + 1234 + index)
def run_experiment_here( experiment_function, variant, seed=None, use_gpu=True, gpu_id=0, # Logger params: snapshot_mode='last', snapshot_gap=1, force_randomize_seed=False, log_dir=None, ): """ Run an experiment locally without any serialization. :param experiment_function: Function. `variant` will be passed in as its only argument. :param exp_prefix: Experiment prefix for the save file. :param variant: Dictionary passed in to `experiment_function`. :param exp_id: Experiment ID. Should be unique across all experiments. Note that one experiment may correspond to multiple seeds,. :param seed: Seed used for this experiment. :param use_gpu: Run with GPU. By default False. :param script_name: Name of the running script :param log_dir: If set, set the log directory to this. Otherwise, the directory will be auto-generated based on the exp_prefix. :return: """ torch.set_num_threads(1) if force_randomize_seed or seed is None: seed = random.randint(0, 100000) variant['seed'] = str(seed) log_dir = variant['log_dir'] # The logger's default mode is to # append to the text file if the file already exists # So this would not override and erase any existing # log file in the same log dir. logger.reset() setup_logger( snapshot_mode=snapshot_mode, snapshot_gap=snapshot_gap, log_dir=log_dir, ) # Assume this file is at the top level of the repo git_infos = get_git_infos([osp.dirname(__file__)]) run_experiment_here_kwargs = dict( variant=variant, seed=seed, use_gpu=use_gpu, snapshot_mode=snapshot_mode, snapshot_gap=snapshot_gap, git_infos=git_infos, ) exp_setting = dict( run_experiment_here_kwargs=run_experiment_here_kwargs ) exp_setting_pkl_path = osp.join(log_dir, 'experiment.pkl') # Check if existing result exists prev_exp_state = None if osp.isfile(exp_setting_pkl_path): # Sanity check to make sure the experimental setting # of the saved data and the current experiment run is the same prev_exp_setting = load_pkl(exp_setting_pkl_path) logger.log(f'Log dir is not empty: {os.listdir(log_dir)}') if prev_exp_setting != exp_setting: logger.log("""Previous experimental setting is not the same as the current experimental setting. Very risky to try to reload the previous state. Exitting""") logger.log(f'Previous: {prev_exp_setting}') logger.log(f'Current: {exp_setting}') exit(1) try: prev_exp_state = load_gzip_pickle( osp.join(log_dir, 'params.zip_pkl')) logger.log('Trying to restore the state of the experiment program') except FileNotFoundError: logger.log("""There is no previous experiment state available. Do not try to restore.""") prev_exp_state = None # Log the variant logger.log("Variant:") logger.log(json.dumps(dict_to_safe_json(variant), indent=2)) variant_log_path = osp.join(log_dir, 'variant.json') logger.log_variant(variant_log_path, variant) # Save the current experimental setting dump_pkl(exp_setting_pkl_path, exp_setting) log_git_infos(git_infos, log_dir) logger.log(f'Seed: {seed}') set_seed(seed) logger.log(f'Using GPU: {use_gpu}') set_gpu_mode(use_gpu, gpu_id) return experiment_function(variant, prev_exp_state)
assert len(bcq_buffers) == len(idx_list) # Load ensemble parameters ensemble_params_list = [] for idx in idx_list: params_dir = ensemble_params_dir + str(idx) + '/itr_200.zip_pkl' params = load_gzip_pickle(params_dir) ensemble_params_list.extend( params['trainer']['network_ensemble_state_dict']) # set up logger variant['log_dir'] = get_log_dir(variant) logger.reset() setup_logger(log_dir=variant['log_dir'], snapshot_gap=100, snapshot_mode="gap") logger.log(f"Seed: {seed}") set_seed(seed) logger.log(f'Using GPU: {True}') set_gpu_mode(mode=True, gpu_id=0) experiment(variant, bcq_policies, bcq_buffers, ensemble_params_list, prev_exp_state=None)