def test_train_save_load() -> None: """ Runs training and compares reward curve against saved baseline for an environment with a discrete action space, running a single process, when training is resumed from a saved checkpoint. """ # Check that desired results name is available. save_name = "test_train_save_load" check_results_name(save_name) # Load default training config and run training for the first time. with open(CARTPOLE_CONFIG_PATH, "r") as config_file: config = json.load(config_file) # Modify default training config. config["save_name"] = save_name # Run training to get checkpoint. train(config) # Modify config for second training run. config["load_from"] = save_name config["save_name"] = None config["baseline_metrics_filename"] = "cartpole_save_load" # Run resumed training. train(config) # Clean up. os.system("rm -rf %s" % save_dir_from_name(save_name))
def test_train_cartpole() -> None: """ Runs training and compares reward curve against saved baseline for an environment with a discrete action space, running a single process. """ # Load default training config. with open(CARTPOLE_CONFIG_PATH, "r") as config_file: config = json.load(config_file) # Modify default training config. config["baseline_metrics_filename"] = "cartpole" # Run training. train(config)
def test_train_MT10() -> None: """ Runs training and compares reward curve against saved baseline for an environment with a continuous action space, running a single process. """ # Load default training config. with open(MT10_CONFIG_PATH, "r") as config_file: config = json.load(config_file) # Modify default training config. config["baseline_metrics_filename"] = "MT10" # Run training. train(config)
def test_train_MT10_splitting_v2() -> None: """ Runs training and compares reward curve against saved baseline for a multi-task environment, running a single process, with splitting v2 network architecture. """ # Load default training config. with open(SPLITTING_V2_CONFIG_PATH, "r") as config_file: config = json.load(config_file) # Modify default training config. config["baseline_metrics_filename"] = "MT10_splitting_v2" # Run training. train(config)
def test_train_lunar_lander_gpu() -> None: """ Runs training and compares reward curve against saved baseline for an environment with a continuous action space, running a single process. """ # Load default training config. with open(LUNAR_LANDER_CONFIG_PATH, "r") as config_file: config = json.load(config_file) # Modify default training config. config["cuda"] = True config["baseline_metrics_filename"] = "lunar_lander_gpu" # Run training. train(config)
def test_train_cartpole_recurrent() -> None: """ Runs training and compares reward curve against saved baseline for an environment with a discrete action space, running a single process, with a recurrent policy. """ # Load default training config. with open(CARTPOLE_CONFIG_PATH, "r") as config_file: config = json.load(config_file) # Modify default training config. config["architecture_config"]["recurrent"] = True config["architecture_config"]["recurrent_hidden_size"] = 64 config["baseline_metrics_filename"] = "cartpole_recurrent" # Run training. train(config)
def test_train_MT10_splitting_v2_exclude_task() -> None: """ Runs training and compares reward curve against saved baseline for a multi-task environment, running a single process, with splitting v2 network architecture where task index is excluded from input. """ # Load default training config. with open(SPLITTING_V2_CONFIG_PATH, "r") as config_file: config = json.load(config_file) # Modify default training config. config["architecture_config"]["include_task_index"] = False config["baseline_metrics_filename"] = "MT10_splitting_v2_exclude_task" # Run training. train(config)
def test_train_MT10_trunk_exclude_task() -> None: """ Runs training and compares reward curve against saved baseline for a multi-task environment, running a single process, with shared trunk architecture, while excluding the task index from the network input. """ # Load default training config. with open(TRUNK_CONFIG_PATH, "r") as config_file: config = json.load(config_file) # Modify default training config. config["architecture_config"]["include_task_index"] = False config["baseline_metrics_filename"] = "MT10_trunk_exclude_task" # Run training. train(config)
def test_train_MT10_save_memory() -> None: """ Runs training and compares reward curve against saved baseline for an environment with a continuous action space, running a single process, while using the memory saving version of the MT10 benchmark. """ # Load default training config. with open(MT10_CONFIG_PATH, "r") as config_file: config = json.load(config_file) # Modify default training config. config["baseline_metrics_filename"] = "MT10_save_memory" config["save_memory"] = True # Run training. train(config)
def test_train_MT10_trunk_recurrent() -> None: """ Runs training and compares reward curve against saved baseline for a multi-task environment, running a single process, with recurrent shared trunk architecture. """ # Load default training config. with open(TRUNK_CONFIG_PATH, "r") as config_file: config = json.load(config_file) # Modify default training config. config["architecture_config"]["recurrent"] = True config["architecture_config"]["recurrent_hidden_size"] = 32 config["baseline_metrics_filename"] = "MT10_trunk_recurrent" # Run training. train(config)
def test_train_MT10_multi() -> None: """ Runs training and compares reward curve against saved baseline for an environment with a continuous action space, running multiple processes. """ # Load default training config. with open(MT10_CONFIG_PATH, "r") as config_file: config = json.load(config_file) # Modify default training config. config["num_updates"] = int(config["num_updates"] / MP_FACTOR) config["num_processes"] *= MP_FACTOR config["baseline_metrics_filename"] = "MT10_multi" # Run training. train(config)
def test_train_cartpole_multi_gpu() -> None: """ Runs training and compares reward curve against saved baseline for an environment with a discrete action space, running multiple processes. """ # Load default training config. with open(CARTPOLE_CONFIG_PATH, "r") as config_file: config = json.load(config_file) # Modify default training config. config["num_updates"] = int(config["num_updates"] / MP_FACTOR) config["num_processes"] *= MP_FACTOR config["cuda"] = True config["baseline_metrics_filename"] = "cartpole_multi_gpu" # Run training. train(config)
def test_train_cartpole_relu() -> None: """ Runs training and compares reward curve against saved baseline for an environment with a discrete action space, running a single process, with a relu activation function in the networks. """ # Load default training config. with open(CARTPOLE_CONFIG_PATH, "r") as config_file: config = json.load(config_file) # Modify default training config. config["architecture_config"]["actor_config"]["activation"] = "relu" config["architecture_config"]["critic_config"]["activation"] = "relu" config["baseline_metrics_filename"] = "cartpole_relu" # Run training. train(config)
def test_train_lunar_lander_recurrent() -> None: """ Runs training and compares reward curve against saved baseline for an environment with a continuous action space, running a single process, with a recurrent policy. """ # Load default training config. with open(LUNAR_LANDER_CONFIG_PATH, "r") as config_file: config = json.load(config_file) # Modify default training config. config["architecture_config"]["recurrent"] = True config["architecture_config"]["recurrent_hidden_size"] = 64 config["num_minibatch"] = 1 config["baseline_metrics_filename"] = "lunar_lander_recurrent" # Run training. train(config)
def test_train_MT10_multi_gpu_recurrent() -> None: """ Runs training and compares reward curve against saved baseline for an environment with a continuous action space, running multiple processes, with a recurrent policy. """ # Load default training config. with open(MT10_CONFIG_PATH, "r") as config_file: config = json.load(config_file) # Modify default training config. config["num_updates"] = int(config["num_updates"] / MP_FACTOR) config["num_processes"] *= MP_FACTOR config["cuda"] = True config["architecture_config"]["recurrent"] = True config["architecture_config"]["recurrent_hidden_size"] = 64 config["baseline_metrics_filename"] = "MT10_multi_gpu_recurrent" # Run training. train(config)
def test_save_load_multi() -> None: """ Test saving/loading functionality for training when multiprocessing. """ # Check that desired results name is available. save_name = "test_save_load_multi" check_results_name(save_name) # Load default training config. with open(CARTPOLE_CONFIG_PATH, "r") as config_file: config = json.load(config_file) # Modify default training config and run training to save checkpoint. config["save_name"] = save_name config["num_updates"] = int(config["num_updates"] / MP_FACTOR) config["num_processes"] *= MP_FACTOR checkpoint = train(config) first_metrics = checkpoint["metrics"].state() # Run training for the second time, and load from checkpoint. config["load_from"] = save_name config["save_name"] = None config["num_updates"] *= 2 checkpoint = train(config) second_metrics = checkpoint["metrics"].state() # Compare metrics. assert list(first_metrics.keys()) == list(second_metrics.keys()) for metric_name in first_metrics.keys(): first_metric = first_metrics[metric_name] second_metric = second_metrics[metric_name] assert first_metric["maximum"] <= second_metric["maximum"] for key in ["history", "mean", "stdev"]: n = len(first_metric[key]) assert first_metric[key][:n] == second_metric[key][:n] # Clean up. os.system("rm -rf %s" % save_dir_from_name(save_name))
if __name__ == "__main__": # Parse config filename from command line arguments. parser = argparse.ArgumentParser() parser.add_argument( "command", type=str, help="Command to run. Either 'train' or 'tune'.", ) parser.add_argument( "config_filename", type=str, help="Name of config file to load from.", ) args = parser.parse_args() # Load config file. with open(args.config_filename, "r") as config_file: config = json.load(config_file) # Run specified command. if args.command == "train": train(config) elif args.command == "tune": tune(config) elif args.command == "meta_train": meta_train(config) else: raise ValueError("Unsupported command: '%s'" % args.command)
def meta_train(config: Dict[str, Any]) -> Dict[str, Dict[str, Any]]: """ Main function for meta_train.py, runs meta-training and meta-testing over the train() function from meta/train/train.py. The expected entries of `config` are documented below. Returns a dictionary holding values of performance metrics from training and evaluation. Parameters ---------- meta_train_config : Dict[str, Any] Config to pass to train() for meta-training, without common settings listed below such as `cuda` and `seed`. meta_test_config : Dict[str, Any] Config to pass to train() for meta-testing, without common settings listed below such as `cuda` and `seed`. Note that if any architecture configuration is present within `meta_test_config`, it will be ignored and instead the architecture specified in `meta_train_config` will be used. cuda : bool Whether or not to train on GPU. seed : int Random seed. load_from : str Path of checkpoint file (as saved by this function) to load from in order to resume training. NOTE: This should be included in the config file but isn't yet supported for meta-training. print_freq : int Number of training iterations between metric printing. save_freq : int Number of training iterations between saving of intermediate progress. If None, no saving of intermediate progress will occur. Note that if save_name is None, then this value will just be ignored. save_name : str Name to save experiments under. Each experiment (meta-train and meta-test) will be given their own value of `save_name` based on this one. """ # Check for unsupported options. unsupported_options = ["load_from"] for unsupported in unsupported_options: if config[unsupported] is not None: raise NotImplementedError if config["meta_train_config"]["architecture_config"]["include_task_index"]: raise NotImplementedError # Add common settings to meta-train config and meta-test config. meta_train_config = config["meta_train_config"] meta_test_config = config["meta_test_config"] common_settings = list(config.keys()) common_settings.remove("meta_train_config") common_settings.remove("meta_test_config") common_settings.remove("save_name") for setting in common_settings: meta_train_config[setting] = config[setting] meta_test_config[setting] = config[setting] # Construct save names for meta-training and meta-testing. if config["save_name"] is None: meta_train_config["save_name"] = None meta_test_config["save_name"] = None else: meta_train_config["save_name"] = "%s_meta_train" % config["save_name"] meta_test_config["save_name"] = "%s_meta_test" % config["save_name"] # Perform meta-training. print("Meta-Training:") checkpoint = train(meta_train_config) # Convert policy for meta-test time. num_test_tasks = get_num_tasks(meta_test_config["env_name"]) policy = checkpoint["policy"] policy.meta_conversion(num_test_tasks) # Perform meta-testing. print("\nMeta-Testing:") checkpoint = train(meta_test_config, policy) return checkpoint
def train_single_config( train_config: Dict[str, Any], trials_per_config: int, fitness_fn: Callable, seed: int, checkpoint: Dict[str, Any], save_dir: str, config_save_name: str = None, metrics_filename: str = None, baseline_metrics_filename: str = None, early_stop_trials: int = None, ) -> Tuple[float, Dict[str, Any], Dict[str, Any]]: """ Run training with a fixed config for ``trials_per_config`` trials, and return fitness and a dictionary holding results. """ # Load in checkpoint, if necessary. fitness = 0.0 trial = 0 config_results: Dict[str, Any] = {} config_results["trials"] = [] config_results["config"] = dict(train_config) if checkpoint is not None and checkpoint["config_checkpoint"] is not None: config_results = checkpoint["config_checkpoint"]["config_results"] fitness = checkpoint["config_checkpoint"]["fitness"] trial = checkpoint["config_checkpoint"]["trial"] # Perform training and compute resulting fitness for multiple trials. while trial < trials_per_config: # Check for early stop. if early_stop_trials is not None and trial == early_stop_trials: break trial_results: Dict[str, Any] = {} # Set trial name, seed, and metrics filenames for saving/comparison, if # neccessary. get_save_name = ( lambda name: "%s_%d" % (name, trial) if name is not None else None ) train_config["save_name"] = get_save_name(config_save_name) train_config["metrics_filename"] = get_save_name(metrics_filename) train_config["baseline_metrics_filename"] = get_save_name( baseline_metrics_filename ) train_config["seed"] = seed + trial # Run training and get fitness. checkpoint = train(train_config) metrics = checkpoint["metrics"].state() trial_fitness = fitness_fn(metrics) fitness += trial_fitness # Fill in trial results. trial_results["trial"] = trial trial_results["metrics"] = dict(metrics) trial_results["fitness"] = trial_fitness config_results["trials"].append(dict(trial_results)) # Save checkpoint, if necessary. We increment the trial index here so that when # training resumes, it will start with the next trial after the last completed # one. if save_dir is not None: config_checkpoint: Dict[str, Any] = {} config_checkpoint["config_results"] = dict(config_results) config_checkpoint["fitness"] = fitness config_checkpoint["trial"] = trial + 1 checkpoint["config_checkpoint"] = dict(config_checkpoint) checkpoint_filename = os.path.join(save_dir, "checkpoint.pkl") with open(checkpoint_filename, "wb") as checkpoint_file: pickle.dump(checkpoint, checkpoint_file) # Update trial index. trial += 1 fitness /= trials_per_config config_results["fitness"] = fitness return fitness, config_results, checkpoint