def recommender( experiment: str, config_path: str, data_path: str, evaluations_path: str, nskip: int, ): """ Evaluates the performance of the models proposed by a recommender on a hold-out dataset. Recommendations are made for each dataset by providing recommenders with the offline evaluations on the remaining datasets in the registry. This call runs the Sacred script for each provided configuration sequentially and returns only once all runs have completed. """ with Path(config_path).open("r", encoding="utf-8") as f: content = yaml.safe_load(f) configs = explode_key_values("recommender", content) for configuration in iterate_configurations(configs, nskip): run_sacred_script( "recommender.py", experiment=experiment, data_path=data_path, evaluations_path=evaluations_path, **configuration, )
def ensemble( experiment: str, config_path: str, data_path: str, evaluations_path: str, nskip: int, ): """ Evaluates the performance of an ensemble of best-performing models. This allows also to only consider models of a particular type (thus building hyper-ensembles). This call runs the Sacred script for each provided configuration sequentially and returns only once all runs have completed. """ with Path(config_path).open("r", encoding="utf-8") as f: content = yaml.safe_load(f) configs = explode_key_values("__", content) for configuration in iterate_configurations(configs, nskip): run_sacred_script( "ensemble.py", experiment=experiment, data_path=data_path, evaluations_path=evaluations_path, **{k: v for k, v in configuration.items() if k != "__"}, )
def surrogate( experiment: str, config_path: str, data_path: str, evaluations_path: str, nskip: int, ): """ Evaluates the performance of a set of surrogate models using the available offline evaluations. Performance is evaluated via ranking metrics and performed via stratified leave-one-out cross-validation where each stratum consists of the evaluations on a single evaluation dataset. This call runs the Sacred script for each provided configuration sequentially and returns only once all runs have completed. """ with Path(config_path).open("r", encoding="utf-8") as f: content = yaml.safe_load(f) configs = explode_key_values("surrogate", content) for configuration in iterate_configurations(configs, nskip): run_sacred_script( "surrogate.py", experiment=experiment, data_path=data_path, evaluations_path=evaluations_path, **configuration, )
def schedule( config_path: str, sagemaker_role: str, experiment: str, data_bucket: str, data_bucket_prefix: str, output_bucket: str, output_bucket_prefix: str, instance_type: str, docker_image: str, max_runtime: int, nskip: int, local: bool, ): """ Schedules evaluations on AWS Sagemaker by running a grid search over the configurations provided in the given file(s). As AWS Sagemaker does not allow queuing jobs, this script is running as long as not all evaluation runs have been scheduled. """ assert instance_type[:5] not in ( "ml.p3", "ml.p2", "ml.g4", ), "Cannot schedule experiments on GPU instances." # First, setup Sagemaker connection boto_session = default_session() if local: sm_session = sagemaker.LocalSession(boto_session) else: sm_session = sagemaker.Session(boto_session) def job_factory() -> str: date_str = datetime.now( tz=timezone.utc).strftime("%d-%m-%Y-%H-%M-%S-%f") job_name = f"{experiment}-{date_str}" return job_name # Then, generate configs all_configurations = generate_configurations(Path(config_path)) # Then, we can run the training, passing parameters as required for configuration in iterate_configurations(all_configurations, nskip): # Create the estimator estimator = CustomFramework( sagemaker_session=sm_session, role=sagemaker_role, tags=[ { "Key": "Experiment", "Value": experiment }, ], instance_type="local" if local else (configuration["__instance_type__"] if "__instance_type__" in configuration else instance_type), instance_count=1, volume_size=30, max_run=max_runtime * 60 * 60, image_uri=image_uri(docker_image), source_dir=str( Path(os.path.realpath(__file__)).parent.parent.parent), output_path=( f"s3://{output_bucket}/{output_bucket_prefix}/{experiment}"), entry_point="evaluate.py", debugger_hook_config=False, metric_definitions=metric_definitions(), hyperparameters={ k: v for k, v in configuration.items() if not k.startswith("__") }, ) while True: # Try fitting the estimator try: estimator.fit( job_name=job_factory(), inputs={ configuration["dataset"]: f"s3://{data_bucket}/{data_bucket_prefix}/{configuration['dataset']}" }, wait=False, ) break except ClientError as err: print(f"+++ Scheduling failed: {err}") print("+++ Sleeping for 5 minutes.") time.sleep(300) print(f">>> Launched job: {estimator.latest_training_job.name}" ) # type: ignore print(">>> Successfully scheduled all training jobs.")