def _dummy_performance() -> Performance: return Performance.from_dict( { mm: np.nan for m in Performance.metrics() for mm in [f"{m}_mean", f"{m}_std"] } )
def performance(cls, evaluations: List[Evaluation]) -> Performance: """ Aggregates the provided evaluations into a single performance object. Args: evaluations: The evaluations. Returns: The performance object. Since this is not part of the evaluation, it has the `num_model_parameters` attribute unset (set to zero). """ metrics = [e.summary for e in evaluations] kwargs = { m: (Metric(0, 0) if m == "num_model_parameters" else Metric( np.mean([ metric[m] if m in metric else np.nan for metric in metrics ]), np.std([ metric[m] if m in metric else np.nan for metric in metrics ]), )) for m in Performance.metrics() } return Performance(**kwargs) # type: ignore
def extract_job_infos( training_jobs: List[Job], validation_metric: Optional[ValidationMetric], group_seeds: bool, data_path: Union[str, Path] = DEFAULT_DATA_PATH, ) -> List[JobInfo]: """ Returns a list of the job information objects available for all training jobs provided. """ # We group the jobs by hyperparameters, excluding the seed if group_seeds: grouped_jobs = defaultdict(list) for job in training_jobs: hypers = { "model": job.model, "dataset": job.dataset, **job.hyperparameters, } grouped_jobs[tuple(sorted(hypers.items()))].append(job) all_jobs = grouped_jobs.values() else: all_jobs = [[job] for job in training_jobs] # Then, we can instantiate the info objects by iterating over groups of jobs runs = [] for jobs in tqdm(all_jobs): ref_job = jobs[0] model_name = ref_job.model base_hyperparams = {**ref_job.hyperparameters} # First, we reconstruct the training times if issubclass(MODEL_REGISTRY[model_name], TrainConfig): training_fractions = [1 / 81, 1 / 27] + [ i / 9 for i in range(1, 10) ] else: training_fractions = [0] assert all( len(job.metrics) == len(training_fractions) for job in jobs ), "Job does not provide sufficiently many models." # Then, we iterate over the Hyperband training times if len(training_fractions) == 1: training_fraction_indices = [0] else: training_fraction_indices = [0, 1, 2, 4, 10] # Then, we iterate over all training times, construct the hyperparameters and collect # the performane metrics for i in training_fraction_indices: # Create the config object hyperparams = { **base_hyperparams, "training_fraction": training_fractions[i], } model_config = get_model_config(model_name, **hyperparams) config = Config( model_config, get_dataset_config(ref_job.dataset, data_path) ) # Get the indices of the models that should be used to derive the performance if validation_metric is None or len(training_fractions) == 1: # If the model does not require training, or we don't look at the validation # performance, we just choose the current index choices = [i] * len(jobs) else: # Otherwise, we get the minimum value for the metric up to this point in time choices = [ np.argmin( [ p["evaluation"][validation_metric] for p in job.metrics ][: i + 1] ).item() for job in jobs ] # Get the performances of the chosen models performances = [ job.performances[choice] for choice, job in zip(choices, jobs) ] # And average the performance averaged_performance = Performance( **{ metric: Metric( np.mean( [getattr(p, metric).mean for p in performances] ), np.std( [getattr(p, metric).mean for p in performances] ), ) for metric in Performance.metrics() } ) # Get validation scores if available try: val_ncrps = np.mean( [ job.metrics[c]["evaluation"]["val_ncrps"] for (job, c) in zip(jobs, choices) ] ) val_loss = np.mean( [ job.metrics[c]["evaluation"]["val_loss"] for (job, c) in zip(jobs, choices) ] ).item() val_scores = ValidationScores(val_ncrps, val_loss) except KeyError: val_scores = None # Initialize the info object runs.append( JobInfo( config, averaged_performance, val_scores, jobs, choices ) ) return runs