def lookup_or_fetch_experiment_data_multi( cls, experiment: core.experiment.Experiment, metrics: Iterable[Metric], trials: Optional[Iterable[core.base_trial.BaseTrial]] = None, **kwargs: Any, ) -> Tuple[Data, bool]: """Fetch or lookup (with fallback to fetching) data for given metrics, depending on whether they are available while running. Return a tuple containing the data, along with a boolean that will be True if new data was fetched, and False if all data was looked up from cache. If metric is available while running, its data can change (and therefore we should always re-fetch it). If metric is available only upon trial completion, its data does not change, so we can look up that data on the experiment and only fetch the data that is not already attached to the experiment. NOTE: If fetching data for a metrics class that is only available upon trial completion, data fetched in this function (data that was not yet available on experiment) will be attached to experiment. """ # If this metric is available while trial is running, just default to # `fetch_experiment_data_multi`. if cls.is_available_while_running(): fetched_data = cls.fetch_experiment_data_multi( experiment=experiment, metrics=metrics, trials=trials, **kwargs) return fetched_data, True # If this metric is available only upon trial completion, look up data # on experiment and only fetch data that is not already cached. if trials is None: completed_trials = experiment.trials_by_status[ core.base_trial.TrialStatus.COMPLETED] else: completed_trials = [t for t in trials if t.status.is_completed] if not completed_trials: return cls.data_constructor(), False trials_data = [] contains_new_data = False for trial in completed_trials: cached_trial_data = experiment.lookup_data_for_trial( trial_index=trial.index, )[0] cached_metric_names = cached_trial_data.metric_names metrics_to_fetch = [ m for m in metrics if m.name not in cached_metric_names ] if not metrics_to_fetch: # If all needed data fetched from cache, no need to fetch any other data # for trial. trials_data.append(cached_trial_data) continue try: fetched_trial_data = cls.fetch_experiment_data_multi( experiment=experiment, metrics=metrics_to_fetch, trials=[trial], **kwargs, ) contains_new_data = True except NotImplementedError: # Metric does not implement fetching logic and only uses lookup. fetched_trial_data = cls.data_constructor() final_data = cls.data_constructor.from_multiple_data( [cached_trial_data, fetched_trial_data]) trials_data.append(final_data) return ( cls.data_constructor.from_multiple_data( trials_data, subset_metrics=[m.name for m in metrics]), contains_new_data, )
def lookup_or_fetch_experiment_data_multi( cls, experiment: core.experiment.Experiment, metrics: Iterable[Metric], trials: Optional[Iterable[core.base_trial.BaseTrial]] = None, **kwargs: Any, ) -> AbstractDataFrameData: """Fetch or lookup (with fallback to fetching) data for given metrics, depending on whether they are available while running. If metric is available while running, its data can change (and therefore we should always re-fetch it). If metric is available only upon trial completion, its data does not change, so we can look up that data on the experiment and only fetch the data that is not already attached to the experiment. NOTE: If fetching data for a metrics class that is only available upon trial completion, data fetched in this function (data that was not yet available on experiment) will be attached to experiment. """ # If this metric is available while trial is running, just default to # `fetch_experiment_data_multi`. if cls.is_available_while_running(): fetched_data = cls.fetch_experiment_data_multi( experiment=experiment, metrics=metrics, trials=trials, **kwargs) if not fetched_data.df.empty: experiment.attach_data( fetched_data, overwrite_existing_data=cls.overwrite_existing_data(), combine_with_last_data=cls.combine_with_last_data(), ) return fetched_data # If this metric is available only upon trial completion, look up data # on experiment and only fetch data that is not already cached. if trials is None: completed_trials = experiment.trials_by_status[ core.base_trial.TrialStatus.COMPLETED] else: completed_trials = [t for t in trials if t.status.is_completed] if not completed_trials: return cls.data_constructor() trials_data = [] for trial in completed_trials: cached_trial_data = experiment.lookup_data_for_trial( trial_index=trial.index, )[0] cached_metric_names = cached_trial_data.metric_names metrics_to_fetch = [ m for m in metrics if m.name not in cached_metric_names ] if not metrics_to_fetch: # If all needed data fetched from cache, no need to fetch any other data # for trial. trials_data.append(cached_trial_data) continue try: fetched_trial_data = cls.fetch_experiment_data_multi( experiment=experiment, metrics=metrics_to_fetch, trials=[trial], **kwargs, ) except NotImplementedError: # Metric does not implement fetching logic and only uses lookup. fetched_trial_data = cls.data_constructor() final_data = cls.data_constructor.from_multiple_data( # pyre-fixme [6]: Incompatible paramtype: Expected `Data` # but got `AbstractDataFrameData`. [cached_trial_data, fetched_trial_data]) if not final_data.df.empty: experiment.attach_data( final_data, overwrite_existing_data=cls.overwrite_existing_data(), combine_with_last_data=cls.combine_with_last_data(), ) trials_data.append(final_data) return cls.data_constructor.from_multiple_data( trials_data, subset_metrics=[m.name for m in metrics])
def lookup_or_fetch_experiment_data_multi( cls, experiment: core.experiment.Experiment, metrics: Iterable[Metric], trials: Optional[Iterable[core.base_trial.BaseTrial]] = None, **kwargs: Any, ) -> AbstractDataFrameData: """Fetch or lookup (with fallback to fetching) data for given metrics, depending on whether they are available while running. If metric is available while running, its data can change (and therefore we should always re-fetch it). If metric is available only upon trial completion, its data does not change, so we can look up that data on the experiment and only fetch the data that is not already attached to the experiment. NOTE: If fetching data for a metrics class that is only available upon trial completion, data fetched in this function (data that was not yet available on experiment) will be attached to experiment. """ # If this metric is available while trial is running, just default to # `fetch_experiment_data_multi`. if cls.is_available_while_running(): cached_data = (experiment.lookup_data( trial_indices=[trial.index for trial in trials], keep_latest_map_values_only=False, ) if trials else None) fetched_data = cls.fetch_experiment_data_multi( experiment=experiment, metrics=metrics, trials=trials, **kwargs) # If there is cached data, consider combining it with fetched data. # That way, if this function is being called from within a loop over # multiple metric classes (as is currently the case in lookup_data), # we'll combine data from all metric classes into a single dataframe # before attaching it to the experiment. if cached_data: cached_metric_names = cached_data.metric_names # if there is a collision (ie fetched = A cached = AB), just use # the recently fetched. That way, if we call `fetch_data` twice # in a row (not within a for loop), we don't end up with # duplicate data if len( cached_metric_names.intersection( {m.name for m in metrics})) > 0: final_data = fetched_data else: final_data = cls.data_constructor.from_multiple_data( # pyre-fixme [6]: Incompatible paramtype: Expected `Data` # but got `AbstractDataFrameData`. [cached_data, fetched_data]) else: final_data = fetched_data if not fetched_data.df.empty: experiment.attach_data( final_data, overwrite_existing_data=cls.overwrite_existing_data(), combine_with_last_data=cls.combine_with_last_data(), ) return fetched_data # If this metric is available only upon trial completion, look up data # on experiment and only fetch data that is not already cached. if trials is None: completed_trials = experiment.trials_by_status[ core.base_trial.TrialStatus.COMPLETED] else: completed_trials = [t for t in trials if t.status.is_completed] if not completed_trials: return cls.data_constructor() trials_data = [] for trial in completed_trials: cached_trial_data = experiment.lookup_data_for_trial( trial_index=trial.index, keep_latest_map_values_only=False, )[0] cached_metric_names = cached_trial_data.metric_names metrics_to_fetch = [ m for m in metrics if m.name not in cached_metric_names ] if not metrics_to_fetch: # If all needed data fetched from cache, no need to fetch any other data # for trial. trials_data.append(cached_trial_data) continue try: fetched_trial_data = cls.fetch_experiment_data_multi( experiment=experiment, metrics=metrics_to_fetch, trials=[trial], **kwargs, ) except NotImplementedError: # Metric does not implement fetching logic and only uses lookup. fetched_trial_data = cls.data_constructor() final_data = cls.data_constructor.from_multiple_data( # pyre-fixme [6]: Incompatible paramtype: Expected `Data` # but got `AbstractDataFrameData`. [cached_trial_data, fetched_trial_data]) if not final_data.df.empty: experiment.attach_data( final_data, overwrite_existing_data=cls.overwrite_existing_data(), combine_with_last_data=cls.combine_with_last_data(), ) trials_data.append(final_data) return cls.data_constructor.from_multiple_data( trials_data, subset_metrics=[m.name for m in metrics])