def make_dataset( self, dag: FugueWorkflow, dataset: Any, df: Any = None, df_name: str = TUNE_DATASET_DF_DEFAULT_NAME, test_df: Any = None, test_df_name: str = TUNE_DATASET_VALIDATION_DF_DEFAULT_NAME, partition_keys: Optional[List[str]] = None, temp_path: str = "", ) -> TuneDataset: assert_or_throw(dataset is not None, TuneCompileError("dataset can't be None")) if isinstance(dataset, TuneDataset): assert_or_throw( df is None, TuneCompileError("can't set df when dataset is TuneDataset")) return dataset if isinstance(dataset, Space): path = self.get_path_or_temp(temp_path) builder = TuneDatasetBuilder(dataset, path) if df is not None: wdf = dag.df(df) if partition_keys is not None and len(partition_keys) > 0: wdf = wdf.partition_by(*partition_keys) builder.add_df(df_name, wdf) if test_df is not None: wdf = dag.df(test_df) how = "cross" if partition_keys is not None and len(partition_keys) > 0: wdf = wdf.partition_by(*partition_keys) how = "inner" builder.add_df(test_df_name, wdf, how=how) return builder.build(dag, batch_size=1, shuffle=True) raise TuneCompileError(f"{dataset} can't be converted to TuneDataset")
def _object_to_iterative_objective(self, obj: Any) -> IterativeObjectiveFunc: assert_or_throw(obj is not None, TuneCompileError("objective can't be None")) if isinstance(obj, IterativeObjectiveFunc): return obj raise TuneCompileError( f"{obj} can't be converted to iterative objective function")
def make_noniterative_objective(self, obj: Any) -> NonIterativeObjectiveFunc: assert_or_throw(obj is not None, TuneCompileError("objective can't be None")) if isinstance(obj, NonIterativeObjectiveFunc): return obj return self._noniterative_objective_converter(obj)
def _object_to_stopper(self, obj: Any) -> Optional[NonIterativeStopper]: if isinstance(obj, NonIterativeStopper): return obj if obj is None: return None raise TuneCompileError( f"{obj} can't be converted to NonIterativeStopper")
def _object_to_noniterative_local_optimizer( self, obj: Any) -> NonIterativeObjectiveLocalOptimizer: if isinstance(obj, NonIterativeObjectiveLocalOptimizer): return obj if obj is None: return NonIterativeObjectiveLocalOptimizer() raise TuneCompileError( f"{obj} can't be converted to non iterative objective optimizer")
def add_df(self, name: str, df: WorkflowDataFrame, how: str = "") -> "TuneDatasetBuilder": """Add a dataframe to the dataset :param name: name of the dataframe, it will also create a ``__tune_df__<name>`` column in the dataset dataframe :param df: the dataframe to add. :param how: join type, can accept ``semi``, ``left_semi``, ``anti``, ``left_anti``, ``inner``, ``left_outer``, ``right_outer``, ``full_outer``, ``cross`` :returns: the builder itself .. note:: For the first dataframe you add, ``how`` should be empty. From the second dataframe you add, ``how`` must be set. .. note:: If ``df`` is prepartitioned, the partition key will be used to join with the added dataframes. Read :ref:`TuneDataset Tutorial </notebooks/tune_dataset.ipynb>` for more details """ assert_or_throw( not any(r[0] == name for r in self._dfs_spec), TuneCompileError(name + " already exists"), ) if len(self._dfs_spec) == 0: assert_or_throw( how == "", TuneCompileError("first dataframe can't specify how to join")) else: assert_or_throw( how != "", TuneCompileError( "must specify how to join after first dataframe"), ) self._dfs_spec.append((name, df, how)) return self
def add_df(self, name: str, df: WorkflowDataFrame, how: str = "") -> "TuneDatasetBuilder": assert_or_throw( not any(r[0] == name for r in self._dfs_spec), TuneCompileError(name + " already exists"), ) if len(self._dfs_spec) == 0: assert_or_throw( how == "", TuneCompileError("first dataframe can't specify how to join")) else: assert_or_throw( how != "", TuneCompileError( "must specify how to join after first dataframe"), ) self._dfs_spec.append((name, df, how)) return self
def _get_distributed(self, distributed: Optional[bool]) -> bool: if distributed is None: return self._optimizer.distributable if distributed: assert_or_throw( self._optimizer.distributable, TuneCompileError( f"can't distribute non-distributable optimizer {self._optimizer}" ), ) return True return False
def suggest_by_hyperband( objective: Any, space: Space, plans: List[List[Tuple[float, int]]], train_df: Any = None, temp_path: str = "", partition_keys: Optional[List[str]] = None, top_n: int = 1, monitor: Any = None, distributed: Optional[bool] = None, execution_engine: Any = None, execution_engine_conf: Any = None, ) -> List[TrialReport]: assert_or_throw( not space.has_random_parameter, TuneCompileError("space can't contain random parameters, " "use sample method before calling this function"), ) dag = FugueWorkflow() dataset = TUNE_OBJECT_FACTORY.make_dataset( dag, space, df=train_df, partition_keys=partition_keys, temp_path=temp_path, ) study = optimize_by_hyperband( objective=objective, dataset=dataset, plans=plans, checkpoint_path=temp_path, distributed=distributed, monitor=monitor, ) study.result(top_n).yield_dataframe_as("result") rows = list( dag.run( execution_engine, conf=execution_engine_conf, )["result"].as_dict_iterable()) return [ TrialReport.from_jsondict(json.loads(r[TUNE_REPORT])) for r in sorted(rows, key=lambda r: r[TUNE_REPORT_METRIC]) ]
def to_noniterative_objective( obj: Any, min_better: bool = True, global_vars: Optional[Dict[str, Any]] = None, local_vars: Optional[Dict[str, Any]] = None, ) -> NonIterativeObjectiveFunc: if isinstance(obj, NonIterativeObjectiveFunc): return copy.copy(obj) global_vars, local_vars = get_caller_global_local_vars(global_vars, local_vars) try: f = to_function(obj, global_vars=global_vars, local_vars=local_vars) # this is for string expression of function with decorator if isinstance(f, NonIterativeObjectiveFunc): return copy.copy(f) # this is for functions without decorator return _NonIterativeObjectiveFuncWrapper.from_func(f, min_better) except Exception as e: exp = e raise TuneCompileError(f"{obj} is not a valid tunable function", exp)
def suggest_by_sha( objective: Any, space: Space, plan: List[Tuple[float, int]], train_df: Any = None, temp_path: str = "", partition_keys: Optional[List[str]] = None, top_n: int = 1, monitor: Any = None, distributed: Optional[bool] = None, execution_engine: Any = None, execution_engine_conf: Any = None, ) -> List[TrialReport]: assert_or_throw( not space.has_stochastic, TuneCompileError("space can't contain random parameters, " "use sample method before calling this function"), ) dag = FugueWorkflow() dataset = TUNE_OBJECT_FACTORY.make_dataset( dag, space, df=train_df, partition_keys=partition_keys, temp_path=temp_path, ) study = optimize_by_sha( objective=objective, dataset=dataset, plan=plan, checkpoint_path=temp_path, distributed=distributed, monitor=monitor, ) study.result(top_n).yield_dataframe_as("result") return _run( dag=dag, execution_engine=execution_engine, execution_engine_conf=execution_engine_conf, )
def _object_to_monitor(self, obj: Any) -> Optional[Monitor]: if obj is None: return None raise TuneCompileError(f"{obj} can't be converted to Monitor")
def _object_to_noniterative_objective_runner( self, obj: Any) -> NonIterativeObjectiveRunner: if obj is None: return NonIterativeObjectiveRunner() raise TuneCompileError( f"{obj} can't be converted to non iterative objective runner")
def _object_to_noniterative_objective( self, obj: Any) -> NonIterativeObjectiveFunc: raise TuneCompileError( f"{obj} can't be converted to non iterative objective function")
def get_path_or_temp(self, path: str) -> str: if path is None or path == "": path = self._tmp assert_or_throw(path != "", TuneCompileError("path or temp path must be set")) return path