def test_irreducible_1(self): from lale.lib.sklearn import PCA from lale.lib.sklearn import Nystroem from lale.lib.sklearn import MinMaxScaler from lale.lib.sklearn import LogisticRegression from lale.lib.sklearn import KNeighborsClassifier from lale.operators import get_pipeline_of_applicable_type choice = PCA | Nystroem pipeline = get_pipeline_of_applicable_type( steps=[ choice, MinMaxScaler, LogisticRegression, KNeighborsClassifier ], edges=[(choice, LogisticRegression), (MinMaxScaler, LogisticRegression), (MinMaxScaler, KNeighborsClassifier)]) expected = \ """from lale.lib.sklearn import PCA from lale.lib.sklearn import Nystroem from lale.lib.sklearn import MinMaxScaler from lale.lib.sklearn import LogisticRegression from lale.lib.sklearn import KNeighborsClassifier from lale.operators import get_pipeline_of_applicable_type import lale lale.wrap_imported_operators() choice = PCA | Nystroem pipeline = get_pipeline_of_applicable_type(steps=[choice, MinMaxScaler, LogisticRegression, KNeighborsClassifier], edges=[(choice,LogisticRegression), (MinMaxScaler,LogisticRegression), (MinMaxScaler,KNeighborsClassifier)])""" self._roundtrip(expected, lale.pretty_print.to_string(pipeline))
def _unfold(self, op: Operator, n: int) -> Optional[Operator]: """ Unroll all possible operators from the grammar `g` starting from non-terminal `op` after `n` derivations. Parameters ---------- op : Operator starting rule (e.g., `g.start`) n : int number of derivations Returns ------- Optional[Operator] """ if isinstance(op, BasePipeline): steps = op.steps() new_steps = [self._unfold(sop, n) for sop in op.steps()] step_map = {steps[i]: new_steps[i] for i in range(len(steps))} new_edges = [(step_map[s], step_map[d]) for s, d in op.edges()] if not None in new_steps: return get_pipeline_of_applicable_type(new_steps, new_edges, True) return None if isinstance(op, OperatorChoice): steps = [ s for s in (self._unfold(sop, n) for sop in op.steps()) if s ] return make_choice(*steps) if steps else None if isinstance(op, NonTerminal): return self._unfold(self._variables[op.name()], n - 1) if n > 0 else None if isinstance(op, IndividualOp): return op assert False, f"Unknown operator {op}"
def _sample(self, op: Operator, n: int) -> Optional[Operator]: """ Sample the grammar `g` starting from `g.start`, that is, choose one element at random for each possible choices. Parameters ---------- op : Operator starting rule (e.g., `g.start`) n : int number of derivations Returns ------- Optional[Operator] """ if isinstance(op, BasePipeline): steps = op.steps() new_steps = [self._sample(sop, n) for sop in op.steps()] step_map = {steps[i]: new_steps[i] for i in range(len(steps))} new_edges = [(step_map[s], step_map[d]) for s, d in op.edges()] if not None in new_steps: return get_pipeline_of_applicable_type(new_steps, new_edges, True) return None if isinstance(op, OperatorChoice): return self._sample(random.choice(op.steps()), n) if isinstance(op, NonTerminal): return self._sample(getattr(self, op.name()), n - 1) if n > 0 else None if isinstance(op, IndividualOp): return op assert False, f"Unknown operator {op}"
def set_operator_params(op: 'Ops.Operator', **impl_params) -> Ops.TrainableOperator: """May return a new operator, in which case the old one should be overwritten """ if isinstance(op, Ops.PlannedIndividualOp): return op.set_params(**impl_params) elif isinstance(op, Ops.Pipeline): steps = op.steps() partitioned_params: Dict[str, Dict[ str, Any]] = partition_sklearn_params(impl_params) found_names: Set[str] = set() step_map: Dict[Ops.Operator, Ops.TrainableOperator] = {} for s in steps: name = s.name() found_names.add(name) params: Dict[str, Any] = {} if name in partitioned_params: params = partitioned_params[name] new_s = set_operator_params(s, **params) if s != new_s: step_map[s] = new_s # make sure that no parameters were passed in for operations # that are not actually part of this pipeline assert set(partitioned_params.keys()).issubset(found_names) if step_map: op.subst_steps(step_map) if not isinstance(op, Ops.TrainablePipeline): # As a result of choices made, we may now be a TrainableIndividualOp ret = Ops.get_pipeline_of_applicable_type(op.steps(), op.edges(), ordered=True) if not isinstance(ret, Ops.TrainableOperator): assert False return ret else: return op else: assert isinstance(op, Ops.TrainableOperator) return op elif isinstance(op, Ops.OperatorChoice): discriminant_name: str = "_lale_discriminant" assert discriminant_name in impl_params choice_name = impl_params[discriminant_name] choices: List[Ops.Operator] = [ step for step in op.steps() if step.name() == choice_name ] assert len( choices ) == 1, f"found {len(choices)} operators with the same name: {choice_name}" choice: Ops.Operator = choices[0] chosen_params = dict(impl_params) del chosen_params[discriminant_name] new_step = set_operator_params(choice, **chosen_params) # we remove the OperatorChoice, replacing it with the branch that was taken return new_step else: assert False, f"Not yet supported operation of type: {op.__class__.__name__}"
def test_irreducible_2(self): from lale.lib.sklearn import PCA from lale.lib.sklearn import MinMaxScaler as MMS from lale.lib.lale import ConcatFeatures as HStack from lale.lib.sklearn import KNeighborsClassifier as KNN from lale.lib.sklearn import LogisticRegression as LR from lale.operators import get_pipeline_of_applicable_type pipeline_0 = HStack >> LR pipeline = get_pipeline_of_applicable_type( steps=[PCA, MMS, KNN, pipeline_0], edges=[(PCA, KNN), (PCA, pipeline_0), (MMS, pipeline_0)]) expected = \ """from lale.lib.sklearn import PCA from lale.lib.sklearn import MinMaxScaler as MMS from lale.lib.sklearn import KNeighborsClassifier as KNN from lale.lib.lale import ConcatFeatures as HStack from lale.lib.sklearn import LogisticRegression as LR from lale.operators import get_pipeline_of_applicable_type import lale lale.wrap_imported_operators() pipeline_0 = HStack >> LR pipeline = get_pipeline_of_applicable_type(steps=[PCA, MMS, KNN, pipeline_0], edges=[(PCA,KNN), (PCA,pipeline_0), (MMS,pipeline_0)])""" self._roundtrip(expected, lale.pretty_print.to_string(pipeline))
def set_operator_params(op:Ops.Operator, **impl_params)->Ops.TrainableOperator: """May return a new operator, in which case the old one should be overwritten """ if isinstance(op, Ops.PlannedIndividualOp): main_params, partitioned_sub_params = partition_sklearn_params(impl_params) hyper = op._hyperparams if hyper is None: hyper = {} # we set the sub params first for sub_key, sub_params in partitioned_sub_params.items(): set_structured_params(sub_key, sub_params, hyper) # we have now updated any nested operators # (if this is a higher order operator) # and can work on the main operator all_params = {**main_params, **hyper} return op.set_params(**all_params) elif isinstance(op, Ops.BasePipeline): steps = op.steps() main_params, partitioned_sub_params = partition_sklearn_params(impl_params) assert not main_params, f"Unexpected non-nested arguments {main_params}" found_names:Dict[str, int] = {} step_map:Dict[Ops.Operator, Ops.TrainableOperator] = {} for s in steps: name = s.name() name_index = 0 params:Dict[str, Any] = {} if name in found_names: name_index = found_names[name] + 1 found_names[name] = name_index uname = make_indexed_name(name, name_index) if uname in partitioned_sub_params: params = partitioned_sub_params[uname] else: found_names[name] = 0 uname = make_degen_indexed_name(name, 0) if uname in partitioned_sub_params: params = partitioned_sub_params[uname] assert name not in partitioned_sub_params elif name in partitioned_sub_params: params = partitioned_sub_params[name] new_s = set_operator_params(s, **params) if s != new_s: step_map[s] = new_s # make sure that no parameters were passed in for operations # that are not actually part of this pipeline for k in partitioned_sub_params.keys(): n,i = get_name_and_index(k) assert n in found_names and i <= found_names[n] if step_map: op.subst_steps(step_map) if not isinstance(op, Ops.TrainablePipeline): # As a result of choices made, we may now be a TrainableIndividualOp ret = Ops.get_pipeline_of_applicable_type(op.steps(), op.edges(), ordered=True) if not isinstance(ret, Ops.TrainableOperator): assert False return ret else: return op else: assert isinstance(op, Ops.TrainableOperator) return op elif isinstance(op, Ops.OperatorChoice): choices = op.steps() choice_index:int choice_params:Dict[str, Any] if len(choices)==1: choice_index = 0 chosen_params = impl_params else: (choice_index, chosen_params) = partition_sklearn_choice_params(impl_params) assert 0 <= choice_index and choice_index < len(choices) choice:Ops.Operator = choices[choice_index] new_step = set_operator_params(choice, **chosen_params) # we remove the OperatorChoice, replacing it with the branch that was taken return new_step else: assert False, f"Not yet supported operation of type: {op.__class__.__name__}"