def transform_schema(self, s_X): """Used internally by Lale for type-checking downstream operators.""" if is_schema(s_X): if hasattr(self, "_fit_columns"): return self._transform_schema_fit_columns(s_X) keep_cols = self._columns drop_cols = self._drop_columns known_keep_cols = False known_drop_cols = False if keep_cols is None: known_keep_cols = True elif isinstance(keep_cols, _CallableMonoidFactory): kc = keep_cols._c if is_schema(kc): keep_cols = kc known_keep_cols = True if drop_cols is None: known_drop_cols = True elif isinstance(drop_cols, _CallableMonoidFactory): dc = drop_cols._c if is_schema(dc): drop_cols = dc known_drop_cols = True if known_keep_cols and known_drop_cols: return self._transform_schema_schema(s_X, keep_cols, drop_cols) return s_X else: X = lale.datasets.data_schemas.add_schema(s_X) assert X is not None self.fit(X) return self._transform_schema_fit_columns(X.json_schema)
def transform_schema(self, s_X): """Used internally by Lale for type-checking downstream operators.""" if is_schema(s_X): if hasattr(self, '_fit_columns'): return self._transform_schema_fit_columns(s_X) keep_cols = self._hyperparams['columns'] drop_cols = self._hyperparams['drop_columns'] if ((keep_cols is None or is_schema(keep_cols)) and (drop_cols is None or is_schema(drop_cols))): return self._transform_schema_schema(s_X, keep_cols, drop_cols) return s_X else: X = lale.datasets.data_schemas.add_schema(s_X) self.fit(X) return self._transform_schema_fit_columns(X.json_schema)
def to_monoid(self, df): c = self._c if callable(c): c = c(df) self._c = c elif is_schema(c): c = _columns_schema_to_list(df, c) self._c = c else: assert isinstance(c, list) return _StaticMonoid(c)
def get_column_factory(columns, kind): if columns is None: if kind == "passthrough": return _StaticMonoidFactory(None) else: return _StaticMonoidFactory([]) elif isinstance(columns, list): return _StaticMonoidFactory(columns) elif isinstance(columns, MonoidFactory): return columns elif callable(columns): return _AllDataMonoidFactory(columns) elif is_schema(columns): return _CallableMonoidFactory(columns) else: raise TypeError(f"type {type(columns)}, columns {columns}")
def _columns_to_list(columns, kind, X): if columns is None: if kind == "passthrough": result = [*range(X.shape[1])] else: result = [] elif isinstance(columns, list): result = columns elif callable(columns): result = columns(X) elif is_schema(columns): result = _columns_schema_to_list(X, columns) else: raise TypeError(f"type {type(columns)}, columns {columns}") if len(result) > 0 and isinstance(result[0], str): name2idx = {name: idx for idx, name in enumerate(X.columns)} result = [name2idx[name] for name in result] return result