Example #1
0
 def transform_schema(self, s_X):
     """Used internally by Lale for type-checking downstream operators."""
     if is_schema(s_X):
         if hasattr(self, "_fit_columns"):
             return self._transform_schema_fit_columns(s_X)
         keep_cols = self._columns
         drop_cols = self._drop_columns
         known_keep_cols = False
         known_drop_cols = False
         if keep_cols is None:
             known_keep_cols = True
         elif isinstance(keep_cols, _CallableMonoidFactory):
             kc = keep_cols._c
             if is_schema(kc):
                 keep_cols = kc
                 known_keep_cols = True
         if drop_cols is None:
             known_drop_cols = True
         elif isinstance(drop_cols, _CallableMonoidFactory):
             dc = drop_cols._c
             if is_schema(dc):
                 drop_cols = dc
                 known_drop_cols = True
         if known_keep_cols and known_drop_cols:
             return self._transform_schema_schema(s_X, keep_cols, drop_cols)
         return s_X
     else:
         X = lale.datasets.data_schemas.add_schema(s_X)
         assert X is not None
         self.fit(X)
         return self._transform_schema_fit_columns(X.json_schema)
Example #2
0
 def transform_schema(self, s_X):
     """Used internally by Lale for type-checking downstream operators."""
     if is_schema(s_X):
         if hasattr(self, '_fit_columns'):
             return self._transform_schema_fit_columns(s_X)
         keep_cols = self._hyperparams['columns']
         drop_cols = self._hyperparams['drop_columns']
         if ((keep_cols is None or is_schema(keep_cols))
             and (drop_cols is None or is_schema(drop_cols))):
             return self._transform_schema_schema(s_X, keep_cols, drop_cols)
         return s_X
     else:
         X = lale.datasets.data_schemas.add_schema(s_X)
         self.fit(X)
         return self._transform_schema_fit_columns(X.json_schema)
Example #3
0
    def to_monoid(self, df):
        c = self._c
        if callable(c):
            c = c(df)
            self._c = c
        elif is_schema(c):
            c = _columns_schema_to_list(df, c)
            self._c = c
        else:
            assert isinstance(c, list)

        return _StaticMonoid(c)
Example #4
0
def get_column_factory(columns, kind):
    if columns is None:
        if kind == "passthrough":
            return _StaticMonoidFactory(None)
        else:
            return _StaticMonoidFactory([])
    elif isinstance(columns, list):
        return _StaticMonoidFactory(columns)
    elif isinstance(columns, MonoidFactory):
        return columns
    elif callable(columns):
        return _AllDataMonoidFactory(columns)
    elif is_schema(columns):
        return _CallableMonoidFactory(columns)
    else:
        raise TypeError(f"type {type(columns)}, columns {columns}")
Example #5
0
def _columns_to_list(columns, kind, X):
    if columns is None:
        if kind == "passthrough":
            result = [*range(X.shape[1])]
        else:
            result = []
    elif isinstance(columns, list):
        result = columns
    elif callable(columns):
        result = columns(X)
    elif is_schema(columns):
        result = _columns_schema_to_list(X, columns)
    else:
        raise TypeError(f"type {type(columns)}, columns {columns}")
    if len(result) > 0 and isinstance(result[0], str):
        name2idx = {name: idx for idx, name in enumerate(X.columns)}
        result = [name2idx[name] for name in result]
    return result