Example #1
0
def test_make_pandas_transformer_transform(transformer, kwargs, sk_path,
                                           input_csv):
    """Test wrapped transformer has the initial transform functionality.

        Args:
            transformer: wrapped transformer class name
            kwargs: key word arguments for transformer initialization
            sk_path: path to the module containing the wrapped sklearn
                transformer
            input_csv: dataset to test on

    """
    import pandas as pd
    import numpy as np
    from scipy.sparse import issparse

    sk_transformer = get_transformer(transformer, source_lib=sk_path)(**kwargs)
    transformer = get_transformer(transformer)(**kwargs)

    df = pd.read_csv(input_csv)
    crim_df = df[["crim"]]
    transformer.fit(crim_df)
    sk_transformer.fit(crim_df)
    sk_out = sk_transformer.transform(crim_df)
    if issparse(sk_out):
        sk_out = sk_out.toarray()
    assert np.array_equal(transformer.transform(crim_df).values, sk_out)
Example #2
0
    def pick_transformer(self, X, y=None, **fit_params):
        """Get best intent transformer for a given column.

        Note:
            This function also sets the cache_manager

        Args:
            X: input DataFrame
            y: input labels
            **fit_params: fit_params

        Returns:
            Best intent transformer.

        """
        column = X.columns[0]
        override_key = "_".join([Override.INTENT, column])
        if override_key in self.cache_manager[AcceptedKey.OVERRIDE]:
            intent_override = self.cache_manager[
                AcceptedKey.OVERRIDE][override_key]
            intent_class = get_transformer(intent_override)
        else:
            intent_class = get_transformer(self._resolve_intent(X, y=y))

        return intent_class()
Example #3
0
def test_make_pandas_transformer_init(transformer, sk_path):
    """Test pandas_wrap has initial transformer init functionality.

    Should be able to accept any parameters from the sklearn transformer and
    initialize on the wrapped instance. They should also posses the is_wrapped
    method.

        Args:
            transformer: wrapped transformer
            sk_path: path to the module containing the wrapped sklearn
                transformer
    """
    sk_transformer = get_transformer(transformer, source_lib=sk_path)()
    params = sk_transformer.get_params()
    transformer = get_transformer(transformer)(**params)
Example #4
0
    def transformer(self, value):
        """Validate transformer initialization.

        Args:
            value (object): The selected transformer that SmartTransformer
                should use.

        Raises:
            ValueError: If input is neither a valid foreshadow wrapped
                transformer, scikit-learn Pipeline, scikit-learn FeatureUnion,
                nor None.

        """
        if isinstance(value, str):
            value = get_transformer(value)(**self.kwargs)
            self.unset_resolve()
        # Check transformer type
        is_trans = is_transformer(value)
        trans_wrapped = (is_wrapped(value)
                         if getattr(self, "check_wrapped", True) else True)
        # True by default passes this check if we don't want it.
        is_pipe = isinstance(value, Pipeline)
        is_none = value is None
        checks = [is_trans, is_pipe, is_none, trans_wrapped]
        # Check the transformer inheritance status
        if not any(checks):
            logging.error("transformer: {} failed checks: {}".format(
                value, checks))
            raise ValueError(
                "{} is neither a scikit-learn Pipeline, FeatureUnion, a "
                "wrapped foreshadow transformer, nor None.".format(value))

        self._transformer = value
Example #5
0
def test_make_pandas_transformer_meta(transformer, expected_path):
    """Test that the wrapped transformer has proper metadata.

    Args:
        transformer: wrapped transformer class name
        expected_path: path to the initial transformer

    Returns:

    """
    expected_class = get_transformer(transformer, source_lib=expected_path)
    transformer = get_transformer(transformer)()

    assert isinstance(transformer, expected_class)  # should remain a subclass
    assert type(transformer).__name__ == expected_class.__name__
    assert transformer.__doc__ == expected_class.__doc__
Example #6
0
    def get_config(self):
        """Resolve a config instance.

        Returns:
            dict: A resolved version of the system configuration that merges \
                system, user, and local configuration setups.

        """
        local_path = os.path.abspath("")
        local_config = load_config(local_path)

        # Expand the dictionaries in order of precedence
        resolved_strs = {
            **self.system_config,
            **self.user_config,
            **local_config,
        }

        resolved_hash = hash(json.dumps(resolved_strs, sort_keys=True))

        if resolved_hash in self._cfg_list:
            return self._cfg_list[resolved_hash]

        resolved = {}
        # key is cleaner, resolver, or intent
        # all individual steps are converted to classes
        for key, data in resolved_strs.items():
            if not len(data):
                resolved[key] = data
            elif isinstance(data, list):
                resolved[key] = [
                    get_transformer(transformer) for transformer in data
                ]
            elif isinstance(data, dict):
                resolved[key] = {
                    step: [
                        get_transformer(transformer)
                        for transformer in transformer_list
                    ]
                    for step, transformer_list in data.items()
                }

        self._cfg_list[resolved_hash] = resolved

        return resolved
Example #7
0
 def _summarize(self, X_df):
     summary = {}
     if self.y_var:
         intent = "Label"
         data = standard_col_summary(X_df)
         summary[X_df.columns[0]] = {"intent": intent, "data": data}
     else:
         for k in X_df.columns.values.tolist():
             intent = self.cache_manager[AcceptedKey.INTENT, k]
             data = get_transformer(intent).column_summary(X_df[[k]])
             summary[k] = {"intent": intent, "data": data}
     return summary
Example #8
0
def test_make_pandas_transformer_fit(transformer, input_csv):
    """Test pandas_wrap has initial transformer fit functionality.

        Args:
            transformer: wrapped transformer class name
            input_csv: dataset to test on

    """
    import pandas as pd

    transformer = get_transformer(transformer)()
    df = pd.read_csv(input_csv)
    assert transformer.fit(df) == transformer