def test_make_pandas_transformer_transform(transformer, kwargs, sk_path, input_csv): """Test wrapped transformer has the initial transform functionality. Args: transformer: wrapped transformer class name kwargs: key word arguments for transformer initialization sk_path: path to the module containing the wrapped sklearn transformer input_csv: dataset to test on """ import pandas as pd import numpy as np from scipy.sparse import issparse sk_transformer = get_transformer(transformer, source_lib=sk_path)(**kwargs) transformer = get_transformer(transformer)(**kwargs) df = pd.read_csv(input_csv) crim_df = df[["crim"]] transformer.fit(crim_df) sk_transformer.fit(crim_df) sk_out = sk_transformer.transform(crim_df) if issparse(sk_out): sk_out = sk_out.toarray() assert np.array_equal(transformer.transform(crim_df).values, sk_out)
def pick_transformer(self, X, y=None, **fit_params): """Get best intent transformer for a given column. Note: This function also sets the cache_manager Args: X: input DataFrame y: input labels **fit_params: fit_params Returns: Best intent transformer. """ column = X.columns[0] override_key = "_".join([Override.INTENT, column]) if override_key in self.cache_manager[AcceptedKey.OVERRIDE]: intent_override = self.cache_manager[ AcceptedKey.OVERRIDE][override_key] intent_class = get_transformer(intent_override) else: intent_class = get_transformer(self._resolve_intent(X, y=y)) return intent_class()
def test_make_pandas_transformer_init(transformer, sk_path): """Test pandas_wrap has initial transformer init functionality. Should be able to accept any parameters from the sklearn transformer and initialize on the wrapped instance. They should also posses the is_wrapped method. Args: transformer: wrapped transformer sk_path: path to the module containing the wrapped sklearn transformer """ sk_transformer = get_transformer(transformer, source_lib=sk_path)() params = sk_transformer.get_params() transformer = get_transformer(transformer)(**params)
def transformer(self, value): """Validate transformer initialization. Args: value (object): The selected transformer that SmartTransformer should use. Raises: ValueError: If input is neither a valid foreshadow wrapped transformer, scikit-learn Pipeline, scikit-learn FeatureUnion, nor None. """ if isinstance(value, str): value = get_transformer(value)(**self.kwargs) self.unset_resolve() # Check transformer type is_trans = is_transformer(value) trans_wrapped = (is_wrapped(value) if getattr(self, "check_wrapped", True) else True) # True by default passes this check if we don't want it. is_pipe = isinstance(value, Pipeline) is_none = value is None checks = [is_trans, is_pipe, is_none, trans_wrapped] # Check the transformer inheritance status if not any(checks): logging.error("transformer: {} failed checks: {}".format( value, checks)) raise ValueError( "{} is neither a scikit-learn Pipeline, FeatureUnion, a " "wrapped foreshadow transformer, nor None.".format(value)) self._transformer = value
def test_make_pandas_transformer_meta(transformer, expected_path): """Test that the wrapped transformer has proper metadata. Args: transformer: wrapped transformer class name expected_path: path to the initial transformer Returns: """ expected_class = get_transformer(transformer, source_lib=expected_path) transformer = get_transformer(transformer)() assert isinstance(transformer, expected_class) # should remain a subclass assert type(transformer).__name__ == expected_class.__name__ assert transformer.__doc__ == expected_class.__doc__
def get_config(self): """Resolve a config instance. Returns: dict: A resolved version of the system configuration that merges \ system, user, and local configuration setups. """ local_path = os.path.abspath("") local_config = load_config(local_path) # Expand the dictionaries in order of precedence resolved_strs = { **self.system_config, **self.user_config, **local_config, } resolved_hash = hash(json.dumps(resolved_strs, sort_keys=True)) if resolved_hash in self._cfg_list: return self._cfg_list[resolved_hash] resolved = {} # key is cleaner, resolver, or intent # all individual steps are converted to classes for key, data in resolved_strs.items(): if not len(data): resolved[key] = data elif isinstance(data, list): resolved[key] = [ get_transformer(transformer) for transformer in data ] elif isinstance(data, dict): resolved[key] = { step: [ get_transformer(transformer) for transformer in transformer_list ] for step, transformer_list in data.items() } self._cfg_list[resolved_hash] = resolved return resolved
def _summarize(self, X_df): summary = {} if self.y_var: intent = "Label" data = standard_col_summary(X_df) summary[X_df.columns[0]] = {"intent": intent, "data": data} else: for k in X_df.columns.values.tolist(): intent = self.cache_manager[AcceptedKey.INTENT, k] data = get_transformer(intent).column_summary(X_df[[k]]) summary[k] = {"intent": intent, "data": data} return summary
def test_make_pandas_transformer_fit(transformer, input_csv): """Test pandas_wrap has initial transformer fit functionality. Args: transformer: wrapped transformer class name input_csv: dataset to test on """ import pandas as pd transformer = get_transformer(transformer)() df = pd.read_csv(input_csv) assert transformer.fit(df) == transformer