def execute(self, source=config.FILE, **kwargs): data = pd.DataFrame() try: if source == config.FILE: kwargs_not_empty(kwargs.get("filename"), "filename") data = self.load_from_file(kwargs.get("filename")) elif source == config.DATABASE: kwargs_not_empty(kwargs.get("sql"), "sql") data = self.load_from_server(kwargs.get("sql")) except Exception: self.logger.error(traceback.format_exc()) return data
def execute(self, data, **kwargs): try: if not data.empty: stratify = kwargs.get("stratify") test_size = kwargs.get("test_size") kwargs_not_empty(test_size, "test_size") return train_test_split(data, test_size=test_size, stratify=stratify) else: raise AttributeError("No data provided") except Exception as e: print(e) self.logger.error(traceback.format_exc()) return None
def execute(self, data, **kwargs): models = kwargs.get("models") explainers = kwargs.get("explainers") target = kwargs.get("target") kwargs_not_empty(models, "models") kwargs_not_empty(explainers, "explainers") kwargs_not_empty(target, "target") models_features = kwargs.get("models_features") or {} exp_kwargs = kwargs.get("exp_kwargs") or {} test = exp_kwargs.get("test") if not isinstance(test, pd.DataFrame): test = pd.DataFrame() try: if not data.empty and models and target and explainers: explanations = defaultdict(lambda: {}) for clf_name in models: # include zero-out features, in case not all are available # get the features in the correct order that model expects them feats = models_features.get(clf_name) if feats: # include target, because explain job needs it feats = [target] + feats for feat in feats: if feat not in list(data.columns): data[feat] = 0.0 if not test.empty: if feat not in list(test.columns): test[feat] = 0.0 data = data[feats] if not test.empty: exp_kwargs["test"] = test[feats] model = models[clf_name] for exp_name in explainers: if not callable(exp_name): exp_name = self.get_callable( 'morpher.explainers', exp_name) explainer = exp_name( data, model, target, ** exp_kwargs) #instantiate the algorithm in runtime explanations[clf_name][exp_name] = explainer.explain( **exp_kwargs) return explanations else: raise AttributeError( "No data provided, models or target not available") except Exception: self.logger.error(traceback.format_exc()) return None
def scale(data, **kwargs): try: # TODO: test is this is null if not data.empty: scaling_class = kwargs.get("method") kwargs_not_empty(scaling_class, "method") scaler = scaling_class() # TODO: think about how to solve the issue of fit x fit_transform() scaled_df = pd.DataFrame(scaler.fit_transform(data)) scaled_df.columns = data.columns scaled_df.index = data.index data = scaled_df else: raise AttributeError("No data provided") except Exception: logging.error(traceback.format_exc()) return data
def execute(self, data, target, **kwargs): try: if not data.empty: """ if split_data was called beforehand, data contains a subset of the original available data """ labels = data[target] features = data.drop(target, axis=1) params = {} algorithms = kwargs.get("algorithms") kwargs_not_empty(algorithms, "algorithms") hyperparams = kwargs.get("hyperparams") optimize = kwargs.get("optimize") param_grid = kwargs.get("param_grid") crossval = kwargs.get("crossval") n_splits = kwargs.get("n_splits") drop = kwargs.get("drop") # list of features to drop verbose = kwargs.get("verbose") # verbose mode trained_models = {} crossval_metrics = {} if drop: cols_to_drop = [ col for col in drop if col in features.columns ] features = features.drop(cols_to_drop, axis=1) for algorithm in algorithms: """ here for compatibility purposes """ if not callable(algorithm): algorithm = self.get_callable('morpher.algorithms', algorithm) clf = algorithm( hyperparams=hyperparams, optimize=optimize, param_grid=param_grid, crossval=crossval, n_splits=n_splits, verbose=verbose, ) # instantiate the algorithm in runtime """ if fit returns anything, it will be the cross_validated metrics """ if crossval: crossval_metrics[algorithm] = clf.fit(features, labels) else: clf.fit(features, labels) trained_models[algorithm] = clf if kwargs.get("persist") is True: params["target"] = target params["features"] = features.columns self.persist(trained_models, params) if crossval: return trained_models, crossval_metrics return trained_models else: raise AttributeError("No data provided") except Exception: print(traceback.format_exc()) logging.error(traceback.format_exc()) return data