Esempio n. 1
0
    def execute(self, source=config.FILE, **kwargs):

        data = pd.DataFrame()

        try:
            if source == config.FILE:
                kwargs_not_empty(kwargs.get("filename"), "filename")
                data = self.load_from_file(kwargs.get("filename"))

            elif source == config.DATABASE:
                kwargs_not_empty(kwargs.get("sql"), "sql")
                data = self.load_from_server(kwargs.get("sql"))

        except Exception:
            self.logger.error(traceback.format_exc())

        return data
Esempio n. 2
0
    def execute(self, data, **kwargs):

        try:
            if not data.empty:
                stratify = kwargs.get("stratify")
                test_size = kwargs.get("test_size")
                kwargs_not_empty(test_size, "test_size")
                return train_test_split(data,
                                        test_size=test_size,
                                        stratify=stratify)
            else:
                raise AttributeError("No data provided")
        except Exception as e:
            print(e)
            self.logger.error(traceback.format_exc())

        return None
Esempio n. 3
0
    def execute(self, data, **kwargs):

        models = kwargs.get("models")
        explainers = kwargs.get("explainers")
        target = kwargs.get("target")
        kwargs_not_empty(models, "models")
        kwargs_not_empty(explainers, "explainers")
        kwargs_not_empty(target, "target")
        models_features = kwargs.get("models_features") or {}

        exp_kwargs = kwargs.get("exp_kwargs") or {}
        test = exp_kwargs.get("test")
        if not isinstance(test, pd.DataFrame):
            test = pd.DataFrame()

        try:
            if not data.empty and models and target and explainers:

                explanations = defaultdict(lambda: {})
                for clf_name in models:

                    # include zero-out features, in case not all are available
                    # get the features in the correct order that model expects them
                    feats = models_features.get(clf_name)

                    if feats:
                        # include target, because explain job needs it
                        feats = [target] + feats

                        for feat in feats:
                            if feat not in list(data.columns):
                                data[feat] = 0.0
                            if not test.empty:
                                if feat not in list(test.columns):
                                    test[feat] = 0.0
                        data = data[feats]
                        if not test.empty:
                            exp_kwargs["test"] = test[feats]

                    model = models[clf_name]
                    for exp_name in explainers:

                        if not callable(exp_name):
                            exp_name = self.get_callable(
                                'morpher.explainers', exp_name)

                        explainer = exp_name(
                            data, model, target, **
                            exp_kwargs)  #instantiate the algorithm in runtime
                        explanations[clf_name][exp_name] = explainer.explain(
                            **exp_kwargs)

                return explanations

            else:
                raise AttributeError(
                    "No data provided, models or target not available")
        except Exception:
            self.logger.error(traceback.format_exc())
            return None
Esempio n. 4
0
def scale(data, **kwargs):
    try:
        # TODO: test is this is null
        if not data.empty:

            scaling_class = kwargs.get("method")
            kwargs_not_empty(scaling_class, "method")
            scaler = scaling_class()

            # TODO: think about how to solve the issue of fit x fit_transform()
            scaled_df = pd.DataFrame(scaler.fit_transform(data))
            scaled_df.columns = data.columns
            scaled_df.index = data.index

            data = scaled_df

        else:
            raise AttributeError("No data provided")

    except Exception:
        logging.error(traceback.format_exc())

    return data
Esempio n. 5
0
    def execute(self, data, target, **kwargs):
        try:

            if not data.empty:
                """ if split_data was called beforehand, data contains a subset of the original available data """
                labels = data[target]
                features = data.drop(target, axis=1)
                params = {}
                algorithms = kwargs.get("algorithms")
                kwargs_not_empty(algorithms, "algorithms")
                hyperparams = kwargs.get("hyperparams")
                optimize = kwargs.get("optimize")
                param_grid = kwargs.get("param_grid")
                crossval = kwargs.get("crossval")
                n_splits = kwargs.get("n_splits")
                drop = kwargs.get("drop")  # list of features to drop
                verbose = kwargs.get("verbose")  # verbose mode

                trained_models = {}
                crossval_metrics = {}

                if drop:
                    cols_to_drop = [
                        col for col in drop if col in features.columns
                    ]
                    features = features.drop(cols_to_drop, axis=1)

                for algorithm in algorithms:
                    """ here for compatibility purposes """
                    if not callable(algorithm):
                        algorithm = self.get_callable('morpher.algorithms',
                                                      algorithm)

                    clf = algorithm(
                        hyperparams=hyperparams,
                        optimize=optimize,
                        param_grid=param_grid,
                        crossval=crossval,
                        n_splits=n_splits,
                        verbose=verbose,
                    )  # instantiate the algorithm in runtime
                    """ if fit returns anything, it will be the cross_validated metrics """
                    if crossval:
                        crossval_metrics[algorithm] = clf.fit(features, labels)
                    else:
                        clf.fit(features, labels)

                    trained_models[algorithm] = clf

                if kwargs.get("persist") is True:
                    params["target"] = target
                    params["features"] = features.columns
                    self.persist(trained_models, params)

                if crossval:
                    return trained_models, crossval_metrics

                return trained_models

            else:
                raise AttributeError("No data provided")

        except Exception:
            print(traceback.format_exc())
            logging.error(traceback.format_exc())

        return data