Esempio n. 1
0
 def save_parameters(path, params=None):
     original = gorilla.get_original_attribute(nn, 'save_parameters')
     original(path, params)
     run_id = mlflow.active_run().info.run_id
     uri = 'runs:{}/{}'.format(run_id, 'parameters')
     try_mlflow_log(mlflow.log_artifact, path, uri)
Esempio n. 2
0
    def train(*args, **kwargs):
        def record_eval_results(eval_results):
            """
            Create a callback function that records evaluation results.
            """

            def callback(env):
                res = {}
                for data_name, eval_name, value, _ in env.evaluation_result_list:
                    key = data_name + "-" + eval_name
                    res[key] = value

                eval_results.append(res)

            return callback

        def log_feature_importance_plot(features, importance, importance_type):
            """
            Log feature importance plot.
            """
            import matplotlib.pyplot as plt

            indices = np.argsort(importance)
            features = np.array(features)[indices]
            importance = importance[indices]
            num_features = len(features)

            # If num_features > 10, increase the figure height to prevent the plot
            # from being too dense.
            w, h = [6.4, 4.8]  # matplotlib's default figure size
            h = h + 0.1 * num_features if num_features > 10 else h
            fig, ax = plt.subplots(figsize=(w, h))

            yloc = np.arange(num_features)
            ax.barh(yloc, importance, align="center", height=0.5)
            ax.set_yticks(yloc)
            ax.set_yticklabels(features)
            ax.set_xlabel("Importance")
            ax.set_title("Feature Importance ({})".format(importance_type))
            fig.tight_layout()

            tmpdir = tempfile.mkdtemp()
            try:
                # pylint: disable=undefined-loop-variable
                filepath = os.path.join(tmpdir, "feature_importance_{}.png".format(imp_type))
                fig.savefig(filepath)
                try_mlflow_log(mlflow.log_artifact, filepath)
            finally:
                plt.close(fig)
                shutil.rmtree(tmpdir)

        if not mlflow.active_run():
            try_mlflow_log(mlflow.start_run)
            auto_end_run = True
        else:
            auto_end_run = False

        original = gorilla.get_original_attribute(lightgbm, "train")

        # logging booster params separately via mlflow.log_params to extract key/value pairs
        # and make it easier to compare them across runs.
        params = args[0] if len(args) > 0 else kwargs["params"]
        try_mlflow_log(mlflow.log_params, params)

        unlogged_params = [
            "params",
            "train_set",
            "valid_sets",
            "valid_names",
            "fobj",
            "feval",
            "init_model",
            "evals_result",
            "learning_rates",
            "callbacks",
        ]

        log_fn_args_as_params(original, args, kwargs, unlogged_params)

        all_arg_names = inspect.getargspec(original)[0]  # pylint: disable=W1505
        num_pos_args = len(args)

        # adding a callback that records evaluation results.
        eval_results = []
        callbacks_index = all_arg_names.index("callbacks")
        callback = record_eval_results(eval_results)
        if num_pos_args >= callbacks_index + 1:
            tmp_list = list(args)
            tmp_list[callbacks_index] += [callback]
            args = tuple(tmp_list)
        elif "callbacks" in kwargs and kwargs["callbacks"] is not None:
            kwargs["callbacks"] += [callback]
        else:
            kwargs["callbacks"] = [callback]

        # training model
        model = original(*args, **kwargs)

        # logging metrics on each iteration.
        for idx, metrics in enumerate(eval_results):
            try_mlflow_log(mlflow.log_metrics, metrics, step=idx)

        # If early_stopping_rounds is present, logging metrics at the best iteration
        # as extra metrics with the max step + 1.
        early_stopping_index = all_arg_names.index("early_stopping_rounds")
        early_stopping = (
            num_pos_args >= early_stopping_index + 1 or "early_stopping_rounds" in kwargs
        )
        if early_stopping:
            extra_step = len(eval_results)
            try_mlflow_log(mlflow.log_metric, "stopped_iteration", len(eval_results))
            # best_iteration is set even if training does not stop early.
            try_mlflow_log(mlflow.log_metric, "best_iteration", model.best_iteration)
            # iteration starts from 1 in LightGBM.
            try_mlflow_log(
                mlflow.log_metrics, eval_results[model.best_iteration - 1], step=extra_step
            )

        # logging feature importance as artifacts.
        for imp_type in ["split", "gain"]:
            features = model.feature_name()
            importance = model.feature_importance(importance_type=imp_type)
            try:
                log_feature_importance_plot(features, importance, imp_type)
            except Exception:  # pylint: disable=broad-except
                _logger.exception(
                    "Failed to log feature importance plot. LightGBM autologging "
                    "will ignore the failure and continue. Exception: "
                )

            imp = {ft: imp for ft, imp in zip(features, importance.tolist())}
            tmpdir = tempfile.mkdtemp()
            try:
                filepath = os.path.join(tmpdir, "feature_importance_{}.json".format(imp_type))
                with open(filepath, "w") as f:
                    json.dump(imp, f, indent=2)
                try_mlflow_log(mlflow.log_artifact, filepath)
            finally:
                shutil.rmtree(tmpdir)

        # train_set must exist as the original train function already ran successfully
        train_set = args[1] if len(args) > 1 else kwargs.get("train_set")

        # it is possible that the dataset was constructed before the patched
        #   constructor was applied, so we cannot assume the input_example_info exists
        input_example_info = getattr(train_set, "input_example_info", None)

        def get_input_example():
            if input_example_info is None:
                raise Exception(ENSURE_AUTOLOGGING_ENABLED_TEXT)
            if input_example_info.error_msg is not None:
                raise Exception(input_example_info.error_msg)
            return input_example_info.input_example

        def infer_model_signature(input_example):
            model_output = model.predict(input_example)
            model_signature = infer_signature(input_example, model_output)
            return model_signature

        input_example, signature = resolve_input_example_and_signature(
            get_input_example,
            infer_model_signature,
            log_input_example,
            log_model_signature,
            _logger,
        )

        try_mlflow_log(
            log_model,
            model,
            artifact_path="model",
            signature=signature,
            input_example=input_example,
        )

        if auto_end_run:
            try_mlflow_log(mlflow.end_run)
        return model
Esempio n. 3
0
 def train_begin(self, estimator, *args, **kwargs):
     try_mlflow_log(mlflow.log_param, "num_layers", len(estimator.net))
     if estimator.max_epoch is not None:
         try_mlflow_log(mlflow.log_param, "epochs", estimator.max_epoch)
     if estimator.max_batch is not None:
         try_mlflow_log(mlflow.log_param, "batches",
                        estimator.max_batch)
     try_mlflow_log(mlflow.log_param, "optimizer_name",
                    type(estimator.trainer.optimizer).__name__)
     if hasattr(estimator.trainer.optimizer, "lr"):
         try_mlflow_log(mlflow.log_param, "learning_rate",
                        estimator.trainer.optimizer.lr)
     if hasattr(estimator.trainer.optimizer, "epsilon"):
         try_mlflow_log(mlflow.log_param, "epsilon",
                        estimator.trainer.optimizer.epsilon)
Esempio n. 4
0
    def on_train_begin(self, logs=None):  # pylint: disable=unused-argument
        opt = self.model.optimizer
        if hasattr(opt, '_name'):
            try_mlflow_log(mlflow.log_param, 'optimizer_name', opt._name)
        # Elif checks are if the optimizer is a TensorFlow optimizer rather than a Keras one.
        elif hasattr(opt, 'optimizer'):
            # TensorFlow optimizer parameters are associated with the inner optimizer variable.
            # Therefore, we assign opt to be opt.optimizer for logging parameters.
            opt = opt.optimizer
            try_mlflow_log(mlflow.log_param, 'optimizer_name',
                           type(opt).__name__)
        if hasattr(opt, 'lr'):
            lr = opt.lr if type(
                opt.lr) is float else tensorflow.keras.backend.eval(opt.lr)
            try_mlflow_log(mlflow.log_param, 'learning_rate', lr)
        elif hasattr(opt, '_lr'):
            lr = opt._lr if type(
                opt._lr) is float else tensorflow.keras.backend.eval(opt._lr)
            try_mlflow_log(mlflow.log_param, 'learning_rate', lr)
        if hasattr(opt, 'epsilon'):
            epsilon = opt.epsilon if type(opt.epsilon) is float \
                else tensorflow.keras.backend.eval(opt.epsilon)
            try_mlflow_log(mlflow.log_param, 'epsilon', epsilon)
        elif hasattr(opt, '_epsilon'):
            epsilon = opt._epsilon if type(opt._epsilon) is float \
                else tensorflow.keras.backend.eval(opt._epsilon)
            try_mlflow_log(mlflow.log_param, 'epsilon', epsilon)

        sum_list = []
        self.model.summary(print_fn=sum_list.append)
        summary = '\n'.join(sum_list)
        tempdir = tempfile.mkdtemp()
        try:
            summary_file = os.path.join(tempdir, "model_summary.txt")
            with open(summary_file, 'w') as f:
                f.write(summary)
            try_mlflow_log(mlflow.log_artifact, local_path=summary_file)
        finally:
            shutil.rmtree(tempdir)
Esempio n. 5
0
 def on_train_end(self, logs=None):  # pylint: disable=unused-argument
     try_mlflow_log(mlflow.keras.log_model,
                    self.model,
                    artifact_path='model')
Esempio n. 6
0
 def on_train_end(self, logs=None):
     try_mlflow_log(log_model, self.model, artifact_path="model")
Esempio n. 7
0
 def on_train_end(self, **kwargs):
     try_mlflow_log(log_model, self.learner, artifact_path="model")
Esempio n. 8
0
    def train(*args, **kwargs):

        def record_eval_results(eval_results):
            """
            Create a callback function that records evaluation results.
            """
            def callback(env):
                res = {}
                for data_name, eval_name, value, _ in env.evaluation_result_list:
                    key = data_name + '-' + eval_name
                    res[key] = value

                eval_results.append(res)
            return callback

        if not mlflow.active_run():
            try_mlflow_log(mlflow.start_run)
            auto_end_run = True
        else:
            auto_end_run = False

        original = gorilla.get_original_attribute(lightgbm, 'train')

        # logging booster params separately via mlflow.log_params to extract key/value pairs
        # and make it easier to compare them across runs.
        params = args[0] if len(args) > 0 else kwargs['params']
        try_mlflow_log(mlflow.log_params, params)

        unlogged_params = ['params', 'train_set', 'valid_sets', 'valid_names', 'fobj', 'feval',
                           'init_model', 'evals_result', 'learning_rates', 'callbacks']

        log_fn_args_as_params(original, args, kwargs, unlogged_params)

        all_arg_names = inspect.getargspec(original)[0]  # pylint: disable=W1505
        num_pos_args = len(args)

        # adding a callback that records evaluation results.
        eval_results = []
        callbacks_index = all_arg_names.index('callbacks')
        callback = record_eval_results(eval_results)
        if num_pos_args >= callbacks_index + 1:
            tmp_list = list(args)
            tmp_list[callbacks_index] += [callback]
            args = tuple(tmp_list)
        elif 'callbacks' in kwargs and kwargs['callbacks'] is not None:
            kwargs['callbacks'] += [callback]
        else:
            kwargs['callbacks'] = [callback]

        # training model
        model = original(*args, **kwargs)

        # logging metrics on each iteration.
        for idx, metrics in enumerate(eval_results):
            try_mlflow_log(mlflow.log_metrics, metrics, step=idx)

        # If early_stopping_rounds is present, logging metrics at the best iteration
        # as extra metrics with the max step + 1.
        early_stopping_index = all_arg_names.index('early_stopping_rounds')
        early_stopping = (num_pos_args >= early_stopping_index + 1 or
                          'early_stopping_rounds' in kwargs)
        if early_stopping:
            extra_step = len(eval_results)
            try_mlflow_log(mlflow.log_metric, 'stopped_iteration', len(eval_results))
            # best_iteration is set even if training does not stop early.
            try_mlflow_log(mlflow.log_metric, 'best_iteration', model.best_iteration)
            # iteration starts from 1 in LightGBM.
            try_mlflow_log(mlflow.log_metrics, eval_results[model.best_iteration - 1],
                           step=extra_step)

        # logging feature importance as artifacts.
        for imp_type in ['split', 'gain']:
            features = model.feature_name()
            importance = model.feature_importance(importance_type=imp_type)
            imp = {ft: imp for ft, imp in zip(features, importance.tolist())}
            tmpdir = tempfile.mkdtemp()
            try:
                filepath = os.path.join(tmpdir, 'feature_importance_{}.json'.format(imp_type))
                with open(filepath, 'w') as f:
                    json.dump(imp, f)
                try_mlflow_log(mlflow.log_artifact, filepath)
            finally:
                shutil.rmtree(tmpdir)

        try_mlflow_log(log_model, model, artifact_path='model')

        if auto_end_run:
            try_mlflow_log(mlflow.end_run)
        return model
Esempio n. 9
0
 def on_checkpoint(self, checkpoint_path):
     #os.makedirs(checkpoint_path, exist_ok=True)
     try_mlflow_log(self.client.log_artifact, self._run_id, checkpoint_path)
Esempio n. 10
0
def _log_estimator_params(param_map):
    # Chunk model parameters to avoid hitting the log_batch API limit
    for chunk in _chunk_dict(param_map, chunk_size=MAX_PARAMS_TAGS_PER_BATCH,):
        truncated = _truncate_dict(chunk, MAX_ENTITY_KEY_LENGTH, MAX_PARAM_VAL_LENGTH)
        try_mlflow_log(mlflow.log_params, truncated)
Esempio n. 11
0
    def train(original, *args, **kwargs):
        def record_eval_results(eval_results, metrics_logger):
            """
            Create a callback function that records evaluation results.
            """
            @exception_safe_function
            def callback(env):
                metrics_logger.record_metrics(dict(env.evaluation_result_list),
                                              env.iteration)
                eval_results.append(dict(env.evaluation_result_list))

            return callback

        def log_feature_importance_plot(features, importance, importance_type):
            """
            Log feature importance plot.
            """
            import matplotlib.pyplot as plt

            features = np.array(features)
            importance = np.array(importance)
            indices = np.argsort(importance)
            features = features[indices]
            importance = importance[indices]
            num_features = len(features)

            # If num_features > 10, increase the figure height to prevent the plot
            # from being too dense.
            w, h = [6.4, 4.8]  # matplotlib's default figure size
            h = h + 0.1 * num_features if num_features > 10 else h
            fig, ax = plt.subplots(figsize=(w, h))

            yloc = np.arange(num_features)
            ax.barh(yloc, importance, align="center", height=0.5)
            ax.set_yticks(yloc)
            ax.set_yticklabels(features)
            ax.set_xlabel("Importance")
            ax.set_title("Feature Importance ({})".format(importance_type))
            fig.tight_layout()

            tmpdir = tempfile.mkdtemp()
            try:
                # pylint: disable=undefined-loop-variable
                filepath = os.path.join(
                    tmpdir, "feature_importance_{}.png".format(imp_type))
                fig.savefig(filepath)
                try_mlflow_log(mlflow.log_artifact, filepath)
            finally:
                plt.close(fig)
                shutil.rmtree(tmpdir)

        # logging booster params separately via mlflow.log_params to extract key/value pairs
        # and make it easier to compare them across runs.
        params = args[0] if len(args) > 0 else kwargs["params"]
        try_mlflow_log(mlflow.log_params, params)

        unlogged_params = [
            "params",
            "dtrain",
            "evals",
            "obj",
            "feval",
            "evals_result",
            "xgb_model",
            "callbacks",
            "learning_rates",
        ]
        log_fn_args_as_params(original, args, kwargs, unlogged_params)

        all_arg_names = inspect.getargspec(original)[0]  # pylint: disable=W1505
        num_pos_args = len(args)

        # adding a callback that records evaluation results.
        eval_results = []
        callbacks_index = all_arg_names.index("callbacks")

        run_id = mlflow.active_run().info.run_id
        with batch_metrics_logger(run_id) as metrics_logger:
            callback = record_eval_results(eval_results, metrics_logger)
            if num_pos_args >= callbacks_index + 1:
                tmp_list = list(args)
                tmp_list[callbacks_index] += [callback]
                args = tuple(tmp_list)
            elif "callbacks" in kwargs and kwargs["callbacks"] is not None:
                kwargs["callbacks"] += [callback]
            else:
                kwargs["callbacks"] = [callback]

            # training model
            model = original(*args, **kwargs)

            # If early_stopping_rounds is present, logging metrics at the best iteration
            # as extra metrics with the max step + 1.
            early_stopping_index = all_arg_names.index("early_stopping_rounds")
            early_stopping = (num_pos_args >= early_stopping_index + 1
                              or "early_stopping_rounds" in kwargs)
            if early_stopping:
                extra_step = len(eval_results)
                metrics_logger.record_metrics(
                    {"stopped_iteration": extra_step - 1})
                metrics_logger.record_metrics(
                    {"best_iteration": model.best_iteration})
                metrics_logger.record_metrics(
                    eval_results[model.best_iteration], extra_step)

        # logging feature importance as artifacts.
        for imp_type in importance_types:
            imp = None
            try:
                imp = model.get_score(importance_type=imp_type)
                features, importance = zip(*imp.items())
                log_feature_importance_plot(features, importance, imp_type)
            except Exception:
                _logger.exception(
                    "Failed to log feature importance plot. XGBoost autologging "
                    "will ignore the failure and continue. Exception: ")

            if imp is not None:
                tmpdir = tempfile.mkdtemp()
                try:
                    filepath = os.path.join(
                        tmpdir, "feature_importance_{}.json".format(imp_type))
                    with open(filepath, "w") as f:
                        json.dump(imp, f)
                    try_mlflow_log(mlflow.log_artifact, filepath)
                finally:
                    shutil.rmtree(tmpdir)

        # dtrain must exist as the original train function already ran successfully
        dtrain = args[1] if len(args) > 1 else kwargs.get("dtrain")

        # it is possible that the dataset was constructed before the patched
        #   constructor was applied, so we cannot assume the input_example_info exists
        input_example_info = getattr(dtrain, "input_example_info", None)

        def get_input_example():
            if input_example_info is None:
                raise Exception(ENSURE_AUTOLOGGING_ENABLED_TEXT)
            if input_example_info.error_msg is not None:
                raise Exception(input_example_info.error_msg)
            return input_example_info.input_example

        def infer_model_signature(input_example):
            model_output = model.predict(xgboost.DMatrix(input_example))
            model_signature = infer_signature(input_example, model_output)
            return model_signature

        # Only log the model if the autolog() param log_models is set to True.
        if log_models:
            # Will only resolve `input_example` and `signature` if `log_models` is `True`.
            input_example, signature = resolve_input_example_and_signature(
                get_input_example,
                infer_model_signature,
                log_input_examples,
                log_model_signatures,
                _logger,
            )

            try_mlflow_log(
                log_model,
                model,
                artifact_path="model",
                signature=signature,
                input_example=input_example,
            )

        return model
Esempio n. 12
0
        def on_train_start(self, trainer, pl_module):
            """
            Logs Optimizer related metrics when the train begins

            :param trainer: pytorch lightning trainer instance
            :param pl_module: pytorch lightning base module
            """
            try_mlflow_log(mlflow.set_tag, "Mode", "training")
            try_mlflow_log(mlflow.log_param, "epochs", trainer.max_epochs)

            for callback in trainer.callbacks:
                if isinstance(callback, pl.callbacks.early_stopping.EarlyStopping):
                    self.early_stopping = True
                    self._log_early_stop_params(callback)

            if hasattr(trainer, "optimizers"):
                for optimizer in trainer.optimizers:
                    try_mlflow_log(mlflow.log_param, "optimizer_name", type(optimizer).__name__)
                    optimizer_name = type(optimizer).__name__.lower() + "_optimizer"

                    if hasattr(optimizer, "defaults"):
                        optim_dict = optimizer.defaults

                        if "lr" in optim_dict:
                            try_mlflow_log(
                                mlflow.log_param,
                                "learning_rate_" + optimizer_name,
                                optim_dict["lr"],
                            )

                        if "eps" in optim_dict:
                            try_mlflow_log(
                                mlflow.log_param, "epsilon_" + optimizer_name, optim_dict["eps"]
                            )

                        if "betas" in optim_dict:
                            try_mlflow_log(
                                mlflow.log_param, "betas_" + optimizer_name, optim_dict["betas"]
                            )

                        if "weight_decay" in optim_dict:
                            try_mlflow_log(
                                mlflow.log_param,
                                "weight_decay_" + optimizer_name,
                                optim_dict["weight_decay"],
                            )

            summary = str(ModelSummary(pl_module, mode="full"))
            tempdir = tempfile.mkdtemp()
            try:
                summary_file = os.path.join(tempdir, "model_summary.txt")
                with open(summary_file, "w") as f:
                    f.write(summary)

                try_mlflow_log(mlflow.log_artifact, local_path=summary_file)
            finally:
                shutil.rmtree(tempdir)
Esempio n. 13
0
        def on_train_begin(self, logs=None):  # pylint: disable=unused-argument
            try_mlflow_log(mlflow.log_param, 'num_layers', len(self.model.layers))
            try_mlflow_log(mlflow.log_param, 'optimizer_name', type(self.model.optimizer).__name__)
            if hasattr(self.model.optimizer, 'lr'):
                lr = self.model.optimizer.lr if \
                    type(self.model.optimizer.lr) is float \
                    else keras.backend.eval(self.model.optimizer.lr)
                try_mlflow_log(mlflow.log_param, 'learning_rate', lr)
            if hasattr(self.model.optimizer, 'epsilon'):
                epsilon = self.model.optimizer.epsilon if \
                    type(self.model.optimizer.epsilon) is float \
                    else keras.backend.eval(self.model.optimizer.epsilon)
                try_mlflow_log(mlflow.log_param, 'epsilon', epsilon)

            sum_list = []
            self.model.summary(print_fn=sum_list.append)
            summary = '\n'.join(sum_list)
            try_mlflow_log(mlflow.set_tag, 'model_summary', summary)

            tempdir = tempfile.mkdtemp()
            try:
                summary_file = os.path.join(tempdir, "model_summary.txt")
                with open(summary_file, 'w') as f:
                    f.write(summary)
                try_mlflow_log(mlflow.log_artifact, local_path=summary_file)
            finally:
                shutil.rmtree(tempdir)
Esempio n. 14
0
    def fit_mlflow(self, func_name, *args, **kwargs):
        should_start_run = mlflow.active_run() is None
        if should_start_run:
            try_mlflow_log(mlflow.start_run)

        # TODO: We should not log nested estimator parameters for
        # parameter search estimators (GridSearchCV, RandomizedSearchCV)

        # Chunk and truncate model parameters to avoid hitting the log_batch API limit
        for chunk in _chunk_dict(self.get_params(deep=True),
                                 chunk_size=MAX_PARAMS_TAGS_PER_BATCH):
            truncated = _truncate_dict(chunk, MAX_ENTITY_KEY_LENGTH,
                                       MAX_PARAM_VAL_LENGTH)
            try_mlflow_log(mlflow.log_params, truncated)

        try_mlflow_log(
            mlflow.set_tags,
            {
                "estimator_name":
                self.__class__.__name__,
                "estimator_class":
                self.__class__.__module__ + "." + self.__class__.__name__,
            },
        )

        original_fit = gorilla.get_original_attribute(self, func_name)
        try:
            fit_output = original_fit(*args, **kwargs)
        except Exception as e:
            if should_start_run:
                try_mlflow_log(mlflow.end_run,
                               RunStatus.to_string(RunStatus.FAILED))

            raise e

        if hasattr(self, "score"):
            try:
                score_args = _get_args_for_score(self.score, self.fit, args,
                                                 kwargs)
                training_score = self.score(*score_args)
            except Exception as e:  # pylint: disable=broad-except
                msg = (
                    self.score.__qualname__ +
                    " failed. The 'training_score' metric will not be recorded. Scoring error: "
                    + str(e))
                _logger.warning(msg)
            else:
                try_mlflow_log(mlflow.log_metric, "training_score",
                               training_score)

        try_mlflow_log(log_model, self, artifact_path="model")

        if should_start_run:
            try_mlflow_log(mlflow.end_run)

        return fit_output
Esempio n. 15
0
def _log_specialized_estimator_content(fitted_estimator,
                                       run_id,
                                       prefix,
                                       X,
                                       y_true,
                                       sample_weight=None):
    import sklearn

    mlflow_client = MlflowClient()
    metrics = dict()
    try:
        if sklearn.base.is_classifier(fitted_estimator):
            metrics = _get_classifier_metrics(fitted_estimator, prefix, X,
                                              y_true, sample_weight)
        elif sklearn.base.is_regressor(fitted_estimator):
            metrics = _get_regressor_metrics(fitted_estimator, prefix, X,
                                             y_true, sample_weight)
    except Exception as err:
        msg = ("Failed to autolog metrics for " +
               fitted_estimator.__class__.__name__ + ". Logging error: " +
               str(err))
        _logger.warning(msg)
    else:
        # batch log all metrics
        try_mlflow_log(
            mlflow_client.log_batch,
            run_id,
            metrics=[
                Metric(key=str(key),
                       value=value,
                       timestamp=int(time.time() * 1000),
                       step=0) for key, value in metrics.items()
            ],
        )

    if sklearn.base.is_classifier(fitted_estimator):
        try:
            artifacts = _get_classifier_artifacts(fitted_estimator, prefix, X,
                                                  y_true, sample_weight)
        except Exception as e:
            msg = ("Failed to autolog artifacts for " +
                   fitted_estimator.__class__.__name__ + ". Logging error: " +
                   str(e))
            _logger.warning(msg)
            return

        with TempDir() as tmp_dir:
            for artifact in artifacts:
                try:
                    display = artifact.function(**artifact.arguments)
                    display.ax_.set_title(artifact.title)
                    artifact_path = "{}.png".format(artifact.name)
                    filepath = tmp_dir.path(artifact_path)
                    display.figure_.savefig(filepath)
                    import matplotlib.pyplot as plt

                    plt.close(display.figure_)
                except Exception as e:
                    _log_warning_for_artifacts(artifact.name,
                                               artifact.function, e)

            try_mlflow_log(mlflow_client.log_artifacts, run_id, tmp_dir.path())

    return metrics
Esempio n. 16
0
    def _log_posttraining_metadata(estimator, *args, **kwargs):
        """
        Records metadata for a scikit-learn estimator after training has completed.
        This is intended to be invoked within a patched scikit-learn training routine
        (e.g., `fit()`, `fit_transform()`, ...) and assumes the existence of an active
        MLflow run that can be referenced via the fluent Tracking API.

        :param estimator: The scikit-learn estimator for which to log metadata.
        :param args: The arguments passed to the scikit-learn training routine (e.g.,
                     `fit()`, `fit_transform()`, ...).
        :param kwargs: The keyword arguments passed to the scikit-learn training routine.
        """
        if hasattr(estimator, "score"):
            try:
                score_args = _get_args_for_score(estimator.score, estimator.fit, args, kwargs)
                training_score = estimator.score(*score_args)
            except Exception as e:
                msg = (
                    estimator.score.__qualname__
                    + " failed. The 'training_score' metric will not be recorded. Scoring error: "
                    + str(e)
                )
                _logger.warning(msg)
            else:
                try_mlflow_log(mlflow.log_metric, "training_score", training_score)

        # log common metrics and artifacts for estimators (classifier, regressor)
        _log_specialized_estimator_content(estimator, mlflow.active_run().info.run_id, args, kwargs)

        def get_input_example():
            # Fetch an input example using the first several rows of the array-like
            # training data supplied to the training routine (e.g., `fit()`)
            fit_arg_names = _get_arg_names(estimator.fit)
            X_var_name, y_var_name = fit_arg_names[:2]
            input_example = _get_Xy(args, kwargs, X_var_name, y_var_name)[0][
                :INPUT_EXAMPLE_SAMPLE_ROWS
            ]
            return input_example

        def infer_model_signature(input_example):
            if not hasattr(estimator, "predict"):
                raise Exception(
                    "the trained model does not specify a `predict` function, "
                    + "which is required in order to infer the signature"
                )

            return infer_signature(input_example, estimator.predict(input_example))

        if log_models:
            # Will only resolve `input_example` and `signature` if `log_models` is `True`.
            input_example, signature = resolve_input_example_and_signature(
                get_input_example,
                infer_model_signature,
                log_input_examples,
                log_model_signatures,
                _logger,
            )

            try_mlflow_log(
                log_model,
                estimator,
                artifact_path="model",
                signature=signature,
                input_example=input_example,
            )

        if _is_parameter_search_estimator(estimator):
            if hasattr(estimator, "best_estimator_") and log_models:
                try_mlflow_log(
                    log_model,
                    estimator.best_estimator_,
                    artifact_path="best_estimator",
                    signature=signature,
                    input_example=input_example,
                )

            if hasattr(estimator, "best_score_"):
                try_mlflow_log(mlflow.log_metric, "best_cv_score", estimator.best_score_)

            if hasattr(estimator, "best_params_"):
                best_params = {
                    "best_{param_name}".format(param_name=param_name): param_value
                    for param_name, param_value in estimator.best_params_.items()
                }
                try_mlflow_log(mlflow.log_params, best_params)

            if hasattr(estimator, "cv_results_"):
                try:
                    # Fetch environment-specific tags (e.g., user and source) to ensure that lineage
                    # information is consistent with the parent run
                    child_tags = context_registry.resolve_tags()
                    child_tags.update({MLFLOW_AUTOLOGGING: FLAVOR_NAME})
                    _create_child_runs_for_parameter_search(
                        cv_estimator=estimator,
                        parent_run=mlflow.active_run(),
                        child_tags=child_tags,
                    )
                except Exception as e:

                    msg = (
                        "Encountered exception during creation of child runs for parameter search."
                        " Child runs may be missing. Exception: {}".format(str(e))
                    )
                    _logger.warning(msg)

                try:
                    cv_results_df = pd.DataFrame.from_dict(estimator.cv_results_)
                    _log_parameter_search_results_as_artifact(
                        cv_results_df, mlflow.active_run().info.run_id
                    )
                except Exception as e:

                    msg = (
                        "Failed to log parameter search results as an artifact."
                        " Exception: {}".format(str(e))
                    )
                    _logger.warning(msg)
Esempio n. 17
0
 def on_epoch_end(self, epoch, logs=None):
     if not logs:
         return
     try_mlflow_log(mlflow.log_metrics, logs, step=epoch)
Esempio n. 18
0
    def train(*args, **kwargs):

        def record_eval_results(eval_results):
            """
            Create a callback function that records evaluation results.
            """
            def callback(env):
                res = {}
                for data_name, eval_name, value, _ in env.evaluation_result_list:
                    key = data_name + '-' + eval_name
                    res[key] = value

                eval_results.append(res)
            return callback

        def log_feature_importance_plot(features, importance, importance_type):
            """
            Log feature importance plot.
            """
            indices = np.argsort(importance)
            features = np.array(features)[indices]
            importance = importance[indices]
            num_features = len(features)

            # If num_features > 10, increase the figure height to prevent the plot
            # from being too dense.
            w, h = [6.4, 4.8]  # matplotlib's default figure size
            h = h + 0.1 * num_features if num_features > 10 else h
            fig, ax = plt.subplots(figsize=(w, h))

            yloc = np.arange(num_features)
            ax.barh(yloc, importance, align='center', height=0.5)
            ax.set_yticks(yloc)
            ax.set_yticklabels(features)
            ax.set_xlabel('Importance')
            ax.set_title('Feature Importance ({})'.format(importance_type))
            fig.tight_layout()

            tmpdir = tempfile.mkdtemp()
            try:
                # pylint: disable=undefined-loop-variable
                filepath = os.path.join(tmpdir, 'feature_importance_{}.png'.format(imp_type))
                fig.savefig(filepath)
                try_mlflow_log(mlflow.log_artifact, filepath)
            finally:
                plt.close(fig)
                shutil.rmtree(tmpdir)

        if not mlflow.active_run():
            try_mlflow_log(mlflow.start_run)
            auto_end_run = True
        else:
            auto_end_run = False

        original = gorilla.get_original_attribute(lightgbm, 'train')

        # logging booster params separately via mlflow.log_params to extract key/value pairs
        # and make it easier to compare them across runs.
        params = args[0] if len(args) > 0 else kwargs['params']
        try_mlflow_log(mlflow.log_params, params)

        unlogged_params = ['params', 'train_set', 'valid_sets', 'valid_names', 'fobj', 'feval',
                           'init_model', 'evals_result', 'learning_rates', 'callbacks']

        log_fn_args_as_params(original, args, kwargs, unlogged_params)

        all_arg_names = inspect.getargspec(original)[0]  # pylint: disable=W1505
        num_pos_args = len(args)

        # adding a callback that records evaluation results.
        eval_results = []
        callbacks_index = all_arg_names.index('callbacks')
        callback = record_eval_results(eval_results)
        if num_pos_args >= callbacks_index + 1:
            tmp_list = list(args)
            tmp_list[callbacks_index] += [callback]
            args = tuple(tmp_list)
        elif 'callbacks' in kwargs and kwargs['callbacks'] is not None:
            kwargs['callbacks'] += [callback]
        else:
            kwargs['callbacks'] = [callback]

        # training model
        model = original(*args, **kwargs)

        # logging metrics on each iteration.
        for idx, metrics in enumerate(eval_results):
            try_mlflow_log(mlflow.log_metrics, metrics, step=idx)

        # If early_stopping_rounds is present, logging metrics at the best iteration
        # as extra metrics with the max step + 1.
        early_stopping_index = all_arg_names.index('early_stopping_rounds')
        early_stopping = (num_pos_args >= early_stopping_index + 1 or
                          'early_stopping_rounds' in kwargs)
        if early_stopping:
            extra_step = len(eval_results)
            try_mlflow_log(mlflow.log_metric, 'stopped_iteration', len(eval_results))
            # best_iteration is set even if training does not stop early.
            try_mlflow_log(mlflow.log_metric, 'best_iteration', model.best_iteration)
            # iteration starts from 1 in LightGBM.
            try_mlflow_log(mlflow.log_metrics, eval_results[model.best_iteration - 1],
                           step=extra_step)

        # logging feature importance as artifacts.
        for imp_type in ['split', 'gain']:
            features = model.feature_name()
            importance = model.feature_importance(importance_type=imp_type)
            try:
                log_feature_importance_plot(features, importance, imp_type)
            except Exception:  # pylint: disable=broad-except
                _logger.exception('Failed to log feature importance plot. LightGBM autologging '
                                  'will ignore the failure and continue. Exception: ')

            imp = {ft: imp for ft, imp in zip(features, importance.tolist())}
            tmpdir = tempfile.mkdtemp()
            try:
                filepath = os.path.join(tmpdir, 'feature_importance_{}.json'.format(imp_type))
                with open(filepath, 'w') as f:
                    json.dump(imp, f, indent=2)
                try_mlflow_log(mlflow.log_artifact, filepath)
            finally:
                shutil.rmtree(tmpdir)

        try_mlflow_log(log_model, model, artifact_path='model')

        if auto_end_run:
            try_mlflow_log(mlflow.end_run)
        return model
Esempio n. 19
0
        def on_train_begin(self, **kwargs):
            info = layers_info(self.learner)
            try_mlflow_log(mlflow.log_param, "num_layers", len(info))
            try_mlflow_log(mlflow.log_param, "opt_func",
                           self.opt_func.func.__name__)

            if hasattr(self.opt, "true_wd"):
                try_mlflow_log(mlflow.log_param, "true_wd", self.opt.true_wd)

            if hasattr(self.opt, "bn_wd"):
                try_mlflow_log(mlflow.log_param, "bn_wd", self.opt.bn_wd)

            if hasattr(self.opt, "train_bn"):
                try_mlflow_log(mlflow.log_param, "train_bn", self.train_bn)

            summary = model_summary(self.learner)
            try_mlflow_log(mlflow.set_tag, "model_summary", summary)

            tempdir = tempfile.mkdtemp()
            try:
                summary_file = os.path.join(tempdir, "model_summary.txt")
                with open(summary_file, "w") as f:
                    f.write(summary)
                try_mlflow_log(mlflow.log_artifact, local_path=summary_file)
            finally:
                shutil.rmtree(tempdir)
Esempio n. 20
0
    def train(original, *args, **kwargs):
        def record_eval_results(eval_results, metrics_logger):
            """
            Create a callback function that records evaluation results.
            """
            # TODO: Remove `replace("SNAPSHOT", "dev")` once the following issue is addressed:
            #       https://github.com/dmlc/xgboost/issues/6984
            if Version(xgboost.__version__.replace("SNAPSHOT", "dev")) >= Version("1.3.0"):
                # In xgboost >= 1.3.0, user-defined callbacks should inherit
                # `xgboost.callback.TrainingCallback`:
                # https://xgboost.readthedocs.io/en/latest/python/callbacks.html#defining-your-own-callback  # noqa

                class Callback(
                    xgboost.callback.TrainingCallback, metaclass=ExceptionSafeAbstractClass,
                ):
                    def after_iteration(self, model, epoch, evals_log):
                        """
                        Run after each iteration. Return True when training should stop.
                        """
                        # `evals_log` is a nested dict (type: Dict[str, Dict[str, List[float]]])
                        # that looks like this:
                        # {
                        #   "train": {
                        #     "auc": [0.5, 0.6, 0.7, ...],
                        #     ...
                        #   },
                        #   ...
                        # }
                        evaluation_result_dict = {}
                        for data_name, metric_dict in evals_log.items():
                            for metric_name, metric_values_on_each_iter in metric_dict.items():
                                key = "{}-{}".format(data_name, metric_name)
                                # The last element in `metric_values_on_each_iter` corresponds to
                                # the meric on the current iteration
                                evaluation_result_dict[key] = metric_values_on_each_iter[-1]

                        metrics_logger.record_metrics(evaluation_result_dict, epoch)
                        eval_results.append(evaluation_result_dict)

                        # Return `False` to indicate training should not stop
                        return False

                return Callback()

            else:

                @exception_safe_function
                def callback(env):
                    metrics_logger.record_metrics(dict(env.evaluation_result_list), env.iteration)
                    eval_results.append(dict(env.evaluation_result_list))

                return callback

        def log_feature_importance_plot(features, importance, importance_type):
            """
            Log feature importance plot.
            """
            import matplotlib.pyplot as plt

            features = np.array(features)
            importance = np.array(importance)
            indices = np.argsort(importance)
            features = features[indices]
            importance = importance[indices]
            num_features = len(features)

            # If num_features > 10, increase the figure height to prevent the plot
            # from being too dense.
            w, h = [6.4, 4.8]  # matplotlib's default figure size
            h = h + 0.1 * num_features if num_features > 10 else h
            fig, ax = plt.subplots(figsize=(w, h))

            yloc = np.arange(num_features)
            ax.barh(yloc, importance, align="center", height=0.5)
            ax.set_yticks(yloc)
            ax.set_yticklabels(features)
            ax.set_xlabel("Importance")
            ax.set_title("Feature Importance ({})".format(importance_type))
            fig.tight_layout()

            tmpdir = tempfile.mkdtemp()
            try:
                # pylint: disable=undefined-loop-variable
                filepath = os.path.join(tmpdir, "feature_importance_{}.png".format(imp_type))
                fig.savefig(filepath)
                try_mlflow_log(mlflow.log_artifact, filepath)
            finally:
                plt.close(fig)
                shutil.rmtree(tmpdir)

        # logging booster params separately via mlflow.log_params to extract key/value pairs
        # and make it easier to compare them across runs.
        params = args[0] if len(args) > 0 else kwargs["params"]
        try_mlflow_log(mlflow.log_params, params)

        unlogged_params = [
            "params",
            "dtrain",
            "evals",
            "obj",
            "feval",
            "evals_result",
            "xgb_model",
            "callbacks",
            "learning_rates",
        ]
        log_fn_args_as_params(original, args, kwargs, unlogged_params)

        all_arg_names = inspect.getargspec(original)[0]  # pylint: disable=W1505
        num_pos_args = len(args)

        # adding a callback that records evaluation results.
        eval_results = []
        callbacks_index = all_arg_names.index("callbacks")

        run_id = mlflow.active_run().info.run_id
        with batch_metrics_logger(run_id) as metrics_logger:
            callback = record_eval_results(eval_results, metrics_logger)
            if num_pos_args >= callbacks_index + 1:
                tmp_list = list(args)
                tmp_list[callbacks_index] += [callback]
                args = tuple(tmp_list)
            elif "callbacks" in kwargs and kwargs["callbacks"] is not None:
                kwargs["callbacks"] += [callback]
            else:
                kwargs["callbacks"] = [callback]

            # training model
            model = original(*args, **kwargs)

            # If early_stopping_rounds is present, logging metrics at the best iteration
            # as extra metrics with the max step + 1.
            early_stopping_index = all_arg_names.index("early_stopping_rounds")
            early_stopping = (
                num_pos_args >= early_stopping_index + 1 or "early_stopping_rounds" in kwargs
            )
            if early_stopping:
                extra_step = len(eval_results)
                metrics_logger.record_metrics({"stopped_iteration": extra_step - 1})
                metrics_logger.record_metrics({"best_iteration": model.best_iteration})
                metrics_logger.record_metrics(eval_results[model.best_iteration], extra_step)

        # logging feature importance as artifacts.
        for imp_type in importance_types:
            imp = None
            try:
                imp = model.get_score(importance_type=imp_type)
                features, importance = zip(*imp.items())
                log_feature_importance_plot(features, importance, imp_type)
            except Exception:
                _logger.exception(
                    "Failed to log feature importance plot. XGBoost autologging "
                    "will ignore the failure and continue. Exception: "
                )

            if imp is not None:
                tmpdir = tempfile.mkdtemp()
                try:
                    filepath = os.path.join(tmpdir, "feature_importance_{}.json".format(imp_type))
                    with open(filepath, "w") as f:
                        json.dump(imp, f)
                    try_mlflow_log(mlflow.log_artifact, filepath)
                finally:
                    shutil.rmtree(tmpdir)

        # dtrain must exist as the original train function already ran successfully
        dtrain = args[1] if len(args) > 1 else kwargs.get("dtrain")

        # it is possible that the dataset was constructed before the patched
        #   constructor was applied, so we cannot assume the input_example_info exists
        input_example_info = getattr(dtrain, "input_example_info", None)

        def get_input_example():
            if input_example_info is None:
                raise Exception(ENSURE_AUTOLOGGING_ENABLED_TEXT)
            if input_example_info.error_msg is not None:
                raise Exception(input_example_info.error_msg)
            return input_example_info.input_example

        def infer_model_signature(input_example):
            model_output = model.predict(xgboost.DMatrix(input_example))
            model_signature = infer_signature(input_example, model_output)
            return model_signature

        # Only log the model if the autolog() param log_models is set to True.
        if log_models:
            # Will only resolve `input_example` and `signature` if `log_models` is `True`.
            input_example, signature = resolve_input_example_and_signature(
                get_input_example,
                infer_model_signature,
                log_input_examples,
                log_model_signatures,
                _logger,
            )

            try_mlflow_log(
                log_model,
                model,
                artifact_path="model",
                signature=signature,
                input_example=input_example,
            )

        return model
Esempio n. 21
0
    def before_fit(self):
        from fastai.callback.all import ParamScheduler

        # Do not record in case of predicting or lr_finder
        if hasattr(self, "lr_finder") or hasattr(self, "gather_preds"):
            return

        if self.is_fine_tune and len(self.opt.param_lists) == 1:
            _logger.warning(
                "Using `fine_tune` with model which cannot be frozen."
                " Current model have only one param group which makes it impossible to freeze."
                " Because of this it will record some fitting params twice (overriding exception)"
            )

        frozen = self.opt.frozen_idx != 0
        if frozen and self.is_fine_tune:
            self.freeze_prefix = "freeze_"
            try_mlflow_log(mlflow.log_param, "frozen_idx", self.opt.frozen_idx)
        else:
            self.freeze_prefix = ""

        # Extract function name when `opt_func` is partial function
        if isinstance(self.opt_func, partial):
            try_mlflow_log(
                mlflow.log_param,
                self.freeze_prefix + "opt_func",
                self.opt_func.keywords["opt"].__name__,
            )
        else:
            try_mlflow_log(mlflow.log_param, self.freeze_prefix + "opt_func",
                           self.opt_func.__name__)

        params_not_to_log = []
        for cb in self.cbs:
            if isinstance(cb, ParamScheduler):
                params_not_to_log = list(cb.scheds.keys())
                for param, f in cb.scheds.items():
                    values = []
                    for step in np.linspace(0, 1, num=100, endpoint=False):
                        values.append(f(step))
                    values = np.array(values)

                    # Log params main values from scheduling
                    try_mlflow_log(mlflow.log_param,
                                   self.freeze_prefix + param + "_min",
                                   np.min(values, 0))
                    try_mlflow_log(mlflow.log_param,
                                   self.freeze_prefix + param + "_max",
                                   np.max(values, 0))
                    try_mlflow_log(mlflow.log_param,
                                   self.freeze_prefix + param + "_init",
                                   values[0])
                    try_mlflow_log(mlflow.log_param,
                                   self.freeze_prefix + param + "_final",
                                   values[-1])

                    # Plot and save image of scheduling
                    fig = plt.figure()
                    plt.plot(values)
                    plt.ylabel(param)

                    tempdir = tempfile.mkdtemp()
                    try:
                        scheds_file = os.path.join(
                            tempdir, self.freeze_prefix + param + ".png")
                        plt.savefig(scheds_file)
                        plt.close(fig)
                        try_mlflow_log(mlflow.log_artifact,
                                       local_path=scheds_file)
                    finally:
                        shutil.rmtree(tempdir)
                break

        for param in self.opt.hypers[0]:
            if param not in params_not_to_log:
                try_mlflow_log(
                    mlflow.log_param,
                    self.freeze_prefix + param,
                    [h[param] for h in self.opt.hypers],
                )

        if hasattr(self.opt, "true_wd"):
            try_mlflow_log(mlflow.log_param, self.freeze_prefix + "true_wd",
                           self.opt.true_wd)

        if hasattr(self.opt, "bn_wd"):
            try_mlflow_log(mlflow.log_param, self.freeze_prefix + "bn_wd",
                           self.opt.bn_wd)

        if hasattr(self.opt, "train_bn"):
            try_mlflow_log(mlflow.log_param, self.freeze_prefix + "train_bn",
                           self.opt.train_bn)
Esempio n. 22
0
def test_no_force_try_mlflow_log_to_fail():
    with mlflow.start_run():
        try_mlflow_log(lambda: 1 / 0)
Esempio n. 23
0
 def on_epoch_end(self, epoch, logs=None):
     if (epoch - 1) % _LOG_EVERY_N_STEPS == 0:
         try_mlflow_log(mlflow.log_metrics, logs, step=epoch)
Esempio n. 24
0
 def on_train_end(self, logs=None):
     try_mlflow_log(mlflow.log_param, 'num_layers',
                    len(self.model.layers))
     try_mlflow_log(mlflow.log_param, 'optimizer_name',
                    type(self.model.optimizer).__name__)
     if hasattr(self.model.optimizer, 'lr'):
         lr = self.model.optimizer.lr if \
             type(self.model.optimizer.lr) is float \
             else keras.backend.eval(self.model.optimizer.lr)
         try_mlflow_log(mlflow.log_param, 'learning_rate', lr)
     if hasattr(self.model.optimizer, 'epsilon'):
         epsilon = self.model.optimizer.epsilon if \
             type(self.model.optimizer.epsilon) is float \
             else keras.backend.eval(self.model.optimizer.epsilon)
         try_mlflow_log(mlflow.log_param, 'epsilon', epsilon)
     sum_list = []
     self.model.summary(print_fn=sum_list.append)
     summary = '\n'.join(sum_list)
     try_mlflow_log(mlflow.set_tag, 'summary', summary)
     try_mlflow_log(log_model, self.model, artifact_path='model')
Esempio n. 25
0
def _log_artifacts_with_warning(**kwargs):
    try_mlflow_log(mlflow.log_artifacts, **kwargs)
Esempio n. 26
0
    def _log_posttraining_metadata(estimator, spark_model, params):

        if _is_parameter_search_estimator(estimator):
            try:
                # Fetch environment-specific tags (e.g., user and source) to ensure that lineage
                # information is consistent with the parent run
                child_tags = context_registry.resolve_tags()
                child_tags.update(
                    {MLFLOW_AUTOLOGGING: AUTOLOGGING_INTEGRATION_NAME})
                _create_child_runs_for_parameter_search(
                    parent_estimator=estimator,
                    parent_model=spark_model,
                    parent_run=mlflow.active_run(),
                    child_tags=child_tags,
                )
            except Exception:
                import traceback

                msg = (
                    "Encountered exception during creation of child runs for parameter search."
                    " Child runs may be missing. Exception: {}".format(
                        traceback.format_exc()))
                _logger.warning(msg)

            estimator_param_maps = _get_tuning_param_maps(
                estimator,
                estimator._autologging_metadata.uid_to_indexed_name_map)

            metrics_dict, best_index = _get_param_search_metrics_and_best_index(
                estimator, spark_model)
            _log_parameter_search_results_as_artifact(
                estimator_param_maps, metrics_dict,
                mlflow.active_run().info.run_id)

            # Log best_param_map as JSON artifact
            best_param_map = estimator_param_maps[best_index]
            try_mlflow_log(mlflow.log_dict,
                           best_param_map,
                           artifact_file="best_parameters.json")

            # Log best_param_map as autologging parameters as well
            _log_estimator_params({
                f"best_{param_name}": param_value
                for param_name, param_value in best_param_map.items()
            })

        if log_models:
            if _should_log_model(spark_model):
                # TODO: support model signature
                try_mlflow_log(
                    mlflow.spark.log_model,
                    spark_model,
                    artifact_path="model",
                )
                if _is_parameter_search_model(spark_model):
                    try_mlflow_log(
                        mlflow.spark.log_model,
                        spark_model.bestModel,
                        artifact_path="best_model",
                    )
            else:
                _logger.warning(
                    _get_warning_msg_for_skip_log_model(spark_model))
Esempio n. 27
0
    def train(*args, **kwargs):
        def record_eval_results(eval_results):
            """
            Create a callback function that records evaluation results.
            """
            def callback(env):
                eval_results.append(dict(env.evaluation_result_list))

            return callback

        if not mlflow.active_run():
            try_mlflow_log(mlflow.start_run)
            auto_end_run = True
        else:
            auto_end_run = False

        def log_feature_importance_plot(features, importance, importance_type):
            """
            Log feature importance plot.
            """
            import matplotlib.pyplot as plt

            features = np.array(features)
            importance = np.array(importance)
            indices = np.argsort(importance)
            features = features[indices]
            importance = importance[indices]
            num_features = len(features)

            # If num_features > 10, increase the figure height to prevent the plot
            # from being too dense.
            w, h = [6.4, 4.8]  # matplotlib's default figure size
            h = h + 0.1 * num_features if num_features > 10 else h
            fig, ax = plt.subplots(figsize=(w, h))

            yloc = np.arange(num_features)
            ax.barh(yloc, importance, align="center", height=0.5)
            ax.set_yticks(yloc)
            ax.set_yticklabels(features)
            ax.set_xlabel("Importance")
            ax.set_title("Feature Importance ({})".format(importance_type))
            fig.tight_layout()

            tmpdir = tempfile.mkdtemp()
            try:
                # pylint: disable=undefined-loop-variable
                filepath = os.path.join(
                    tmpdir, "feature_importance_{}.png".format(imp_type))
                fig.savefig(filepath)
                try_mlflow_log(mlflow.log_artifact, filepath)
            finally:
                plt.close(fig)
                shutil.rmtree(tmpdir)

        original = gorilla.get_original_attribute(xgboost, "train")

        # logging booster params separately via mlflow.log_params to extract key/value pairs
        # and make it easier to compare them across runs.
        params = args[0] if len(args) > 0 else kwargs["params"]
        try_mlflow_log(mlflow.log_params, params)

        unlogged_params = [
            "params",
            "dtrain",
            "evals",
            "obj",
            "feval",
            "evals_result",
            "xgb_model",
            "callbacks",
            "learning_rates",
        ]
        log_fn_args_as_params(original, args, kwargs, unlogged_params)

        all_arg_names = inspect.getargspec(original)[0]  # pylint: disable=W1505
        num_pos_args = len(args)

        # adding a callback that records evaluation results.
        eval_results = []
        callbacks_index = all_arg_names.index("callbacks")
        callback = record_eval_results(eval_results)
        if num_pos_args >= callbacks_index + 1:
            tmp_list = list(args)
            tmp_list[callbacks_index] += [callback]
            args = tuple(tmp_list)
        elif "callbacks" in kwargs and kwargs["callbacks"] is not None:
            kwargs["callbacks"] += [callback]
        else:
            kwargs["callbacks"] = [callback]

        # training model
        model = original(*args, **kwargs)

        # logging metrics on each iteration.
        for idx, metrics in enumerate(eval_results):
            try_mlflow_log(mlflow.log_metrics, metrics, step=idx)

        # If early_stopping_rounds is present, logging metrics at the best iteration
        # as extra metrics with the max step + 1.
        early_stopping_index = all_arg_names.index("early_stopping_rounds")
        early_stopping = (num_pos_args >= early_stopping_index + 1
                          or "early_stopping_rounds" in kwargs)
        if early_stopping:
            extra_step = len(eval_results)
            try_mlflow_log(mlflow.log_metric, "stopped_iteration",
                           len(eval_results) - 1)
            try_mlflow_log(mlflow.log_metric, "best_iteration",
                           model.best_iteration)
            try_mlflow_log(mlflow.log_metrics,
                           eval_results[model.best_iteration],
                           step=extra_step)

        # logging feature importance as artifacts.
        for imp_type in importance_types:
            imp = None
            try:
                imp = model.get_score(importance_type=imp_type)
                features, importance = zip(*imp.items())
                log_feature_importance_plot(features, importance, imp_type)
            except Exception:  # pylint: disable=broad-except
                _logger.exception(
                    "Failed to log feature importance plot. XGBoost autologging "
                    "will ignore the failure and continue. Exception: ")

            if imp is not None:
                tmpdir = tempfile.mkdtemp()
                try:
                    filepath = os.path.join(
                        tmpdir, "feature_importance_{}.json".format(imp_type))
                    with open(filepath, "w") as f:
                        json.dump(imp, f)
                    try_mlflow_log(mlflow.log_artifact, filepath)
                finally:
                    shutil.rmtree(tmpdir)

        # dtrain must exist as the original train function already ran successfully
        dtrain = args[1] if len(args) > 1 else kwargs.get("dtrain")

        input_example = None
        signature = None
        try:
            # it is possible that the dataset was constructed before the patched
            #   constructor was applied, so we cannot assume the input_example_info exists
            input_example_info = getattr(dtrain, "input_example_info", None)

            if input_example_info is None:
                raise Exception("please ensure that autologging is " +
                                "enabled before constructing the dataset.")

            input_example = input_example_info.input_example
            if input_example is None:
                # input example collection failed
                raise Exception(input_example_info.error_msg)

            model_output = model.predict(xgboost.DMatrix(input_example))
            signature = infer_signature(input_example, model_output)
        except Exception as e:  # pylint: disable=broad-except
            input_example = None
            msg = "Failed to gather example input and model signature: " + str(
                e)
            _logger.warning(msg)

        try_mlflow_log(
            log_model,
            model,
            artifact_path="model",
            signature=signature,
            input_example=input_example,
        )

        if auto_end_run:
            try_mlflow_log(mlflow.end_run)
        return model
Esempio n. 28
0
    def _log_posttraining_metadata(estimator, *args, **kwargs):
        """
        Records metadata for a scikit-learn estimator after training has completed.
        This is intended to be invoked within a patched scikit-learn training routine
        (e.g., `fit()`, `fit_transform()`, ...) and assumes the existence of an active
        MLflow run that can be referenced via the fluent Tracking API.

        :param estimator: The scikit-learn estimator for which to log metadata.
        :param args: The arguments passed to the scikit-learn training routine (e.g.,
                     `fit()`, `fit_transform()`, ...).
        :param kwargs: The keyword arguments passed to the scikit-learn training routine.
        """
        if hasattr(estimator, "score"):
            try:
                score_args = _get_args_for_score(estimator.score,
                                                 estimator.fit, args, kwargs)
                training_score = estimator.score(*score_args)
            except Exception as e:  # pylint: disable=broad-except
                msg = (
                    estimator.score.__qualname__ +
                    " failed. The 'training_score' metric will not be recorded. Scoring error: "
                    + str(e))
                _logger.warning(msg)
            else:
                try_mlflow_log(mlflow.log_metric, "training_score",
                               training_score)

        # log common metrics and artifacts for estimators (classifier, regressor)
        _log_specialized_estimator_content(estimator,
                                           mlflow.active_run().info.run_id,
                                           args, kwargs)

        input_example = None
        signature = None
        if hasattr(estimator, "predict"):
            try:
                # Fetch an input example using the first several rows of the array-like
                # training data supplied to the training routine (e.g., `fit()`)
                SAMPLE_ROWS = 5
                fit_arg_names = _get_arg_names(estimator.fit)
                X_var_name, y_var_name = fit_arg_names[:2]
                input_example = _get_Xy(args, kwargs, X_var_name,
                                        y_var_name)[0][:SAMPLE_ROWS]

                model_output = estimator.predict(input_example)
                signature = infer_signature(input_example, model_output)
            except Exception as e:  # pylint: disable=broad-except
                input_example = None
                msg = "Failed to infer an input example and model signature: " + str(
                    e)
                _logger.warning(msg)

        try_mlflow_log(
            log_model,
            estimator,
            artifact_path="model",
            signature=signature,
            input_example=input_example,
        )

        if _is_parameter_search_estimator(estimator):
            if hasattr(estimator, "best_estimator_"):
                try_mlflow_log(
                    log_model,
                    estimator.best_estimator_,
                    artifact_path="best_estimator",
                    signature=signature,
                    input_example=input_example,
                )

            if hasattr(estimator, "best_params_"):
                best_params = {
                    "best_{param_name}".format(param_name=param_name):
                    param_value
                    for param_name, param_value in
                    estimator.best_params_.items()
                }
                try_mlflow_log(mlflow.log_params, best_params)

            if hasattr(estimator, "cv_results_"):
                try:
                    # Fetch environment-specific tags (e.g., user and source) to ensure that lineage
                    # information is consistent with the parent run
                    environment_tags = context_registry.resolve_tags()
                    _create_child_runs_for_parameter_search(
                        cv_estimator=estimator,
                        parent_run=mlflow.active_run(),
                        child_tags=environment_tags,
                    )
                except Exception as e:  # pylint: disable=broad-except

                    msg = (
                        "Encountered exception during creation of child runs for parameter search."
                        " Child runs may be missing. Exception: {}".format(
                            str(e)))
                    _logger.warning(msg)

                try:
                    cv_results_df = pd.DataFrame.from_dict(
                        estimator.cv_results_)
                    _log_parameter_search_results_as_artifact(
                        cv_results_df,
                        mlflow.active_run().info.run_id)
                except Exception as e:  # pylint: disable=broad-except

                    msg = (
                        "Failed to log parameter search results as an artifact."
                        " Exception: {}".format(str(e)))
                    _logger.warning(msg)
Esempio n. 29
0
 def train_end(self, estimator, *args, **kwargs):
     if isinstance(estimator.net, HybridSequential):
         try_mlflow_log(log_model, estimator.net, artifact_path="model")
Esempio n. 30
0
 def add_series(self, index, value):
     if _check_interval(index, self.flush_at, self.interval):
         value = sum(self.buf + [value]) / (len(self.buf) + 1)
         try_mlflow_log(mlflow.log_metric, self.name, value, step=index)
     original = gorilla.get_original_attribute(MonitorSeries, 'add')
     original(self, index, value)