コード例 #1
0
    def test_get_original_attribute(self):
        destination = _tomodule.Class
        name = 'method'
        target = gorilla.get_attribute(destination, name)
        obj = gorilla.get_attribute(_frommodule, 'unbound_method')
        settings = gorilla.Settings(allow_hit=True)
        patch = gorilla.Patch(destination, name, obj, settings=settings)

        gorilla.apply(patch)
        self.assertIs(_unfold(gorilla.get_original_attribute(destination, name)), target)

        gorilla.apply(patch)
        self.assertIs(_unfold(gorilla.get_original_attribute(destination, name)), target)
コード例 #2
0
 def __init__(self):
     """frommodule.Class.__init__"""
     gorilla.get_original_attribute(self, '__init__')()
     self.stored_init_instance_value = self.instance_value
     self.instance_value = "frommodule.Class.instance_value"
コード例 #3
0
    def patched__init__(self,
                        *,
                        size=100,
                        alpha=0.025,
                        window=5,
                        min_count=5,
                        max_vocab_size=None,
                        sample=1e-3,
                        seed=1,
                        workers=3,
                        min_alpha=0.0001,
                        sg=0,
                        hs=0,
                        negative=5,
                        cbow_mean=1,
                        hashfxn=hash,
                        iter=5,
                        null_word=0,
                        trim_rule=None,
                        sorted_vocab=1,
                        batch_words=10000,
                        mlinspect_caller_filename=None,
                        mlinspect_lineno=None,
                        mlinspect_optional_code_reference=None,
                        mlinspect_optional_source_code=None,
                        mlinspect_fit_transform_active=False):
        """ Patch for ('example_pipelines.healthcare.healthcare_utils', 'MyW2VTransformer') """
        # pylint: disable=no-method-argument, attribute-defined-outside-init, too-many-locals, redefined-builtin,
        # pylint: disable=invalid-name
        original = gorilla.get_original_attribute(sklearn_api.W2VTransformer,
                                                  '__init__')

        self.mlinspect_caller_filename = mlinspect_caller_filename
        self.mlinspect_lineno = mlinspect_lineno
        self.mlinspect_optional_code_reference = mlinspect_optional_code_reference
        self.mlinspect_optional_source_code = mlinspect_optional_source_code
        self.mlinspect_fit_transform_active = mlinspect_fit_transform_active

        def execute_inspections(_, caller_filename, lineno,
                                optional_code_reference, optional_source_code):
            """ Execute inspections, add DAG node """
            original(self,
                     size=size,
                     alpha=alpha,
                     window=window,
                     min_count=min_count,
                     max_vocab_size=max_vocab_size,
                     sample=sample,
                     seed=seed,
                     workers=workers,
                     min_alpha=min_alpha,
                     sg=sg,
                     hs=hs,
                     negative=negative,
                     cbow_mean=cbow_mean,
                     hashfxn=hashfxn,
                     iter=iter,
                     null_word=null_word,
                     trim_rule=trim_rule,
                     sorted_vocab=sorted_vocab,
                     batch_words=batch_words)

            self.mlinspect_caller_filename = caller_filename
            self.mlinspect_lineno = lineno
            self.mlinspect_optional_code_reference = optional_code_reference
            self.mlinspect_optional_source_code = optional_source_code

        return execute_patched_func_no_op_id(original,
                                             execute_inspections,
                                             self,
                                             size=size,
                                             alpha=alpha,
                                             window=window,
                                             min_count=min_count,
                                             max_vocab_size=max_vocab_size,
                                             sample=sample,
                                             seed=seed,
                                             workers=workers,
                                             min_alpha=min_alpha,
                                             sg=sg,
                                             hs=hs,
                                             negative=negative,
                                             cbow_mean=cbow_mean,
                                             hashfxn=hashfxn,
                                             iter=iter,
                                             null_word=null_word,
                                             trim_rule=trim_rule,
                                             sorted_vocab=sorted_vocab,
                                             batch_words=batch_words)
コード例 #4
0
 def fit(self, *args, **kwargs):
     original = gorilla.get_original_attribute(Learner, "fit")
     unlogged_params = ["self", "callbacks", "learner"]
     return _run_and_log_function(self, original, args, kwargs, unlogged_params, 3)
コード例 #5
0
ファイル: keras.py プロジェクト: avflor/mlflow-deploy
 def fit_generator(self, *args, **kwargs):
     original = gorilla.get_original_attribute(keras.Model, 'fit_generator')
     unlogged_params = ['self', 'generator', 'callbacks', 'validation_data', 'verbose']
     return _run_and_log_function(self, original, args, kwargs, unlogged_params, 4)
コード例 #6
0
ファイル: tensorflow.py プロジェクト: saikat1506/mlflow
 def add_summary(self, *args, **kwargs):
     original = gorilla.get_original_attribute(FileWriter, 'add_summary')
     result = original(self, *args, **kwargs)
     _flush_queue()
     return result
コード例 #7
0
ファイル: tensorflow.py プロジェクト: saikat1506/mlflow
 def add_event(self, event):
     _log_event(event)
     original = gorilla.get_original_attribute(EventFileWriter, 'add_event')
     return original(self, event)
コード例 #8
0
    def train(*args, **kwargs):
        def record_eval_results(eval_results):
            """
            Create a callback function that records evaluation results.
            """
            def callback(env):
                res = {}
                for data_name, eval_name, value, _ in env.evaluation_result_list:
                    key = data_name + '-' + eval_name
                    res[key] = value

                eval_results.append(res)

            return callback

        def log_feature_importance_plot(features, importance, importance_type):
            """
            Log feature importance plot.
            """
            import matplotlib.pyplot as plt

            indices = np.argsort(importance)
            features = np.array(features)[indices]
            importance = importance[indices]
            num_features = len(features)

            # If num_features > 10, increase the figure height to prevent the plot
            # from being too dense.
            w, h = [6.4, 4.8]  # matplotlib's default figure size
            h = h + 0.1 * num_features if num_features > 10 else h
            fig, ax = plt.subplots(figsize=(w, h))

            yloc = np.arange(num_features)
            ax.barh(yloc, importance, align='center', height=0.5)
            ax.set_yticks(yloc)
            ax.set_yticklabels(features)
            ax.set_xlabel('Importance')
            ax.set_title('Feature Importance ({})'.format(importance_type))
            fig.tight_layout()

            tmpdir = tempfile.mkdtemp()
            try:
                # pylint: disable=undefined-loop-variable
                filepath = os.path.join(
                    tmpdir, 'feature_importance_{}.png'.format(imp_type))
                fig.savefig(filepath)
                try_mlflow_log(mlflow.log_artifact, filepath)
            finally:
                plt.close(fig)
                shutil.rmtree(tmpdir)

        if not mlflow.active_run():
            try_mlflow_log(mlflow.start_run)
            auto_end_run = True
        else:
            auto_end_run = False

        original = gorilla.get_original_attribute(lightgbm, 'train')

        # logging booster params separately via mlflow.log_params to extract key/value pairs
        # and make it easier to compare them across runs.
        params = args[0] if len(args) > 0 else kwargs['params']
        try_mlflow_log(mlflow.log_params, params)

        unlogged_params = [
            'params', 'train_set', 'valid_sets', 'valid_names', 'fobj',
            'feval', 'init_model', 'evals_result', 'learning_rates',
            'callbacks'
        ]

        log_fn_args_as_params(original, args, kwargs, unlogged_params)

        all_arg_names = inspect.getargspec(original)[0]  # pylint: disable=W1505
        num_pos_args = len(args)

        # adding a callback that records evaluation results.
        eval_results = []
        callbacks_index = all_arg_names.index('callbacks')
        callback = record_eval_results(eval_results)
        if num_pos_args >= callbacks_index + 1:
            tmp_list = list(args)
            tmp_list[callbacks_index] += [callback]
            args = tuple(tmp_list)
        elif 'callbacks' in kwargs and kwargs['callbacks'] is not None:
            kwargs['callbacks'] += [callback]
        else:
            kwargs['callbacks'] = [callback]

        # training model
        model = original(*args, **kwargs)

        # logging metrics on each iteration.
        for idx, metrics in enumerate(eval_results):
            try_mlflow_log(mlflow.log_metrics, metrics, step=idx)

        # If early_stopping_rounds is present, logging metrics at the best iteration
        # as extra metrics with the max step + 1.
        early_stopping_index = all_arg_names.index('early_stopping_rounds')
        early_stopping = (num_pos_args >= early_stopping_index + 1
                          or 'early_stopping_rounds' in kwargs)
        if early_stopping:
            extra_step = len(eval_results)
            try_mlflow_log(mlflow.log_metric, 'stopped_iteration',
                           len(eval_results))
            # best_iteration is set even if training does not stop early.
            try_mlflow_log(mlflow.log_metric, 'best_iteration',
                           model.best_iteration)
            # iteration starts from 1 in LightGBM.
            try_mlflow_log(mlflow.log_metrics,
                           eval_results[model.best_iteration - 1],
                           step=extra_step)

        # logging feature importance as artifacts.
        for imp_type in ['split', 'gain']:
            features = model.feature_name()
            importance = model.feature_importance(importance_type=imp_type)
            try:
                log_feature_importance_plot(features, importance, imp_type)
            except Exception:  # pylint: disable=broad-except
                _logger.exception(
                    'Failed to log feature importance plot. LightGBM autologging '
                    'will ignore the failure and continue. Exception: ')

            imp = {ft: imp for ft, imp in zip(features, importance.tolist())}
            tmpdir = tempfile.mkdtemp()
            try:
                filepath = os.path.join(
                    tmpdir, 'feature_importance_{}.json'.format(imp_type))
                with open(filepath, 'w') as f:
                    json.dump(imp, f, indent=2)
                try_mlflow_log(mlflow.log_artifact, filepath)
            finally:
                shutil.rmtree(tmpdir)

        try_mlflow_log(log_model, model, artifact_path='model')

        if auto_end_run:
            try_mlflow_log(mlflow.end_run)
        return model
コード例 #9
0
 def fit(self, *args, **kwargs):
     """
     Patching trainer.fit method to add autolog class into callback
     """
     original = gorilla.get_original_attribute(pl.Trainer, "fit")
     return _run_and_log_function(self, original, args, kwargs)
コード例 #10
0
ファイル: xgboost.py プロジェクト: shyamsunder0072/mlflow
    def train(*args, **kwargs):
        def record_eval_results(eval_results):
            """
            Create a callback function that records evaluation results.
            """
            def callback(env):
                eval_results.append(dict(env.evaluation_result_list))

            return callback

        if not mlflow.active_run():
            try_mlflow_log(mlflow.start_run)
            auto_end_run = True
        else:
            auto_end_run = False

        def log_feature_importance_plot(features, importance, importance_type):
            """
            Log feature importance plot.
            """
            import matplotlib.pyplot as plt

            features = np.array(features)
            importance = np.array(importance)
            indices = np.argsort(importance)
            features = features[indices]
            importance = importance[indices]
            num_features = len(features)

            # If num_features > 10, increase the figure height to prevent the plot
            # from being too dense.
            w, h = [6.4, 4.8]  # matplotlib's default figure size
            h = h + 0.1 * num_features if num_features > 10 else h
            fig, ax = plt.subplots(figsize=(w, h))

            yloc = np.arange(num_features)
            ax.barh(yloc, importance, align="center", height=0.5)
            ax.set_yticks(yloc)
            ax.set_yticklabels(features)
            ax.set_xlabel("Importance")
            ax.set_title("Feature Importance ({})".format(importance_type))
            fig.tight_layout()

            tmpdir = tempfile.mkdtemp()
            try:
                # pylint: disable=undefined-loop-variable
                filepath = os.path.join(
                    tmpdir, "feature_importance_{}.png".format(imp_type))
                fig.savefig(filepath)
                try_mlflow_log(mlflow.log_artifact, filepath)
            finally:
                plt.close(fig)
                shutil.rmtree(tmpdir)

        original = gorilla.get_original_attribute(xgboost, "train")

        # logging booster params separately via mlflow.log_params to extract key/value pairs
        # and make it easier to compare them across runs.
        params = args[0] if len(args) > 0 else kwargs["params"]
        try_mlflow_log(mlflow.log_params, params)

        unlogged_params = [
            "params",
            "dtrain",
            "evals",
            "obj",
            "feval",
            "evals_result",
            "xgb_model",
            "callbacks",
            "learning_rates",
        ]
        log_fn_args_as_params(original, args, kwargs, unlogged_params)

        all_arg_names = inspect.getargspec(original)[0]  # pylint: disable=W1505
        num_pos_args = len(args)

        # adding a callback that records evaluation results.
        eval_results = []
        callbacks_index = all_arg_names.index("callbacks")
        callback = record_eval_results(eval_results)
        if num_pos_args >= callbacks_index + 1:
            tmp_list = list(args)
            tmp_list[callbacks_index] += [callback]
            args = tuple(tmp_list)
        elif "callbacks" in kwargs and kwargs["callbacks"] is not None:
            kwargs["callbacks"] += [callback]
        else:
            kwargs["callbacks"] = [callback]

        # training model
        model = original(*args, **kwargs)

        # logging metrics on each iteration.
        for idx, metrics in enumerate(eval_results):
            try_mlflow_log(mlflow.log_metrics, metrics, step=idx)

        # If early_stopping_rounds is present, logging metrics at the best iteration
        # as extra metrics with the max step + 1.
        early_stopping_index = all_arg_names.index("early_stopping_rounds")
        early_stopping = (num_pos_args >= early_stopping_index + 1
                          or "early_stopping_rounds" in kwargs)
        if early_stopping:
            extra_step = len(eval_results)
            try_mlflow_log(mlflow.log_metric, "stopped_iteration",
                           len(eval_results) - 1)
            try_mlflow_log(mlflow.log_metric, "best_iteration",
                           model.best_iteration)
            try_mlflow_log(mlflow.log_metrics,
                           eval_results[model.best_iteration],
                           step=extra_step)

        # logging feature importance as artifacts.
        for imp_type in importance_types:
            imp = model.get_score(importance_type=imp_type)
            features, importance = zip(*imp.items())
            try:
                log_feature_importance_plot(features, importance, imp_type)
            except Exception:  # pylint: disable=broad-except
                _logger.exception(
                    "Failed to log feature importance plot. LightGBM autologging "
                    "will ignore the failure and continue. Exception: ")

            tmpdir = tempfile.mkdtemp()
            try:
                filepath = os.path.join(
                    tmpdir, "feature_importance_{}.json".format(imp_type))
                with open(filepath, "w") as f:
                    json.dump(imp, f)
                try_mlflow_log(mlflow.log_artifact, filepath)
            finally:
                shutil.rmtree(tmpdir)

        try_mlflow_log(log_model, model, artifact_path="model")

        if auto_end_run:
            try_mlflow_log(mlflow.end_run)
        return model
コード例 #11
0
 def save_parameters(path, params=None):
     original = gorilla.get_original_attribute(nn, 'save_parameters')
     original(path, params)
     run_id = mlflow.active_run().info.run_id
     uri = 'runs:{}/{}'.format(run_id, 'parameters')
     try_mlflow_log(mlflow.log_artifact, path, uri)
コード例 #12
0
 def add_series(self, index, value):
     if _check_interval(index, self.flush_at, self.interval):
         value = sum(self.buf + [value]) / (len(self.buf) + 1)
         try_mlflow_log(mlflow.log_metric, self.name, value, step=index)
     original = gorilla.get_original_attribute(MonitorSeries, 'add')
     original(self, index, value)
コード例 #13
0
    def train(*args, **kwargs):

        def record_eval_results(eval_results):
            """
            Create a callback function that records evaluation results.
            """
            def callback(env):
                eval_results.append(dict(env.evaluation_result_list))
            return callback

        if not mlflow.active_run():
            try_mlflow_log(mlflow.start_run)
            auto_end_run = True
        else:
            auto_end_run = False

        original = gorilla.get_original_attribute(xgboost, 'train')

        # logging booster params separately via mlflow.log_params to extract key/value pairs
        # and make it easier to compare them across runs.
        params = args[0] if len(args) > 0 else kwargs['params']
        try_mlflow_log(mlflow.log_params, params)

        unlogged_params = ['params', 'dtrain', 'evals', 'obj', 'feval', 'evals_result',
                           'xgb_model', 'callbacks', 'learning_rates']
        log_fn_args_as_params(original, args, kwargs, unlogged_params)

        all_arg_names = inspect.getargspec(original)[0]  # pylint: disable=W1505
        num_pos_args = len(args)

        # adding a callback that records evaluation results.
        eval_results = []
        callbacks_index = all_arg_names.index('callbacks')
        callback = record_eval_results(eval_results)
        if num_pos_args >= callbacks_index + 1:
            tmp_list = list(args)
            tmp_list[callbacks_index] += [callback]
            args = tuple(tmp_list)
        elif 'callbacks' in kwargs and kwargs['callbacks'] is not None:
            kwargs['callbacks'] += [callback]
        else:
            kwargs['callbacks'] = [callback]

        # training model
        model = original(*args, **kwargs)

        # logging metrics on each iteration.
        for idx, metrics in enumerate(eval_results):
            try_mlflow_log(mlflow.log_metrics, metrics, step=idx)

        # If early_stopping_rounds is present, logging metrics at the best iteration
        # as extra metrics with the max step + 1.
        early_stopping_index = all_arg_names.index('early_stopping_rounds')
        early_stopping = (num_pos_args >= early_stopping_index + 1 or
                          'early_stopping_rounds' in kwargs)
        if early_stopping:
            extra_step = len(eval_results)
            try_mlflow_log(mlflow.log_metric, 'stopped_iteration', len(eval_results) - 1)
            try_mlflow_log(mlflow.log_metric, 'best_iteration', model.best_iteration)
            try_mlflow_log(mlflow.log_metrics, eval_results[model.best_iteration],
                           step=extra_step)

        # logging feature importance as artifacts.
        for imp_type in importance_types:
            imp = model.get_score(importance_type=imp_type)
            tmpdir = tempfile.mkdtemp()
            try:
                filepath = os.path.join(tmpdir, 'feature_importance_{}.json'.format(imp_type))
                with open(filepath, 'w') as f:
                    json.dump(imp, f)
                try_mlflow_log(mlflow.log_artifact, filepath)
            finally:
                shutil.rmtree(tmpdir)

        try_mlflow_log(log_model, model, artifact_path='model')

        if auto_end_run:
            try_mlflow_log(mlflow.end_run)
        return model
コード例 #14
0
ファイル: frommodule.py プロジェクト: jonike/gorilla
 def __init__(self):
     """frommodule.Class.__init__"""
     gorilla.get_original_attribute(self, '__init__')()
     self.stored_init_instance_value = self.instance_value
     self.instance_value = "frommodule.Class.instance_value"
コード例 #15
0
 def new_add(self, *args, **kwargs):
     """new add"""
     orig = gorilla.get_original_attribute(self, "add")
     return 2 * orig(*args, **kwargs)