def test_get_original_attribute(self): destination = _tomodule.Class name = 'method' target = gorilla.get_attribute(destination, name) obj = gorilla.get_attribute(_frommodule, 'unbound_method') settings = gorilla.Settings(allow_hit=True) patch = gorilla.Patch(destination, name, obj, settings=settings) gorilla.apply(patch) self.assertIs(_unfold(gorilla.get_original_attribute(destination, name)), target) gorilla.apply(patch) self.assertIs(_unfold(gorilla.get_original_attribute(destination, name)), target)
def __init__(self): """frommodule.Class.__init__""" gorilla.get_original_attribute(self, '__init__')() self.stored_init_instance_value = self.instance_value self.instance_value = "frommodule.Class.instance_value"
def patched__init__(self, *, size=100, alpha=0.025, window=5, min_count=5, max_vocab_size=None, sample=1e-3, seed=1, workers=3, min_alpha=0.0001, sg=0, hs=0, negative=5, cbow_mean=1, hashfxn=hash, iter=5, null_word=0, trim_rule=None, sorted_vocab=1, batch_words=10000, mlinspect_caller_filename=None, mlinspect_lineno=None, mlinspect_optional_code_reference=None, mlinspect_optional_source_code=None, mlinspect_fit_transform_active=False): """ Patch for ('example_pipelines.healthcare.healthcare_utils', 'MyW2VTransformer') """ # pylint: disable=no-method-argument, attribute-defined-outside-init, too-many-locals, redefined-builtin, # pylint: disable=invalid-name original = gorilla.get_original_attribute(sklearn_api.W2VTransformer, '__init__') self.mlinspect_caller_filename = mlinspect_caller_filename self.mlinspect_lineno = mlinspect_lineno self.mlinspect_optional_code_reference = mlinspect_optional_code_reference self.mlinspect_optional_source_code = mlinspect_optional_source_code self.mlinspect_fit_transform_active = mlinspect_fit_transform_active def execute_inspections(_, caller_filename, lineno, optional_code_reference, optional_source_code): """ Execute inspections, add DAG node """ original(self, size=size, alpha=alpha, window=window, min_count=min_count, max_vocab_size=max_vocab_size, sample=sample, seed=seed, workers=workers, min_alpha=min_alpha, sg=sg, hs=hs, negative=negative, cbow_mean=cbow_mean, hashfxn=hashfxn, iter=iter, null_word=null_word, trim_rule=trim_rule, sorted_vocab=sorted_vocab, batch_words=batch_words) self.mlinspect_caller_filename = caller_filename self.mlinspect_lineno = lineno self.mlinspect_optional_code_reference = optional_code_reference self.mlinspect_optional_source_code = optional_source_code return execute_patched_func_no_op_id(original, execute_inspections, self, size=size, alpha=alpha, window=window, min_count=min_count, max_vocab_size=max_vocab_size, sample=sample, seed=seed, workers=workers, min_alpha=min_alpha, sg=sg, hs=hs, negative=negative, cbow_mean=cbow_mean, hashfxn=hashfxn, iter=iter, null_word=null_word, trim_rule=trim_rule, sorted_vocab=sorted_vocab, batch_words=batch_words)
def fit(self, *args, **kwargs): original = gorilla.get_original_attribute(Learner, "fit") unlogged_params = ["self", "callbacks", "learner"] return _run_and_log_function(self, original, args, kwargs, unlogged_params, 3)
def fit_generator(self, *args, **kwargs): original = gorilla.get_original_attribute(keras.Model, 'fit_generator') unlogged_params = ['self', 'generator', 'callbacks', 'validation_data', 'verbose'] return _run_and_log_function(self, original, args, kwargs, unlogged_params, 4)
def add_summary(self, *args, **kwargs): original = gorilla.get_original_attribute(FileWriter, 'add_summary') result = original(self, *args, **kwargs) _flush_queue() return result
def add_event(self, event): _log_event(event) original = gorilla.get_original_attribute(EventFileWriter, 'add_event') return original(self, event)
def train(*args, **kwargs): def record_eval_results(eval_results): """ Create a callback function that records evaluation results. """ def callback(env): res = {} for data_name, eval_name, value, _ in env.evaluation_result_list: key = data_name + '-' + eval_name res[key] = value eval_results.append(res) return callback def log_feature_importance_plot(features, importance, importance_type): """ Log feature importance plot. """ import matplotlib.pyplot as plt indices = np.argsort(importance) features = np.array(features)[indices] importance = importance[indices] num_features = len(features) # If num_features > 10, increase the figure height to prevent the plot # from being too dense. w, h = [6.4, 4.8] # matplotlib's default figure size h = h + 0.1 * num_features if num_features > 10 else h fig, ax = plt.subplots(figsize=(w, h)) yloc = np.arange(num_features) ax.barh(yloc, importance, align='center', height=0.5) ax.set_yticks(yloc) ax.set_yticklabels(features) ax.set_xlabel('Importance') ax.set_title('Feature Importance ({})'.format(importance_type)) fig.tight_layout() tmpdir = tempfile.mkdtemp() try: # pylint: disable=undefined-loop-variable filepath = os.path.join( tmpdir, 'feature_importance_{}.png'.format(imp_type)) fig.savefig(filepath) try_mlflow_log(mlflow.log_artifact, filepath) finally: plt.close(fig) shutil.rmtree(tmpdir) if not mlflow.active_run(): try_mlflow_log(mlflow.start_run) auto_end_run = True else: auto_end_run = False original = gorilla.get_original_attribute(lightgbm, 'train') # logging booster params separately via mlflow.log_params to extract key/value pairs # and make it easier to compare them across runs. params = args[0] if len(args) > 0 else kwargs['params'] try_mlflow_log(mlflow.log_params, params) unlogged_params = [ 'params', 'train_set', 'valid_sets', 'valid_names', 'fobj', 'feval', 'init_model', 'evals_result', 'learning_rates', 'callbacks' ] log_fn_args_as_params(original, args, kwargs, unlogged_params) all_arg_names = inspect.getargspec(original)[0] # pylint: disable=W1505 num_pos_args = len(args) # adding a callback that records evaluation results. eval_results = [] callbacks_index = all_arg_names.index('callbacks') callback = record_eval_results(eval_results) if num_pos_args >= callbacks_index + 1: tmp_list = list(args) tmp_list[callbacks_index] += [callback] args = tuple(tmp_list) elif 'callbacks' in kwargs and kwargs['callbacks'] is not None: kwargs['callbacks'] += [callback] else: kwargs['callbacks'] = [callback] # training model model = original(*args, **kwargs) # logging metrics on each iteration. for idx, metrics in enumerate(eval_results): try_mlflow_log(mlflow.log_metrics, metrics, step=idx) # If early_stopping_rounds is present, logging metrics at the best iteration # as extra metrics with the max step + 1. early_stopping_index = all_arg_names.index('early_stopping_rounds') early_stopping = (num_pos_args >= early_stopping_index + 1 or 'early_stopping_rounds' in kwargs) if early_stopping: extra_step = len(eval_results) try_mlflow_log(mlflow.log_metric, 'stopped_iteration', len(eval_results)) # best_iteration is set even if training does not stop early. try_mlflow_log(mlflow.log_metric, 'best_iteration', model.best_iteration) # iteration starts from 1 in LightGBM. try_mlflow_log(mlflow.log_metrics, eval_results[model.best_iteration - 1], step=extra_step) # logging feature importance as artifacts. for imp_type in ['split', 'gain']: features = model.feature_name() importance = model.feature_importance(importance_type=imp_type) try: log_feature_importance_plot(features, importance, imp_type) except Exception: # pylint: disable=broad-except _logger.exception( 'Failed to log feature importance plot. LightGBM autologging ' 'will ignore the failure and continue. Exception: ') imp = {ft: imp for ft, imp in zip(features, importance.tolist())} tmpdir = tempfile.mkdtemp() try: filepath = os.path.join( tmpdir, 'feature_importance_{}.json'.format(imp_type)) with open(filepath, 'w') as f: json.dump(imp, f, indent=2) try_mlflow_log(mlflow.log_artifact, filepath) finally: shutil.rmtree(tmpdir) try_mlflow_log(log_model, model, artifact_path='model') if auto_end_run: try_mlflow_log(mlflow.end_run) return model
def fit(self, *args, **kwargs): """ Patching trainer.fit method to add autolog class into callback """ original = gorilla.get_original_attribute(pl.Trainer, "fit") return _run_and_log_function(self, original, args, kwargs)
def train(*args, **kwargs): def record_eval_results(eval_results): """ Create a callback function that records evaluation results. """ def callback(env): eval_results.append(dict(env.evaluation_result_list)) return callback if not mlflow.active_run(): try_mlflow_log(mlflow.start_run) auto_end_run = True else: auto_end_run = False def log_feature_importance_plot(features, importance, importance_type): """ Log feature importance plot. """ import matplotlib.pyplot as plt features = np.array(features) importance = np.array(importance) indices = np.argsort(importance) features = features[indices] importance = importance[indices] num_features = len(features) # If num_features > 10, increase the figure height to prevent the plot # from being too dense. w, h = [6.4, 4.8] # matplotlib's default figure size h = h + 0.1 * num_features if num_features > 10 else h fig, ax = plt.subplots(figsize=(w, h)) yloc = np.arange(num_features) ax.barh(yloc, importance, align="center", height=0.5) ax.set_yticks(yloc) ax.set_yticklabels(features) ax.set_xlabel("Importance") ax.set_title("Feature Importance ({})".format(importance_type)) fig.tight_layout() tmpdir = tempfile.mkdtemp() try: # pylint: disable=undefined-loop-variable filepath = os.path.join( tmpdir, "feature_importance_{}.png".format(imp_type)) fig.savefig(filepath) try_mlflow_log(mlflow.log_artifact, filepath) finally: plt.close(fig) shutil.rmtree(tmpdir) original = gorilla.get_original_attribute(xgboost, "train") # logging booster params separately via mlflow.log_params to extract key/value pairs # and make it easier to compare them across runs. params = args[0] if len(args) > 0 else kwargs["params"] try_mlflow_log(mlflow.log_params, params) unlogged_params = [ "params", "dtrain", "evals", "obj", "feval", "evals_result", "xgb_model", "callbacks", "learning_rates", ] log_fn_args_as_params(original, args, kwargs, unlogged_params) all_arg_names = inspect.getargspec(original)[0] # pylint: disable=W1505 num_pos_args = len(args) # adding a callback that records evaluation results. eval_results = [] callbacks_index = all_arg_names.index("callbacks") callback = record_eval_results(eval_results) if num_pos_args >= callbacks_index + 1: tmp_list = list(args) tmp_list[callbacks_index] += [callback] args = tuple(tmp_list) elif "callbacks" in kwargs and kwargs["callbacks"] is not None: kwargs["callbacks"] += [callback] else: kwargs["callbacks"] = [callback] # training model model = original(*args, **kwargs) # logging metrics on each iteration. for idx, metrics in enumerate(eval_results): try_mlflow_log(mlflow.log_metrics, metrics, step=idx) # If early_stopping_rounds is present, logging metrics at the best iteration # as extra metrics with the max step + 1. early_stopping_index = all_arg_names.index("early_stopping_rounds") early_stopping = (num_pos_args >= early_stopping_index + 1 or "early_stopping_rounds" in kwargs) if early_stopping: extra_step = len(eval_results) try_mlflow_log(mlflow.log_metric, "stopped_iteration", len(eval_results) - 1) try_mlflow_log(mlflow.log_metric, "best_iteration", model.best_iteration) try_mlflow_log(mlflow.log_metrics, eval_results[model.best_iteration], step=extra_step) # logging feature importance as artifacts. for imp_type in importance_types: imp = model.get_score(importance_type=imp_type) features, importance = zip(*imp.items()) try: log_feature_importance_plot(features, importance, imp_type) except Exception: # pylint: disable=broad-except _logger.exception( "Failed to log feature importance plot. LightGBM autologging " "will ignore the failure and continue. Exception: ") tmpdir = tempfile.mkdtemp() try: filepath = os.path.join( tmpdir, "feature_importance_{}.json".format(imp_type)) with open(filepath, "w") as f: json.dump(imp, f) try_mlflow_log(mlflow.log_artifact, filepath) finally: shutil.rmtree(tmpdir) try_mlflow_log(log_model, model, artifact_path="model") if auto_end_run: try_mlflow_log(mlflow.end_run) return model
def save_parameters(path, params=None): original = gorilla.get_original_attribute(nn, 'save_parameters') original(path, params) run_id = mlflow.active_run().info.run_id uri = 'runs:{}/{}'.format(run_id, 'parameters') try_mlflow_log(mlflow.log_artifact, path, uri)
def add_series(self, index, value): if _check_interval(index, self.flush_at, self.interval): value = sum(self.buf + [value]) / (len(self.buf) + 1) try_mlflow_log(mlflow.log_metric, self.name, value, step=index) original = gorilla.get_original_attribute(MonitorSeries, 'add') original(self, index, value)
def train(*args, **kwargs): def record_eval_results(eval_results): """ Create a callback function that records evaluation results. """ def callback(env): eval_results.append(dict(env.evaluation_result_list)) return callback if not mlflow.active_run(): try_mlflow_log(mlflow.start_run) auto_end_run = True else: auto_end_run = False original = gorilla.get_original_attribute(xgboost, 'train') # logging booster params separately via mlflow.log_params to extract key/value pairs # and make it easier to compare them across runs. params = args[0] if len(args) > 0 else kwargs['params'] try_mlflow_log(mlflow.log_params, params) unlogged_params = ['params', 'dtrain', 'evals', 'obj', 'feval', 'evals_result', 'xgb_model', 'callbacks', 'learning_rates'] log_fn_args_as_params(original, args, kwargs, unlogged_params) all_arg_names = inspect.getargspec(original)[0] # pylint: disable=W1505 num_pos_args = len(args) # adding a callback that records evaluation results. eval_results = [] callbacks_index = all_arg_names.index('callbacks') callback = record_eval_results(eval_results) if num_pos_args >= callbacks_index + 1: tmp_list = list(args) tmp_list[callbacks_index] += [callback] args = tuple(tmp_list) elif 'callbacks' in kwargs and kwargs['callbacks'] is not None: kwargs['callbacks'] += [callback] else: kwargs['callbacks'] = [callback] # training model model = original(*args, **kwargs) # logging metrics on each iteration. for idx, metrics in enumerate(eval_results): try_mlflow_log(mlflow.log_metrics, metrics, step=idx) # If early_stopping_rounds is present, logging metrics at the best iteration # as extra metrics with the max step + 1. early_stopping_index = all_arg_names.index('early_stopping_rounds') early_stopping = (num_pos_args >= early_stopping_index + 1 or 'early_stopping_rounds' in kwargs) if early_stopping: extra_step = len(eval_results) try_mlflow_log(mlflow.log_metric, 'stopped_iteration', len(eval_results) - 1) try_mlflow_log(mlflow.log_metric, 'best_iteration', model.best_iteration) try_mlflow_log(mlflow.log_metrics, eval_results[model.best_iteration], step=extra_step) # logging feature importance as artifacts. for imp_type in importance_types: imp = model.get_score(importance_type=imp_type) tmpdir = tempfile.mkdtemp() try: filepath = os.path.join(tmpdir, 'feature_importance_{}.json'.format(imp_type)) with open(filepath, 'w') as f: json.dump(imp, f) try_mlflow_log(mlflow.log_artifact, filepath) finally: shutil.rmtree(tmpdir) try_mlflow_log(log_model, model, artifact_path='model') if auto_end_run: try_mlflow_log(mlflow.end_run) return model
def new_add(self, *args, **kwargs): """new add""" orig = gorilla.get_original_attribute(self, "add") return 2 * orig(*args, **kwargs)