def load_data(data_path): data = pd.read_json(data_path) data["valid_row"] = data["semantic_data"].apply(verify_row) log("Dropped number of rows (empty semantic_data): {}".format( (~data["valid_row"]).sum())) data = data[data["valid_row"]] return data, []
def _get_mlflow_run_id(tracking_uri, current_experiment, clean_experiment_dir, version_name): mlflow.set_tracking_uri(tracking_uri) mlflow.set_experiment(current_experiment.name) # Delete runs with the same name as the current version mlflow_client = mlflow.tracking.MlflowClient(tracking_uri) experiment_ids = [exp.experiment_id for exp in mlflow_client.list_experiments() if current_experiment.name == exp.name] current_experiment.mlflow_client = mlflow_client if mlflow.active_run() is not None: log("Ending spurious run", 30) try: mlflow.end_run() except mlflow.exceptions.MlflowException: mlflow.tracking.fluent._active_run_stack = [] run_id = None if len(experiment_ids) > 0: runs = mlflow_client.search_runs(experiment_ids, f"tags.mlflow.runName = '{version_name}'") assert len(runs) <= 1, "There cannot be more than one active run for a version" if len(runs) > 0: if clean_experiment_dir and current_experiment.allow_delete_experiment_dir: mlflow_client.delete_run(runs[0].info.run_uuid) else: run_id = runs[0].info.run_id return run_id
def load_semantic_classes(path): with open(path) as f: labels = json.load(f) label_coding = {} for idx, label in enumerate(labels.keys()): label_coding[label] = idx log("labels used: {}".format(label_coding)) return label_coding
def _save_training_time(experiment, version_, config): if config.experiment_mode == ExperimentModeKeys.TEST: return name = experiment.name with open(config.training_history_log_file, "a") as log_file: time = datetime.now().timestamp() config.executed_experiments[name].version.addExecutingVersion(version_, time) log("Executing version: {0}".format( config.executed_experiments[experiment.name].version.executing_version), log_to_file=False) log_file.write("{0}::{1}::{2}\n".format(name, config.executed_experiments[name].version.executing_version, time))
def _experiment_main_loop(current_experiment, version_name_s, clean_experiment_dir, config): ''' Returns False if there are no more versions to execute or a version resulted in an exception Returns True otherwise. ''' _add_to_and_return_result_string = _AddToAndReturnResultString() if current_experiment is None: if config.cmd_mode: sys.exit(3) else: return False log_special_tokens.log_experiment_started() log("Experiment loaded: {0}".format(current_experiment.name)) if config.experiment_mode == ExperimentModeKeys.TEST: log_special_tokens.log_mode_test() elif config.experiment_mode == ExperimentModeKeys.EXPORT: log_special_tokens.log_mode_exporting() else: log_special_tokens.log_mode_train() if config.experiment_mode == ExperimentModeKeys.EXPORT: for version_name, version_spec in version_name_s: experiment_dir, _ = _get_experiment_dir(Path(current_experiment.name).stem, version_spec, config.experiment_mode, config) current_experiment._current_version = version_spec current_experiment._experiment_dir = experiment_dir dataloader = version_spec[version_parameters.DATALOADER] current_experiment._dataloader = dataloader() try: current_experiment.setup_model() except NotImplementedError: log("`setup_model` not implemented. Ignoring.") log("Exporting model for version: {}".format(version_name)) current_experiment.export_model() log("Exported model {}".format(version_name)) log_special_tokens.log_experiment_ended() if config.cmd_mode: sys.exit(3) else: return False else: version_name = version_name_s _add_to_and_return_result_string("Experiment: {0}".format(current_experiment.name), True) _add_to_and_return_result_string("Version: {0}".format(version_name)) if version_name is None: log("No Version Specifications", logging.WARNING, modifier_1=console_colors.RED_FG, modifier_2=console_colors.BOLD) else: log("version loaded: {0} [{1}/{2}]".format( version_name, len(config.executed_experiments[current_experiment.name].version.executed_versions) + 1, len(current_experiment.versions.get_version_names())), modifier_1=console_colors.GREEN_FG, modifier_2=console_colors.BOLD) version_spec = current_experiment.versions.get_version(version_name) dataloader = version_spec[version_parameters.DATALOADER] if dataloader is not None: dataloader = dataloader() else: dataloader = DummyDataloader() log("Version_spec: {}".format(version_spec)) experiment_dir, tracking_uri = _get_experiment_dir(Path(current_experiment.name).stem, version_spec, config.experiment_mode, config) record_training = True if config.experiment_mode != ExperimentModeKeys.TEST else False if clean_experiment_dir and current_experiment.allow_delete_experiment_dir: try: current_experiment.clean_experiment_dir(experiment_dir) log("Cleaned experiment dir", modifier_1=console_colors.RED_BG) except NotImplementedError: log("`experiment.clean_experiment_dir` not implemened." "contents in the experiment_dir will not be changed", level=logging.WARNING) run_id = _get_mlflow_run_id(tracking_uri, current_experiment, clean_experiment_dir, version_name) current_experiment._current_version = version_spec current_experiment._experiment_dir = experiment_dir current_experiment._dataloader = dataloader mlflow.start_run(run_name=version_name, run_id=run_id) # Logging the versions params for k, v in version_spec.items(): if k != version_parameters.DATALOADER: mlflow.log_param(k, _get_non_default_str(v)) _dataloader_wrapper = version_spec[version_parameters.DATALOADER] if isinstance(_dataloader_wrapper, DataLoaderCallableWrapper): mlflow.log_param(version_parameters.DATALOADER, _dataloader_wrapper.dataloader_class) mlflow.log_param("dataloader_args", _dataloader_wrapper.args) for k, v in _dataloader_wrapper.kwargs.items(): mlflow.log_param("dataloader_" + k, _get_non_default_str(v)) else: mlflow.log_param(version_parameters.DATALOADER, _get_non_default_str(_dataloader_wrapper)) # eval_complete=False # LOGGER.setLevel(logging.INFO) train_results = "" eval_results = "" try: try: current_experiment.setup_model() except NotImplementedError: log("`setup_model` not implemented. Ignoring.") try: current_experiment.pre_execution_hook(mode=config.experiment_mode) except NotImplementedError: log("`pre_execution_hook` not implemented. Ignoring.") os.makedirs(experiment_dir, exist_ok=True) current_experiment.copy_related_files(experiment_dir) try: test__eval_steps = dataloader.get_test_sample_count() except NotImplementedError: test__eval_steps = None try: train_eval_steps = dataloader.get_train_sample_count() except NotImplementedError: train_eval_steps = None if config.experiment_mode == ExperimentModeKeys.TEST: test__eval_steps = 1 if test__eval_steps is not None else None train_eval_steps = 1 if train_eval_steps is not None else None _save_training_time(current_experiment, version_name, config) try: input_fn = dataloader.get_train_input(mode=ExecutionModeKeys.TRAIN) except NotImplementedError: log('`get_train_input` not implemented for training. Setting training input to `None`.', level=logging.WARNING) input_fn = None if input_fn is None: log("input to `train_loop` is `None`", level=logging.WARNING) try: train_output = current_experiment.train_loop( input_fn=input_fn) if isinstance(train_output, MetricContainer): train_output = train_output.log_metrics(log_to_file=False, complete_epoch=True) if isinstance(train_output, str): log("Experiment traning loop output: {0}".format(train_output)) log(log_special_tokens.TRAINING_COMPLETE) except NotImplementedError: log("`train_loop` not implemeted.") except Exception as e: train_results = "Training loop failed: {0}".format(str(e)) log(train_results, logging.ERROR) log(traceback.format_exc(), logging.ERROR) if config.experiment_mode == ExperimentModeKeys.TEST: raise try: input_fn = dataloader.get_train_input(mode=ExecutionModeKeys.TEST) except NotImplementedError: log('`get_train_input` not implemented for evaluation. Setting training input to `None`.', level=logging.WARNING) input_fn = None if input_fn is not None: log('Input to `evaluate_loop` is `None` for training input data.', level=logging.WARNING) try: log("Training evaluation started: {0} steps" .format(train_eval_steps if train_eval_steps is not None else 'unspecified')) train_results = current_experiment.evaluate_loop( input_fn=input_fn) log("Eval on train set: ") if isinstance(train_results, MetricContainer): train_results = train_results.log_metrics(complete_epoch=True, name_prefix="TRAIN_") elif isinstance(train_results, str): log("{0}".format(train_results)) else: raise ValueError("The output of `evaluate_loop` should be" " a string or a `MetricContainer`") except NotImplementedError: log('`evaluate_loop` not implemented. Ignoring') except Exception as e: train_results = "Training evaluation failed: {0}".format(str(e)) log(train_results, logging.ERROR) log(traceback.format_exc(), logging.ERROR) if config.experiment_mode == ExperimentModeKeys.TEST: raise try: input_fn = dataloader.get_test_input() except NotImplementedError: log('`get_test_input` not implemented.') input_fn = None if input_fn is not None: try: log("Testing evaluation started: {0} steps". format(test__eval_steps if test__eval_steps is not None else 'unspecified')) eval_results = current_experiment.evaluate_loop(input_fn=input_fn) log("Eval on train set:") if isinstance(eval_results, MetricContainer): eval_results = eval_results.log_metrics(complete_epoch=True, name_prefix="TEST_") elif isinstance(eval_results, str): log("{0}".format(eval_results)) else: raise ValueError("The output of `evaluate_loop` should" " be a string or a `MetricContainer`") except NotImplementedError: log('`evaluate_loop` not implemented. Ignoring') except Exception as e: eval_results = "Test evaluation failed: {0}".format(str(e)) log(eval_results, logging.ERROR) log(traceback.format_exc(), logging.ERROR) if config.experiment_mode == ExperimentModeKeys.TEST: raise else: log('Not executing `evaluate_loop` as testing input data is `None`') try: current_experiment.post_execution_hook(mode=config.experiment_mode) except NotImplementedError: log("`post_execution_hook` not implemented. Ignoring.") log("Experiment evaluation complete") _add_to_and_return_result_string("Eval on train set: {0}".format(train_results)) _add_to_and_return_result_string("Eval on test set: {0}".format(eval_results)) _add_to_and_return_result_string("-------------------------------------------") _add_to_and_return_result_string("EXECUTION SUMMERY:") _add_to_and_return_result_string("Number of epochs: {0}".format( version_spec[version_parameters.EPOCH_COUNT])) _add_to_and_return_result_string("Parameters for this version: {0}".format(version_spec)) _add_to_and_return_result_string("-------------------------------------------") _add_to_and_return_result_string("EXPERIMENT SUMMERY:") _add_to_and_return_result_string(current_experiment.summery) _add_to_and_return_result_string("-------------------------------------------") _add_to_and_return_result_string("DATALOADER SUMMERY:") _add_to_and_return_result_string(dataloader.summery) if record_training and not config.no_log: _save_results_to_file(_add_to_and_return_result_string(), current_experiment, config) except Exception as e: mlflow.end_run(mlflow.entities.RunStatus.to_string(mlflow.entities.RunStatus.FAILED)) if config.experiment_mode == ExperimentModeKeys.TEST: raise else: log("Exception: {0}".format(str(e)), logging.ERROR) log(traceback.format_exc(), logging.ERROR) if config.cmd_mode: sys.exit(1) else: return False mlflow.end_run() log_special_tokens.log_experiment_ended() return True
def _get_experiment(file_path, whitelist_versions=None, blacklist_versions=None, just_return_experiment=False): # Import and load the experiment module = _load_file_as_module(file_path) clean_experiment_dir = False experiment = None try: experiment = module.EXPERIMENT experiment.name = file_path except Exception: log("{0} is not a experiment script. " "It does not contain a `EXPERIMENT` global variable".format(file_path)) return None, None, False if just_return_experiment: return experiment, None, None experiment._collect_related_files(CONFIG.experiments_dir, [os.path.abspath(module.__file__)]) # Figure our which version should be executed next returning_version = None try: versions = experiment.versions except Exception: versions = None if whitelist_versions is not None or blacklist_versions is not None: versions.filter_versions(whitelist_versions=whitelist_versions, blacklist_versions=blacklist_versions) log("{0}{1}Processing experiment: {2}{3}".format(console_colors.BOLD, console_colors.BLUE_FG, experiment.name, console_colors.RESET)) if CONFIG.experiment_mode == ExperimentModeKeys.EXPORT: return experiment, versions.get_versions(), False # Get the training history. i.e. the time stamps of each training launched with open(CONFIG.training_history_log_file, "r") as t_hist_file: t_history = [line.rstrip("\n") for line in t_hist_file] all_history = [t_entry.split("::") for t_entry in t_history] module_history = [(v, float(t)) for n, v, t in all_history if n == experiment.name] modified_time = os.path.getmtime(file_path) if file_path not in CONFIG.executed_experiments: CONFIG.executed_experiments[experiment.name] = _ExecutedExperiment(version=_VersionLog(), modified_time=modified_time) else: CONFIG.executed_experiments[experiment.name].modified_time = modified_time # Determine if training should be started from scratch or should resume training # Here, the modified time is used as an indicator. # If there is a entry in the training hitory that is greater than the the modified time, # that implies the the experiment was modeified later, hence the training should restart from scratch. reset_experiment_dir = True modified_time = os.path.getmtime(file_path) for v, t in module_history: if t > modified_time: reset_experiment_dir = False if reset_experiment_dir: clean_experiment_dir = True CONFIG.executed_experiments[experiment.name].version.clean() else: # If a training had started and not completed, resume the training of that version versions__ = versions.get_version_names() for v, t in module_history: if t > modified_time: if CONFIG.executed_experiments[experiment.name].version.executed(v)\ is not _VersionLog.EXECUTED and v in versions__: modified_time = t returning_version = v # If there are no training sessions to be resumed, decide which version to execute next # based on the ORDER set in the version if returning_version is None: # TODO: check if this line works: for v, k in versions.get_versions(): if CONFIG.executed_experiments[experiment.name].version.executed(v) is not _VersionLog.EXECUTED: returning_version = v clean_experiment_dir = True log("Executed versions: {0}".format( CONFIG.executed_experiments[experiment.name].version.executed_versions), log_to_file=False) if returning_version is None: return None, None, False return experiment, returning_version, clean_experiment_dir
def __call__(self, message, *args, **kwargs): log(message, *args, **kwargs) with open(self.file_path, "a") as f: f.write(str(message) + "\n")
def export_model(model, data_codes_mapping, used_labels, export_path, model_summery, dataloader_summery, sample_data, name_prefix=None): directory = "{}/{}".format(export_path.rstrip("/"), time.time()) export_path_file = os.path.join(directory, "model_params.tch") if not os.path.exists(directory): os.makedirs(directory) else: raise Exception("The directory {} already exists.".format(directory)) log("Exporting model to : {}".format(export_path_file), modifier_1=console_colors.GREEN_FG) log("-- Data codes mapping: {}".format(data_codes_mapping)) log("-- Used labels: {}".format(used_labels)) log("-- Model Summery: {}".format(model_summery)) log("-- Dataloder Summery: {}".format(dataloader_summery)) log("-- Sample Data Shape: {}".format(sample_data.shape)) torch.save( { # 'model': model 'state_dict': model.state_dict(), 'data_codes_mapping': data_codes_mapping, 'used_labels': used_labels, 'model_summery': model_summery, 'dataloader_summery': dataloader_summery }, export_path_file) log("Exporting onnx model") # Converting to CPU to make sure there is no device conflicts. model = model.cpu() torch.onnx.export(model, sample_data, os.path.join(directory, "model.onnx")) return directory
def log(self, message, log_to_file=False, **kargs): log("{}DataLoader- {}{}".format(console_colors.CYAN_FG, console_colors.RESET, message), log=log_to_file, **kargs)
from mlpipeline.utils import add_script_dir_to_PATH from mlpipeline.entities import ExecutionModeKeys from mlpipeline import Versions from mlpipeline.base import ExperimentABC from mlpipeline.base import DataLoaderABC from mlpipeline.entities import version_parameters from mlpipeline import log from mlpipeline.entities import console_colors from mlpipeline.entities import ExperimentModeKeys try: import cv2 cv2_available = True except ImportError: cv2_available = False log("opencv cannot be imported. Check for installation.", level=logging.WARN) try: import torch import torch.nn as nn torch_available = True except ImportError: torch_available = False log("pytorch cannot be imported. Check for installation.", level=logging.WARN) class BaseTorchExperiment(ExperimentABC): def __init__(self, versions, **args): super().__init__(versions, **args) self.model = None
def _config_update(): log("Updating configuration") if CONFIG.experiment_mode == ExperimentModeKeys.TEST: config_from = "experiments_test.config" else: config_from = "experiments.config" config = configparser.ConfigParser(allow_no_value=True) config_file = config.read(config_from) if len(config_file) == 0: log("\033[1;031mWARNING:\033[0:031mNo 'experiments.config' file found\033[0m", log_to_file=True) else: try: config["MLP"] except KeyError: log("\033[1;031mWARNING:\033[0:031mNo MLP section in 'experiments.config' file\033[0m", log_to_file=True, level=logging.WARNING) CONFIG.use_blacklist = config.getboolean("MLP", "use_blacklist", fallback=CONFIG.use_blacklist) try: if CONFIG.use_blacklist: CONFIG.listed_experiments = config["BLACKLISTED_EXPERIMENTS"] else: CONFIG.listed_experiments = config["WHITELISTED_EXPERIMENTS"] listed_experiments = [] for experiment in CONFIG.listed_experiments: listed_experiments.append(experiment) for experiment in listed_experiments: experiment_script_path = os.path.join(CONFIG.experiments_dir, experiment) if not os.path.exists(experiment_script_path): listed_experiments.remove(experiment) log("Script missing: {}".format(experiment_script_path), level=logging.WARNING) CONFIG.listed_experiments = listed_experiments log("\033[1;036m{0}\033[0;036m: {1}\033[0m".format([ "BLACKLISTED_EXPERIMENTS" if CONFIG.use_blacklist else "WHITELISTED_EXPERIMENTS" ][0].replace("_", " "), CONFIG.listed_experiments).lower(), log_to_file=True) except KeyError: log("\033[1;031mWARNING:\033[0:031mNo {0} section in 'cnn.config' file\033[0m" .format([ "BLACKLISTED_EXPERIMENTS" if CONFIG.use_blacklist else "WHITELISTED_EXPERIMENTS" ][0]), log_to_file=True, level=logging.ERROR)
def mlpipeline_execute_exeperiment(experiment, experiment_mode=ExperimentModeKeys.TEST, whitelist_versions=None, blacklist_versions=None, pipeline_config=None): ''' Warning: Experimental interface ''' if pipeline_config is None: pipeline_config = PipelineConfig(experiments_dir="", experiments_outputs_dir="outputs", mlflow_tracking_uri=".mlruns") pipeline_config.experiment_mode = experiment_mode pipeline_config.output_file = Path( os.path.join(pipeline_config.experiments_outputs_dir, "output")) pipeline_config.history_file = Path( os.path.join(pipeline_config.experiments_outputs_dir, "history")) pipeline_config.training_history_log_file = Path( os.path.join(pipeline_config.experiments_outputs_dir, "t_history")) pipeline_config.log_file = Path( os.path.join(pipeline_config.experiments_outputs_dir, "log")) pipeline_config.output_file.parent.mkdir(parents=True, exist_ok=True) pipeline_config.history_file.parent.mkdir(parents=True, exist_ok=True) pipeline_config.training_history_log_file.parent.mkdir(parents=True, exist_ok=True) pipeline_config.log_file.parent.mkdir(parents=True, exist_ok=True) pipeline_config.output_file.touch() pipeline_config.history_file.touch() pipeline_config.training_history_log_file.touch() pipeline_config.log_file.touch() pipeline_config.logger = set_logger(experiment_mode=experiment_mode, no_log=False, log_file=pipeline_config.log_file) if not isinstance(experiment, ExperimentABC): log("`experiment` is not of type `mlpipeline.base.ExperimentABC`", 20) experiment.name = experiment.__class__.__name__ experiment._collect_related_files(pipeline_config.experiments_dir) versions = experiment.versions log("{0}{1}Processing experiment: {2}{3}".format(console_colors.BOLD, console_colors.BLUE_FG, experiment.name, console_colors.RESET)) if whitelist_versions is not None or blacklist_versions is not None: versions.filter_versions(whitelist_versions=whitelist_versions, blacklist_versions=blacklist_versions) pipeline_config.executed_experiments[ experiment.name] = _ExecutedExperiment(_VersionLog(), 0) if experiment_mode != ExperimentModeKeys.RUN: versions_list = versions.get_versions() _experiment_main_loop( experiment, versions_list if experiment_mode == ExperimentModeKeys.EXPORT else versions_list[0][0], True, pipeline_config) else: for v, k in versions.get_versions(): if _experiment_main_loop(experiment, v, True, pipeline_config): pipeline_config.executed_experiments[ experiment.name].version.addExecutingVersion(v, 0) else: log("Pipeline Stoped", 30)