def load_data(data_path):
    data = pd.read_json(data_path)
    data["valid_row"] = data["semantic_data"].apply(verify_row)
    log("Dropped number of rows (empty semantic_data): {}".format(
        (~data["valid_row"]).sum()))
    data = data[data["valid_row"]]
    return data, []
def _get_mlflow_run_id(tracking_uri, current_experiment, clean_experiment_dir, version_name):
    mlflow.set_tracking_uri(tracking_uri)
    mlflow.set_experiment(current_experiment.name)
    # Delete runs with the same name as the current version
    mlflow_client = mlflow.tracking.MlflowClient(tracking_uri)
    experiment_ids = [exp.experiment_id
                      for exp in mlflow_client.list_experiments()
                      if current_experiment.name == exp.name]
    current_experiment.mlflow_client = mlflow_client
    if mlflow.active_run() is not None:
        log("Ending spurious run", 30)
        try:
            mlflow.end_run()
        except mlflow.exceptions.MlflowException:
            mlflow.tracking.fluent._active_run_stack = []

    run_id = None
    if len(experiment_ids) > 0:
        runs = mlflow_client.search_runs(experiment_ids,
                                         f"tags.mlflow.runName = '{version_name}'")
        assert len(runs) <= 1, "There cannot be more than one active run for a version"
        if len(runs) > 0:
            if clean_experiment_dir and current_experiment.allow_delete_experiment_dir:
                mlflow_client.delete_run(runs[0].info.run_uuid)
            else:
                run_id = runs[0].info.run_id
    return run_id
def load_semantic_classes(path):
    with open(path) as f:
        labels = json.load(f)

    label_coding = {}
    for idx, label in enumerate(labels.keys()):
        label_coding[label] = idx

    log("labels used:  {}".format(label_coding))
    return label_coding
def _save_training_time(experiment, version_, config):
    if config.experiment_mode == ExperimentModeKeys.TEST:
        return
    name = experiment.name
    with open(config.training_history_log_file, "a") as log_file:
        time = datetime.now().timestamp()
        config.executed_experiments[name].version.addExecutingVersion(version_, time)
        log("Executing version: {0}".format(
            config.executed_experiments[experiment.name].version.executing_version),
            log_to_file=False)
        log_file.write("{0}::{1}::{2}\n".format(name,
                                                config.executed_experiments[name].version.executing_version,
                                                time))
def _experiment_main_loop(current_experiment, version_name_s, clean_experiment_dir, config):
    '''
    Returns False if there are no more versions to execute or a version resulted in an exception
    Returns True otherwise.
    '''
    _add_to_and_return_result_string = _AddToAndReturnResultString()
    if current_experiment is None:
        if config.cmd_mode:
            sys.exit(3)
        else:
            return False
    log_special_tokens.log_experiment_started()
    log("Experiment loaded: {0}".format(current_experiment.name))
    if config.experiment_mode == ExperimentModeKeys.TEST:
        log_special_tokens.log_mode_test()
    elif config.experiment_mode == ExperimentModeKeys.EXPORT:
        log_special_tokens.log_mode_exporting()
    else:
        log_special_tokens.log_mode_train()

    if config.experiment_mode == ExperimentModeKeys.EXPORT:
        for version_name, version_spec in version_name_s:
            experiment_dir, _ = _get_experiment_dir(Path(current_experiment.name).stem,
                                                    version_spec,
                                                    config.experiment_mode,
                                                    config)
            current_experiment._current_version = version_spec
            current_experiment._experiment_dir = experiment_dir
            dataloader = version_spec[version_parameters.DATALOADER]
            current_experiment._dataloader = dataloader()

            try:
                current_experiment.setup_model()
            except NotImplementedError:
                log("`setup_model` not implemented. Ignoring.")
            log("Exporting model for version: {}".format(version_name))
            current_experiment.export_model()
            log("Exported model {}".format(version_name))
        log_special_tokens.log_experiment_ended()
        if config.cmd_mode:
            sys.exit(3)
        else:
            return False
    else:
        version_name = version_name_s
        _add_to_and_return_result_string("Experiment: {0}".format(current_experiment.name), True)
        _add_to_and_return_result_string("Version: {0}".format(version_name))
        if version_name is None:
            log("No Version Specifications",
                logging.WARNING,
                modifier_1=console_colors.RED_FG,
                modifier_2=console_colors.BOLD)
        else:
            log("version loaded: {0} [{1}/{2}]".format(
                version_name,
                len(config.executed_experiments[current_experiment.name].version.executed_versions) + 1,
                len(current_experiment.versions.get_version_names())),
                modifier_1=console_colors.GREEN_FG,
                modifier_2=console_colors.BOLD)

        version_spec = current_experiment.versions.get_version(version_name)
        dataloader = version_spec[version_parameters.DATALOADER]
        if dataloader is not None:
            dataloader = dataloader()
        else:
            dataloader = DummyDataloader()

        log("Version_spec: {}".format(version_spec))

        experiment_dir, tracking_uri = _get_experiment_dir(Path(current_experiment.name).stem,
                                                           version_spec,
                                                           config.experiment_mode,
                                                           config)
        record_training = True if config.experiment_mode != ExperimentModeKeys.TEST else False
        if clean_experiment_dir and current_experiment.allow_delete_experiment_dir:
            try:
                current_experiment.clean_experiment_dir(experiment_dir)
                log("Cleaned experiment dir", modifier_1=console_colors.RED_BG)
            except NotImplementedError:
                log("`experiment.clean_experiment_dir` not implemened."
                    "contents in the experiment_dir will not be changed", level=logging.WARNING)

        run_id = _get_mlflow_run_id(tracking_uri, current_experiment, clean_experiment_dir, version_name)

        current_experiment._current_version = version_spec
        current_experiment._experiment_dir = experiment_dir
        current_experiment._dataloader = dataloader
        
        mlflow.start_run(run_name=version_name, run_id=run_id)
        # Logging the versions params
        for k, v in version_spec.items():
            if k != version_parameters.DATALOADER:
                mlflow.log_param(k, _get_non_default_str(v))

        _dataloader_wrapper = version_spec[version_parameters.DATALOADER]
        if isinstance(_dataloader_wrapper, DataLoaderCallableWrapper):
            mlflow.log_param(version_parameters.DATALOADER, _dataloader_wrapper.dataloader_class)
            mlflow.log_param("dataloader_args", _dataloader_wrapper.args)
            for k, v in _dataloader_wrapper.kwargs.items():
                mlflow.log_param("dataloader_" + k, _get_non_default_str(v))
        else:
            mlflow.log_param(version_parameters.DATALOADER, _get_non_default_str(_dataloader_wrapper))

        # eval_complete=False
        # LOGGER.setLevel(logging.INFO)
        train_results = ""
        eval_results = ""

        try:
            try:
                current_experiment.setup_model()
            except NotImplementedError:
                log("`setup_model` not implemented. Ignoring.")
            try:
                current_experiment.pre_execution_hook(mode=config.experiment_mode)
            except NotImplementedError:
                log("`pre_execution_hook` not implemented. Ignoring.")
            os.makedirs(experiment_dir, exist_ok=True)
            current_experiment.copy_related_files(experiment_dir)
            try:
                test__eval_steps = dataloader.get_test_sample_count()
            except NotImplementedError:
                test__eval_steps = None
            try:
                train_eval_steps = dataloader.get_train_sample_count()
            except NotImplementedError:
                train_eval_steps = None
            if config.experiment_mode == ExperimentModeKeys.TEST:
                test__eval_steps = 1 if test__eval_steps is not None else None
                train_eval_steps = 1 if train_eval_steps is not None else None

            _save_training_time(current_experiment, version_name, config)

            try:
                input_fn = dataloader.get_train_input(mode=ExecutionModeKeys.TRAIN)
            except NotImplementedError:
                log('`get_train_input` not implemented for training. Setting training input to `None`.',
                    level=logging.WARNING)
                input_fn = None

            if input_fn is None:
                log("input to `train_loop` is `None`",
                    level=logging.WARNING)
            try:
                train_output = current_experiment.train_loop(
                    input_fn=input_fn)
                if isinstance(train_output, MetricContainer):
                    train_output = train_output.log_metrics(log_to_file=False, complete_epoch=True)
                if isinstance(train_output, str):
                    log("Experiment traning loop output: {0}".format(train_output))
                log(log_special_tokens.TRAINING_COMPLETE)
            except NotImplementedError:
                log("`train_loop` not implemeted.")
            except Exception as e:
                train_results = "Training loop failed: {0}".format(str(e))
                log(train_results, logging.ERROR)
                log(traceback.format_exc(), logging.ERROR)
                if config.experiment_mode == ExperimentModeKeys.TEST:
                    raise

            try:
                input_fn = dataloader.get_train_input(mode=ExecutionModeKeys.TEST)
            except NotImplementedError:
                log('`get_train_input` not implemented for evaluation. Setting training input to `None`.',
                    level=logging.WARNING)
                input_fn = None

            if input_fn is not None:
                log('Input to `evaluate_loop` is `None` for training input data.',
                    level=logging.WARNING)
            try:
                log("Training evaluation started: {0} steps"
                    .format(train_eval_steps if train_eval_steps is not None else 'unspecified'))
                train_results = current_experiment.evaluate_loop(
                    input_fn=input_fn)
                log("Eval on train set: ")
                if isinstance(train_results, MetricContainer):
                    train_results = train_results.log_metrics(complete_epoch=True, name_prefix="TRAIN_")
                elif isinstance(train_results, str):
                    log("{0}".format(train_results))
                else:
                    raise ValueError("The output of `evaluate_loop` should be"
                                     " a string or a `MetricContainer`")
            except NotImplementedError:
                log('`evaluate_loop` not implemented. Ignoring')
            except Exception as e:
                train_results = "Training evaluation failed: {0}".format(str(e))
                log(train_results, logging.ERROR)
                log(traceback.format_exc(), logging.ERROR)
                if config.experiment_mode == ExperimentModeKeys.TEST:
                    raise

            try:
                input_fn = dataloader.get_test_input()
            except NotImplementedError:
                log('`get_test_input` not implemented.')
                input_fn = None
            if input_fn is not None:
                try:
                    log("Testing evaluation started: {0} steps".
                        format(test__eval_steps if test__eval_steps is not None else 'unspecified'))
                    eval_results = current_experiment.evaluate_loop(input_fn=input_fn)
                    log("Eval on train set:")
                    if isinstance(eval_results, MetricContainer):
                        eval_results = eval_results.log_metrics(complete_epoch=True, name_prefix="TEST_")
                    elif isinstance(eval_results, str):
                        log("{0}".format(eval_results))
                    else:
                        raise ValueError("The output of `evaluate_loop` should"
                                         " be a string or a `MetricContainer`")
                except NotImplementedError:
                    log('`evaluate_loop` not implemented. Ignoring')
                except Exception as e:
                    eval_results = "Test evaluation failed: {0}".format(str(e))
                    log(eval_results, logging.ERROR)
                    log(traceback.format_exc(), logging.ERROR)
                    if config.experiment_mode == ExperimentModeKeys.TEST:
                        raise
            else:
                log('Not executing `evaluate_loop` as testing input data is `None`')

            try:
                current_experiment.post_execution_hook(mode=config.experiment_mode)
            except NotImplementedError:
                log("`post_execution_hook` not implemented. Ignoring.")

            log("Experiment evaluation complete")
            _add_to_and_return_result_string("Eval on train set: {0}".format(train_results))
            _add_to_and_return_result_string("Eval on test  set: {0}".format(eval_results))
            _add_to_and_return_result_string("-------------------------------------------")
            _add_to_and_return_result_string("EXECUTION SUMMERY:")
            _add_to_and_return_result_string("Number of epochs: {0}".format(
                version_spec[version_parameters.EPOCH_COUNT]))
            _add_to_and_return_result_string("Parameters for this version: {0}".format(version_spec))
            _add_to_and_return_result_string("-------------------------------------------")
            _add_to_and_return_result_string("EXPERIMENT SUMMERY:")
            _add_to_and_return_result_string(current_experiment.summery)
            _add_to_and_return_result_string("-------------------------------------------")
            _add_to_and_return_result_string("DATALOADER	 SUMMERY:")
            _add_to_and_return_result_string(dataloader.summery)
            if record_training and not config.no_log:
                _save_results_to_file(_add_to_and_return_result_string(), current_experiment, config)

        except Exception as e:
            mlflow.end_run(mlflow.entities.RunStatus.to_string(mlflow.entities.RunStatus.FAILED))
            if config.experiment_mode == ExperimentModeKeys.TEST:
                raise
            else:
                log("Exception: {0}".format(str(e)), logging.ERROR)
                log(traceback.format_exc(), logging.ERROR)
                if config.cmd_mode:
                    sys.exit(1)
                else:
                    return False
        mlflow.end_run()
    log_special_tokens.log_experiment_ended()
    return True
def _get_experiment(file_path,
                    whitelist_versions=None,
                    blacklist_versions=None,
                    just_return_experiment=False):
    # Import and load the experiment
    module = _load_file_as_module(file_path)
    clean_experiment_dir = False
    experiment = None
    try:
        experiment = module.EXPERIMENT
        experiment.name = file_path
    except Exception:
        log("{0} is not a experiment script. "
            "It does not contain a `EXPERIMENT` global variable".format(file_path))
        return None, None, False

    if just_return_experiment:
        return experiment, None, None

    experiment._collect_related_files(CONFIG.experiments_dir, [os.path.abspath(module.__file__)])
    # Figure our which version should be executed next
    returning_version = None
    try:
        versions = experiment.versions
    except Exception:
        versions = None

    if whitelist_versions is not None or blacklist_versions is not None:
        versions.filter_versions(whitelist_versions=whitelist_versions,
                                 blacklist_versions=blacklist_versions)

    log("{0}{1}Processing experiment: {2}{3}".format(console_colors.BOLD,
                                                     console_colors.BLUE_FG,
                                                     experiment.name,
                                                     console_colors.RESET))

    if CONFIG.experiment_mode == ExperimentModeKeys.EXPORT:
        return experiment, versions.get_versions(), False

    # Get the training history. i.e. the time stamps of each training launched
    with open(CONFIG.training_history_log_file, "r") as t_hist_file:
        t_history = [line.rstrip("\n") for line in t_hist_file]
        all_history = [t_entry.split("::") for t_entry in t_history]
        module_history = [(v, float(t)) for n, v, t in all_history if n == experiment.name]

    modified_time = os.path.getmtime(file_path)
    if file_path not in CONFIG.executed_experiments:
        CONFIG.executed_experiments[experiment.name] = _ExecutedExperiment(version=_VersionLog(),
                                                                           modified_time=modified_time)
    else:
        CONFIG.executed_experiments[experiment.name].modified_time = modified_time

    # Determine if training should be started from scratch or should resume training
    # Here, the modified time is used as an indicator.
    # If there is a entry in the training hitory that is greater than the the modified time,
    # that implies the the experiment was modeified later, hence the training should restart from scratch.
    reset_experiment_dir = True
    modified_time = os.path.getmtime(file_path)
    for v, t in module_history:
        if t > modified_time:
            reset_experiment_dir = False
    if reset_experiment_dir:
        clean_experiment_dir = True
        CONFIG.executed_experiments[experiment.name].version.clean()
    else:
        # If a training had started and not completed, resume the training of that version
        versions__ = versions.get_version_names()
        for v, t in module_history:
            if t > modified_time:
                if CONFIG.executed_experiments[experiment.name].version.executed(v)\
                   is not _VersionLog.EXECUTED and v in versions__:
                    modified_time = t
                    returning_version = v
    # If there are no training sessions to be resumed, decide which version to execute next
    # based on the ORDER set in the version
    if returning_version is None:
        # TODO: check if this line works:
        for v, k in versions.get_versions():
            if CONFIG.executed_experiments[experiment.name].version.executed(v) is not _VersionLog.EXECUTED:
                returning_version = v
                clean_experiment_dir = True
    log("Executed versions: {0}".format(
        CONFIG.executed_experiments[experiment.name].version.executed_versions),
        log_to_file=False)
    if returning_version is None:
        return None, None, False
    return experiment, returning_version, clean_experiment_dir
Example #7
0
 def __call__(self, message, *args, **kwargs):
     log(message, *args, **kwargs)
     with open(self.file_path, "a") as f:
         f.write(str(message) + "\n")
Example #8
0
def export_model(model,
                 data_codes_mapping,
                 used_labels,
                 export_path,
                 model_summery,
                 dataloader_summery,
                 sample_data,
                 name_prefix=None):
    directory = "{}/{}".format(export_path.rstrip("/"), time.time())
    export_path_file = os.path.join(directory, "model_params.tch")
    if not os.path.exists(directory):
        os.makedirs(directory)
    else:
        raise Exception("The directory {} already exists.".format(directory))
    log("Exporting model to : {}".format(export_path_file),
        modifier_1=console_colors.GREEN_FG)
    log("-- Data codes mapping: {}".format(data_codes_mapping))
    log("-- Used labels: {}".format(used_labels))
    log("-- Model Summery: {}".format(model_summery))
    log("-- Dataloder Summery: {}".format(dataloader_summery))
    log("-- Sample Data Shape: {}".format(sample_data.shape))
    torch.save(
        {
            # 'model': model
            'state_dict': model.state_dict(),
            'data_codes_mapping': data_codes_mapping,
            'used_labels': used_labels,
            'model_summery': model_summery,
            'dataloader_summery': dataloader_summery
        },
        export_path_file)
    log("Exporting onnx model")
    # Converting to CPU to make sure there is no device conflicts.
    model = model.cpu()
    torch.onnx.export(model, sample_data, os.path.join(directory,
                                                       "model.onnx"))
    return directory
Example #9
0
 def log(self, message, log_to_file=False, **kargs):
     log("{}DataLoader- {}{}".format(console_colors.CYAN_FG,
                                     console_colors.RESET, message),
         log=log_to_file,
         **kargs)
Example #10
0
from mlpipeline.utils import add_script_dir_to_PATH
from mlpipeline.entities import ExecutionModeKeys
from mlpipeline import Versions
from mlpipeline.base import ExperimentABC
from mlpipeline.base import DataLoaderABC
from mlpipeline.entities import version_parameters
from mlpipeline import log
from mlpipeline.entities import console_colors
from mlpipeline.entities import ExperimentModeKeys

try:
    import cv2
    cv2_available = True
except ImportError:
    cv2_available = False
    log("opencv cannot be imported. Check for installation.",
        level=logging.WARN)

try:
    import torch
    import torch.nn as nn
    torch_available = True
except ImportError:
    torch_available = False
    log("pytorch cannot be imported. Check for installation.",
        level=logging.WARN)


class BaseTorchExperiment(ExperimentABC):
    def __init__(self, versions, **args):
        super().__init__(versions, **args)
        self.model = None
Example #11
0
def _config_update():
    log("Updating configuration")
    if CONFIG.experiment_mode == ExperimentModeKeys.TEST:
        config_from = "experiments_test.config"
    else:
        config_from = "experiments.config"
    config = configparser.ConfigParser(allow_no_value=True)
    config_file = config.read(config_from)

    if len(config_file) == 0:
        log("\033[1;031mWARNING:\033[0:031mNo 'experiments.config' file found\033[0m",
            log_to_file=True)
    else:
        try:
            config["MLP"]
        except KeyError:
            log("\033[1;031mWARNING:\033[0:031mNo MLP section in 'experiments.config' file\033[0m",
                log_to_file=True,
                level=logging.WARNING)
        CONFIG.use_blacklist = config.getboolean("MLP",
                                                 "use_blacklist",
                                                 fallback=CONFIG.use_blacklist)
        try:
            if CONFIG.use_blacklist:
                CONFIG.listed_experiments = config["BLACKLISTED_EXPERIMENTS"]
            else:
                CONFIG.listed_experiments = config["WHITELISTED_EXPERIMENTS"]
            listed_experiments = []
            for experiment in CONFIG.listed_experiments:
                listed_experiments.append(experiment)

            for experiment in listed_experiments:
                experiment_script_path = os.path.join(CONFIG.experiments_dir,
                                                      experiment)
                if not os.path.exists(experiment_script_path):
                    listed_experiments.remove(experiment)
                    log("Script missing: {}".format(experiment_script_path),
                        level=logging.WARNING)
            CONFIG.listed_experiments = listed_experiments
            log("\033[1;036m{0}\033[0;036m: {1}\033[0m".format([
                "BLACKLISTED_EXPERIMENTS"
                if CONFIG.use_blacklist else "WHITELISTED_EXPERIMENTS"
            ][0].replace("_", " "), CONFIG.listed_experiments).lower(),
                log_to_file=True)
        except KeyError:
            log("\033[1;031mWARNING:\033[0:031mNo {0} section in 'cnn.config' file\033[0m"
                .format([
                    "BLACKLISTED_EXPERIMENTS"
                    if CONFIG.use_blacklist else "WHITELISTED_EXPERIMENTS"
                ][0]),
                log_to_file=True,
                level=logging.ERROR)
Example #12
0
def mlpipeline_execute_exeperiment(experiment,
                                   experiment_mode=ExperimentModeKeys.TEST,
                                   whitelist_versions=None,
                                   blacklist_versions=None,
                                   pipeline_config=None):
    '''
    Warning: Experimental interface
    '''
    if pipeline_config is None:
        pipeline_config = PipelineConfig(experiments_dir="",
                                         experiments_outputs_dir="outputs",
                                         mlflow_tracking_uri=".mlruns")
    pipeline_config.experiment_mode = experiment_mode
    pipeline_config.output_file = Path(
        os.path.join(pipeline_config.experiments_outputs_dir, "output"))
    pipeline_config.history_file = Path(
        os.path.join(pipeline_config.experiments_outputs_dir, "history"))
    pipeline_config.training_history_log_file = Path(
        os.path.join(pipeline_config.experiments_outputs_dir, "t_history"))
    pipeline_config.log_file = Path(
        os.path.join(pipeline_config.experiments_outputs_dir, "log"))

    pipeline_config.output_file.parent.mkdir(parents=True, exist_ok=True)
    pipeline_config.history_file.parent.mkdir(parents=True, exist_ok=True)
    pipeline_config.training_history_log_file.parent.mkdir(parents=True,
                                                           exist_ok=True)
    pipeline_config.log_file.parent.mkdir(parents=True, exist_ok=True)

    pipeline_config.output_file.touch()
    pipeline_config.history_file.touch()
    pipeline_config.training_history_log_file.touch()
    pipeline_config.log_file.touch()

    pipeline_config.logger = set_logger(experiment_mode=experiment_mode,
                                        no_log=False,
                                        log_file=pipeline_config.log_file)
    if not isinstance(experiment, ExperimentABC):
        log("`experiment` is not of type `mlpipeline.base.ExperimentABC`", 20)
    experiment.name = experiment.__class__.__name__
    experiment._collect_related_files(pipeline_config.experiments_dir)
    versions = experiment.versions

    log("{0}{1}Processing experiment: {2}{3}".format(console_colors.BOLD,
                                                     console_colors.BLUE_FG,
                                                     experiment.name,
                                                     console_colors.RESET))
    if whitelist_versions is not None or blacklist_versions is not None:
        versions.filter_versions(whitelist_versions=whitelist_versions,
                                 blacklist_versions=blacklist_versions)

    pipeline_config.executed_experiments[
        experiment.name] = _ExecutedExperiment(_VersionLog(), 0)
    if experiment_mode != ExperimentModeKeys.RUN:
        versions_list = versions.get_versions()
        _experiment_main_loop(
            experiment, versions_list if experiment_mode
            == ExperimentModeKeys.EXPORT else versions_list[0][0], True,
            pipeline_config)
    else:
        for v, k in versions.get_versions():
            if _experiment_main_loop(experiment, v, True, pipeline_config):
                pipeline_config.executed_experiments[
                    experiment.name].version.addExecutingVersion(v, 0)
            else:
                log("Pipeline Stoped", 30)