Ejemplo n.º 1
0
def register_model(model_id, host_name, did_pass_acceptance_test):
    logger = logging.getLogger(__name__)
    mlflow.set_tracking_uri(uri=host_name)

    logger.info(f"Reporting data for model {model_id}")

    file_names = get_model_files(model_id)
    specification = get_json(file_names['model_specification'])

    mlflow.set_experiment(specification['problem_name'])

    with mlflow.start_run(run_name=model_id):
        log_param("ProblemName", specification['problem_name'])
        log_param("MLPipelineParamsName",
                  specification['ml_pipeline_params_name'])
        log_param("FeatureSetName", specification['feature_set_name'])
        log_param("AlgorithmName", specification['algorithm_name'])
        log_param("AlgorithmParamsName",
                  specification['algorithm_params_name'])

        set_tag("DidPassAcceptanceTest", did_pass_acceptance_test)
        set_tag("BuildNumber", os.getenv('BUILD_NUMBER'))

        log_model_metrics_file(file_names["model_metrics"])
        log_ml_pipeline_params_file(file_names["ml_pipeline_params"])
        log_artifacts(file_names['results_folder'])
Ejemplo n.º 2
0
def get_last_model_subdir():
    # use empty string to get the root model_results_dir
    file_names = get_model_files('')

    results_dir = file_names['results_folder']
    subdirs = get_subdirs(results_dir)
    if len(subdirs) == 0:
        return None

    latest_subdir = max(subdirs, key=os.path.getmtime)
    return latest_subdir
Ejemplo n.º 3
0
def get_model_id_location(model_id_arg):
    logger = logging.getLogger(__name__)
    if model_id_arg is None:
        latest_subdir = get_last_model_subdir()
        if latest_subdir is None:
            results_dir = get_model_files('')['results_folder']
            raise IOError(f'No models found in {results_dir}')

        model_id = latest_subdir.split('/')[-1]
        logger.info(f"Using model_id {model_id}")
    else:
        model_id = model_id_arg

    return model_id
def test_acceptance_with_no_arguments(tmp_path):
    os.environ["CD4ML_DATA_DIR"] = str(tmp_path)
    files = get_model_files('', base_data_dir=tmp_path)

    base_results_directory = files['results_folder']
    earlier_folder = Path(base_results_directory, "earlier")
    earlier_folder.mkdir(parents=True)
    os.utime(earlier_folder, (earlier_time, earlier_time))
    latest_folder = Path(base_results_directory, "later")
    latest_folder.mkdir(parents=True)
    os.utime(earlier_folder, (later_time, later_time))

    model_id = acceptance_script.parse_arguments([])
    del os.environ["CD4ML_DATA_DIR"]

    assert model_id == 'later'
    def test_tracking_dictionaries_only(self, tmp_path):
        specification = {'problem_name': "foo_problem"}
        os.environ["CD4ML_DATA_DIR"] = str(tmp_path)
        tracking = Track("unit-test-id", specification)
        tracking.log_param("my_param", 1)
        tracking.log_metrics({"my_metric": 2})
        tracking.save_results()

        filenames = get_model_files('unit-test-id', base_data_dir=tmp_path)
        model_dir = filenames['results_folder']
        files = os.listdir(model_dir)

        assert set(files) == {"model_metrics.json", "ml_pipeline_params.json", "model_specification.json"}
        metrics_json = get_json(filenames['model_metrics'])
        assert metrics_json["my_metric"] == 2
        params_json = get_json(filenames['ml_pipeline_params'])
        assert params_json["my_param"] == 1
Ejemplo n.º 6
0
def is_model_accepted(model_id):
    model_files = get_model_files(model_id)
    params = get_json(model_files['ml_pipeline_params'])

    metric_name = params['acceptance_metric'].replace("'", "")
    threshold_min = float(params['acceptance_threshold_min'])
    threshold_max = float(params['acceptance_threshold_max'])

    metrics = get_json(model_files['model_metrics'])
    metric_value = metrics[metric_name]

    accepted = threshold_min <= metric_value <= threshold_max

    message = get_message(model_id, metric_name, metric_value, threshold_min,
                          threshold_max, accepted)

    return accepted, message
Ejemplo n.º 7
0
    def save_results(self):
        filenames = get_model_files(self.model_id)
        self.logger.info("Recording run information for model %s" %
                         self.model_id)

        if self.model is not None:
            self.model.save(filenames['full_model'])

        if self.plot is not None:
            import bokeh.plotting as bokeh_saver
            bokeh_saver.save(obj=self.plot,
                             filename=filenames['validation_plot'],
                             title='Validation Plot')

        self._write_dictionary_to_file(self.params,
                                       filenames['ml_pipeline_params'])
        self._write_dictionary_to_file(self.metrics,
                                       filenames['model_metrics'])
        self._write_dictionary_to_file(self.specification,
                                       filenames['model_specification'])
Ejemplo n.º 8
0
def get_trained_encoder(stream, ml_fields, problem_name, write=True,
                        read_from_file=False, base_features_omitted=None):

    file_names = get_model_files(problem_name)
    encoder_file = file_names.get('encoder')

    if encoder_file and os.path.exists(encoder_file) and read_from_file:
        logger.info('Reading encoder from : {}'.format(encoder_file))
        logger.info(base_features_omitted)
        encoder_from_file = OneHotEncoder([], [], omit_cols=base_features_omitted)
        encoder_from_file.load_from_file(encoder_file)
        return encoder_from_file

    logger.info('Building encoder')
    encoder = get_encoder_from_stream(stream, ml_fields, omit_cols=base_features_omitted)

    if write:
        if encoder_file:
            logger.info('Writing encoder to: %s' % encoder_file)
            encoder.save(encoder_file)

    return encoder
    def test_writing_bokeh_plot(self, tmp_path):
        from bokeh.plotting import figure, output_file
        from bokeh.sampledata.iris import flowers
        specification = {'problem_name': "foo_problem"}

        colormap = {'setosa': 'red', 'versicolor': 'green', 'virginica': 'blue'}
        colors = [colormap[x] for x in flowers['species']]

        output_file(NamedTemporaryFile().name)
        p = figure(title="Iris Morphology")
        p.xaxis.axis_label = 'Petal Length'
        p.yaxis.axis_label = 'Petal Width'

        p.circle(flowers["petal_length"], flowers["petal_width"], color=colors, fill_alpha=0.2, size=10)

        os.environ["CD4ML_DATA_DIR"] = str(tmp_path)
        tracking = Track("unit-test-id", specification)
        tracking.log_validation_plot(p)
        tracking.save_results()
        filenames = get_model_files('unit-test-id', base_data_dir=tmp_path)
        model_dir = filenames['results_folder']
        files = os.listdir(model_dir)
        assert set(files) == {"validation_plot.html", "model_specification.json"}