Python setup_mlflow Beispiele, lightsaber.trainers.helper.setup_mlflow Python Beispiele

Beispiel #1

0

Datei anzeigen

Datei: pt_trainer.py Projekt: IBM/DPM360

def load_model_from_mlflow(
    run_id,
    mlflow_conf,
    wrapped_model,
    model_path="model_checkpoint",
):
    """Method to load a trained model from mlflow

    Parameters
    ----------
    run_id: str
        mlflow run id for the trained model
    mlflow_conf: dict
        mlflow configuration e,g, MLFLOW_URI
    wrapped_model: PyModel
        model architecture to be logged
    model_path: str
        output path where model checkpoints are logged

    Returns
    -------
    PyModel:
        wrapped model with saved weights and parameters from the run
    """
    mlflow_setup = helper.setup_mlflow(**mlflow_conf)
    #  model_uri = f"runs:/{run_id}/{mlflow_setup['experiment_name']}_{model_path}"
    # run_data = helper.fetch_mlflow_run(run_id,
    #                                    mlflow_uri=mlflow_setup['mlflow_uri'],
    #                                    parse_params=True
    #                                    )

    # hparams = run_data['params']
    # model_name = run_data['tags']['model']
    # if wrapped_model is None:
    #     base_model = mlflow.sklearn.load_model(model_uri)
    #     wrapped_model = SKModel(base_model, hparams, name=model_name)

    run_data = helper.fetch_mlflow_run(run_id,
                                       mlflow_uri=mlflow_setup['mlflow_uri'],
                                       artifacts_prefix=[model_path])

    ckpt_path = helper.get_artifact_path(
        run_data['artifact_paths'][0],
        artifact_uri=run_data['info'].artifact_uri)
    wrapped_model = load_model(wrapped_model, ckpt_path)
    return wrapped_model

Beispiel #2

0

Datei anzeigen

Datei: pt_trainer.py Projekt: IBM/DPM360

def register_model_with_mlflow(run_id,
                               mlflow_conf,
                               wrapped_model,
                               registered_model_name,
                               model_path='model_checkpoint',
                               **artifacts):
    """Method to register a trained model

    Parameters
    ----------
    run_id: str
        mlflow run id for the trained model
    mlflow_conf: dict
        mlflow configuration e,g, MLFLOW_URI
    wrapped_model: PyModel
        model architecture to be logged
    registered_model_name: str
        name for registering the model
    model_path: str
        output path where model will be logged
    artifacts: dict
        dictionary of objects to log with the model
    """
    # Getting run info
    mlflow_setup = helper.setup_mlflow(**mlflow_conf)
    wrapped_model = load_model_from_mlflow(run_id, mlflow_conf, wrapped_model,
                                           model_path)
    # Registering model
    try:
        mlflow.pytorch.log_model(wrapped_model,
                                 model_path.rstrip('_checkpoint'),
                                 registered_model_name=registered_model_name)
    except Exception as e:
        log.error(
            f'Exception during logging model: {e}. Continuing to dump artifacts'
        )

    # logging other artifacts
    dumper = helper.model_register_dumper(
        registered_model_name=registered_model_name)
    helper.log_artifacts(artifacts,
                         run_id,
                         mlflow_uri=mlflow_setup['mlflow_uri'],
                         dumper=dumper,
                         delete=True)
    return

Beispiel #3

0

Datei anzeigen

Datei: sk_trainer.py Projekt: IBM/DPM360

def register_model_with_mlflow(run_id, 
                               mlflow_conf,
                               wrapped_model=None,
                               registered_model_name=None,
                               model_path='model',
                               **artifacts
                               ):
    """Method to register a trained model

    Parameters
    ----------
    run_id: str
        mlflow run id for the trained model
    mlflow_conf: dict
        mlflow configuration e,g, MLFLOW_URI
    wrapped_model: SKModel, optional
        model architecture to be logged. If not provided, the model is directly read from mlflow
    registered_model_name: str
        name for registering the model
    model_path: str
        output path where model will be logged
    artifacts: dict
        dictionary of objects to log with the model
    """
    # Getting run info
    mlflow_setup = helper.setup_mlflow(**mlflow_conf)
    wrapped_model = load_model_from_mlflow(run_id, mlflow_conf, 
                                           wrapped_model=wrapped_model, model_path=model_path)

    if registered_model_name is None:
        model_name = wrapped_model.__name__
        registered_model_name = f"{mlflow_setup['experiment_name']}_{model_name}_v{uuid.uuid3()}"

    # Registering model
    with mlflow.start_run(run_id):
        try:
            mlflow.sklearn.log_model(wrapped_model.model, model_path, registered_model_name=registered_model_name)
        except Exception as e:
            log.error(f'Exception during logging model: {e}. Continuing to dump artifacts')

    # logging other artifacts
    dumper = helper.model_register_dumper(registered_model_name=registered_model_name)
    helper.log_artifacts(artifacts, run_id, mlflow_uri=mlflow_setup['mlflow_uri'], dumper=dumper, delete=True)
    return

Beispiel #4

0

Datei anzeigen

Datei: sk_trainer.py Projekt: IBM/DPM360

def run_training_with_mlflow(mlflow_conf, 
                             wrapped_model,
                             train_dataloader, 
                             val_dataloader=None, 
                             test_dataloader=None,
                             **kwargs):
    """
    Function to run supervised training for classifcation

    Parameters
    ----------
    mlflow_conf: dict
        mlflow configuration e,g, MLFLOW_URI
    wrapped_model: SKModel
        wrapped SKModel 
    train_dataloader:
        training dataloader
    val_dataloader:
        validation dataloader, optional
    test_dataloader:
        optional
    kwargs: dict of dicts, optional
        can contain `artifacts` to log with models, `model_path` to specify model output path, and remianing used as experiment tags
        
    Returns
    -------
    tuple:
        (run_id, run_metrics, val_y, val_yhat, val_pred_proba, test_y, test_yhat, test_pred_proba)
    """
    tune = kwargs.get('tune', False)
    if tune:
        inner_cv = kwargs.get('inner_cv', C.DEFAULT_CV)
        h_search = kwargs.pop('h_search', None)
        if h_search is None:
            raise AttributeError(f'if tuner is requested, h_search should be provided')
        scoring = kwargs.get('scoring', C.DEFAULT_SCORING_CLASSIFIER)
        
    model_path = kwargs.pop('model_path', 'model')
    # model_save_dir = Path(kwargs.get('model_save_dir', C.MODEL_SAVE_DIR))
    # model_save_dir.mkdir(parents=True, exist_ok=True)
    artifacts = kwargs.pop('artifacts', dict())

    mlflow_conf.setdefault('problem_type', 'classifier')
    mlflow_setup = setup_mlflow(**mlflow_conf)

    calculate_metrics = Metrics(mlflow_conf['problem_type'])
    log.debug(f"Mlflow setup: {mlflow_setup}")
    log.debug(f"Used metrics: {calculate_metrics}")

    experiment_name = mlflow_setup['experiment_name']

    experiment_tags = dict()
    experiment_tags.update(**kwargs)

    with mlflow.start_run():
        run_id = mlflow.active_run().info.run_id 
        _start_time = time.time()

        X_train, y_train = train_dataloader.get_data()
        
        if val_dataloader is not None:
            X_val, y_val = val_dataloader.get_data()
            outer_cv, _X, _y = get_predefined_split(X_train, y_train, X_val, y_val)
        else:
            warnings.warn("This path is untested...use with caution")
            outer_cv = kwargs.get('outer_cv', None)
            if outer_cv is None:
                warnings.warn(f'Neither validation, nor outer_cv provided. using KFold({C.DEFAULT_CV}) to get validation split')
                outer_cv = KFold(C.DEFAULT_CV)
            _X = X_train.values if hasattr(X_train, 'values') else X_train
            _y = y_train.values if hasattr(y_train, 'values') else y_train

        if test_dataloader is not None:
            X_test, y_test = test_dataloader.get_data()

        # mlflow.log_params(wrapped_model.model.get_params())
        if tune:
            m, gs = wrapped_model.tune(X=_X, y=_y,
                                       hyper_params=h_search,
                                       cv=inner_cv, 
                                       experiment_name=experiment_name, 
                                       scoring=scoring)
            
            mlflow.sklearn.log_model(m, experiment_name + '_model')
            mlflow.sklearn.log_model(gs, experiment_name + '_GridSearchCV')
            
            log.info(f"Experiment: {experiment_name} has finished hyperparameter tuning")
            log.info("Hyperparameter search space: " + str(h_search))
            # log params
            mlflow.log_params(wrapped_model.params)
            print(f"Best_params:\n {gs.best_params_}")
        else:
            wrapped_model.fit(X=X_train, y=y_train)#, Xstd = X_train_std)
        
            mlflow.sklearn.log_model(wrapped_model.model, experiment_name + '_model')
            mlflow.log_params(wrapped_model.params)
            log.info(f"Experiment: {experiment_name} has finished training")

        for split_id, (train_index, val_index) in enumerate(outer_cv.split(_X, _y)):
            if split_id >= 1:
                warnings.warn("Current logic for tune and implicit outer_cv not correct")
                break

            _X_train, _X_val = _X[train_index, :], _X[val_index, :]
            _y_train, _y_val = _y[train_index], _y[val_index]
            
            y_val_proba = wrapped_model.predict_proba(_X_val)
            if y_val_proba.ndim > 1:
                y_val_proba = y_val_proba[:,1]

            y_val_hat = wrapped_model.predict(_X_val)
            val_score = wrapped_model.score(_X_val, _y_val)

        if test_dataloader is not None:
            y_test_proba = wrapped_model.predict_proba(X_test)
            if y_test_proba.ndim > 1:
                y_test_proba = y_test_proba[:, 1]
            y_test_hat = wrapped_model.predict(X_test)
            test_score = wrapped_model.score(X_test, y_test)
        else:
            y_test=None
            y_test_hat=None
            y_test_proba=None
            test_score =None

        # Calculate metrics
        wrapped_model.metrics = calculate_metrics(y_val=y_val, 
                                             y_val_proba=y_val_proba, 
                                             y_val_hat=y_val_hat,
                                             val_score=val_score, 
                                             y_test=y_test, 
                                             y_test_proba=y_test_proba, 
                                             y_test_hat=y_test_hat,
                                             test_score=test_score
                                            )
        _end_time = time.time()
        run_time = (_end_time - _start_time)
        
        # log metrics
        mlflow.log_metrics(wrapped_model.metrics)

        experiment_tags.update(dict(run_time=run_time))
        if experiment_tags is not None:
            mlflow.set_tags(experiment_tags)

        # Other artifacts
        _tmp = {f"artifact/{art_name}": art_val 
                for art_name, art_val in six.iteritems(artifacts)}
        helper.log_artifacts(_tmp, run_id, mlflow_uri=mlflow_setup['mlflow_uri'], delete=True) 

        return (run_id,
                wrapped_model.metrics,
                y_val, y_val_hat, y_val_proba,
                y_test, y_test_hat, y_test_proba,
                )

Beispiel #5

0

Datei anzeigen

Datei: pt_trainer.py Projekt: IBM/DPM360

def run_training_with_mlflow(mlflow_conf, trainer, wrapped_model, **kwargs):
    """
    Function to run supervised training for classifcation

    Parameters
    ----------
    mlflow_conf: dict
        mlflow configuration e,g, MLFLOW_URI
    trainer: pl.Trainer
        a pytorch lightning trainer implementing `fit` function
    wrapped_model: PyModel
        wrapped PyModel 
    kwargs: dict of dicts, optional
        can contain `artifacts` to log with models, `model_path` to specify model output path, and remianing used as experiment tags
        
    Returns
    -------
    tuple:
        (run_id, run_metrics, val_y, val_yhat, val_pred_proba, test_y, test_yhat, test_pred_proba)
    """
    model_path = kwargs.pop('model_path', 'model')
    artifacts = kwargs.pop('artifacts', dict())

    mlflow_conf.setdefault('problem_type', 'classifier')
    mlflow_setup = setup_mlflow(**mlflow_conf)

    calculate_metrics = Metrics(mlflow_conf['problem_type'])
    print(mlflow_setup, calculate_metrics)

    experiment_name = mlflow_setup['experiment_name']

    experiment_tags = dict()
    experiment_tags.update(**kwargs)

    with mlflow.start_run():
        run_id = mlflow.active_run().info.run_id
        _start_time = time.time()
        trainer.fit(wrapped_model)

        mlflow.log_metric('train_score',
                          trainer.callback_metrics['train_score'])
        mlflow.log_metric(
            'train_loss',
            float(trainer.callback_metrics['loss'].data.cpu().numpy()))
        mlflow.log_metric(
            'val_loss',
            float(trainer.callback_metrics['val_loss'].data.cpu().numpy()))
        mlflow.log_params(wrapped_model.get_params())

        try:
            ckpt_path = None
            for callback in trainer.callbacks:
                if isinstance(callback, pl.callbacks.ModelCheckpoint):
                    ckpt_path = callback.best_model_path
                    break
            if ckpt_path is None:
                raise Exception('couldnt determine the best model')
        except Exception as e:
            ckpt_path = _find_checkpoint(wrapped_model)
        print(f"Best model is temporarily in {ckpt_path}")

        try:
            checkpoint = pl.utilities.cloud_io.load(ckpt_path)['state_dict']
            wrapped_model.load_state_dict(checkpoint)
        except Exception as e:
            raise Exception(
                f"couldnt restore model properly from {ckpt_path}. Error={e}")

        # Calibrating if calibration requested
        cal_dataloader = wrapped_model.cal_dataloader()
        if len(cal_dataloader) > 0 and kwargs.get('calibrate', False):
            wrapped_model.set_temperature(cal_dataloader)
            # manually setting the best model and the model mode
            if ckpt_path is not None:
                ckpt_path = os.path.join(
                    os.path.dirname(ckpt_path),
                    f"{os.path.basename(ckpt_path).rstrip('.ckpt')}-calibrated.ckpt"
                )
            else:
                ckpt_path = os.path.join(os.getcwd(),
                                         f"{model_path}-calibrated.ckpt")
            trainer.save_checkpoint(ckpt_path)
            print(f"Calibrated model saved to {ckpt_path}")

        wrapped_model.eval()

        # Collecting metrics
        val_dataloader = wrapped_model.val_dataloader()
        (val_pred_proba, val_yhat, val_y, val_score) = wrapped_model.run(
            val_dataloader, overfit_pct=kwargs.get('overfit_pct', 0))
        y_val = val_y.data.cpu().numpy()
        y_val_proba = val_pred_proba[:, 1].data.cpu().numpy()
        y_val_hat = val_yhat.data.cpu().numpy()

        test_dataloader = wrapped_model.test_dataloader()
        if len(test_dataloader) > 0:
            log.warning("For now supporting only one test dataloader")

            (test_pred_proba, test_yhat, test_y,
             test_score) = wrapped_model.run(test_dataloader,
                                             overfit_pct=kwargs.get(
                                                 'overfit_pct', 0))
            y_test = test_y.data.cpu().numpy()
            y_test_proba = test_pred_proba[:, 1].data.cpu().numpy()
            y_test_hat = test_yhat.data.cpu().numpy()
        else:
            test_y, test_pred_proba, test_yhat = None, None, None
            y_test, y_test_proba, y_test_hat = None, None, None

        try:
            run_metrics = calculate_metrics(y_val,
                                            y_val_hat,
                                            y_val_proba=y_val_proba,
                                            y_test=y_test,
                                            y_test_hat=y_test_hat,
                                            y_test_proba=y_test_proba)

            mlflow.log_metrics(run_metrics)
        except Exception as e:
            warnings.warn(f"{e}")
            log.warning(f"something went wrong while computing metrics: {e}")
            run_metrics = None

        _end_time = time.time()
        run_time = (_end_time - _start_time)

        experiment_tags.update(dict(run_time=run_time))
        if experiment_tags is not None:
            mlflow.set_tags(experiment_tags)

        # Pytorch log model not working
        # *****************************
        #  mlflow.pytorch.log_model(wrapped_model, model_path, registered_model_name=problem_type)     # <------ use mlflow.pytorch.log_model to log trained sklearn model
        #  print("Model saved in run {}, and registered on {} as a new version of model name {}"
        #       .format(active_run, os.environ['MLFLOW_URI'], problem_type))
        _tmp = {
            f"artifact/{art_name}": art_val
            for art_name, art_val in six.iteritems(artifacts)
        }
        _tmp['model_checkpoint'] = ckpt_path
        helper.log_artifacts(_tmp,
                             run_id,
                             mlflow_uri=mlflow_setup['mlflow_uri'],
                             delete=True)

    return (run_id, run_metrics, val_y, val_yhat, val_pred_proba, test_y,
            test_yhat, test_pred_proba)