Python set_tags Beispiele, mlflow.set_tags Python Beispiele

Beispiel #1

0

Datei anzeigen

Datei: synthesis.py Projekt: elf11/patectsdgym

def run_synthesis(synthesis_args):
	n, s, synth_args, d, epsilons, datasets, cat_cols, save_models_path, run_name = synthesis_args
	res = []
	with mlflow.start_run(nested=True):
		synth = s(epsilon=float(epsilons[0]), **synth_args)
		for i, e in enumerate(epsilons):
			start_time = time.time()		

			#Need to save: Epochs, Epsilons traversed, Loss 
			sampled = synth.fit_sample(datasets[d]["data"],categorical_columns=cat_cols.split(','), update_epsilon=float(e), verbose=conf.VERBOSE, mlflow=True)
			end_time = time.time()

			mlflow.set_tags({"synthesizer": type(synth),
				"args": str(synth_args),
				"dataset": str(d),
				"epsilon": str(e),
				"duration_seconds": str(end_time - start_time)})

			res.append((n, d, str(e), sampled))
			print("Epsilon " + str(e) + " finished for Synthesizer " + n + " in " + str(end_time - start_time) + "s")

						
			datapath = os.path.join(save_models_path, n + "_" + str(e) + "_" + d + "_" + run_name + "_dataset.csv")
			modelpath = os.path.join(save_models_path, n + "_" + str(e) + "_" + d + "_" + run_name + "_model.ckpt")

			with open(datapath, 'wb') as f:
				sampled.to_csv(datapath)
			mlflow.log_artifact(datapath)

			synth.save(modelpath)
			mlflow.log_artifact(modelpath)

	return res

Beispiel #2

0

Datei anzeigen

def train_model(model, X_train, y_train, name, config):
    """train
    train a single model.

    # Arguments
        model: Model, NN model to train.
        X_train: ndarray(number, lags), Input data for train.
        y_train: ndarray(number, ), result data for train.
        name: String, name of model.
        config: Dict, parameter for train.
    """
    mlflow.set_tracking_uri("http://127.0.0.1:5000")
    tracking_uri = mlflow.get_tracking_uri()
    print("Current tracking uri: {}".format(tracking_uri))

    tags = {"usuario": "Anonymous"}

    mlflow.set_experiment("traffic_flow-saes")
    with mlflow.start_run() as run:
        mlflow.set_tags(tags)
        mlflow.keras.autolog()

        model.compile(loss="mse", optimizer="rmsprop", metrics=['mape'])
        #early = EarlyStopping(monitor='val_loss', patience=30, verbose=0, mode='auto')
        hist = model.fit(X_train,
                         y_train,
                         batch_size=config["batch"],
                         epochs=config["epochs"],
                         validation_split=0.05)

        model.save('model/' + name + '.h5')
        df = pd.DataFrame.from_dict(hist.history)
        df.to_csv('model/' + name + ' loss.csv', encoding='utf-8', index=False)
        mlflow.log_param("Run_id", run.info.run_id)

Beispiel #3

0

Datei anzeigen

def train_model(
    params_fp: Path = Path(config.CONFIG_DIR, "params.json"),
    model_dir: Optional[Path] = Path(config.MODEL_DIR),
    experiment_name: Optional[str] = "best",
    run_name: Optional[str] = "model",
) -> None:
    """Train a model using the specified parameters.

    Args:
        params_fp (Path, optional): Parameters to use for training. Defaults to `config/params.json`.
        model_dir (Path): location of model artifacts. Defaults to config.MODEL_DIR.
        experiment_name (str, optional): Name of the experiment to save the run to. Defaults to `best`.
        run_name (str, optional): Name of the run. Defaults to `model`.
    """
    # Set experiment and start run
    params = Namespace(**utils.load_dict(filepath=params_fp))

    # Start run
    mlflow.set_experiment(experiment_name=experiment_name)
    with mlflow.start_run(run_name=run_name):
        run_id = mlflow.active_run().info.run_id

        # Train
        artifacts = main.run(params=params)

        # Set tags
        tags = {}
        mlflow.set_tags(tags)

        # Log metrics
        performance = artifacts["performance"]
        logger.info(json.dumps(performance["overall"], indent=2))
        metrics = {
            "precision": performance["overall"]["precision"],
            "recall": performance["overall"]["recall"],
            "f1": performance["overall"]["f1"],
            "best_val_loss": artifacts["loss"],
            "behavioral_score": performance["behavioral"]["score"],
            "slices_f1": performance["slices"]["overall"]["f1"],
        }
        mlflow.log_metrics(metrics)

        # Log artifacts
        with tempfile.TemporaryDirectory() as dp:
            utils.save_dict(vars(artifacts["params"]),
                            Path(dp, "params.json"),
                            cls=NumpyEncoder)
            utils.save_dict(performance, Path(dp, "performance.json"))
            artifacts["label_encoder"].save(Path(dp, "label_encoder.json"))
            artifacts["tokenizer"].save(Path(dp, "tokenizer.json"))
            torch.save(artifacts["model"].state_dict(), Path(dp, "model.pt"))
            mlflow.log_artifacts(dp)
        mlflow.log_params(vars(artifacts["params"]))

    # Save for repo
    open(Path(model_dir, "run_id.txt"), "w").write(run_id)
    utils.save_dict(vars(params),
                    Path(model_dir, "params.json"),
                    cls=NumpyEncoder)
    utils.save_dict(performance, Path(model_dir, "performance.json"))

Beispiel #4

0

Datei anzeigen

def log_sk_model(sk_model,
                 model_artifact_path: str = 'model',
                 registered_model_name: str = None,
                 params: dict = None,
                 metrics: dict = None,
                 tags: dict = None,
                 artifacts: dict = None) -> None:

    if params is None:
        params = {}
    if metrics is None:
        metrics = {}
    if tags is None:
        tags = {}
    if artifacts is None:
        artifacts = {}

    _logger.info("Logging Scikit-Learn model to MLflow")
    mlflow.sklearn.log_model(sk_model=sk_model,
                             artifact_path=model_artifact_path,
                             conda_env='./environment.yml',
                             registered_model_name=registered_model_name)
    mlflow.log_params(params)
    mlflow.set_tags(tags)
    mlflow.log_metrics(metrics)
    for local_path, artifact_path in artifacts.items():
        _logger.debug(
            f"Logging artifact to MLflow: {local_path} - {artifact_path}")
        mlflow.log_artifact(local_path, artifact_path)

Beispiel #5

0

Datei anzeigen

Datei: omlflow.py Projekt: twolffpiggott/mltb

    def set_tags(self, tags, optuna_log=True):
        """Wrapper of the corresponding MLflow function.

        The data is also added to Optuna as an user attribute.

        Args:
            tags ([Dict]): Dict of tags.
            optuna_log (bool, optional): Internal parameter that should be ignored by the API user.
                Defaults to True.
        """
        for key, value in tags.items():
            if optuna_log:
                self._trial.set_user_attr(key, value)
            _logger.info(f"Tag: {key}: {value}")
            value = str(value)  # make sure it is a string
            if len(value) > self._max_mlflow_tag_length:
                tags[key] = textwrap.shorten(value,
                                             self._max_mlflow_tag_length)
        try:
            mlflow.set_tags(normalize_mlflow_entry_names_in_dict(tags))
        except Exception as e:
            _logger.error(
                "Exception raised during MLflow communication! Exception: {}".
                format(e),
                exc_info=True,
            )

Beispiel #6

0

Datei anzeigen

Datei: utils.py Projekt: sunny1401/randoms

def log_experiment(params,
                   metrics=None,
                   tags=None,
                   model=None,
                   experiment_name=EXPERIMENT_NAME):
    """
    Logs the model and related parameters and metrics as an experiment.
    :param params: (dict): key-value pairs of named-paramters used by the model
    :param tags: (dict): key-value pairs of tags
    :param metrics: (dict): key-value paris of metric by the model
    :param model: (str): file location of saved model
    :param experiment_name: (str): Name of experiment for which data is being logged
    :param tracking_uri: (str): Tracking uri which should be mapped to the experiment.
    For permanent uris such as postgres database etc, these can be set to environment variable - MLFLOW_TRACKING_URI
    """
    try:
        mlflow.create_experiment(experiment_name)
    except MlflowException:
        logger.info("Found existing experiment. Adding new version to that.")

    mlflow.set_experiment(experiment_name)
    with mlflow.start_run():
        mlflow.log_params(params)
        if metrics is not None:
            mlflow.log_metrics(metrics)
        mlflow.set_tags(tags)
        if model is not None:
            mlflow.log_artifact(model)
    try:
        mlflow.end_run()
    except MlflowException:
        pass

Beispiel #7

0

Datei anzeigen

def log_mlflow(experiment, model, params={}, metrics={}, tags={}):
    """
    Function to log models, params, metrics and tags to MLFlow
    """
    print("=== Logging in MLFlow Server...")
    mlflow.set_experiment(experiment)
    with mlflow.start_run():
        ## LOG PARAMS
        mlflow.log_params(params)
        print("Params logged")

        ## LOG METRICS
        mlflow.log_metrics(metrics)
        print("Metric logged.")

        ## LOG TAGS
        mlflow.set_tags(tags)
        print("Tags logged.")

        ## LOG MODEL
        if model != None:
            mlflow.pyfunc.log_model(artifact_path="model",
                                    python_model=model,
                                    conda_env="config/conda.yaml")
            runid = mlflow.active_run().info.run_uuid
            print("Model saved in run: {}.".format(runid))

Beispiel #8

0

Datei anzeigen

Datei: main.py Projekt: kongzii/ml-project-template

def main(args: argparse.Namespace):
    keras_model = KerasModel(args.model_dir)

    raw_test_ds = tf.keras.preprocessing.text_dataset_from_directory(
        f"{args.dataset_dir}/test",
        label_mode="int",
        batch_size=args.batch_size,
        class_names=keras_model.class_names,
        seed=SEED_VALUE,
    )

    y_true, y_pred = [], []

    for x, y in raw_test_ds:
        predictions = keras_model.predict(x, return_id=True)
        predictions = [max(p.keys(), key=lambda k: p[k]) for p in predictions]

        y_true.extend(y.numpy().tolist())
        y_pred.extend(predictions)
        assert len(y_true) == len(y_pred)

        if not len(y_true) % 1_000:
            logging.info(f"Tested {len(y_true)} samples.")

    scores = {
        "accuracy_score": accuracy_score(y_true, y_pred),
        "f1_score": f1_score(y_true, y_pred),
        "precision_score": precision_score(y_true, y_pred),
        "recall_score": recall_score(y_true, y_pred),
        "confusion_matrix": confusion_matrix(y_true, y_pred),
    }

    logging.info(scores)
    mlflow.set_tags(scores)

Beispiel #9

0

Datei anzeigen

def trainer(regularisation: int, max_iter: int):
    with mlflow.start_run() as _:
        mlflow.set_tags({"training_type": "Baseline"})
        mlflow.log_params({"C": regularisation, "max_iter": max_iter})

        X, y = fetch_censusdata()
        clf = make_pipeline(
            make_linear_preprocessor(),
            LogisticRegression(C=regularisation,
                               max_iter=max_iter,
                               class_weight="balanced",
                               random_state=0),
        )

        X_train, X_test, y_train, y_test = train_test_split(X,
                                                            y,
                                                            test_size=0.2)
        clf.fit(X_train, y_train)

        y_pred = clf.predict(X_test)

        precision, recall, fscore, _ = precision_recall_fscore_support(
            y_test, y_pred, average="binary")

        mlflow.log_metrics({
            "precision": precision,
            "recall": recall,
            "fscore": fscore
        })

Beispiel #10

0

Datei anzeigen

Datei: gpopy.py Projekt: ssameerr/gpopy

 def detailed_score(self, generation): 
     """
     A more detailed run with mlflow.
     While use this function, is recomended that you use the mlflow tracking function for the model you're using.
     e.g You can use mlflow tracking on Tensorflow, torch, scikit ... 
     
     TODO: Merge this function with easy_score, and pass the paramater used on run as parameter for easy_score
     This fix will be available 
     """
     if self.score_function == None: 
         assert False, "No score function setted, you can set it using set_score(func) or passing score= func during class instantiation"
     for i, elem in enumerate(self.population): 
         with mlflow.start_run() as run: 
             tags = {
                 'generation': generation + 1, 
                 'individue' : i  + 1
             }
             mlflow.set_tags(tags)
             mlflow.log_param("Generation", generation + 1)
             score, model = self.score_function(elem)
         if score > self.top_score: 
             print(f"*** New optimal model founded with a score of {score} ***")
             self.top_score = score
             self.top_model = (score, model)
         elem['score'] = score
     sorted_list = sorted(self.population, key= itemgetter('score'), reverse= True)
     self.first_parent = sorted_list[0]
     self.second_parent = sorted_list[1]
     generation_top_score = self.first_parent['score']
     self.genetic_tree.append(self.first_parent)
     print(f"Better parent {self.first_parent}")
     print("DONE")

Beispiel #11

0

Datei anzeigen

Datei: pipeline.py Projekt: OvidiuGrec/Darwin-Project

    def run_experiment(self, **kwargs):

        if self.options.opt:
            self.optimize()

        seed = self.config['general']['seed']
        np.random.seed(seed)
        tf.random.set_seed(seed)

        if kwargs:
            self.adjust_pars(kwargs)
            self.data = Data(self.config, self.options, self.pars)

        if self.options.mlflow:
            mlflow.set_experiment(self.config['general']['experiment'])
            mlflow.start_run()
            self.log_pars()
            mlflow.set_tags({'seed': seed, 'mode': self.options.mode})

        if self.feature_type == 'combined' and self.fusion == 'late':
            y_true, y_pred, mae = self.train_bimodal()
        else:
            _, y_pred, _, y_true, mae = self.train_model(self.feature_type)

        if self.options.verbose:
            run_validation(y_true, y_pred)

        if self.options.mlflow:
            mlflow.end_run()

        return -mae

Beispiel #12

0

Datei anzeigen

    def before_pipeline_run(self, run_params: Dict[str, Any],
                            pipeline: Pipeline, catalog: DataCatalog) -> None:
        """Hook to be invoked before a pipeline runs.
        Args:
            run_params: The params needed for the given run.
                Should be identical to the data logged by Journal.
                # @fixme: this needs to be modelled explicitly as code, instead of comment
                Schema: {
                    "run_id": str,
                    "project_path": str,
                    "env": str,
                    "kedro_version": str,
                    "tags": Optional[List[str]],
                    "from_nodes": Optional[List[str]],
                    "to_nodes": Optional[List[str]],
                    "node_names": Optional[List[str]],
                    "from_inputs": Optional[List[str]],
                    "load_versions": Optional[List[str]],
                    "pipeline_name": str,
                    "extra_params": Optional[Dict[str, Any]],
                }
            pipeline: The ``Pipeline`` that will be run.
            catalog: The ``DataCatalog`` to be used during the run.
        """
        self.context = load_context(
            project_path=run_params["project_path"],
            env=run_params["env"],
            extra_params=run_params["extra_params"],
        )

        mlflow_conf = get_mlflow_config(self.context)
        mlflow_conf.setup(self.context)

        run_name = (mlflow_conf.run_opts["name"]
                    if mlflow_conf.run_opts["name"] is not None else
                    run_params["pipeline_name"])
        mlflow.start_run(
            run_id=mlflow_conf.run_opts["id"],
            experiment_id=mlflow_conf.experiment.experiment_id,
            run_name=run_name,
            nested=mlflow_conf.run_opts["nested"],
        )
        # Set tags only for run parameters that have values.
        mlflow.set_tags({k: v for k, v in run_params.items() if v})
        # add manually git sha for consistency with the journal
        # TODO : this does not take into account not committed files, so it
        # does not ensure reproducibility. Define what to do.
        mlflow.set_tag("git_sha", _git_sha(run_params["project_path"]))
        mlflow.set_tag(
            "kedro_command",
            _generate_kedro_command(
                tags=run_params["tags"],
                node_names=run_params["node_names"],
                from_nodes=run_params["from_nodes"],
                to_nodes=run_params["to_nodes"],
                from_inputs=run_params["from_inputs"],
                load_versions=run_params["load_versions"],
                pipeline_name=run_params["pipeline_name"],
            ),
        )

Beispiel #13

0

Datei anzeigen

Datei: pytorch_modeler.py Projekt: HirokiNarita/dcase2020task2

def mlflow_log(history, config, machine_type, out_path, tb_log_dir):
    mlflow.set_tracking_uri(config['IO_OPTION']['MLFLOW_PATH']+'/mlruns')
    run_name = config['IO_OPTION']['model_name']+'_'+machine_type
    with mlflow.start_run(run_name=run_name) as run:
        # IO_OPTION and etc into mlflow
        mlflow.set_tags(config['IO_OPTION'])
        mlflow.set_tags(config['etc'])
        mlflow.set_tag('machine_type', machine_type)
        mlflow.set_tag('tb_log_dir', tb_log_dir)
        # Log spectrogram_param into mlflow
        for key, value in config['mel_spectrogram_param'].items():
            mlflow.log_param(key, value)
        # log fit param
        for key, value in config['fit'].items():
            mlflow.log_param(key, value)
        # Log other info
        mlflow.log_param('loss_type', 'MSE')
        
        # Log results into mlflow
        mlflow.log_metric('train_epoch_score', history['epoch_score_lists']['train'][-1])
        mlflow.log_metric('valid_epoch_score', history['epoch_score_lists']['valid'][-1])

        # Log model
        mlflow.log_artifact(out_path)
    mlflow.end_run()

Beispiel #14

0

Datei anzeigen

Datei: synthesis.py Projekt: LydiaY5559/whitenoise-system

def run_synthesis(synthesis_args):
    """
    A parallel run of the synthesis step

    :param synthesis_args: 
        n = name of the synthesizer, 
        s = synthesizer object,
        synth_args = dictionary of hyperparams for the synthesizer
        d = name of dataset
        e = epsilon value 
        datasets = dataset dictionary 
        cat_cols = list of categorical columns in dataset d
    :type synthesis_args: tuple
    :return: (n, d, str(e), sampled = synthesized data of size len(d))
    :rtype: tuple
    """
    n, s, synth_args, d, e, datasets, cat_cols = synthesis_args
    with mlflow.start_run(nested=True):
        start_time = time.time()
        synth = s(epsilon=float(e), **synth_args)
        d_copy = datasets[d]["data"].copy()
        sampled = synth.fit_sample(d_copy,categorical_columns=cat_cols.split(','))
        end_time = time.time()
        mlflow.set_tags({"synthesizer": type(synth),
                         "args": str(synth_args),
                         "epsilon": str(e),
                         "dataset": str(datasets),
                         "duration_seconds": str(end_time - start_time)})
        print(datasets[d]["name"] + ' finished. Epsilon: ' + str(e))
        datasets[d][n][str(e)] = sampled
    return (n, d, str(e), sampled)

Beispiel #15

0

Datei anzeigen

Datei: modeling.py Projekt: dominikmn/one-million-posts

 def log(self):
     """Log params, metrics, and tags to MLFlow if is_def is False"""
     if not self.is_dev:
         mlflow.log_params(self.params)
         mlflow.log_metrics(self.metrics)
         mlflow.set_tags(self.tags)
         self._save_model()

Beispiel #16

0

Datei anzeigen

Datei: _train.py Projekt: danieliong/GeoMagForecasting

def setup_mlflow(cfg, features_cfg, data_cfg):
    import mlflow

    experiment_id = OmegaConf.select(cfg, "experiment_id", default=None)

    if experiment_id is None and cfg.experiment_name is not None:
        mlflow.set_experiment(cfg.experiment_name)
        experiment = mlflow.get_experiment_by_name(cfg.experiment_name)
        if cfg.experiment_name is not None:
            logger.debug(f"MLFlow Experiment: {cfg.experiment_name}")

        experiment_id = experiment.experiment_id

    orig_cwd = get_original_cwd()
    tracking_uri = f"file://{orig_cwd}/mlruns"
    mlflow.set_tracking_uri(tracking_uri)
    logger.info(f"MLFlow Tracking URI: {tracking_uri}")

    # if cfg.model == "xgboost":
    #     import mlflow.xgboost

    #     logger.debug("Turning on MLFlow autologging for XGBoost...")
    #     mlflow.xgboost.autolog()

    run = mlflow.start_run(experiment_id=experiment_id)

    # tracking_uri = mlflow.get_tracking_uri()
    # logger.info(f"MLFlow Tracking URI: {tracking_uri}")

    processed_data_dir = Path(to_absolute_path(data_cfg.hydra.run.dir))
    if processed_data_dir is not None:
        data_hydra_dir = processed_data_dir / ".hydra"
        mlflow.log_artifacts(data_hydra_dir,
                             artifact_path="processed_data_configs")
        data_cfg = OmegaConf.load(data_hydra_dir / "config.yaml")
        for name, param_name in DATA_CONFIGS_TO_LOG.items():
            param = OmegaConf.select(data_cfg, param_name)
            if param is not None:
                if isinstance(param, list):
                    param = ", ".join([str(p) for p in param])
                mlflow.log_param(name, param)

    model_hydra_dir = Path(".hydra")
    mlflow.log_artifacts(model_hydra_dir, artifact_path="model_configs")

    mlflow.log_params({
        "model": cfg.model,
        "lag": features_cfg.lag,
        "exog_lag": features_cfg.exog_lag,
        "lead": features_cfg.lead,
        "cv_method": cfg.cv.method,
    })
    mlflow.log_params(cfg.cv.params)

    tags = OmegaConf.select(cfg, "tags", default={})
    if bool(tags):
        mlflow.set_tags(tags)

    return run

Beispiel #17

0

Datei anzeigen

def trainer(regularisation: int, max_iter: int):
    with mlflow.start_run() as _:
        mlflow.set_tags({"training_type": "FeatureImportance"})
        mlflow.log_params({"C": regularisation, "max_iter": max_iter})

        X, y = fetch_censusdata()
        X_train, X_test, y_train, y_test = train_test_split(X,
                                                            y,
                                                            test_size=0.2)

        p1 = make_nonlinear_preprocessor()
        p2 = make_nonlinear_to_linear_preprocessor()
        clf = LogisticRegression(C=regularisation,
                                 max_iter=max_iter,
                                 class_weight="balanced",
                                 random_state=0)

        pp = make_pipeline(p1, p2, clf)

        pp.fit(X_train, y_train)

        # Log reference metrics
        y_pred = pp.predict(X_test)

        ref_precision, ref_recall, ref_fscore, _ = precision_recall_fscore_support(
            y_test, y_pred, average="binary")

        mlflow.log_metrics({
            "precision": ref_precision,
            "recall": ref_recall,
            "fscore": ref_fscore
        })

        for i in range(len(FEATURES)):
            with mlflow.start_run(nested=True) as _:
                mlflow.set_tags({"training_type": "FeatureImportance"})
                mlflow.log_params({"feature": FEATURES[i]})

                X_test_tr = p1.transform(X_test)

                #  shuffle feature i
                indexes = np.arange(X_test_tr.shape[0])
                np.random.shuffle(indexes)
                X_test_tr[:, i] = X_test_tr[indexes, i]

                X_test_tr = p2.transform(X_test_tr)
                y_pred = clf.predict(X_test_tr)

                precision, recall, fscore, _ = precision_recall_fscore_support(
                    y_test, y_pred, average="binary")

                mlflow.log_metrics({
                    "precision": precision,
                    "precision_penalty": precision - ref_precision,
                    "recall": recall,
                    "recall_penalty": recall - ref_recall,
                    "fscore": fscore,
                    "fscore_penalty": fscore - ref_fscore,
                })

Beispiel #18

0

Datei anzeigen

def _log_xp(config, name):
    set_experiment(name)
    set_tags(config.get('tags', {}))

    simplejson.dump(config, open(f"/tmp/config.json", "w"))
    log_artifact(f"/tmp/config.json")
    os.remove(f"/tmp/config.json")
    return name

Beispiel #19

0

Datei anzeigen

Datei: ml_logger.py Projekt: T-Sumida/sound-recognition

    def __init__(self, exp_name: str, tag: Dict):
        """mlflowの初期化

        Args:
            exp_name (str): 実験名
            tag (Dict): タグ情報
        """
        mlflow.set_experiment(exp_name)
        mlflow.set_tags(tag)

Beispiel #20

0

Datei anzeigen

Datei: mlflow.py Projekt: rhhc/optuna_begin

    def __call__(self, study: optuna.study.Study,
                 trial: optuna.trial.FrozenTrial) -> None:

        # This sets the tracking_uri for MLflow.
        if self._tracking_uri is not None:
            mlflow.set_tracking_uri(self._tracking_uri)

        # This sets the experiment of MLflow.
        mlflow.set_experiment(study.study_name)

        with mlflow.start_run(run_name=str(trial.number),
                              nested=self._nest_trials):

            # This sets the metric for MLflow.
            trial_value = trial.value if trial.value is not None else float(
                "nan")
            mlflow.log_metric(self._metric_name, trial_value)

            # This sets the params for MLflow.
            mlflow.log_params(trial.params)

            # This sets the tags for MLflow.
            tags: Dict[str, str] = {}
            tags["number"] = str(trial.number)
            tags["datetime_start"] = str(trial.datetime_start)
            tags["datetime_complete"] = str(trial.datetime_complete)

            # Set state and convert it to str and remove the common prefix.
            trial_state = trial.state
            if isinstance(trial_state, TrialState):
                tags["state"] = str(trial_state).split(".")[-1]

            # Set direction and convert it to str and remove the common prefix.
            study_direction = study.direction
            if isinstance(study_direction, StudyDirection):
                tags["direction"] = str(study_direction).split(".")[-1]

            tags.update(trial.user_attrs)
            distributions = {(k + "_distribution"): str(v)
                             for (k, v) in trial.distributions.items()}
            tags.update(distributions)

            if self._tag_study_user_attrs:
                tags.update(study.user_attrs)

            # This is a temporary fix on Optuna side. It avoids an error with user
            # attributes that are too long. It should be fixed on MLflow side later.
            # When it is fixed on MLflow side this codeblock can be removed.
            # see https://github.com/optuna/optuna/issues/1340
            # see https://github.com/mlflow/mlflow/issues/2931
            max_mlflow_tag_length = 5000
            for key, value in tags.items():
                value = str(value)  # make sure it is a string
                if len(value) > max_mlflow_tag_length:
                    tags[key] = textwrap.shorten(value, max_mlflow_tag_length)

            mlflow.set_tags(tags)

Beispiel #21

0

Datei anzeigen

Datei: state.py Projekt: isabella232/uv-metrics

def start_run(param_prefix: Optional[str] = None,
              experiment_name: Optional[str] = None,
              run_name: Optional[str] = None,
              artifact_location: Optional[str] = None,
              **args) -> mlf.ActiveRun:
    """Close alias of mlflow.start_run. The only difference is that uv.start_run
  attempts to extract parameters from the environment and log those to the
  bound UV reporter using `report_params`.

  Note that if experiment_name is specified and refers to an existing
  experiment, then the artifact_location will not be honored as this is an
  immutable property of an mlflow experiment. This method will issue a warning
  but proceed.

  Note that the returned value can be used as a context manager:
  https://www.mlflow.org/docs/latest/python_api/mlflow.html#mlflow.start_run
  """
    if experiment_name is None:
        experiment_name = os.environ.get("MLFLOW_EXPERIMENT_NAME")

    if run_name is None:
        run_name = os.environ.get("MLFLOW_RUN_NAME")

    if artifact_location is None:
        artifact_location = os.environ.get("MLFLOW_ARTIFACT_ROOT")

    _ensure_non_null_project(artifact_location)

    # Make sure the experiment exists before the run starts.
    if experiment_name is not None:
        if mlf.get_experiment_by_name(experiment_name) is None:
            mlf.create_experiment(experiment_name, artifact_location)
        mlf.set_experiment(experiment_name)

    ret = mlf.start_run(run_name=run_name, **args)
    env_params = ue.extract_params(prefix=param_prefix)
    mlf.set_tags(env_params)

    # for CAIP jobs, we add the job id as a tag, along with a link to the
    # console page
    cloud_ml_job_id = os.environ.get('CLOUD_ML_JOB_ID')
    if cloud_ml_job_id is not None:
        mlf.set_tag(
            'cloud_ml_job_details',
            f'https://console.cloud.google.com/ai-platform/jobs/{cloud_ml_job_id}'
        )
        mlf.set_tag('cloud_ml_job_id', cloud_ml_job_id)

    mlf_artifact_uri = mlf.get_artifact_uri()
    if mlf_artifact_uri is not None and artifact_location is not None:
        if not mlf_artifact_uri.startswith(artifact_location):
            logging.warning(
                f'requested mlflow artifact location {artifact_location} differs '
                f'from existing experiment artifact uri {mlf_artifact_uri}')

    return ret

Beispiel #22

0

Datei anzeigen

Datei: utils.py Projekt: barmanu/dp-dpo-citation-bio-trainer

def log_mlflow_results(model, metrics, feat_config, model_config, tags):
    TRACKING_URI = "https://mlflow.caps.dev.dp.elsevier.systems"
    mlflow.set_tracking_uri(TRACKING_URI)
    mlflow.set_experiment("cp-ml-reference-separator-evaluator")
    with mlflow.start_run():
        mlflow.log_metrics(metrics)
        mlflow.keras.log_model(model, "models")
        mlflow.log_params(feat_config)
        mlflow.log_params(model_config)
        mlflow.set_tags(tags)

Beispiel #23

0

Datei anzeigen

 def mlflow_run(self, df):
     with mlflow.start_run() as run:
         run_id = run.info.run_uuid
         experiment_id = run.info.experiment_id
         # train test split
         train, test = train_test_split(df,
                                        test_size=0.2,
                                        random_state=42,
                                        stratify=df[['is_profit']])
         y = train['is_profit'].copy()
         X = train.drop(columns=['is_profit']).copy()
         y_test = test['is_profit'].copy()
         X_test = test.drop(columns=['is_profit']).copy()
         # pipeline
         float_cols = df.select_dtypes(include='float64').columns
         preprocessor = ColumnTransformer(
             [
                 ('StandardScaler', StandardScaler(), float_cols),
                 #('OneHotEncoder', OneHotEncoder(), cat_cols),
             ],
             remainder='passthrough')
         full_pipe = Pipeline(steps=[
             ('preprocessor', preprocessor),
             ('model', self.model),
         ])
         # fit
         t_start = time.time()
         full_pipe.fit(X, y)
         t_training = time.time() - t_start
         # predict
         t_start = time.time()
         y_test_pred_proba = full_pipe.predict_proba(X_test)
         t_prediction = time.time() - t_start
         # score
         proba_threshold = 0.75
         metrics = {
             'auroc':
             roc_auc_score(y_test, y_test_pred_proba[:, 1]),
             'precision':
             precision_score(y_test,
                             (y_test_pred_proba[:, 1] > proba_threshold)),
             't_training':
             t_training,
             't_prediction':
             t_prediction,
         }
         # log params, metrics, tags
         mlflow.log_params(self.params)
         mlflow.log_metrics(metrics)
         mlflow.set_tags(self.tags)
         # log Model
         #mlflow.sklearn.log_model(full_pipe, artifact_path='model')
         #wrapped_model = SklearnModelWrapper(full_pipe)
         #mlflow.pyfunc.log_model('model', python_model=wrapped_model)
         return full_pipe

Beispiel #24

0

Datei anzeigen

def log_tag(dry_run, model_name, data_name, suffix):
    if suffix is not None:
        suffix = suffix[1:]

    if not dry_run:
        mlflow.set_tags({
            'mlflow.runName': f'{model_name}-{data_name}-{suffix.upper()}',
            'model': model_name,
            'data': data_name,
            'suffix': suffix,
        })

Beispiel #25

0

Datei anzeigen

Datei: resources.py Projekt: prezi/dagster

    def _set_all_tags(self):
        """Method collects dagster_run_id plus all env variables/tags that have been
            specified by the user in the config_schema and logs them as tags in mlflow.

        Returns:
            tags [dict]: Dictionary of all the tags
        """
        tags = {tag: environ.get(tag) for tag in self.env_tags_to_log}
        tags["dagster_run_id"] = self.dagster_run_id
        if self.extra_tags:
            tags.update(self.extra_tags)

        mlflow.set_tags(tags)

Beispiel #26

0

Datei anzeigen

Datei: test_mlflow_registry.py Projekt: eto-ai/rikai

def mlflow_client(tmp_path_factory, resnet_model_uri: str,
                  spark: SparkSession) -> MlflowClient:
    tmp_path = tmp_path_factory.mktemp("mlflow")
    tmp_path.mkdir(parents=True, exist_ok=True)
    tracking_uri = "sqlite:///" + str(tmp_path / "tracking.db")
    mlflow.set_tracking_uri(tracking_uri)
    experiment_id = mlflow.create_experiment("rikai-test", str(tmp_path))
    # simpliest
    with mlflow.start_run(experiment_id=experiment_id):
        mlflow.log_param("optimizer", "Adam")
        # Fake training loop
        model = torch.load(resnet_model_uri)
        artifact_path = "model"

        schema = ("STRUCT<boxes:ARRAY<ARRAY<float>>,"
                  "scores:ARRAY<float>,labels:ARRAY<int>>")
        pre_processing = ("rikai.contrib.torch.transforms."
                          "fasterrcnn_resnet50_fpn.pre_processing")
        post_processing = ("rikai.contrib.torch.transforms."
                           "fasterrcnn_resnet50_fpn.post_processing")
        rikai.mlflow.pytorch.log_model(
            model,  # same as vanilla mlflow
            artifact_path,  # same as vanilla mlflow
            schema,
            pre_processing,
            post_processing,
            registered_model_name="rikai-test",  # same as vanilla mlflow
        )

    # vanilla mlflow
    with mlflow.start_run():
        mlflow.pytorch.log_model(model,
                                 artifact_path,
                                 registered_model_name="vanilla-mlflow")
        mlflow.set_tags({
            "rikai.model.flavor": "pytorch",
            "rikai.output.schema": schema,
            "rikai.transforms.pre": pre_processing,
            "rikai.transforms.post": post_processing,
        })

    # vanilla mlflow no tags
    with mlflow.start_run():
        mlflow.pytorch.log_model(
            model,
            artifact_path,
            registered_model_name="vanilla-mlflow-no-tags",
        )

    spark.conf.set("rikai.sql.ml.registry.mlflow.tracking_uri", tracking_uri)
    return mlflow.tracking.MlflowClient(tracking_uri)

Beispiel #27

0

Datei anzeigen

def test_set_tags():
    exact_expected_tags = {"name_1": "c", "name_2": "b", "nested/nested/name": 5}
    approx_expected_tags = set([MLFLOW_USER, MLFLOW_SOURCE_NAME, MLFLOW_SOURCE_TYPE])
    with start_run() as active_run:
        run_id = active_run.info.run_id
        mlflow.set_tags(exact_expected_tags)
    finished_run = tracking.MlflowClient().get_run(run_id)
    # Validate tags
    assert len(finished_run.data.tags) == len(exact_expected_tags) + len(approx_expected_tags)
    for tag_key, tag_val in finished_run.data.tags.items():
        if tag_key in approx_expected_tags:
            pass
        else:
            assert str(exact_expected_tags[tag_key]) == tag_val

Beispiel #28

0

Datei anzeigen

Datei: mflow_handler.py Projekt: aleksei-kashapov/deep-depth-transfer

    def start_callback(self, parameters):
        try:
            mlflow.set_experiment(self._experiment_name)
            if mlflow.active_run() is not None:
                mlflow.end_run()
            mlflow.start_run()
            mlflow.set_tags(self._mlflow_tags)
            mlflow.log_params(parameters)
            mlflow.log_params(self._mlflow_parameters)

        except mlflow.exceptions.MlflowException as msg:
            self._enable_mlflow = False
            print(f"[WARNING][MlFlowHandler] - [StartCallback] {msg}")
            print(f"[WARNING][MlFlowHandler] - [StartCallback] mlflow is disabled")

Beispiel #29

0

Datei anzeigen

def train_model(
    args_fp: Path = Path(config.CONFIG_DIR, "args.json"),
    experiment_name: Optional[str] = "best",
    run_name: Optional[str] = "model",
) -> None:
    """Train a model using the specified parameters.

    Args:
        args_fp (Path, optional): Location of arguments to use for training. Defaults to `config/args.json`.
        experiment_name (str, optional): Name of the experiment to save the run to. Defaults to `best`.
        run_name (str, optional): Name of the run. Defaults to `model`.
    """
    # Set experiment and start run
    args = Namespace(**utils.load_dict(filepath=args_fp))

    # Start run
    mlflow.set_experiment(experiment_name=experiment_name)
    with mlflow.start_run(run_name=run_name
                          ) as run:  # NOQA: F841 (assigned to but never used)
        # Train
        artifacts = main.run(args=args)

        # Set tags
        tags = {"data_version": artifacts["data_version"]}
        mlflow.set_tags(tags)

        # Log metrics
        performance = artifacts["performance"]
        logger.info(json.dumps(performance["overall"], indent=2))
        metrics = {
            "precision": performance["overall"]["precision"],
            "recall": performance["overall"]["recall"],
            "f1": performance["overall"]["f1"],
            "best_val_loss": artifacts["loss"],
            "behavioral_score": artifacts["behavioral_report"]["score"],
            "slices_f1": performance["slices"]["f1"],
        }
        mlflow.log_metrics(metrics)

        # Log artifacts
        with tempfile.TemporaryDirectory() as dp:
            artifacts["label_encoder"].save(Path(dp, "label_encoder.json"))
            artifacts["tokenizer"].save(Path(dp, "tokenizer.json"))
            torch.save(artifacts["model"].state_dict(), Path(dp, "model.pt"))
            utils.save_dict(performance, Path(dp, "performance.json"))
            utils.save_dict(artifacts["behavioral_report"],
                            Path(dp, "behavioral_report.json"))
            mlflow.log_artifacts(dp)
        mlflow.log_params(vars(artifacts["args"]))

Beispiel #30

0

Datei anzeigen

def pMSE_test(args):
    """
    Parallelizable
    """
    d1, d2, mlflow_step, name, epsilon, synth_name, dataset_name = args
    pmse = pmse_ratio(d1, d2)
    with mlflow.start_run(nested=True):
        mlflow.set_tags({
            'metric_name': str(name),
            'dataset': dataset_name,
            'epsilon': str(epsilon),
            'synthesizer': str(synth_name),
            'pmse_score': str(pmse),
        })
    return float(pmse)