Beispiel #1
0
def run_experiment():
    with mlflow.start_run():

        mlflow.set_tag("agent_type", "ppo")
        mlflow.log_param("actor_fc_layers", actor_fc_layers)
        mlflow.log_param("value_fc_layers", value_fc_layers)
        mlflow.log_param("importance_ratio_clipping",
                         importance_ratio_clipping)
        mlflow.log_param("discount", discount)

        while environment_steps_metric.result() < num_environment_steps:
            print("training step {counter} out of {total_steps} \n".format(
                counter=environment_steps_metric.result(),
                total_steps=num_environment_steps))
            global_step_val = global_step.numpy()
            collect_driver.run()

            print("collect time", collect_time)

            total_loss, _ = train_step()
            replay_buffer.clear()

            t, u = calculate_u_metric(eval_tf_env, tf_agent.policy)
            mlflow.log_metric("u_metric", u)
            mlflow.log_metric("t_metric", t)

            for train_metric in train_metrics:
                train_metric.tf_summaries(train_step=global_step,
                                          step_metrics=step_metrics)
    def experiment(pattern, expt_conf):
        for i, param in enumerate(hyperparameters.keys()):
            expt_conf[param] = pattern[i]

        expt_conf['model_path'] = str(
            expt_dir /
            f"{'_'.join([str(p).replace('/', '-') for p in pattern])}.pth")
        expt_conf[
            'log_id'] = f"{'_'.join([str(p).replace('/', '-') for p in pattern])}"
        wav_path = Path(
            expt_conf['manifest_path']).resolve().parents[1] / 'wav'
        load_func = set_load_func(wav_path, expt_conf['sample_rate'],
                                  expt_conf['n_waves'])

        with mlflow.start_run():
            mlflow.set_tag('target', expt_conf['target'])
            result_series, val_pred = typical_train(expt_conf, load_func,
                                                    label_func, dataset_cls,
                                                    groups, val_metrics)

            mlflow.log_params({
                hyperparameter: value
                for hyperparameter, value in zip(hyperparameters.keys(),
                                                 pattern)
            })
            mlflow.log_artifacts(expt_dir)

        return result_series, val_pred
Beispiel #3
0
def train(data_path, ntrees, log_as_onnx):
    train_data, test_data, train_cols = prepare_data(args.data_path)
    with mlflow.start_run() as run:
        exp = client.get_experiment(run.info.experiment_id)
        print("MLflow:")
        print("  run id:", run.info.run_id)
        print("  experiment id:", run.info.experiment_id)
        print("  experiment name:", exp.name)
        print("  experiment artifact_location:", exp.artifact_location)
        rf = H2ORandomForestEstimator(ntrees=ntrees)
        rf.train(train_cols,
                 "quality",
                 training_frame=train_data,
                 validation_frame=test_data)

        mlflow.log_param("ntrees", ntrees)

        mlflow.log_metric("rmse", rf.rmse())
        mlflow.log_metric("r2", rf.r2())
        mlflow.log_metric("mae", rf.mae())

        mlflow.set_tag("mlflow_version", mlflow.__version__)
        mlflow.set_tag("h2o_version", h2o.__version__)

        mlflow.h2o.log_model(rf, "h2o-model")

        if log_as_onnx:
            import onnxmltools
            from onnxmltools.convert import convert_h2o
            print("onnxmltools.version:", onnxmltools.__version__)
            path = f"{exp.artifact_location}/{run.info.run_id}/artifacts/h2o-model/model.h2o"
            onnx_model = convert_h2o(path)
            print("onnx_model.type:", type(onnx_model))
def bagging_rgcn_embeddings(adj_matrix_files, node_labels_file,
                            embeddings_file, use_cuda, params, metadata):
    mlflow.set_experiment("LOOCV")

    with mlflow.start_run(run_name=RUN_NAME):
        mlflow.log_param("model", MODEL_NAME)
        u_mlflow.add_params(**params)
        u_mlflow.add_metadata(metadata)
        mlflow.set_tag("use_cuda", use_cuda)

        labels = data_loaders.load_labels(node_labels_file, use_cuda=use_cuda)
        n_nodes = labels.size(0)

        embeddings = data_loaders.load_embeddings(embeddings_file,
                                                  use_cuda=use_cuda)

        graph = data_loaders.load_graph(
            adj_matrix_files,
            n_nodes,
            add_edge_type=True,
            add_node_ids=True,
            normalization=NORMALIZATION,
            use_cuda=use_cuda,
        )

        print(RUN_NAME)
        ranks_df = loocv.run(labels=labels,
                             model_class=Bagging,
                             bagging_model=RGCN,
                             graph=graph,
                             features=embeddings,
                             n_rels=len(adj_matrix_files),
                             **params)

        data_savers.save_ranks(ranks_df, n_nodes, RUN_NAME, params)
Beispiel #5
0
def sv_model(params, model):
    mlflow.log_params(params)
    mlflow.set_tag("running_from_jupyter", "True")
    #mlflow.log_artifact("./models")
    #mlflow.sklearn.log_model(model, "model")
    path = f"models/{params['model']+params['label']}_{datetime.now().strftime('%Y-%m-%d_%H%M%S')}"
    save_model(sk_model=model, path=path)
def mlflow_log(history, config, machine_type, out_path, tb_log_dir):
    mlflow.set_tracking_uri(config['IO_OPTION']['MLFLOW_PATH']+'/mlruns')
    run_name = config['IO_OPTION']['model_name']+'_'+machine_type
    with mlflow.start_run(run_name=run_name) as run:
        # IO_OPTION and etc into mlflow
        mlflow.set_tags(config['IO_OPTION'])
        mlflow.set_tags(config['etc'])
        mlflow.set_tag('machine_type', machine_type)
        mlflow.set_tag('tb_log_dir', tb_log_dir)
        # Log spectrogram_param into mlflow
        for key, value in config['mel_spectrogram_param'].items():
            mlflow.log_param(key, value)
        # log fit param
        for key, value in config['fit'].items():
            mlflow.log_param(key, value)
        # Log other info
        mlflow.log_param('loss_type', 'MSE')
        
        # Log results into mlflow
        mlflow.log_metric('train_epoch_score', history['epoch_score_lists']['train'][-1])
        mlflow.log_metric('valid_epoch_score', history['epoch_score_lists']['valid'][-1])

        # Log model
        mlflow.log_artifact(out_path)
    mlflow.end_run()
def train_model():
    #Connect to tracking server
    mlflow.set_tracking_uri("http://127.0.0.1:5500")

    # #Set experiment
    mlflow.set_experiment("/test-experiment")

    print(f"tracking_uri: {mlflow.get_tracking_uri()}")
    print(f"artifact_uri: {mlflow.get_artifact_uri()}")
    artifact_uri = mlflow.get_artifact_uri()

    # Create directory for artifacts
    if not os.path.exists("tests/data/artifact_folder"):
        os.makedirs("tests/data/artifact_folder")

    #Test parametes
    # with mlflow.start_run() as run:
    mlflow.log_param("param1", randint(0, 100))

    #Test metrics
    mlflow.log_metric("metric1", random())
    mlflow.log_metric("metric2", random())
    mlflow.log_metric("metric3", random())
    mlflow.log_metric("metric4", random())

    #Test tags
    mlflow.set_tag("tag", "version1")
Beispiel #8
0
def rwr_m(adj_matrix_files, node_labels_file, use_cuda, params, metadata):
    mlflow.set_experiment("LOOCV")

    with mlflow.start_run(run_name=RUN_NAME):
        mlflow.log_param("model", MODEL_NAME)

        u_mlflow.add_params(**params)
        u_mlflow.add_metadata(metadata)
        mlflow.set_tag("use_cuda", use_cuda)

        labels = data_loaders.load_labels(node_labels_file, use_cuda=use_cuda)
        n_nodes = labels.size(0)

        print("Loading adjacency matrices")
        adj_matrices = data_loaders.load_adj_matrices(adj_matrix_files,
                                                      normalization=None,
                                                      use_cuda=use_cuda)

        adjacency_matrices = torch.stack(adj_matrices)

        print(RUN_NAME)
        ranks_df = loocv.run(labels=labels,
                             model_class=RwrM,
                             adjacency_matrices=adjacency_matrices,
                             **params)

        data_savers.save_ranks(ranks_df, n_nodes, RUN_NAME, params)
Beispiel #9
0
def net_prop_with_restart(
    run_name,
    model_name,
    normalization,
    adj_matrix_file,
    node_labels_file,
    use_cuda,
    params,
    metadata,
):
    mlflow.set_experiment("LOOCV")

    with mlflow.start_run(run_name=run_name):
        u_mlflow.add_params(**params)
        u_mlflow.add_metadata(metadata)

        mlflow.log_param("model", model_name)
        mlflow.set_tag("use_cuda", use_cuda)
        mlflow.log_param("merged_layers", True)

        labels = data_loaders.load_labels(node_labels_file, use_cuda=use_cuda)
        n_nodes = labels.size(0)

        adjacency_matrix = data_loaders.load_adj_matrices(
            [adj_matrix_file], normalization=normalization,
            use_cuda=use_cuda)[0]

        print(run_name)
        ranks_df = loocv.run(labels=labels,
                             model_class=NetPropWithRestart,
                             adjacency_matrix=adjacency_matrix,
                             **params)

        data_savers.save_ranks(ranks_df, n_nodes, run_name, params)
Beispiel #10
0
def register_model(model_id, host_name, did_pass_acceptance_test):
    logger = logging.getLogger(__name__)
    mlflow.set_tracking_uri(uri=host_name)

    logger.info(f"Reporting data for model {model_id}")

    file_names = get_model_files(model_id)
    specification = get_json(file_names['model_specification'])

    mlflow.set_experiment(specification['problem_name'])

    with mlflow.start_run(run_name=model_id):
        log_param("ProblemName", specification['problem_name'])
        log_param("MLPipelineParamsName",
                  specification['ml_pipeline_params_name'])
        log_param("FeatureSetName", specification['feature_set_name'])
        log_param("AlgorithmName", specification['algorithm_name'])
        log_param("AlgorithmParamsName",
                  specification['algorithm_params_name'])

        set_tag("DidPassAcceptanceTest", did_pass_acceptance_test)
        set_tag("BuildNumber", os.getenv('BUILD_NUMBER'))

        log_model_metrics_file(file_names["model_metrics"])
        log_ml_pipeline_params_file(file_names["ml_pipeline_params"])
        log_artifacts(file_names['results_folder'])
Beispiel #11
0
    def run(self):
        self.fetch_data()
        self.preprocessing_train()
        data = self.dataset
        model = models.base_model()
        model.compile(optimizer='adam',
                      loss='sparse_categorical_crossentropy',
                      metrics=['accuracy'])
        history = model.fit(data.train_data,
                            data.train_label,
                            epochs=self.params.epoch)  # epoch 1
        test_loss, test_acc = model.evaluate(data.test_data,
                                             data.test_label,
                                             verbose=1)
        fname = TASK + ".hdf5"
        model.save(os.path.join(self.catalog.output.model, fname))

        # Mlflowで保存
        EXPERIMENT_NAME = 'mnist_exp'
        set_experiment(EXPERIMENT_NAME)
        set_tag("task", TASK)
        log_param("data", self.catalog.mnist.raw_pkl)
        log_param("epoch", self.params.epoch)
        log_param("model name", self.params.model_name)
        log_metric("test loss", test_loss)
        log_metric("test acc", test_acc)
        plot(history)
Beispiel #12
0
def import_run_data(run_dct):
    for k, v in run_dct['params'].items():
        mlflow.log_param(k, v)
    for k, v in run_dct['metrics'].items():
        mlflow.log_metric(k, v)
    for k, v in run_dct['tags'].items():
        mlflow.set_tag(k, v)
Beispiel #13
0
 def track_with_mlflow(self, tracking_uri, experiment_name, run_name=None):
     """
     Connect to an MLflow server to log parameters and metrics
     
     :tracking_uri: string; Address of local or remote tracking server.
     :experiment_name: string; name of experiment to log to. if experiment 
             doesn't exist, creates one with this name
     :run_name: string; name of run to log to. if run doesn't exist, 
         one will be created
     """
     # set up a connection to the server, an experiment, and the run
     import mlflow
     mlflow.set_tracking_uri(tracking_uri)
     mlflow.set_experiment(experiment_name)
     run = mlflow.start_run(run_name=run_name)
     
     self._mlflow = {"run":run}
     # log all our parameters for the model, input configuration, and
     # data augmentation
     mlflow.log_params({"model_"+k:self.config[k] for k in self.config})
     mlflow.log_params({"input_"+k:self.input_config[k] for k in 
                        self.input_config})
     if self.augment_config is not False:
         mlflow.log_params({"augment_"+k:self.augment_config[k] for k in 
                        self.augment_config})
     if self._notes is not None:
         mlflow.set_tag("mlflow.note.content", self._notes)
         
     self._log_metrics = mlflow.log_metrics
Beispiel #14
0
def export_mlflow(model, y_pred):
    try:
        model_save_path = os.path.join(os.sep, 'tmp', 'tmp.obj')
        with open(model_save_path, mode='wb') as f:
            pickle.dumps(model)

        for k, v in model.get_params().items():
            mlflow.log_param(k, v)

        mlflow.log_metric('accuracy',
                          accuracy_score(y_pred=y_pred, y_true=y_test))
        mlflow.log_metric(
            'precision',
            precision_score(y_pred=y_pred, y_true=y_test, average='micro'))
        mlflow.log_metric(
            'recall',
            recall_score(y_pred=y_pred, y_true=y_test, average='micro'))
        mlflow.log_metric(
            'f1_score', f1_score(y_pred=y_pred, y_true=y_test,
                                 average='micro'))

        mlflow.set_tag('example', 10)
        mlflow.set_tag('example2', 'test')

        mlflow.log_artifact(model_save_path)
    except Exception as e:
        return False

    return True
Beispiel #15
0
def train_model(params):
    # With MLflow autologging, hyperparameters and the trained model are automatically logged to MLflow.
    mlflow.xgboost.autolog()
    with mlflow.start_run(nested=True):
        train = xgb.DMatrix(data=X_train, label=y_train)
        test = xgb.DMatrix(data=X_test, label=y_test)

        # Train
        booster = xgb.train(params=params, dtrain=train, num_boost_round=20,\
                            evals=[(test, "test")], early_stopping_rounds=10)
        # Evaluate on test set
        predictions_test = booster.predict(test)

        # Calculate AUC, F1 & log values
        auc_score = roc_auc_score(y_test, predictions_test)
        mlflow.log_metric('auc', auc_score)
        f1_score_ = f1_eval(predictions_test, test)
        mlflow.log_metric('F1', f1_score_)

        # Log model signature (for traceability)
        signature = infer_signature(X_train, booster.predict(train))
        mlflow.xgboost.log_model(booster, "model", signature=signature)

        # Add tag for searchability
        mlflow.set_tag("model", model_name)

        # Set the loss to -1*auc_score so fmin maximizes the auc_score
        return {
            'status': STATUS_OK,
            'loss': -1 * auc_score,
            'booster': booster.attributes()
        }
Beispiel #16
0
def create_runs():
    create_experiment()
    with mlflow.start_run() as run:
        mlflow.log_param("p1", "hi")
        mlflow.log_metric("m1", 0.786)
        mlflow.set_tag("t1", "hi")
    return client.search_runs(run.info.experiment_id, "")
Beispiel #17
0
 def on_train_end(self, logs=None):
     # serialize weights to HDF5
     self.model.save_weights(self.checkpoint_path + "final_epoch_model_weights.hdf5")
     # log file to mlflow
     mlflow.log_artifact(local_path=self.checkpoint_path)
     mlflow.set_tag("Current epoch", "Done!")
     return
Beispiel #18
0
def train_population(population, generation, dMatrixTrain, dMatrixtest, y_train, y_test):
    fScore = []
    for i in range(population.shape[0]):
        with mlflow.start_run(run_name='XGBoostGA'):
            param = { 'objective':'binary:logistic', #insert fitness function here
                'booster': 'gbtree',
                'learning_rate': population[i][0],
                'n_estimators': population[i][1], 
                'max_depth': int(population[i][2]), 
                'min_child_weight': population[i][3],
                'gamma': population[i][4], 
                'subsample': population[i][5],
                'colsample_bytree': population[i][6],
                'seed': 24}
            mlflow.log_params(param)
            mlflow.set_tag('type', 'evolution')
            num_round = 100
            mlflow.log_param('num_round', 100)
            xgbT = xgb.train(param, dMatrixTrain, num_round)
            train_preds = xgbT.predict(dMatrixTrain)
            train_preds = train_preds>0.5
            test_preds = xgbT.predict(dMatrixtest)
            test_preds = test_preds>0.5
            mlflow.log_param('generation', generation)
            mlflow.log_param('gen_model_id', i)
            train_f_score = fitness_f1score(y_train, train_preds)
            train_rmse = mean_squared_error(y_train, train_preds)**0.5
            test_f_score = fitness_f1score(y_test, test_preds)
            test_rmse = mean_squared_error(y_test, test_preds)**0.5
            mlflow.log_metric('train_rmse',train_rmse) 
            mlflow.log_metric('train_F1Score',train_f_score)
            mlflow.log_metric('test_rmse',test_rmse) 
            mlflow.log_metric('test_F1Score',test_f_score)
            fScore.append(test_f_score)        
    return fScore
Beispiel #19
0
def train(run_name, params, X_train, X_test, Y_train, Y_test):

  with mlflow.start_run(run_name=run_name) as run:
  
    rf = RandomForestClassifier(**params)
    rf.fit(X_train, Y_train)
    predictions = rf.predict(X_test)

    # Log params
    mlflow.log_params(params)
    
    # Add source data info
    mlflow.set_tag("source_table", table_name)
    mlflow.set_tag("source_table_history_version", latest_table_version)
    
    # Log metrics
    metrics = eval_metrics(rf, X_test, Y_test)
    mlflow.log_metrics(metrics)

    # Log model
    mlflow.sklearn.log_model(rf, "random-forest-model")
    
    # log some other artifacts
    importance = pd.DataFrame(list(zip(df.columns, rf.feature_importances_)), 
                                columns=["Feature", "Importance"]
                              ).sort_values("Importance", ascending=False)
    print(importance)
    csvPath = "/tmp/feature-importance.csv"
    importance.to_csv(csvPath, index=False)
    
    mlflow.log_artifact(csvPath)

  return run.info.run_uuid
Beispiel #20
0
    def before_pipeline_run(self, run_params: Dict[str, Any],
                            pipeline: Pipeline, catalog: DataCatalog) -> None:
        """Hook to be invoked before a pipeline runs.
        Args:
            run_params: The params needed for the given run.
                Should be identical to the data logged by Journal.
                # @fixme: this needs to be modelled explicitly as code, instead of comment
                Schema: {
                    "run_id": str,
                    "project_path": str,
                    "env": str,
                    "kedro_version": str,
                    "tags": Optional[List[str]],
                    "from_nodes": Optional[List[str]],
                    "to_nodes": Optional[List[str]],
                    "node_names": Optional[List[str]],
                    "from_inputs": Optional[List[str]],
                    "load_versions": Optional[List[str]],
                    "pipeline_name": str,
                    "extra_params": Optional[Dict[str, Any]],
                }
            pipeline: The ``Pipeline`` that will be run.
            catalog: The ``DataCatalog`` to be used during the run.
        """
        self.context = load_context(
            project_path=run_params["project_path"],
            env=run_params["env"],
            extra_params=run_params["extra_params"],
        )

        mlflow_conf = get_mlflow_config(self.context)
        mlflow_conf.setup(self.context)

        run_name = (mlflow_conf.run_opts["name"]
                    if mlflow_conf.run_opts["name"] is not None else
                    run_params["pipeline_name"])
        mlflow.start_run(
            run_id=mlflow_conf.run_opts["id"],
            experiment_id=mlflow_conf.experiment.experiment_id,
            run_name=run_name,
            nested=mlflow_conf.run_opts["nested"],
        )
        # Set tags only for run parameters that have values.
        mlflow.set_tags({k: v for k, v in run_params.items() if v})
        # add manually git sha for consistency with the journal
        # TODO : this does not take into account not committed files, so it
        # does not ensure reproducibility. Define what to do.
        mlflow.set_tag("git_sha", _git_sha(run_params["project_path"]))
        mlflow.set_tag(
            "kedro_command",
            _generate_kedro_command(
                tags=run_params["tags"],
                node_names=run_params["node_names"],
                from_nodes=run_params["from_nodes"],
                to_nodes=run_params["to_nodes"],
                from_inputs=run_params["from_inputs"],
                load_versions=run_params["load_versions"],
                pipeline_name=run_params["pipeline_name"],
            ),
        )
Beispiel #21
0
    def run_experiment(self, library, Model, params, highlighted=False):
        model_name = ""
        if Model:
            model_name = Model.__name__
        else:
            model_name = library

        with mlflow.start_run():
            mlflow.set_tag("highlighted", highlighted)
            if library == 'sklearn':
                model = Model(**params).fit(self.X_train, self.y_train)
                params['model_name'] = model_name
                mlflow.log_params(params)
                mlflow.log_metric('accuracy',
                                  model.score(self.X_test, self.y_test))
            elif library == 'xgboost':
                dtrain = xgb.DMatrix(self.X_train, label=self.y_train)
                dtest = xgb.DMatrix(self.X_test, label=self.y_test)
                model = xgb.train(params, dtrain, evals=[(dtrain, 'train')])
                y_proba = model.predict(dtest)
                y_pred = y_proba.argmax(axis=1)
                loss = log_loss(self.y_test, y_proba)
                acc = accuracy_score(self.y_test, y_pred)
                mlflow.log_metrics({'log_loss': loss, 'accuracy': acc})
                mlflow.log_param('model_name', model_name)
Beispiel #22
0
def classificationModel(stages, params, train, test):
    pipeline = Pipeline(stages=stages)

    with mlflow.start_run(run_name=mlflow_exp_name) as ml_run:
        for k, v in params.items():
            mlflow.log_param(k, v)

        mlflow.set_tag("state", "dev")

        model = pipeline.fit(train)
        predictions = model.transform(test)

        evaluator = MulticlassClassificationEvaluator(
            labelCol="indexedLabel",
            predictionCol="prediction",
            metricName="accuracy")
        accuracy = evaluator.evaluate(predictions)
        predictions.select("predicted_quality", "quality").groupBy(
            "predicted_quality",
            "quality").count().toPandas().to_pickle("confusion_matrix.pkl")

        mlflow.log_metric("accuracy", accuracy)
        mlflow.log_artifact("confusion_matrix.pkl")
        mlflow.spark.log_model(model, "spark-model")

        print("Documented with MLflow Run id %s" % ml_run.info.run_uuid)

    return model, predictions, accuracy, ml_run.info
Beispiel #23
0
def _upload_logs(local_log_path: str) -> None:
    try:
        mlflow.set_tag("log_url", _log_url())
        mlflow.log_artifact(local_log_path)
    except Exception:
        print("failed to upload logs to mlflow")
        traceback.print_exc()
def train_iobjectspy_voc(train_data_path, train_config_path, weight_path,
                         max_iter, out_dir, register_train_name,
                         ml_set_tracking_path, experiment_id,
                         ml_experiment_tag):
    cfg = get_cfg()
    cfg.merge_from_file(train_config_path)
    cfg.DATASETS.TRAIN = (register_train_name, )
    cfg.DATASETS.TEST = ()  # no metrics implemented for this dataset
    cfg.MODEL.WEIGHTS = weight_path  # initialize from model zoo
    if max_iter == -1:
        pass
    else:
        cfg.SOLVER.MAX_ITER = max_iter
    num_class = get_class_num(train_data_path)
    cfg.MODEL.ROI_HEADS.NUM_CLASSES = num_class  # get classes from sda
    cfg.OUTPUT_DIR = out_dir
    os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)
    trainer = DefaultTrainer(cfg)
    trainer.resume_or_load(resume=True)
    trainer.train()
    try:
        import mlflow as ml
        # 设置mlflow
        ml.set_tracking_uri(ml_set_tracking_path)
        # 通过设置不同的实验id来管理实验,建议这一层级为项目名称,比如:iobjectspy_faster_rcnn_dota
        ml.set_experiment(experiment_id)
        # 通过设置
        ml.set_tag('experiment_id', ml_experiment_tag)
        ml.log_param('lr', cfg.SOLVER.BASE_LR)
        ml.log_param('max_iter', cfg.SOLVER.MAX_ITER)
        ml.log_param('epoch', cfg.SOLVER.IMS_PER_BATCH)
    except:
        pass
    def mlflow_commands():
        mlflow.log_params(logs["parameters"])

        for ss, vv in zip(logs["metric"]["step"], logs["metric"]["val"]):
            mlflow.log_metric(logs["metric"]["key"], vv, step=ss)

        mlflow.set_tag(logs["tag"]["key"], logs["tag"]["val"])
Beispiel #26
0
def direct_neighbors(adj_matrix_file, train_node_labels_file,
                     test_node_labels_file, use_cuda, metadata):
    mlflow.set_experiment("Test")

    with mlflow.start_run(run_name=RUN_NAME):
        u_mlflow.add_metadata(metadata)
        mlflow.set_tag("use_cuda", use_cuda)
        mlflow.log_param("model", MODEL_NAME)

        train_labels = data_loaders.load_labels(train_node_labels_file,
                                                use_cuda=use_cuda)
        test_labels = data_loaders.load_labels(test_node_labels_file,
                                               use_cuda=use_cuda)
        labels = (train_labels.byte() | test_labels.byte()).long()
        train_mask = ~test_labels.byte()
        n_nodes = labels.size(0)

        graph = data_loaders.load_graph(
            [adj_matrix_file],
            n_nodes,
            self_loop=False,
            normalization=None,
            use_cuda=use_cuda,
        )

        print(RUN_NAME)
        ranks_df = test.run(
            labels=labels,
            train_mask=train_mask,
            model_class=DirectNeighbors,
            graph=graph,
        )

        data_savers.save_ranks(ranks_df, n_nodes, RUN_NAME)
def direct_neighbors(adj_matrix_file, node_labels_file, use_cuda, metadata):
    mlflow.set_experiment("LOOCV")

    with mlflow.start_run(run_name=RUN_NAME):
        u_mlflow.add_metadata(metadata)
        mlflow.set_tag("use_cuda", use_cuda)
        mlflow.log_param("model", MODEL_NAME)
        mlflow.log_param("merged_layers", True)

        labels = data_loaders.load_labels(node_labels_file, use_cuda=use_cuda)
        n_nodes = labels.size(0)

        graph = data_loaders.load_graph(
            [adj_matrix_file],
            n_nodes,
            self_loop=False,
            normalization=None,
            use_cuda=use_cuda,
        )

        print(RUN_NAME)
        ranks_df = loocv.run(labels=labels,
                             model_class=DirectNeighbors,
                             graph=graph)

        data_savers.save_ranks(ranks_df, n_nodes, RUN_NAME)
def train_keras_model(train_desc,
                      test_desc,
                      train_cat,
                      test_cat,
                      distributed=False,
                      shop="all"):
    with mlflow.start_run(run_name="keras", nested=True):
        if (distributed):
            from hyperopt import SparkTrials
            trials = SparkTrials()
        else:
            trials = Trials()
        run_keras = RunKeras(train_desc, test_desc, train_cat, test_cat)
        argmin = fmin(run_keras.keras_model,
                      get_search_space(),
                      algo=tpe.suggest,
                      max_evals=10,
                      show_progressbar=True,
                      trials=trials)
        best_params = space_eval(get_search_space(), argmin)
        best_model, f1 = run_keras.train_model(best_params)
        #mlflow.keras.log_model(best_model, 'model')
        mlflow.log_metric("f1", f1)
        #mlflow.log_metric("delta_version", delta_version)
        mlflow.set_tag("shop", shop)
        mlflow.set_tag("model", "keras_classifier")
        return argmin
    def eval_and_log_metrics(self, estimator, X, Y):
        predictions = estimator.predict(X)

        # Calc metrics
        acc = accuracy_score(Y, predictions)
        roc = roc_auc_score(Y, predictions)
        mse = mean_squared_error(Y, predictions)
        mae = mean_absolute_error(Y, predictions)
        r2 = r2_score(Y, predictions)

        # Print metrics
        print("  acc: {}".format(acc))
        print("  roc: {}".format(roc))
        print("  mse: {}".format(mse))
        print("  mae: {}".format(mae))
        print("  R2: {}".format(r2))

        # Log metrics
        mlflow.log_metric("acc", acc)
        mlflow.log_metric("roc", roc)
        mlflow.log_metric("mse", mse)
        mlflow.log_metric("mae", mae)
        mlflow.log_metric("r2", r2)

        mlflow.set_tag('candidate', 'true')
Beispiel #30
0
    def start_run(
        mlflow_config: MlflowConfig,
        exp: str,
        name: str,
        verbose: bool = True,
    ):
        if mlflow.active_run() is not None:
            mlflow.end_run()
        mlflow.set_tracking_uri(mlflow_config.uri)
        exp_name = mlflow_config.exp_name
        if mlflow.get_experiment_by_name(exp_name) is None:
            mlflow.create_experiment(exp_name)
        experiment_id = mlflow.get_experiment_by_name(exp_name).experiment_id

        search_df: Any = mlflow.search_runs(
            [experiment_id], filter_string=f"tags.exp = '{exp}'")
        resume = len(search_df) > 0
        if resume:
            run_id = search_df["run_id"].iloc[0]
            mlflow.start_run(run_id=run_id, experiment_id=experiment_id)
        else:
            mlflow.start_run(run_name=f"{exp}: {name}",
                             experiment_id=experiment_id)
            mlflow.set_tag("exp", exp)

        if verbose:
            print(f"mlflow started. experiment name: {exp_name}")
            print(f"{mlflow_config.uri}/#/experiments/{experiment_id}")