コード例 #1
0
ファイル: test_fluent.py プロジェクト: iPieter/kiwi
def test_get_experiment_id_from_env():
    # When no env variables are set
    HelperEnv.assert_values(None, None)
    assert _get_experiment_id_from_env() is None

    # set only ID
    random_id = random.randint(1, 1e6)
    HelperEnv.set_values(experiment_id=random_id)
    HelperEnv.assert_values(str(random_id), None)
    assert _get_experiment_id_from_env() == str(random_id)

    # set only name
    with TempDir(chdr=True):
        name = "random experiment %d" % random.randint(1, 1e6)
        exp_id = kiwi.create_experiment(name)
        assert exp_id is not None
        HelperEnv.set_values(name=name)
        HelperEnv.assert_values(None, name)
        assert _get_experiment_id_from_env() == exp_id

    # set both: assert that name variable takes precedence
    with TempDir(chdr=True):
        name = "random experiment %d" % random.randint(1, 1e6)
        exp_id = kiwi.create_experiment(name)
        assert exp_id is not None
        random_id = random.randint(1, 1e6)
        HelperEnv.set_values(name=name, experiment_id=random_id)
        HelperEnv.assert_values(str(random_id), name)
        assert _get_experiment_id_from_env() == exp_id
コード例 #2
0
ファイル: test_tracking.py プロジェクト: iPieter/kiwi
def test_create_experiments_with_bad_names():
    # None for name
    with pytest.raises(MlflowException) as e:
        kiwi.create_experiment(None)
        assert e.message.contains("Invalid experiment name: 'None'")

    # empty string name
    with pytest.raises(MlflowException) as e:
        kiwi.create_experiment("")
        assert e.message.contains("Invalid experiment name: ''")
コード例 #3
0
ファイル: test_tracking.py プロジェクト: iPieter/kiwi
def test_create_experiment_with_duplicate_name():
    name = "popular_name"
    exp_id = kiwi.create_experiment(name)

    with pytest.raises(MlflowException):
        kiwi.create_experiment(name)

    tracking.MlflowClient().delete_experiment(exp_id)
    with pytest.raises(MlflowException):
        kiwi.create_experiment(name)
コード例 #4
0
ファイル: test_fluent.py プロジェクト: iPieter/kiwi
def test_get_experiment_by_name():
    with TempDir(chdr=True):
        name = "Random experiment %d" % random.randint(1, 1e6)
        exp_id = kiwi.create_experiment(name)

        experiment = kiwi.get_experiment_by_name(name)
        assert experiment.experiment_id == exp_id
コード例 #5
0
ファイル: test_tracking.py プロジェクト: iPieter/kiwi
def test_list_experiments():
    def _assert_exps(ids_to_lifecycle_stage, view_type_arg):
        result = set([
            (exp.experiment_id, exp.lifecycle_stage)
            for exp in client.list_experiments(view_type=view_type_arg)
        ])
        assert result == set([
            (exp_id, stage)
            for exp_id, stage in ids_to_lifecycle_stage.items()
        ])

    experiment_id = kiwi.create_experiment("exp_1")
    assert experiment_id == '1'
    client = tracking.MlflowClient()
    _assert_exps({
        '0': LifecycleStage.ACTIVE,
        '1': LifecycleStage.ACTIVE
    }, ViewType.ACTIVE_ONLY)
    _assert_exps({
        '0': LifecycleStage.ACTIVE,
        '1': LifecycleStage.ACTIVE
    }, ViewType.ALL)
    _assert_exps({}, ViewType.DELETED_ONLY)
    client.delete_experiment(experiment_id)
    _assert_exps({'0': LifecycleStage.ACTIVE}, ViewType.ACTIVE_ONLY)
    _assert_exps({
        '0': LifecycleStage.ACTIVE,
        '1': LifecycleStage.DELETED
    }, ViewType.ALL)
    _assert_exps({'1': LifecycleStage.DELETED}, ViewType.DELETED_ONLY)
コード例 #6
0
ファイル: test_fluent.py プロジェクト: iPieter/kiwi
def test_get_experiment_id_with_active_experiment_returns_active_experiment_id(
):
    # Create a new experiment and set that as active experiment
    with TempDir(chdr=True):
        name = "Random experiment %d" % random.randint(1, 1e6)
        exp_id = kiwi.create_experiment(name)
        assert exp_id is not None
        kiwi.set_experiment(name)
        assert _get_experiment_id() == exp_id
コード例 #7
0
def test_runs_artifact_repo_init():
    artifact_location = "s3://blah_bucket/"
    experiment_id = kiwi.create_experiment("expr_abc", artifact_location)
    with kiwi.start_run(experiment_id=experiment_id):
        run_id = kiwi.active_run().info.run_id
    runs_uri = "runs:/%s/path/to/model" % run_id
    runs_repo = RunsArtifactRepository(runs_uri)

    assert runs_repo.artifact_uri == runs_uri
    assert isinstance(runs_repo.repo, S3ArtifactRepository)
    expected_absolute_uri = "%s%s/artifacts/path/to/model" % (
        artifact_location, run_id)
    assert runs_repo.repo.artifact_uri == expected_absolute_uri
コード例 #8
0
def test_runs_artifact_repo_uses_repo_download_artifacts():
    """
    The RunsArtifactRepo should delegate `download_artifacts` to it's self.repo.download_artifacts
    function
    """
    artifact_location = "s3://blah_bucket/"
    experiment_id = kiwi.create_experiment("expr_abcd", artifact_location)
    with kiwi.start_run(experiment_id=experiment_id):
        run_id = kiwi.active_run().info.run_id
    runs_repo = RunsArtifactRepository('runs:/{}'.format(run_id))
    runs_repo.repo = Mock()
    runs_repo.download_artifacts('artifact_path', 'dst_path')
    runs_repo.repo.download_artifacts.assert_called_once()
コード例 #9
0
ファイル: test_tracking.py プロジェクト: iPieter/kiwi
def test_create_experiment():
    with pytest.raises(TypeError):
        kiwi.create_experiment()  # pylint: disable=no-value-for-parameter

    with pytest.raises(Exception):
        kiwi.create_experiment(None)

    with pytest.raises(Exception):
        kiwi.create_experiment("")

    exp_id = kiwi.create_experiment("Some random experiment name %d" %
                                    random.randint(1, 1e6))
    assert exp_id is not None
コード例 #10
0
ファイル: test_fluent.py プロジェクト: iPieter/kiwi
def test_get_experiment_id_in_databricks_with_experiment_defined_in_env_returns_env_experiment_id(
):
    with TempDir(chdr=True):
        exp_name = "random experiment %d" % random.randint(1, 1e6)
        exp_id = kiwi.create_experiment(exp_name)
        notebook_id = str(int(exp_id) + 73)
        HelperEnv.set_values(experiment_id=exp_id)

    with mock.patch("mlflow.tracking.fluent.is_in_databricks_notebook") as notebook_detection_mock,\
            mock.patch("mlflow.tracking.fluent.get_notebook_id") as notebook_id_mock:
        notebook_detection_mock.side_effect = lambda *args, **kwargs: True
        notebook_id_mock.side_effect = lambda *args, **kwargs: notebook_id

        assert _get_experiment_id() != notebook_id
        assert _get_experiment_id() == exp_id
コード例 #11
0
ファイル: test_fluent.py プロジェクト: iPieter/kiwi
def test_get_experiment_id_in_databricks_with_active_experiment_returns_active_experiment_id(
):
    with TempDir(chdr=True):
        exp_name = "random experiment %d" % random.randint(1, 1e6)
        exp_id = kiwi.create_experiment(exp_name)
        kiwi.set_experiment(exp_name)
        notebook_id = str(int(exp_id) + 73)

    with mock.patch("mlflow.tracking.fluent.is_in_databricks_notebook") as notebook_detection_mock,\
            mock.patch("mlflow.tracking.fluent.get_notebook_id") as notebook_id_mock:
        notebook_detection_mock.return_value = True
        notebook_id_mock.return_value = notebook_id

        assert _get_experiment_id() != notebook_id
        assert _get_experiment_id() == exp_id
コード例 #12
0
ファイル: test_tracking.py プロジェクト: iPieter/kiwi
def test_search_runs_multiple_experiments():
    experiment_ids = [
        kiwi.create_experiment("exp__{}".format(exp_id))
        for exp_id in range(1, 4)
    ]
    for eid in experiment_ids:
        with kiwi.start_run(experiment_id=eid):
            kiwi.log_metric("m0", 1)
            kiwi.log_metric("m_{}".format(eid), 2)

    assert len(MlflowClient().search_runs(experiment_ids, "metrics.m0 > 0",
                                          ViewType.ALL)) == 3

    assert len(MlflowClient().search_runs(experiment_ids, "metrics.m_1 > 0",
                                          ViewType.ALL)) == 1
    assert len(MlflowClient().search_runs(experiment_ids, "metrics.m_2 = 2",
                                          ViewType.ALL)) == 1
    assert len(MlflowClient().search_runs(experiment_ids, "metrics.m_3 < 4",
                                          ViewType.ALL)) == 1
コード例 #13
0
ファイル: test_tracking.py プロジェクト: iPieter/kiwi
def test_set_experiment():
    with pytest.raises(TypeError):
        kiwi.set_experiment()  # pylint: disable=no-value-for-parameter

    with pytest.raises(Exception):
        kiwi.set_experiment(None)

    with pytest.raises(Exception):
        kiwi.set_experiment("")

    name = "random_exp"
    exp_id = kiwi.create_experiment(name)
    kiwi.set_experiment(name)
    with start_run() as run:
        assert run.info.experiment_id == exp_id

    another_name = "another_experiment"
    kiwi.set_experiment(another_name)
    exp_id2 = kiwi.tracking.MlflowClient().get_experiment_by_name(another_name)
    with start_run() as another_run:
        assert another_run.info.experiment_id == exp_id2.experiment_id
コード例 #14
0
ファイル: test_utils.py プロジェクト: iPieter/kiwi
def test_fetch_create_and_log(tmpdir):
    entry_point_name = "entry_point"
    parameters = {
        "method_name": "string",
    }
    entry_point = _project_spec.EntryPoint(entry_point_name, parameters,
                                           "run_model.sh")
    mock_fetched_project = _project_spec.Project(
        None, {entry_point_name: entry_point}, None, "my_project")
    experiment_id = kiwi.create_experiment("test_fetch_project")
    expected_dir = tmpdir
    project_uri = "http://someuri/myproject.git"
    user_param = {"method_name": "newton"}
    with mock.patch("mlflow.projects.utils._fetch_project",
                    return_value=expected_dir):
        with mock.patch("mlflow.projects._project_spec.load_project",
                        return_value=mock_fetched_project):
            work_dir = fetch_and_validate_project("", "", entry_point_name,
                                                  user_param)
            project = load_project(work_dir)
            assert mock_fetched_project == project
            assert expected_dir == work_dir
            # Create a run
            active_run = get_or_create_run(run_id=None,
                                           uri=project_uri,
                                           experiment_id=experiment_id,
                                           work_dir=work_dir,
                                           version=None,
                                           entry_point=entry_point_name,
                                           parameters=user_param)

            # check tags
            run = kiwi.get_run(active_run.info.run_id)
            assert MLFLOW_PROJECT_ENTRY_POINT in run.data.tags
            assert MLFLOW_SOURCE_NAME in run.data.tags
            assert entry_point_name == run.data.tags[
                MLFLOW_PROJECT_ENTRY_POINT]
            assert project_uri == run.data.tags[MLFLOW_SOURCE_NAME]
            assert user_param == run.data.params
コード例 #15
0
def test_model_log():
    with TempDir(chdr=True) as tmp:
        experiment_id = kiwi.create_experiment("test")
        sig = ModelSignature(inputs=Schema([ColSpec("integer", "x"), ColSpec("integer", "y")]),
                             outputs=Schema([ColSpec(name=None, type="double")]))
        input_example = {"x": 1, "y": 2}
        with kiwi.start_run(experiment_id=experiment_id) as r:
            Model.log("some/path", TestFlavor,
                      signature=sig,
                      input_example=input_example)

        local_path = _download_artifact_from_uri("runs:/{}/some/path".format(r.info.run_id),
                                                 output_path=tmp.path(""))
        loaded_model = Model.load(os.path.join(local_path, "MLmodel"))
        assert loaded_model.run_id == r.info.run_id
        assert loaded_model.artifact_path == "some/path"
        assert loaded_model.flavors == {
            "flavor1": {"a": 1, "b": 2},
            "flavor2": {"x": 1, "y": 2},
        }
        assert loaded_model.signature == sig
        path = os.path.join(local_path, loaded_model.saved_input_example_info["artifact_path"])
        x = _dataframe_from_json(path)
        assert x.to_dict(orient="records")[0] == input_example
コード例 #16
0
def run(original="data/processed/jokes.json",
        replaced="data/processed/dynamic_template_jokes.json"):
    dataset = RankSequenceDataset(
        original=original,
        replaced=replaced,
        embeddings_path="./data/raw/roularta-160.txt")

    # Create splits
    train_size = int(0.7 * len(dataset))
    val_size = int((len(dataset) - train_size) / 2)
    test_size = len(dataset) - train_size - val_size
    train_dataset, val_dataset, test_dataset = torch.utils.data.random_split(
        dataset, [train_size, val_size, test_size],
        generator=torch.Generator().manual_seed(1))

    # Create pytoch dataloaders
    train_loader = DataLoader(train_dataset, batch_size=64, num_workers=8)
    val_loader = DataLoader(val_dataset, batch_size=64, num_workers=8)
    test_loader = DataLoader(test_dataset, batch_size=64, num_workers=8)

    # Register dataset with kiwi
    current_experiment_id: int = kiwi.create_experiment(
        datetime.datetime.now().__str__())
    kiwi.register_training_dataset(dataloader=train_dataset,
                                   dataset_location=original,
                                   experiment_id=current_experiment_id)
    kiwi.register_dev_dataset(dataloader=val_dataset,
                              dataset_location=original,
                              experiment_id=current_experiment_id)
    kiwi.register_test_dataset(dataloader=test_dataset,
                               dataset_location=original,
                               experiment_id=current_experiment_id)

    def objective(args):
        # start a run
        with kiwi.start_run(experiment_id=current_experiment_id):
            # register hyperparams
            for key, value in args.items():
                kiwi.log_param(key, value)

            # Define model
            model = RankedNetworkCNNModule(args['learning_rate'],
                                           dataset.get_embeddings(),
                                           hidden_dim=args['hidden'],
                                           output_labels=2)

            # Train (obviously)
            trainer = pl.Trainer(max_epochs=15, logger=KiwiLogger())
            trainer.fit(model, train_loader, val_loader)

            # Evaluation on held-out test-set
            with torch.no_grad():
                model.eval()
                results = pd.DataFrame(columns=['labels', 'predictions'])
                for batch_idx, batch in enumerate(test_loader):
                    y_hat = model(batch['a'], batch['b'])

                    results: pd.DataFrame = results.append(pd.DataFrame({
                        'labels':
                        batch['label'].flatten(),
                        'predictions':
                        y_hat.detach().argmax(axis=1)
                    }),
                                                           ignore_index=True)
                results.to_csv()

                # With a nice confusion matrix
                confusion_matrix(y_pred=results['predictions'].values,
                                 y_true=results['labels'].values,
                                 classes=[0, 1])

                cm = ConfusionMatrix(
                    actual_vector=results['labels'].values,
                    predict_vector=results['predictions'].values)

                output_test_results = "cm.txt"
                cm.save_stat(output_test_results)

                output_test_predictions_file = "test_predictions.txt"
                np.savetxt(output_test_predictions_file,
                           results['predictions'].values,
                           delimiter=",")

                kiwi.log_metric(key="test_acc", value=cm.Overall_ACC)
                kiwi.log_metric(key="test_f1_micro", value=cm.F1_Micro)
                kiwi.log_metric(key="test_f1_macro", value=cm.F1_Macro)
                kiwi.log_metric(key="test_ci_pm",
                                value=cm.CI95[1] - cm.Overall_ACC)
                kiwi.log_metric(key="test_ci_pm",
                                value=cm.CI95[1] - cm.Overall_ACC)
                kiwi.log_artifact(output_test_predictions_file)
                kiwi.log_artifact(output_test_results + ".pycm")

            return cm.Overall_ACC

    space = {
        'learning_rate': ("range", [1e-3, 1e-1]),
        # 'batch_size': ("choice", [4, 8, 16, 32, 64, 128]),
        'hidden': ("choice", [16])
    }

    kiwi.start_experiment(current_experiment_id,
                          hp_space=space,
                          objective=objective,
                          max_evals=10,
                          mode="random")
コード例 #17
0
ファイル: test_tracking.py プロジェクト: iPieter/kiwi
def test_create_experiments_with_bad_name_types(name):
    with pytest.raises(MlflowException) as e:
        kiwi.create_experiment(name)
        assert e.message.contains(
            "Invalid experiment name: %s. Expects a string." % name)