Exemple #1
0
def test_list_artifact_kind_filter(db: DBInterface, db_session: Session):
    artifact_name_1 = "artifact_name_1"
    artifact_kind_1 = ChartArtifact.kind
    artifact_name_2 = "artifact_name_2"
    artifact_kind_2 = PlotArtifact.kind
    artifact_1 = _generate_artifact(artifact_name_1, kind=artifact_kind_1)
    artifact_2 = _generate_artifact(artifact_name_2, kind=artifact_kind_2)
    uid = "artifact_uid"

    db.store_artifact(
        db_session,
        artifact_name_1,
        artifact_1,
        uid,
    )
    db.store_artifact(
        db_session,
        artifact_name_2,
        artifact_2,
        uid,
    )
    artifacts = db.list_artifacts(db_session)
    assert len(artifacts) == 2

    artifacts = db.list_artifacts(db_session, kind=artifact_kind_1)
    assert len(artifacts) == 1
    assert artifacts[0]["metadata"]["name"] == artifact_name_1

    artifacts = db.list_artifacts(db_session, kind=artifact_kind_2)
    assert len(artifacts) == 1
    assert artifacts[0]["metadata"]["name"] == artifact_name_2
Exemple #2
0
def test_delete_artifacts_tag_filter(db: DBInterface, db_session: Session):
    artifact_1_key = "artifact_key_1"
    artifact_2_key = "artifact_key_2"
    artifact_1_uid = "artifact_uid_1"
    artifact_2_uid = "artifact_uid_2"
    artifact_1_body = _generate_artifact(artifact_1_key, uid=artifact_1_uid)
    artifact_2_body = _generate_artifact(artifact_2_key, uid=artifact_2_uid)
    artifact_1_tag = "artifact_tag_one"
    artifact_2_tag = "artifact_tag_two"

    db.store_artifact(
        db_session,
        artifact_1_key,
        artifact_1_body,
        artifact_1_uid,
        tag=artifact_1_tag,
    )
    db.store_artifact(
        db_session,
        artifact_2_key,
        artifact_2_body,
        artifact_2_uid,
        tag=artifact_2_tag,
    )
    db.del_artifacts(db_session, tag=artifact_1_tag)
    artifacts = db.list_artifacts(db_session, tag=artifact_1_tag)
    assert len(artifacts) == 0
    artifacts = db.list_artifacts(db_session, tag=artifact_2_tag)
    assert len(artifacts) == 1
    db.del_artifacts(db_session, tag=artifact_2_uid)
    artifacts = db.list_artifacts(db_session, tag=artifact_2_tag)
    assert len(artifacts) == 0
Exemple #3
0
def test_list_artifact_iter_parameter(db: DBInterface, db_session: Session):
    artifact_name_1 = "artifact_name_1"
    artifact_name_2 = "artifact_name_2"
    artifact_1 = _generate_artifact(artifact_name_1)
    artifact_2 = _generate_artifact(artifact_name_2)
    uid = "artifact_uid"

    # Use iters with multiple digits, to make sure filtering them via regex works
    test_iters = [0, 5, 9, 42, 219, 2102]
    for iter in test_iters:
        artifact_1["iter"] = artifact_2["iter"] = iter
        db.store_artifact(db_session, artifact_name_1, artifact_1, uid, iter)
        db.store_artifact(db_session, artifact_name_2, artifact_2, uid, iter)

    # No filter on iter. All are expected
    artifacts = db.list_artifacts(db_session)
    assert len(artifacts) == len(test_iters) * 2

    # Look for the various iteration numbers. Note that 0 is a special case due to the DB structure
    for iter in test_iters:
        artifacts = db.list_artifacts(db_session, iter=iter)
        assert len(artifacts) == 2
        for artifact in artifacts:
            assert artifact["iter"] == iter

    # Negative test
    artifacts = db.list_artifacts(db_session, iter=666)
    assert len(artifacts) == 0

    # Iter filter and a name filter, make sure query composition works
    artifacts = db.list_artifacts(db_session, name=artifact_name_1, iter=2102)
    assert len(artifacts) == 1
Exemple #4
0
def test_store_artifact_tagging(db: DBInterface, db_session: Session):
    artifact_1_key = "artifact_key_1"
    artifact_1_body = _generate_artifact(artifact_1_key)
    artifact_1_kind = ChartArtifact.kind
    artifact_1_with_kind_body = _generate_artifact(artifact_1_key,
                                                   kind=artifact_1_kind)
    artifact_1_uid = "artifact_uid"
    artifact_1_with_kind_uid = "artifact_uid_2"

    db.store_artifact(
        db_session,
        artifact_1_key,
        artifact_1_body,
        artifact_1_uid,
    )
    db.store_artifact(
        db_session,
        artifact_1_key,
        artifact_1_with_kind_body,
        artifact_1_with_kind_uid,
    )
    artifact = db.read_artifact(db_session, artifact_1_key, tag="latest")
    assert artifact["kind"] == artifact_1_kind
    artifact = db.read_artifact(db_session, artifact_1_key, tag=artifact_1_uid)
    assert artifact.get("kind") is None
    artifacts = db.list_artifacts(db_session, artifact_1_key, tag="latest")
    assert len(artifacts) == 1
    artifacts = db.list_artifacts(db_session,
                                  artifact_1_key,
                                  tag=artifact_1_uid)
    assert len(artifacts) == 1
Exemple #5
0
def test_read_artifact_tag_resolution(db: DBInterface, db_session: Session):
    """
    We had a bug in which when we got a tag filter for read/list artifact, we were transforming this tag to list of
    possible uids which is wrong, since a different artifact might have this uid as well, and we will return it,
    although it's not really tag with the given tag
    """
    artifact_1_key = "artifact_key_1"
    artifact_2_key = "artifact_key_2"
    artifact_uid = "artifact_uid_1"
    artifact_1_body = _generate_artifact(artifact_1_key, uid=artifact_uid)
    artifact_2_body = _generate_artifact(artifact_2_key, uid=artifact_uid)
    artifact_1_tag = "artifact_tag_1"
    artifact_2_tag = "artifact_tag_2"

    db.store_artifact(
        db_session, artifact_1_key, artifact_1_body, artifact_uid, tag=artifact_1_tag,
    )
    db.store_artifact(
        db_session, artifact_2_key, artifact_2_body, artifact_uid, tag=artifact_2_tag,
    )
    with pytest.raises(mlrun.errors.MLRunNotFoundError):
        db.read_artifact(db_session, artifact_1_key, artifact_2_tag)
    with pytest.raises(mlrun.errors.MLRunNotFoundError):
        db.read_artifact(db_session, artifact_2_key, artifact_1_tag)
    # just verifying it's not raising
    db.read_artifact(db_session, artifact_1_key, artifact_1_tag)
    db.read_artifact(db_session, artifact_2_key, artifact_2_tag)
    # check list
    artifacts = db.list_artifacts(db_session, tag=artifact_1_tag)
    assert len(artifacts) == 1
    artifacts = db.list_artifacts(db_session, tag=artifact_2_tag)
    assert len(artifacts) == 1
Exemple #6
0
def test_store_artifact_restoring_multiple_tags(db: DBInterface, db_session: Session):
    artifact_key = "artifact_key_1"
    artifact_1_uid = "artifact_uid_1"
    artifact_2_uid = "artifact_uid_2"
    artifact_1_body = _generate_artifact(artifact_key, uid=artifact_1_uid)
    artifact_2_body = _generate_artifact(artifact_key, uid=artifact_2_uid)
    artifact_1_tag = "artifact_tag_1"
    artifact_2_tag = "artifact_tag_2"

    db.store_artifact(
        db_session, artifact_key, artifact_1_body, artifact_1_uid, tag=artifact_1_tag,
    )
    db.store_artifact(
        db_session, artifact_key, artifact_2_body, artifact_2_uid, tag=artifact_2_tag,
    )
    artifacts = db.list_artifacts(db_session, artifact_key, tag="*")
    assert len(artifacts) == 2
    expected_uids = [artifact_1_uid, artifact_2_uid]
    uids = [artifact["metadata"]["uid"] for artifact in artifacts]
    assert deepdiff.DeepDiff(expected_uids, uids, ignore_order=True,) == {}
    expected_tags = [artifact_1_tag, artifact_2_tag]
    tags = [artifact["tag"] for artifact in artifacts]
    assert deepdiff.DeepDiff(expected_tags, tags, ignore_order=True,) == {}
    artifact = db.read_artifact(db_session, artifact_key, tag=artifact_1_tag)
    assert artifact["metadata"]["uid"] == artifact_1_uid
    assert artifact["tag"] == artifact_1_tag
    artifact = db.read_artifact(db_session, artifact_key, tag=artifact_2_tag)
    assert artifact["metadata"]["uid"] == artifact_2_uid
    assert artifact["tag"] == artifact_2_tag
Exemple #7
0
def test_list_artifact_category_filter(db: DBInterface, db_session: Session):
    artifact_name_1 = "artifact_name_1"
    artifact_kind_1 = ChartArtifact.kind
    artifact_name_2 = "artifact_name_2"
    artifact_kind_2 = PlotArtifact.kind
    artifact_name_3 = "artifact_name_3"
    artifact_kind_3 = ModelArtifact.kind
    artifact_name_4 = "artifact_name_4"
    artifact_kind_4 = DatasetArtifact.kind
    artifact_1 = _generate_artifact(artifact_name_1, kind=artifact_kind_1)
    artifact_2 = _generate_artifact(artifact_name_2, kind=artifact_kind_2)
    artifact_3 = _generate_artifact(artifact_name_3, kind=artifact_kind_3)
    artifact_4 = _generate_artifact(artifact_name_4, kind=artifact_kind_4)
    uid = "artifact_uid"

    db.store_artifact(
        db_session,
        artifact_name_1,
        artifact_1,
        uid,
    )
    db.store_artifact(
        db_session,
        artifact_name_2,
        artifact_2,
        uid,
    )
    db.store_artifact(
        db_session,
        artifact_name_3,
        artifact_3,
        uid,
    )
    db.store_artifact(
        db_session,
        artifact_name_4,
        artifact_4,
        uid,
    )
    artifacts = db.list_artifacts(db_session)
    assert len(artifacts) == 4

    artifacts = db.list_artifacts(db_session,
                                  category=schemas.ArtifactCategories.model)
    assert len(artifacts) == 1
    assert artifacts[0]["metadata"]["name"] == artifact_name_3

    artifacts = db.list_artifacts(db_session,
                                  category=schemas.ArtifactCategories.dataset)
    assert len(artifacts) == 1
    assert artifacts[0]["metadata"]["name"] == artifact_name_4

    artifacts = db.list_artifacts(db_session,
                                  category=schemas.ArtifactCategories.other)
    assert len(artifacts) == 2
    assert artifacts[0]["metadata"]["name"] == artifact_name_1
    assert artifacts[1]["metadata"]["name"] == artifact_name_2
Exemple #8
0
def test_list_artifacts_exact_name_match(db: DBInterface, db_session: Session):
    artifact_1_key = "pre_artifact_key_suffix"
    artifact_2_key = "pre-artifact-key-suffix"
    artifact_1_uid = "artifact_uid_1"
    artifact_2_uid = "artifact_uid_2"
    artifact_1_body = _generate_artifact(artifact_1_key, uid=artifact_1_uid)
    artifact_2_body = _generate_artifact(artifact_2_key, uid=artifact_2_uid)

    # Store each twice - once with no iter, and once with an iter
    db.store_artifact(
        db_session, artifact_1_key, artifact_1_body, artifact_1_uid,
    )
    artifact_1_body["iter"] = 42
    db.store_artifact(
        db_session, artifact_1_key, artifact_1_body, artifact_1_uid, iter=42,
    )
    db.store_artifact(
        db_session, artifact_2_key, artifact_2_body, artifact_2_uid,
    )
    artifact_2_body["iter"] = 42
    db.store_artifact(
        db_session, artifact_2_key, artifact_2_body, artifact_2_uid, iter=42,
    )

    def _list_and_assert_count(key, count, iter=None):
        results = db.list_artifacts(db_session, name=key, iter=iter)
        assert len(results) == count
        return results

    # Ensure fuzzy query works, and we have everything we need
    _list_and_assert_count("~key", count=4)

    # Do an exact match with underscores in the name - must escape the _ do it doesn't do a like query
    list_results = _list_and_assert_count(artifact_1_key, count=2)
    for artifact in list_results:
        assert artifact["metadata"]["name"] == artifact_1_key

    _list_and_assert_count("%key%", count=0)
    # Verify we don't get artifacts whose name is "%-suffix" due to the like query used in the DB
    _list_and_assert_count("suffix", count=0)
    # This should also be filtered, since the prefix is "pre" which is 3 chars. There's a known caveat if
    # prefix is 1 or 2 chars long.
    _list_and_assert_count("artifact-key-suffix", count=0)

    _list_and_assert_count(artifact_1_key, iter=42, count=1)
    _list_and_assert_count("~key", iter=42, count=2)
    _list_and_assert_count("~key", iter=666, count=0)
Exemple #9
0
def test_data_migration_fix_artifact_tags_duplications(
    data_migration_db: DBInterface,
    db_session: Session,
):
    def _buggy_tag_artifacts(session, objs, project: str, name: str):
        # This is the function code that was used before we did the fix and added the data migration
        for obj in objs:
            tag = obj.Tag(project=project, name=name, obj_id=obj.id)
            _upsert(session, tag, ignore=True)

    def _upsert(session, obj, ignore=False):
        try:
            session.add(obj)
            session.commit()
        except SQLAlchemyError as err:
            session.rollback()
            cls = obj.__class__.__name__
            logger.warning(f"conflict adding {cls}, {err}")
            if not ignore:
                raise DBError(f"duplicate {cls} - {err}") from err

    data_migration_db.tag_artifacts = _buggy_tag_artifacts

    artifact_1_key = "artifact_key_1"
    artifact_1_uid = "artifact_1_uid_1"
    artifact_1_body = _generate_artifact(artifact_1_key, artifact_1_uid)
    artifact_1_kind = ChartArtifact.kind
    artifact_1_with_kind_uid = "artifact_1_uid_2"
    artifact_1_with_kind_body = _generate_artifact(artifact_1_key,
                                                   artifact_1_with_kind_uid,
                                                   kind=artifact_1_kind)
    artifact_2_key = "artifact_key_2"
    artifact_2_uid = "artifact_2_uid_1"
    artifact_2_body = _generate_artifact(artifact_2_key, artifact_2_uid)
    artifact_2_kind = PlotArtifact.kind
    artifact_2_with_kind_uid = "artifact_2_uid_2"
    artifact_2_with_kind_body = _generate_artifact(artifact_2_key,
                                                   artifact_2_with_kind_uid,
                                                   kind=artifact_2_kind)
    artifact_3_key = "artifact_key_3"
    artifact_3_kind = DatasetArtifact.kind
    artifact_3_with_kind_uid = "artifact_3_uid_1"
    artifact_3_with_kind_body = _generate_artifact(artifact_3_key,
                                                   artifact_3_with_kind_uid,
                                                   kind=artifact_3_kind)

    data_migration_db.store_artifact(
        db_session,
        artifact_1_key,
        artifact_1_body,
        artifact_1_uid,
    )
    data_migration_db.store_artifact(
        db_session,
        artifact_1_key,
        artifact_1_with_kind_body,
        artifact_1_with_kind_uid,
    )
    data_migration_db.store_artifact(db_session,
                                     artifact_2_key,
                                     artifact_2_body,
                                     artifact_2_uid,
                                     tag="not-latest")
    data_migration_db.store_artifact(
        db_session,
        artifact_2_key,
        artifact_2_with_kind_body,
        artifact_2_with_kind_uid,
        tag="not-latest",
    )
    data_migration_db.store_artifact(db_session, artifact_3_key,
                                     artifact_3_with_kind_body,
                                     artifact_3_with_kind_uid)

    # Before the migration:
    # 1. read artifact would have failed when there's more than one tag record with the same key (happen when you
    # store twice)
    with pytest.raises(MultipleResultsFound):
        data_migration_db.read_artifact(db_session,
                                        artifact_1_key,
                                        tag="latest")
    with pytest.raises(MultipleResultsFound):
        data_migration_db.read_artifact(db_session,
                                        artifact_2_key,
                                        tag="not-latest")

    # 2. read artifact would have succeed when there's only one tag record with the same key (happen when you
    # stored only once)
    artifact = data_migration_db.read_artifact(db_session,
                                               artifact_3_key,
                                               tag="latest")
    assert artifact["metadata"]["uid"] == artifact_3_with_kind_uid

    # 3. list artifact without tag would have returned the latest (by update time) of each artifact key
    artifacts = data_migration_db.list_artifacts(db_session)
    assert len(artifacts) == len(
        [artifact_1_key, artifact_2_key, artifact_3_key])
    assert (deepdiff.DeepDiff(
        [artifact["metadata"]["uid"] for artifact in artifacts],
        [
            artifact_1_with_kind_uid,
            artifact_2_with_kind_uid,
            artifact_3_with_kind_uid,
        ],
        ignore_order=True,
    ) == {})

    # 4. list artifact with tag would have returned all of the artifact that at some point were tagged with the given
    # tag
    artifacts = data_migration_db.list_artifacts(db_session, tag="latest")
    assert len(artifacts) == len(
        [artifact_1_uid, artifact_1_with_kind_uid, artifact_3_with_kind_uid])

    # perform the migration
    mlrun.api.initial_data._fix_artifact_tags_duplications(
        data_migration_db, db_session)

    # After the migration:
    # 1. read artifact should succeed (fixed) and return the latest updated record that was tagged with the requested
    # tag
    artifact = data_migration_db.read_artifact(db_session,
                                               artifact_1_key,
                                               tag="latest")
    assert artifact["metadata"]["uid"] == artifact_1_with_kind_uid
    artifact = data_migration_db.read_artifact(db_session,
                                               artifact_2_key,
                                               tag="not-latest")
    assert artifact["metadata"]["uid"] == artifact_2_with_kind_uid

    # 2. read artifact should (still) succeed when there's only one tag record with the same key (happen when you
    # stored only once)
    artifact = data_migration_db.read_artifact(db_session,
                                               artifact_3_key,
                                               tag="latest")
    assert artifact["metadata"]["uid"] == artifact_3_with_kind_uid

    # 3. list artifact without tag should (still) return the latest (by update time) of each artifact key
    artifacts = data_migration_db.list_artifacts(db_session)
    assert len(artifacts) == len(
        [artifact_1_key, artifact_2_key, artifact_3_key])
    assert (deepdiff.DeepDiff(
        [artifact["metadata"]["uid"] for artifact in artifacts],
        [
            artifact_1_with_kind_uid,
            artifact_2_with_kind_uid,
            artifact_3_with_kind_uid,
        ],
        ignore_order=True,
    ) == {})

    # 4. list artifact with tag should (fixed) return all of the artifact that are tagged with the given tag
    artifacts = data_migration_db.list_artifacts(db_session, tag="latest")
    assert (deepdiff.DeepDiff(
        [artifact["metadata"]["uid"] for artifact in artifacts],
        [artifact_1_with_kind_uid, artifact_3_with_kind_uid],
        ignore_order=True,
    ) == {})
Exemple #10
0
def test_list_artifacts_best_iter(db: DBInterface, db_session: Session):
    artifact_1_key = "artifact-1"
    artifact_1_uid = "uid-1"
    artifact_2_key = "artifact-2"
    artifact_2_uid = "uid-2"
    artifact_no_link_key = "single-artifact"
    artifact_no_link_uid = "uid-3"

    num_iters = 5
    best_iter_1 = 2
    best_iter_2 = 4
    _generate_artifact_with_iterations(
        db,
        db_session,
        artifact_1_key,
        artifact_1_uid,
        num_iters,
        best_iter_1,
        ArtifactCategories.model,
    )
    _generate_artifact_with_iterations(
        db,
        db_session,
        artifact_2_key,
        artifact_2_uid,
        num_iters,
        best_iter_2,
        ArtifactCategories.dataset,
    )

    # Add non-hyper-param artifact. Single object with iter 0, not pointing at anything
    artifact_body = _generate_artifact(artifact_no_link_key, artifact_no_link_uid)
    artifact_body["iter"] = 0
    db.store_artifact(
        db_session, artifact_no_link_key, artifact_body, artifact_no_link_uid, iter=0
    )

    results = db.list_artifacts(db_session, name="~artifact")
    assert len(results) == num_iters * 2 + 1

    results = db.list_artifacts(db_session, name=artifact_1_key, best_iteration=True)
    assert len(results) == 1 and results[0]["iter"] == best_iter_1

    expected_iters = {
        artifact_1_key: best_iter_1,
        artifact_2_key: best_iter_2,
        artifact_no_link_key: 0,
    }
    results = db.list_artifacts(db_session, name="~artifact", best_iteration=True)
    assert len(results) == 3
    for artifact in results:
        artifact_name = artifact["metadata"]["name"]
        assert (
            artifact_name in expected_iters
            and expected_iters[artifact_name] == artifact["iter"]
        )

    results = db.list_artifacts(
        db_session, best_iteration=True, category=ArtifactCategories.model
    )
    assert len(results) == 1 and results[0]["iter"] == best_iter_1

    # Should get only object-2 (which is of dataset type) and the link artifact
    results = db.list_artifacts(db_session, category=ArtifactCategories.dataset)
    assert len(results) == num_iters
    for artifact in results:
        assert artifact["metadata"]["name"] == artifact_2_key

    # Negative test - asking for both best_iter and iter
    with pytest.raises(mlrun.errors.MLRunInvalidArgumentError):
        results = db.list_artifacts(
            db_session, name="~artifact", best_iteration=True, iter=0
        )
Exemple #11
0
def _create_resources_of_all_kinds(db: DBInterface, db_session: Session,
                                   project: str):
    # Create several functions with several tags
    labels = {
        "name": "value",
        "name2": "value2",
    }
    function = {
        "bla": "blabla",
        "metadata": {
            "labels": labels
        },
        "status": {
            "bla": "blabla"
        },
    }
    function_names = ["function_name_1", "function_name_2", "function_name_3"]
    function_tags = ["some_tag", "some_tag2", "some_tag3"]
    for function_name in function_names:
        for function_tag in function_tags:
            db.store_function(
                db_session,
                function,
                function_name,
                project,
                tag=function_tag,
                versioned=True,
            )

    # Create several artifacts with several tags
    artifact = {
        "bla": "blabla",
        "labels": labels,
        "status": {
            "bla": "blabla"
        },
    }
    artifact_keys = ["artifact_key_1", "artifact_key_2", "artifact_key_3"]
    artifact_uids = ["some_uid", "some_uid2", "some_uid3"]
    artifact_tags = ["some_tag", "some_tag2", "some_tag3"]
    for artifact_key in artifact_keys:
        for artifact_uid in artifact_uids:
            for artifact_tag in artifact_tags:
                for artifact_iter in range(3):
                    db.store_artifact(
                        db_session,
                        artifact_key,
                        artifact,
                        artifact_uid,
                        artifact_iter,
                        artifact_tag,
                        project,
                    )

    # Create several runs
    run = {
        "bla": "blabla",
        "metadata": {
            "labels": labels
        },
        "status": {
            "bla": "blabla"
        },
    }
    run_uids = ["some_uid", "some_uid2", "some_uid3"]
    for run_uid in run_uids:
        for run_iter in range(3):
            db.store_run(db_session, run, run_uid, project, run_iter)

    # Create several logs
    log = b"some random log"
    log_uids = ["some_uid", "some_uid2", "some_uid3"]
    for log_uid in log_uids:
        db.store_log(db_session, log_uid, project, log)

    # Create several schedule
    schedule = {
        "bla": "blabla",
        "status": {
            "bla": "blabla"
        },
    }
    schedule_cron_trigger = schemas.ScheduleCronTrigger(year=1999)
    schedule_names = ["schedule_name_1", "schedule_name_2", "schedule_name_3"]
    for schedule_name in schedule_names:
        db.create_schedule(
            db_session,
            project,
            schedule_name,
            schemas.ScheduleKinds.job,
            schedule,
            schedule_cron_trigger,
            labels,
        )

    feature_set = schemas.FeatureSet(
        metadata=schemas.ObjectMetadata(name="dummy",
                                        tag="latest",
                                        labels={"owner": "nobody"}),
        spec=schemas.FeatureSetSpec(
            entities=[
                schemas.Entity(name="ent1",
                               value_type="str",
                               labels={"label": "1"})
            ],
            features=[
                schemas.Feature(name="feat1",
                                value_type="str",
                                labels={"label": "1"})
            ],
        ),
        status={},
    )
    db.create_feature_set(db_session, project, feature_set)