def test_list_artifact_kind_filter(db: DBInterface, db_session: Session): artifact_name_1 = "artifact_name_1" artifact_kind_1 = ChartArtifact.kind artifact_name_2 = "artifact_name_2" artifact_kind_2 = PlotArtifact.kind artifact_1 = _generate_artifact(artifact_name_1, kind=artifact_kind_1) artifact_2 = _generate_artifact(artifact_name_2, kind=artifact_kind_2) uid = "artifact_uid" db.store_artifact( db_session, artifact_name_1, artifact_1, uid, ) db.store_artifact( db_session, artifact_name_2, artifact_2, uid, ) artifacts = db.list_artifacts(db_session) assert len(artifacts) == 2 artifacts = db.list_artifacts(db_session, kind=artifact_kind_1) assert len(artifacts) == 1 assert artifacts[0]["metadata"]["name"] == artifact_name_1 artifacts = db.list_artifacts(db_session, kind=artifact_kind_2) assert len(artifacts) == 1 assert artifacts[0]["metadata"]["name"] == artifact_name_2
def test_delete_artifacts_tag_filter(db: DBInterface, db_session: Session): artifact_1_key = "artifact_key_1" artifact_2_key = "artifact_key_2" artifact_1_uid = "artifact_uid_1" artifact_2_uid = "artifact_uid_2" artifact_1_body = _generate_artifact(artifact_1_key, uid=artifact_1_uid) artifact_2_body = _generate_artifact(artifact_2_key, uid=artifact_2_uid) artifact_1_tag = "artifact_tag_one" artifact_2_tag = "artifact_tag_two" db.store_artifact( db_session, artifact_1_key, artifact_1_body, artifact_1_uid, tag=artifact_1_tag, ) db.store_artifact( db_session, artifact_2_key, artifact_2_body, artifact_2_uid, tag=artifact_2_tag, ) db.del_artifacts(db_session, tag=artifact_1_tag) artifacts = db.list_artifacts(db_session, tag=artifact_1_tag) assert len(artifacts) == 0 artifacts = db.list_artifacts(db_session, tag=artifact_2_tag) assert len(artifacts) == 1 db.del_artifacts(db_session, tag=artifact_2_uid) artifacts = db.list_artifacts(db_session, tag=artifact_2_tag) assert len(artifacts) == 0
def test_list_artifact_iter_parameter(db: DBInterface, db_session: Session): artifact_name_1 = "artifact_name_1" artifact_name_2 = "artifact_name_2" artifact_1 = _generate_artifact(artifact_name_1) artifact_2 = _generate_artifact(artifact_name_2) uid = "artifact_uid" # Use iters with multiple digits, to make sure filtering them via regex works test_iters = [0, 5, 9, 42, 219, 2102] for iter in test_iters: artifact_1["iter"] = artifact_2["iter"] = iter db.store_artifact(db_session, artifact_name_1, artifact_1, uid, iter) db.store_artifact(db_session, artifact_name_2, artifact_2, uid, iter) # No filter on iter. All are expected artifacts = db.list_artifacts(db_session) assert len(artifacts) == len(test_iters) * 2 # Look for the various iteration numbers. Note that 0 is a special case due to the DB structure for iter in test_iters: artifacts = db.list_artifacts(db_session, iter=iter) assert len(artifacts) == 2 for artifact in artifacts: assert artifact["iter"] == iter # Negative test artifacts = db.list_artifacts(db_session, iter=666) assert len(artifacts) == 0 # Iter filter and a name filter, make sure query composition works artifacts = db.list_artifacts(db_session, name=artifact_name_1, iter=2102) assert len(artifacts) == 1
def test_store_artifact_tagging(db: DBInterface, db_session: Session): artifact_1_key = "artifact_key_1" artifact_1_body = _generate_artifact(artifact_1_key) artifact_1_kind = ChartArtifact.kind artifact_1_with_kind_body = _generate_artifact(artifact_1_key, kind=artifact_1_kind) artifact_1_uid = "artifact_uid" artifact_1_with_kind_uid = "artifact_uid_2" db.store_artifact( db_session, artifact_1_key, artifact_1_body, artifact_1_uid, ) db.store_artifact( db_session, artifact_1_key, artifact_1_with_kind_body, artifact_1_with_kind_uid, ) artifact = db.read_artifact(db_session, artifact_1_key, tag="latest") assert artifact["kind"] == artifact_1_kind artifact = db.read_artifact(db_session, artifact_1_key, tag=artifact_1_uid) assert artifact.get("kind") is None artifacts = db.list_artifacts(db_session, artifact_1_key, tag="latest") assert len(artifacts) == 1 artifacts = db.list_artifacts(db_session, artifact_1_key, tag=artifact_1_uid) assert len(artifacts) == 1
def test_read_artifact_tag_resolution(db: DBInterface, db_session: Session): """ We had a bug in which when we got a tag filter for read/list artifact, we were transforming this tag to list of possible uids which is wrong, since a different artifact might have this uid as well, and we will return it, although it's not really tag with the given tag """ artifact_1_key = "artifact_key_1" artifact_2_key = "artifact_key_2" artifact_uid = "artifact_uid_1" artifact_1_body = _generate_artifact(artifact_1_key, uid=artifact_uid) artifact_2_body = _generate_artifact(artifact_2_key, uid=artifact_uid) artifact_1_tag = "artifact_tag_1" artifact_2_tag = "artifact_tag_2" db.store_artifact( db_session, artifact_1_key, artifact_1_body, artifact_uid, tag=artifact_1_tag, ) db.store_artifact( db_session, artifact_2_key, artifact_2_body, artifact_uid, tag=artifact_2_tag, ) with pytest.raises(mlrun.errors.MLRunNotFoundError): db.read_artifact(db_session, artifact_1_key, artifact_2_tag) with pytest.raises(mlrun.errors.MLRunNotFoundError): db.read_artifact(db_session, artifact_2_key, artifact_1_tag) # just verifying it's not raising db.read_artifact(db_session, artifact_1_key, artifact_1_tag) db.read_artifact(db_session, artifact_2_key, artifact_2_tag) # check list artifacts = db.list_artifacts(db_session, tag=artifact_1_tag) assert len(artifacts) == 1 artifacts = db.list_artifacts(db_session, tag=artifact_2_tag) assert len(artifacts) == 1
def test_store_artifact_restoring_multiple_tags(db: DBInterface, db_session: Session): artifact_key = "artifact_key_1" artifact_1_uid = "artifact_uid_1" artifact_2_uid = "artifact_uid_2" artifact_1_body = _generate_artifact(artifact_key, uid=artifact_1_uid) artifact_2_body = _generate_artifact(artifact_key, uid=artifact_2_uid) artifact_1_tag = "artifact_tag_1" artifact_2_tag = "artifact_tag_2" db.store_artifact( db_session, artifact_key, artifact_1_body, artifact_1_uid, tag=artifact_1_tag, ) db.store_artifact( db_session, artifact_key, artifact_2_body, artifact_2_uid, tag=artifact_2_tag, ) artifacts = db.list_artifacts(db_session, artifact_key, tag="*") assert len(artifacts) == 2 expected_uids = [artifact_1_uid, artifact_2_uid] uids = [artifact["metadata"]["uid"] for artifact in artifacts] assert deepdiff.DeepDiff(expected_uids, uids, ignore_order=True,) == {} expected_tags = [artifact_1_tag, artifact_2_tag] tags = [artifact["tag"] for artifact in artifacts] assert deepdiff.DeepDiff(expected_tags, tags, ignore_order=True,) == {} artifact = db.read_artifact(db_session, artifact_key, tag=artifact_1_tag) assert artifact["metadata"]["uid"] == artifact_1_uid assert artifact["tag"] == artifact_1_tag artifact = db.read_artifact(db_session, artifact_key, tag=artifact_2_tag) assert artifact["metadata"]["uid"] == artifact_2_uid assert artifact["tag"] == artifact_2_tag
def test_list_artifact_category_filter(db: DBInterface, db_session: Session): artifact_name_1 = "artifact_name_1" artifact_kind_1 = ChartArtifact.kind artifact_name_2 = "artifact_name_2" artifact_kind_2 = PlotArtifact.kind artifact_name_3 = "artifact_name_3" artifact_kind_3 = ModelArtifact.kind artifact_name_4 = "artifact_name_4" artifact_kind_4 = DatasetArtifact.kind artifact_1 = _generate_artifact(artifact_name_1, kind=artifact_kind_1) artifact_2 = _generate_artifact(artifact_name_2, kind=artifact_kind_2) artifact_3 = _generate_artifact(artifact_name_3, kind=artifact_kind_3) artifact_4 = _generate_artifact(artifact_name_4, kind=artifact_kind_4) uid = "artifact_uid" db.store_artifact( db_session, artifact_name_1, artifact_1, uid, ) db.store_artifact( db_session, artifact_name_2, artifact_2, uid, ) db.store_artifact( db_session, artifact_name_3, artifact_3, uid, ) db.store_artifact( db_session, artifact_name_4, artifact_4, uid, ) artifacts = db.list_artifacts(db_session) assert len(artifacts) == 4 artifacts = db.list_artifacts(db_session, category=schemas.ArtifactCategories.model) assert len(artifacts) == 1 assert artifacts[0]["metadata"]["name"] == artifact_name_3 artifacts = db.list_artifacts(db_session, category=schemas.ArtifactCategories.dataset) assert len(artifacts) == 1 assert artifacts[0]["metadata"]["name"] == artifact_name_4 artifacts = db.list_artifacts(db_session, category=schemas.ArtifactCategories.other) assert len(artifacts) == 2 assert artifacts[0]["metadata"]["name"] == artifact_name_1 assert artifacts[1]["metadata"]["name"] == artifact_name_2
def test_list_artifacts_exact_name_match(db: DBInterface, db_session: Session): artifact_1_key = "pre_artifact_key_suffix" artifact_2_key = "pre-artifact-key-suffix" artifact_1_uid = "artifact_uid_1" artifact_2_uid = "artifact_uid_2" artifact_1_body = _generate_artifact(artifact_1_key, uid=artifact_1_uid) artifact_2_body = _generate_artifact(artifact_2_key, uid=artifact_2_uid) # Store each twice - once with no iter, and once with an iter db.store_artifact( db_session, artifact_1_key, artifact_1_body, artifact_1_uid, ) artifact_1_body["iter"] = 42 db.store_artifact( db_session, artifact_1_key, artifact_1_body, artifact_1_uid, iter=42, ) db.store_artifact( db_session, artifact_2_key, artifact_2_body, artifact_2_uid, ) artifact_2_body["iter"] = 42 db.store_artifact( db_session, artifact_2_key, artifact_2_body, artifact_2_uid, iter=42, ) def _list_and_assert_count(key, count, iter=None): results = db.list_artifacts(db_session, name=key, iter=iter) assert len(results) == count return results # Ensure fuzzy query works, and we have everything we need _list_and_assert_count("~key", count=4) # Do an exact match with underscores in the name - must escape the _ do it doesn't do a like query list_results = _list_and_assert_count(artifact_1_key, count=2) for artifact in list_results: assert artifact["metadata"]["name"] == artifact_1_key _list_and_assert_count("%key%", count=0) # Verify we don't get artifacts whose name is "%-suffix" due to the like query used in the DB _list_and_assert_count("suffix", count=0) # This should also be filtered, since the prefix is "pre" which is 3 chars. There's a known caveat if # prefix is 1 or 2 chars long. _list_and_assert_count("artifact-key-suffix", count=0) _list_and_assert_count(artifact_1_key, iter=42, count=1) _list_and_assert_count("~key", iter=42, count=2) _list_and_assert_count("~key", iter=666, count=0)
def test_data_migration_fix_artifact_tags_duplications( data_migration_db: DBInterface, db_session: Session, ): def _buggy_tag_artifacts(session, objs, project: str, name: str): # This is the function code that was used before we did the fix and added the data migration for obj in objs: tag = obj.Tag(project=project, name=name, obj_id=obj.id) _upsert(session, tag, ignore=True) def _upsert(session, obj, ignore=False): try: session.add(obj) session.commit() except SQLAlchemyError as err: session.rollback() cls = obj.__class__.__name__ logger.warning(f"conflict adding {cls}, {err}") if not ignore: raise DBError(f"duplicate {cls} - {err}") from err data_migration_db.tag_artifacts = _buggy_tag_artifacts artifact_1_key = "artifact_key_1" artifact_1_uid = "artifact_1_uid_1" artifact_1_body = _generate_artifact(artifact_1_key, artifact_1_uid) artifact_1_kind = ChartArtifact.kind artifact_1_with_kind_uid = "artifact_1_uid_2" artifact_1_with_kind_body = _generate_artifact(artifact_1_key, artifact_1_with_kind_uid, kind=artifact_1_kind) artifact_2_key = "artifact_key_2" artifact_2_uid = "artifact_2_uid_1" artifact_2_body = _generate_artifact(artifact_2_key, artifact_2_uid) artifact_2_kind = PlotArtifact.kind artifact_2_with_kind_uid = "artifact_2_uid_2" artifact_2_with_kind_body = _generate_artifact(artifact_2_key, artifact_2_with_kind_uid, kind=artifact_2_kind) artifact_3_key = "artifact_key_3" artifact_3_kind = DatasetArtifact.kind artifact_3_with_kind_uid = "artifact_3_uid_1" artifact_3_with_kind_body = _generate_artifact(artifact_3_key, artifact_3_with_kind_uid, kind=artifact_3_kind) data_migration_db.store_artifact( db_session, artifact_1_key, artifact_1_body, artifact_1_uid, ) data_migration_db.store_artifact( db_session, artifact_1_key, artifact_1_with_kind_body, artifact_1_with_kind_uid, ) data_migration_db.store_artifact(db_session, artifact_2_key, artifact_2_body, artifact_2_uid, tag="not-latest") data_migration_db.store_artifact( db_session, artifact_2_key, artifact_2_with_kind_body, artifact_2_with_kind_uid, tag="not-latest", ) data_migration_db.store_artifact(db_session, artifact_3_key, artifact_3_with_kind_body, artifact_3_with_kind_uid) # Before the migration: # 1. read artifact would have failed when there's more than one tag record with the same key (happen when you # store twice) with pytest.raises(MultipleResultsFound): data_migration_db.read_artifact(db_session, artifact_1_key, tag="latest") with pytest.raises(MultipleResultsFound): data_migration_db.read_artifact(db_session, artifact_2_key, tag="not-latest") # 2. read artifact would have succeed when there's only one tag record with the same key (happen when you # stored only once) artifact = data_migration_db.read_artifact(db_session, artifact_3_key, tag="latest") assert artifact["metadata"]["uid"] == artifact_3_with_kind_uid # 3. list artifact without tag would have returned the latest (by update time) of each artifact key artifacts = data_migration_db.list_artifacts(db_session) assert len(artifacts) == len( [artifact_1_key, artifact_2_key, artifact_3_key]) assert (deepdiff.DeepDiff( [artifact["metadata"]["uid"] for artifact in artifacts], [ artifact_1_with_kind_uid, artifact_2_with_kind_uid, artifact_3_with_kind_uid, ], ignore_order=True, ) == {}) # 4. list artifact with tag would have returned all of the artifact that at some point were tagged with the given # tag artifacts = data_migration_db.list_artifacts(db_session, tag="latest") assert len(artifacts) == len( [artifact_1_uid, artifact_1_with_kind_uid, artifact_3_with_kind_uid]) # perform the migration mlrun.api.initial_data._fix_artifact_tags_duplications( data_migration_db, db_session) # After the migration: # 1. read artifact should succeed (fixed) and return the latest updated record that was tagged with the requested # tag artifact = data_migration_db.read_artifact(db_session, artifact_1_key, tag="latest") assert artifact["metadata"]["uid"] == artifact_1_with_kind_uid artifact = data_migration_db.read_artifact(db_session, artifact_2_key, tag="not-latest") assert artifact["metadata"]["uid"] == artifact_2_with_kind_uid # 2. read artifact should (still) succeed when there's only one tag record with the same key (happen when you # stored only once) artifact = data_migration_db.read_artifact(db_session, artifact_3_key, tag="latest") assert artifact["metadata"]["uid"] == artifact_3_with_kind_uid # 3. list artifact without tag should (still) return the latest (by update time) of each artifact key artifacts = data_migration_db.list_artifacts(db_session) assert len(artifacts) == len( [artifact_1_key, artifact_2_key, artifact_3_key]) assert (deepdiff.DeepDiff( [artifact["metadata"]["uid"] for artifact in artifacts], [ artifact_1_with_kind_uid, artifact_2_with_kind_uid, artifact_3_with_kind_uid, ], ignore_order=True, ) == {}) # 4. list artifact with tag should (fixed) return all of the artifact that are tagged with the given tag artifacts = data_migration_db.list_artifacts(db_session, tag="latest") assert (deepdiff.DeepDiff( [artifact["metadata"]["uid"] for artifact in artifacts], [artifact_1_with_kind_uid, artifact_3_with_kind_uid], ignore_order=True, ) == {})
def test_list_artifacts_best_iter(db: DBInterface, db_session: Session): artifact_1_key = "artifact-1" artifact_1_uid = "uid-1" artifact_2_key = "artifact-2" artifact_2_uid = "uid-2" artifact_no_link_key = "single-artifact" artifact_no_link_uid = "uid-3" num_iters = 5 best_iter_1 = 2 best_iter_2 = 4 _generate_artifact_with_iterations( db, db_session, artifact_1_key, artifact_1_uid, num_iters, best_iter_1, ArtifactCategories.model, ) _generate_artifact_with_iterations( db, db_session, artifact_2_key, artifact_2_uid, num_iters, best_iter_2, ArtifactCategories.dataset, ) # Add non-hyper-param artifact. Single object with iter 0, not pointing at anything artifact_body = _generate_artifact(artifact_no_link_key, artifact_no_link_uid) artifact_body["iter"] = 0 db.store_artifact( db_session, artifact_no_link_key, artifact_body, artifact_no_link_uid, iter=0 ) results = db.list_artifacts(db_session, name="~artifact") assert len(results) == num_iters * 2 + 1 results = db.list_artifacts(db_session, name=artifact_1_key, best_iteration=True) assert len(results) == 1 and results[0]["iter"] == best_iter_1 expected_iters = { artifact_1_key: best_iter_1, artifact_2_key: best_iter_2, artifact_no_link_key: 0, } results = db.list_artifacts(db_session, name="~artifact", best_iteration=True) assert len(results) == 3 for artifact in results: artifact_name = artifact["metadata"]["name"] assert ( artifact_name in expected_iters and expected_iters[artifact_name] == artifact["iter"] ) results = db.list_artifacts( db_session, best_iteration=True, category=ArtifactCategories.model ) assert len(results) == 1 and results[0]["iter"] == best_iter_1 # Should get only object-2 (which is of dataset type) and the link artifact results = db.list_artifacts(db_session, category=ArtifactCategories.dataset) assert len(results) == num_iters for artifact in results: assert artifact["metadata"]["name"] == artifact_2_key # Negative test - asking for both best_iter and iter with pytest.raises(mlrun.errors.MLRunInvalidArgumentError): results = db.list_artifacts( db_session, name="~artifact", best_iteration=True, iter=0 )
def _create_resources_of_all_kinds(db: DBInterface, db_session: Session, project: str): # Create several functions with several tags labels = { "name": "value", "name2": "value2", } function = { "bla": "blabla", "metadata": { "labels": labels }, "status": { "bla": "blabla" }, } function_names = ["function_name_1", "function_name_2", "function_name_3"] function_tags = ["some_tag", "some_tag2", "some_tag3"] for function_name in function_names: for function_tag in function_tags: db.store_function( db_session, function, function_name, project, tag=function_tag, versioned=True, ) # Create several artifacts with several tags artifact = { "bla": "blabla", "labels": labels, "status": { "bla": "blabla" }, } artifact_keys = ["artifact_key_1", "artifact_key_2", "artifact_key_3"] artifact_uids = ["some_uid", "some_uid2", "some_uid3"] artifact_tags = ["some_tag", "some_tag2", "some_tag3"] for artifact_key in artifact_keys: for artifact_uid in artifact_uids: for artifact_tag in artifact_tags: for artifact_iter in range(3): db.store_artifact( db_session, artifact_key, artifact, artifact_uid, artifact_iter, artifact_tag, project, ) # Create several runs run = { "bla": "blabla", "metadata": { "labels": labels }, "status": { "bla": "blabla" }, } run_uids = ["some_uid", "some_uid2", "some_uid3"] for run_uid in run_uids: for run_iter in range(3): db.store_run(db_session, run, run_uid, project, run_iter) # Create several logs log = b"some random log" log_uids = ["some_uid", "some_uid2", "some_uid3"] for log_uid in log_uids: db.store_log(db_session, log_uid, project, log) # Create several schedule schedule = { "bla": "blabla", "status": { "bla": "blabla" }, } schedule_cron_trigger = schemas.ScheduleCronTrigger(year=1999) schedule_names = ["schedule_name_1", "schedule_name_2", "schedule_name_3"] for schedule_name in schedule_names: db.create_schedule( db_session, project, schedule_name, schemas.ScheduleKinds.job, schedule, schedule_cron_trigger, labels, ) feature_set = schemas.FeatureSet( metadata=schemas.ObjectMetadata(name="dummy", tag="latest", labels={"owner": "nobody"}), spec=schemas.FeatureSetSpec( entities=[ schemas.Entity(name="ent1", value_type="str", labels={"label": "1"}) ], features=[ schemas.Feature(name="feat1", value_type="str", labels={"label": "1"}) ], ), status={}, ) db.create_feature_set(db_session, project, feature_set)