def test_store_artifact_tagging(db: DBInterface, db_session: Session): artifact_1_key = "artifact_key_1" artifact_1_body = _generate_artifact(artifact_1_key) artifact_1_kind = ChartArtifact.kind artifact_1_with_kind_body = _generate_artifact(artifact_1_key, kind=artifact_1_kind) artifact_1_uid = "artifact_uid" artifact_1_with_kind_uid = "artifact_uid_2" db.store_artifact( db_session, artifact_1_key, artifact_1_body, artifact_1_uid, ) db.store_artifact( db_session, artifact_1_key, artifact_1_with_kind_body, artifact_1_with_kind_uid, ) artifact = db.read_artifact(db_session, artifact_1_key, tag="latest") assert artifact["kind"] == artifact_1_kind artifact = db.read_artifact(db_session, artifact_1_key, tag=artifact_1_uid) assert artifact.get("kind") is None artifacts = db.list_artifacts(db_session, artifact_1_key, tag="latest") assert len(artifacts) == 1 artifacts = db.list_artifacts(db_session, artifact_1_key, tag=artifact_1_uid) assert len(artifacts) == 1
def test_store_artifact_restoring_multiple_tags(db: DBInterface, db_session: Session): artifact_key = "artifact_key_1" artifact_1_uid = "artifact_uid_1" artifact_2_uid = "artifact_uid_2" artifact_1_body = _generate_artifact(artifact_key, uid=artifact_1_uid) artifact_2_body = _generate_artifact(artifact_key, uid=artifact_2_uid) artifact_1_tag = "artifact_tag_1" artifact_2_tag = "artifact_tag_2" db.store_artifact( db_session, artifact_key, artifact_1_body, artifact_1_uid, tag=artifact_1_tag, ) db.store_artifact( db_session, artifact_key, artifact_2_body, artifact_2_uid, tag=artifact_2_tag, ) artifacts = db.list_artifacts(db_session, artifact_key, tag="*") assert len(artifacts) == 2 expected_uids = [artifact_1_uid, artifact_2_uid] uids = [artifact["metadata"]["uid"] for artifact in artifacts] assert deepdiff.DeepDiff(expected_uids, uids, ignore_order=True,) == {} expected_tags = [artifact_1_tag, artifact_2_tag] tags = [artifact["tag"] for artifact in artifacts] assert deepdiff.DeepDiff(expected_tags, tags, ignore_order=True,) == {} artifact = db.read_artifact(db_session, artifact_key, tag=artifact_1_tag) assert artifact["metadata"]["uid"] == artifact_1_uid assert artifact["tag"] == artifact_1_tag artifact = db.read_artifact(db_session, artifact_key, tag=artifact_2_tag) assert artifact["metadata"]["uid"] == artifact_2_uid assert artifact["tag"] == artifact_2_tag
def test_read_artifact_tag_resolution(db: DBInterface, db_session: Session): """ We had a bug in which when we got a tag filter for read/list artifact, we were transforming this tag to list of possible uids which is wrong, since a different artifact might have this uid as well, and we will return it, although it's not really tag with the given tag """ artifact_1_key = "artifact_key_1" artifact_2_key = "artifact_key_2" artifact_uid = "artifact_uid_1" artifact_1_body = _generate_artifact(artifact_1_key, uid=artifact_uid) artifact_2_body = _generate_artifact(artifact_2_key, uid=artifact_uid) artifact_1_tag = "artifact_tag_1" artifact_2_tag = "artifact_tag_2" db.store_artifact( db_session, artifact_1_key, artifact_1_body, artifact_uid, tag=artifact_1_tag, ) db.store_artifact( db_session, artifact_2_key, artifact_2_body, artifact_uid, tag=artifact_2_tag, ) with pytest.raises(mlrun.errors.MLRunNotFoundError): db.read_artifact(db_session, artifact_1_key, artifact_2_tag) with pytest.raises(mlrun.errors.MLRunNotFoundError): db.read_artifact(db_session, artifact_2_key, artifact_1_tag) # just verifying it's not raising db.read_artifact(db_session, artifact_1_key, artifact_1_tag) db.read_artifact(db_session, artifact_2_key, artifact_2_tag) # check list artifacts = db.list_artifacts(db_session, tag=artifact_1_tag) assert len(artifacts) == 1 artifacts = db.list_artifacts(db_session, tag=artifact_2_tag) assert len(artifacts) == 1
def test_data_migration_fix_datasets_large_previews( data_migration_db: DBInterface, db_session: Session, ): artifact_with_valid_preview_key = "artifact-with-valid-preview-key" artifact_with_valid_preview_uid = "artifact-with-valid-preview-uid" artifact_with_valid_preview = mlrun.artifacts.DatasetArtifact( artifact_with_valid_preview_key, df=pandas.DataFrame([{ "A": 10, "B": 100 }, { "A": 11, "B": 110 }, { "A": 12, "B": 120 }]), ) data_migration_db._store_artifact( db_session, artifact_with_valid_preview_key, artifact_with_valid_preview.to_dict(), artifact_with_valid_preview_uid, ensure_project=False, ) artifact_with_invalid_preview_key = "artifact-with-invalid-preview-key" artifact_with_invalid_preview_uid = "artifact-with-invalid-preview-uid" artifact_with_invalid_preview = mlrun.artifacts.DatasetArtifact( artifact_with_invalid_preview_key, df=pandas.DataFrame( numpy.random.randint( 0, 10, size=(10, mlrun.artifacts.dataset.max_preview_columns * 3))), ignore_preview_limits=True, ) data_migration_db._store_artifact( db_session, artifact_with_invalid_preview_key, artifact_with_invalid_preview.to_dict(), artifact_with_invalid_preview_uid, ensure_project=False, ) # perform the migration mlrun.api.initial_data._fix_datasets_large_previews( data_migration_db, db_session) artifact_with_valid_preview_after_migration = data_migration_db.read_artifact( db_session, artifact_with_valid_preview_key, artifact_with_valid_preview_uid) assert (deepdiff.DeepDiff( artifact_with_valid_preview_after_migration, artifact_with_valid_preview.to_dict(), ignore_order=True, exclude_paths=["root['updated']", "root['tag']"], ) == {}) artifact_with_invalid_preview_after_migration = data_migration_db.read_artifact( db_session, artifact_with_invalid_preview_key, artifact_with_invalid_preview_uid) assert (deepdiff.DeepDiff( artifact_with_invalid_preview_after_migration, artifact_with_invalid_preview.to_dict(), ignore_order=True, exclude_paths=[ "root['updated']", "root['header']", "root['stats']", "root['schema']", "root['preview']", "root['tag']", ], ) == {}) assert (len(artifact_with_invalid_preview_after_migration["header"]) == mlrun.artifacts.dataset.max_preview_columns) assert (len(artifact_with_invalid_preview_after_migration["stats"]) == mlrun.artifacts.dataset.max_preview_columns - 1) assert (len(artifact_with_invalid_preview_after_migration["preview"][0]) == mlrun.artifacts.dataset.max_preview_columns) assert (len(artifact_with_invalid_preview_after_migration["schema"] ["fields"]) == mlrun.artifacts.dataset.max_preview_columns + 1)
def test_data_migration_fix_artifact_tags_duplications( data_migration_db: DBInterface, db_session: Session, ): def _buggy_tag_artifacts(session, objs, project: str, name: str): # This is the function code that was used before we did the fix and added the data migration for obj in objs: tag = obj.Tag(project=project, name=name, obj_id=obj.id) _upsert(session, tag, ignore=True) def _upsert(session, obj, ignore=False): try: session.add(obj) session.commit() except SQLAlchemyError as err: session.rollback() cls = obj.__class__.__name__ logger.warning(f"conflict adding {cls}, {err}") if not ignore: raise DBError(f"duplicate {cls} - {err}") from err data_migration_db.tag_artifacts = _buggy_tag_artifacts artifact_1_key = "artifact_key_1" artifact_1_uid = "artifact_1_uid_1" artifact_1_body = _generate_artifact(artifact_1_key, artifact_1_uid) artifact_1_kind = ChartArtifact.kind artifact_1_with_kind_uid = "artifact_1_uid_2" artifact_1_with_kind_body = _generate_artifact(artifact_1_key, artifact_1_with_kind_uid, kind=artifact_1_kind) artifact_2_key = "artifact_key_2" artifact_2_uid = "artifact_2_uid_1" artifact_2_body = _generate_artifact(artifact_2_key, artifact_2_uid) artifact_2_kind = PlotArtifact.kind artifact_2_with_kind_uid = "artifact_2_uid_2" artifact_2_with_kind_body = _generate_artifact(artifact_2_key, artifact_2_with_kind_uid, kind=artifact_2_kind) artifact_3_key = "artifact_key_3" artifact_3_kind = DatasetArtifact.kind artifact_3_with_kind_uid = "artifact_3_uid_1" artifact_3_with_kind_body = _generate_artifact(artifact_3_key, artifact_3_with_kind_uid, kind=artifact_3_kind) data_migration_db.store_artifact( db_session, artifact_1_key, artifact_1_body, artifact_1_uid, ) data_migration_db.store_artifact( db_session, artifact_1_key, artifact_1_with_kind_body, artifact_1_with_kind_uid, ) data_migration_db.store_artifact(db_session, artifact_2_key, artifact_2_body, artifact_2_uid, tag="not-latest") data_migration_db.store_artifact( db_session, artifact_2_key, artifact_2_with_kind_body, artifact_2_with_kind_uid, tag="not-latest", ) data_migration_db.store_artifact(db_session, artifact_3_key, artifact_3_with_kind_body, artifact_3_with_kind_uid) # Before the migration: # 1. read artifact would have failed when there's more than one tag record with the same key (happen when you # store twice) with pytest.raises(MultipleResultsFound): data_migration_db.read_artifact(db_session, artifact_1_key, tag="latest") with pytest.raises(MultipleResultsFound): data_migration_db.read_artifact(db_session, artifact_2_key, tag="not-latest") # 2. read artifact would have succeed when there's only one tag record with the same key (happen when you # stored only once) artifact = data_migration_db.read_artifact(db_session, artifact_3_key, tag="latest") assert artifact["metadata"]["uid"] == artifact_3_with_kind_uid # 3. list artifact without tag would have returned the latest (by update time) of each artifact key artifacts = data_migration_db.list_artifacts(db_session) assert len(artifacts) == len( [artifact_1_key, artifact_2_key, artifact_3_key]) assert (deepdiff.DeepDiff( [artifact["metadata"]["uid"] for artifact in artifacts], [ artifact_1_with_kind_uid, artifact_2_with_kind_uid, artifact_3_with_kind_uid, ], ignore_order=True, ) == {}) # 4. list artifact with tag would have returned all of the artifact that at some point were tagged with the given # tag artifacts = data_migration_db.list_artifacts(db_session, tag="latest") assert len(artifacts) == len( [artifact_1_uid, artifact_1_with_kind_uid, artifact_3_with_kind_uid]) # perform the migration mlrun.api.initial_data._fix_artifact_tags_duplications( data_migration_db, db_session) # After the migration: # 1. read artifact should succeed (fixed) and return the latest updated record that was tagged with the requested # tag artifact = data_migration_db.read_artifact(db_session, artifact_1_key, tag="latest") assert artifact["metadata"]["uid"] == artifact_1_with_kind_uid artifact = data_migration_db.read_artifact(db_session, artifact_2_key, tag="not-latest") assert artifact["metadata"]["uid"] == artifact_2_with_kind_uid # 2. read artifact should (still) succeed when there's only one tag record with the same key (happen when you # stored only once) artifact = data_migration_db.read_artifact(db_session, artifact_3_key, tag="latest") assert artifact["metadata"]["uid"] == artifact_3_with_kind_uid # 3. list artifact without tag should (still) return the latest (by update time) of each artifact key artifacts = data_migration_db.list_artifacts(db_session) assert len(artifacts) == len( [artifact_1_key, artifact_2_key, artifact_3_key]) assert (deepdiff.DeepDiff( [artifact["metadata"]["uid"] for artifact in artifacts], [ artifact_1_with_kind_uid, artifact_2_with_kind_uid, artifact_3_with_kind_uid, ], ignore_order=True, ) == {}) # 4. list artifact with tag should (fixed) return all of the artifact that are tagged with the given tag artifacts = data_migration_db.list_artifacts(db_session, tag="latest") assert (deepdiff.DeepDiff( [artifact["metadata"]["uid"] for artifact in artifacts], [artifact_1_with_kind_uid, artifact_3_with_kind_uid], ignore_order=True, ) == {})