def test_mismatched_tags_raises_assertion_error(): now = datetime.datetime.utcnow() x1 = DatasetProfile("test", now, tags={"key": "foo"}) x2 = DatasetProfile("test", now, tags={"key": "bar"}) try: x1.merge(x2) raise RuntimeError("Assertion error not raised") except AssertionError: pass
def test_merge_same_columns(): now = datetime.datetime.utcnow() shared_session_id = uuid4().hex x1 = DatasetProfile( name="test", session_id=shared_session_id, session_timestamp=now, tags={"key": "value"}, metadata={"key": "value"}, ) x1.track("col1", "value1") x2 = DatasetProfile( name="test", session_id=shared_session_id, session_timestamp=now, tags={"key": "value"}, metadata={"key": "value"}, ) x2.track("col1", "value1") x2.track("col2", "value") merged = x1.merge(x2) assert merged.name == "test" assert merged.session_id == shared_session_id assert merged.session_timestamp == now assert set(list(merged.columns.keys())) == {"col1", "col2"} assert merged.columns["col1"].counters.count == 2 assert merged.columns["col2"].counters.count == 1
def test_mismatched_tags_merge_succeeds(): now = datetime.datetime.utcnow() x1 = DatasetProfile("test", now, tags={"key": "foo"}) x2 = DatasetProfile("test2", now, tags={"key": "bar"}) result = x1.merge(x2) assert result.tags.get("key") == "foo"
def test_merge_lhs_no_profile(): now = datetime.datetime.utcnow() shared_session_id = uuid4().hex x1 = DatasetProfile( name="test", session_id=shared_session_id, session_timestamp=now, tags={"key": "value"}, metadata={"key": "value"}, ) x2 = DatasetProfile( name="test", session_id=shared_session_id, session_timestamp=now, tags={"key": "value"}, metadata={"key": "value"}, model_profile=ModelProfile(), ) merged = x1.merge(x2) assert merged.name == "test" assert merged.session_id == shared_session_id assert merged.session_timestamp == now assert merged.columns == {} assert merged.model_profile is not None
def test_merge_different_columns(): now = datetime.datetime.now(datetime.timezone.utc) shared_session_id = uuid4().hex x1 = DatasetProfile( name="test", session_id=shared_session_id, session_timestamp=now, tags={"key": "value"}, metadata={"key": "x1"}, ) x1.track("col1", "value") x2 = DatasetProfile( name="test", session_id=shared_session_id, session_timestamp=now, tags={"key": "value"}, metadata={"key": "x2"}, ) x2.track("col2", "value") merged = x1.merge(x2) assert merged.name == "test" assert merged.session_id == shared_session_id assert merged.session_timestamp == now assert set(list(merged.columns.keys())) == {"col1", "col2"} assert merged.columns["col1"].counters.count == 1 assert merged.columns["col2"].counters.count == 1 assert merged.tags == dict({"name": "test", "key": "value"}) assert merged.metadata == dict({"key": "x1"})
def test_empty_valid_datasetprofiles_empty(): now = datetime.datetime.utcnow() shared_session_id = uuid4().hex x1 = DatasetProfile(name="test", session_id=shared_session_id, session_timestamp=now, tags={ "key": "value"}, metadata={"key": "value"},) x2 = DatasetProfile(name="test", session_id=shared_session_id, session_timestamp=now, tags={ "key": "value"}, metadata={"key": "value"},) merged = x1.merge(x2) assert merged.name == "test" assert merged.session_id == shared_session_id assert merged.session_timestamp == now assert merged.columns == {}