def test_parse_delimited_from_java_single(): dir_path = os.path.dirname(os.path.realpath(__file__)) with open(os.path.join(dir_path, "output_from_java_08242020.bin"), "rb") as f: data = f.read() assert DatasetProfile.parse_delimited_single(data) is not None with open(os.path.join(dir_path, "output_from_java_01212021.bin"), "rb") as f: data = f.read() assert DatasetProfile.parse_delimited_single(data) is not None
def test_serde_without_dataset_timezone(): session = Session("project", "pipeline", writers=[]) dt = datetime.datetime.fromtimestamp(1634939335, tz=None) logger = session.logger("", dataset_timestamp=dt) logger.log_csv(io.StringIO("""a,b,c 1,1,1 1,1,2 4,4,3 """)) profile = logger.profile deserialized_profile = DatasetProfile.parse_delimited_single( profile.serialize_delimited())[1] profiles_eq(profile, deserialized_profile)
def test_write_delimited_single(): now = datetime.datetime.utcnow() original = DatasetProfile(name="test", session_id="test.session.id", session_timestamp=now, tags={ "key": "value"}, metadata={"key": "value"},) original.track("col1", "value") output_bytes = original.serialize_delimited() pos, roundtrip = DatasetProfile.parse_delimited_single(output_bytes) assert roundtrip.session_id == original.session_id # Python time precision includes nanoseconds assert time.to_utc_ms(roundtrip.session_timestamp) == time.to_utc_ms( original.session_timestamp) assert roundtrip.tags == original.tags assert roundtrip.metadata == original.metadata