def to_properties(self):
        """
        Return dataset profile related metadata

        Returns
        -------
        properties : DatasetProperties
            The metadata as a protobuf object.
        """
        tags = self.tags
        metadata = self.metadata
        if len(metadata) < 1:
            metadata = None

        session_timestamp = to_utc_ms(self.session_timestamp)
        data_timestamp = to_utc_ms(self.dataset_timestamp)

        return DatasetProperties(
            schema_major_version=1,
            schema_minor_version=1,
            session_id=self.session_id,
            session_timestamp=session_timestamp,
            data_timestamp=data_timestamp,
            tags=tags,
            metadata=metadata,
        )
def test_protobuf_round_trip():
    now = datetime.datetime.utcnow()
    tags = {"k1": "rock", "k2": "scissors", "k3": "paper"}
    original = DatasetProfile(
        name="test",
        dataset_timestamp=now,
        tags=tags,
    )
    original.track("col1", "value")
    original.track("col2", "value")

    msg = original.to_protobuf()
    roundtrip = DatasetProfile.from_protobuf(msg)

    assert roundtrip.to_protobuf() == msg
    assert roundtrip.name == "test"
    assert roundtrip.session_id == original.session_id
    assert to_utc_ms(roundtrip.session_timestamp) == to_utc_ms(
        original.session_timestamp)
    assert set(list(roundtrip.columns.keys())) == {"col1", "col2"}
    assert roundtrip.columns["col1"].counters.count == 1
    assert roundtrip.columns["col2"].counters.count == 1

    tags["Name"] = "test"
    assert set(roundtrip.tags) == set(tags)
    assert roundtrip.metadata == original.metadata
def test_write_delimited_multiple():
    now = datetime.datetime.utcnow()

    original = DatasetProfile(
        name="test",
        session_id="test.session.id",
        session_timestamp=now,
        tags={"key": "value"},
        metadata={"key": "value"},
    )
    original.track("col1", "value")

    output_bytes = original.serialize_delimited()

    multiple_entries = output_bytes
    for i in range(1, 5):
        multiple_entries += output_bytes

    entries = DatasetProfile.parse_delimited(multiple_entries)
    assert len(entries) == 5

    for entry in entries:
        assert entry.session_id == original.session_id
        # Python time precisions are different
        assert time.to_utc_ms(entry.session_timestamp) == time.to_utc_ms(
            original.session_timestamp)
        assert entry.tags == original.tags
        assert entry.metadata == original.metadata
def test_write_delimited_single():
    now = datetime.datetime.utcnow()

    original = DatasetProfile(name="test", session_id="test.session.id", session_timestamp=now, tags={
                              "key": "value"}, metadata={"key": "value"},)
    original.track("col1", "value")

    output_bytes = original.serialize_delimited()
    pos, roundtrip = DatasetProfile.parse_delimited_single(output_bytes)

    assert roundtrip.session_id == original.session_id
    # Python time precision includes nanoseconds
    assert time.to_utc_ms(roundtrip.session_timestamp) == time.to_utc_ms(
        original.session_timestamp)
    assert roundtrip.tags == original.tags
    assert roundtrip.metadata == original.metadata
 def session_timestamp_ms(self):
     """
     Return the session timestamp value in epoch milliseconds.
     """
     return time.to_utc_ms(self.session_timestamp)