Esempio n. 1
0
def test_basic():
    assert deduplicate({}) == ({}, {})

    _assert_roundtrip({})
    _assert_roundtrip({"debug_meta": {}})
    _assert_roundtrip({"debug_meta": None})
    _assert_roundtrip({"debug_meta": {"images": []}})
    _assert_roundtrip({"debug_meta": {"images": None}})
    _assert_roundtrip({"debug_meta": {"images": [{}]}})

    checksum = "1a3e017bec533f3f4e59e44a3f53784e" if not PY2 else "557c616fbeb6324611944ebce945d06e"
    _assert_roundtrip(
        {
            "debug_meta": {
                "images": [
                    {
                        "image_addr": "0xdeadbeef",
                        "debug_file": "C:/Ding/bla.pdb",
                        "code_file": "C:/Ding/bla.exe",
                        "debug_id": "1234abcdef",
                        "code_id": "1234abcdefgggg",
                    }
                ]
            }
        },
        assert_extra_keys={
            checksum: {
                "code_file": ["C:/Ding/bla.exe"],
                "code_id": ["1234abcdefgggg"],
                "debug_file": ["C:/Ding/bla.pdb"],
                "debug_id": ["1234abcdef"],
            }
        },
    )
Esempio n. 2
0
def capture_nodestore_stats(project_id, event_id):
    set_current_project(project_id)

    from sentry import nodestore
    from sentry.eventstore.compressor import deduplicate
    from sentry.eventstore.models import Event

    event = Event(project_id=project_id, event_id=event_id)
    old_event_size = _json_size(dict(event.data))

    if not event.data:
        metrics.incr("eventstore.compressor.error", tags={"reason": "no_data"})
        return

    platform = event.platform

    for key, value in six.iteritems(event.interfaces):
        len_value = _json_size(value.to_json())
        metrics.timing(
            "events.size.interface", len_value, tags={"interface": key, "platform": platform}
        )

    new_data, extra_keys = deduplicate(dict(event.data))

    total_size = event_size = _json_size(new_data)

    for key, value in six.iteritems(extra_keys):
        if nodestore.get(key) is not None:
            metrics.incr("eventstore.compressor.hits")
            # do not continue, nodestore.set() should bump TTL
        else:
            metrics.incr("eventstore.compressor.misses")
            total_size += _json_size(value)

        # key is md5sum of content
        # do not store actual value to keep prod impact to a minimum
        nodestore.set(key, {})

    metrics.timing("events.size.deduplicated", event_size)
    metrics.timing("events.size.deduplicated.total_written", total_size)

    metrics.timing("events.size.deduplicated.ratio", event_size / old_event_size)
    metrics.timing("events.size.deduplicated.total_written.ratio", total_size / old_event_size)

    if total_size > old_event_size:
        nodestore_stats_logger.info(
            "events.size.deduplicated.details",
            extra={
                "project_id": project_id,
                "event_id": event_id,
                "total_size": total_size,
                "old_event_size": old_event_size,
            },
        )
Esempio n. 3
0
def _assert_roundtrip(data, assert_extra_keys=None):
    new_data, extra_keys = deduplicate(copy.deepcopy(data))

    if assert_extra_keys is not None:
        assert extra_keys == assert_extra_keys

    def get_extra_keys(checksums):
        assert set(checksums) == set(extra_keys)
        return extra_keys

    new_new_data = assemble(copy.deepcopy(new_data), get_extra_keys)

    assert new_new_data == data
Esempio n. 4
0
def capture_nodestore_stats(cache_key, project_id, event_id):
    set_current_project(project_id)

    from sentry.eventstore.compressor import deduplicate
    from sentry.eventstore.models import Event

    node_id = Event.generate_node_id(project_id, event_id)
    data = nodestore.get(node_id)

    if not data:
        metrics.incr("eventstore.compressor.error", tags={"reason": "no_data"})
        return

    old_event_size = _json_size(data)

    unprocessed_data = event_processing_store.get(
        _get_unprocessed_key(cache_key))
    event_processing_store.delete_by_key(_get_unprocessed_key(cache_key))

    tags = {
        "with_reprocessing": bool(unprocessed_data),
        "platform": data.get("platform") or "none",
        "is_minidump": is_minidump_event(data),
    }

    if unprocessed_data:
        metrics.incr("nodestore_stats.with_reprocessing")

        concatenated_size = _json_size(data, unprocessed_data)
        metrics.timing("events.size.concatenated",
                       concatenated_size,
                       tags=tags)
        metrics.timing("events.size.concatenated.ratio",
                       concatenated_size / old_event_size,
                       tags=tags)

        _data = dict(data)
        _data["__nodestore_reprocessing"] = unprocessed_data
        simple_concatenated_size = _json_size(_data)
        metrics.timing("events.size.simple_concatenated",
                       simple_concatenated_size,
                       tags=tags)
        metrics.timing(
            "events.size.simple_concatenated.ratio",
            simple_concatenated_size / old_event_size,
            tags=tags,
        )
    else:
        metrics.incr("nodestore_stats.without_reprocessing")

    new_data, extra_keys = deduplicate(dict(data))
    total_size = event_size = _json_size(new_data)

    for key, value in six.iteritems(extra_keys):
        if nodestore.get(key) is not None:
            metrics.incr("eventstore.compressor.hits", tags=tags)
            # do not continue, nodestore.set() should bump TTL
        else:
            metrics.incr("eventstore.compressor.misses", tags=tags)
            total_size += _json_size(value)

        # key is md5sum of content
        # do not store actual value to keep prod impact to a minimum
        nodestore.set(key, {})

    metrics.timing("events.size.deduplicated", event_size, tags=tags)
    metrics.timing("events.size.deduplicated.total_written",
                   total_size,
                   tags=tags)

    metrics.timing("events.size.deduplicated.ratio",
                   event_size / old_event_size,
                   tags=tags)
    metrics.timing("events.size.deduplicated.total_written.ratio",
                   total_size / old_event_size,
                   tags=tags)

    if total_size > old_event_size:
        nodestore_stats_logger.info(
            "events.size.deduplicated.details",
            extra={
                "project_id": project_id,
                "event_id": event_id,
                "total_size": total_size,
                "old_event_size": old_event_size,
            },
        )