def test_bidirectional_mapping():
    value = BidirectionalMapping({
        'a': 1,
        'b': 2,
    })

    assert value['a'] == 1
    assert value['b'] == 2
    assert value.get_key(1) == 'a'
    assert value.get_key(2) == 'b'
    assert value.inverse() == {
        1: 'a',
        2: 'b',
    }

    value['c'] = 3
    assert value['c'] == 3
    assert value.get_key(3) == 'c'

    with pytest.raises(KeyError):
        value['d']

    with pytest.raises(KeyError):
        value.get_key(4)

    with pytest.raises(TypeError):
        value['d'] = [1, 2, 3]  # not hashable

    assert len(value) == len(value.inverse()) == 3

    del value['c']

    assert len(value) == len(value.inverse()) == 2
Example #2
0
def test_bidirectional_mapping():
    value = BidirectionalMapping({"a": 1, "b": 2})

    assert value["a"] == 1
    assert value["b"] == 2
    assert value.get_key(1) == "a"
    assert value.get_key(2) == "b"
    assert value.inverse() == {1: "a", 2: "b"}

    value["c"] = 3
    assert value["c"] == 3
    assert value.get_key(3) == "c"

    with pytest.raises(KeyError):
        value["d"]

    with pytest.raises(KeyError):
        value.get_key(4)

    with pytest.raises(TypeError):
        value["d"] = [1, 2, 3]  # not hashable

    assert len(value) == len(value.inverse()) == 3

    del value["c"]

    assert len(value) == len(value.inverse()) == 2
Example #3
0
 MinHashIndex(
     redis.clusters.get(
         getattr(
             settings,
             'SENTRY_SIMILARITY_INDEX_REDIS_CLUSTER',
             'default',
         ), ),
     0xFFFF,
     8,
     2,
     60 * 60 * 24 * 30,
     3,
 ),
 BidirectionalMapping({
     'exception:message:character-shingles': 'a',
     'exception:stacktrace:application-chunks': 'b',
     'exception:stacktrace:pairs': 'c',
     'message:message:character-shingles': 'd',
 }), {
     'exception:message:character-shingles':
     ExceptionFeature(lambda exception: map(
         serialize_text_shingle,
         shingle(
             13,
             exception.get('value') or '',
         ),
     )),
     'exception:stacktrace:application-chunks':
     ExceptionFeature(
         lambda exception: map(
             lambda frames: FRAME_SEPARATOR.join(
                 map(
Example #4
0
                                       MinHashSignatureBuilder(16, 0xFFFF), 8,
                                       60 * 60 * 24 * 30, 3, 5000),
        scope_tag_name=None,
    )


features = FeatureSet(
    _make_index_backend(
        getattr(settings, "SENTRY_SIMILARITY_INDEX_REDIS_CLUSTER", None)
        or "similarity",
        namespace="sim:1",
    ),
    Encoder({Frame: get_frame_attributes}),
    BidirectionalMapping({
        "exception:message:character-shingles": "a",
        "exception:stacktrace:application-chunks": "b",
        "exception:stacktrace:pairs": "c",
        "message:message:character-shingles": "d",
    }),
    {
        "exception:message:character-shingles":
        ExceptionFeature(lambda exception: text_shingle(5, exception.value)),
        "exception:stacktrace:application-chunks":
        ExceptionFeature(lambda exception: get_application_chunks(exception)),
        "exception:stacktrace:pairs":
        ExceptionFeature(
            lambda exception: shingle(2, exception.stacktrace.frames)),
        "message:message:character-shingles":
        MessageFeature(lambda message: text_shingle(5, message.formatted)),
    },
    expected_extraction_errors=(InterfaceDoesNotExist, ),
    expected_encoding_errors=(FrameEncodingError, ),
Example #5
0
from django.db import models
from django.utils import timezone

from sentry.db.models import BoundedPositiveIntegerField, Model, sane_repr
from sentry.utils.datastructures import BidirectionalMapping
from sentry.utils.hashlib import md5_text
from sentry.constants import MAX_EMAIL_FIELD_LENGTH


# The order of these keys are significant to also indicate priority
# when used in hashing and determining uniqueness. If you change the order
# you will break stuff.
KEYWORD_MAP = BidirectionalMapping(
    OrderedDict(
        (("ident", "id"), ("username", "username"), ("email", "email"), ("ip_address", "ip"))
    )
)


class EventUser(Model):
    __core__ = False

    project_id = BoundedPositiveIntegerField(db_index=True)
    hash = models.CharField(max_length=32)
    ident = models.CharField(max_length=128, null=True)
    email = models.EmailField(null=True, max_length=MAX_EMAIL_FIELD_LENGTH)
    username = models.CharField(max_length=128, null=True)
    name = models.CharField(max_length=128, null=True)
    ip_address = models.GenericIPAddressField(null=True)
    date_added = models.DateTimeField(default=timezone.now, db_index=True)
Example #6
0
features = FeatureSet(
    MinHashIndex(
        redis.clusters.get(
            getattr(
                settings,
                'SENTRY_SIMILARITY_INDEX_REDIS_CLUSTER',
                'default',
            ),
        ),
        0xFFFF,
        8,
        2,
    ),
    BidirectionalMapping({
        'exception:message:character-shingles': '\x00',
        'exception:stacktrace:application-chunks': '\x01',
        'exception:stacktrace:pairs': '\x02',
        'message:message:character-shingles': '\x03',
    }),
    {
        'exception:message:character-shingles': ExceptionFeature(
            lambda exception: map(
                serialize_text_shingle,
                shingle(
                    13,
                    exception.get('value') or '',
                ),
            )
        ),
        'exception:stacktrace:application-chunks': ExceptionFeature(
            lambda exception: map(
                lambda frames: FRAME_SEPARATOR.join(
Example #7
0
from django.db import models
from django.utils import timezone

from sentry.db.models import BoundedPositiveIntegerField, Model, sane_repr
from sentry.utils.datastructures import BidirectionalMapping
from sentry.utils.hashlib import md5_text
from sentry.constants import MAX_EMAIL_FIELD_LENGTH

# The order of these keys are significant to also indicate priority
# when used in hashing and determining uniqueness. If you change the order
# you will break stuff.
KEYWORD_MAP = BidirectionalMapping(
    OrderedDict((
        ('ident', 'id'),
        ('username', 'username'),
        ('email', 'email'),
        ('ip_address', 'ip'),
    )))


class EventUser(Model):
    __core__ = False

    project_id = BoundedPositiveIntegerField(db_index=True)
    hash = models.CharField(max_length=32)
    ident = models.CharField(max_length=128, null=True)
    email = models.EmailField(null=True, max_length=MAX_EMAIL_FIELD_LENGTH)
    username = models.CharField(max_length=128, null=True)
    name = models.CharField(max_length=128, null=True)
    ip_address = models.GenericIPAddressField(null=True)
Example #8
0
        eventstream.end_unmerge(eventstream_state)

    def run_postgres_replacement(
            self, project: Project, destination_id: int,
            locked_primary_hashes: Collection[str]) -> None:
        # Move the group hashes to the destination.
        GroupHash.objects.filter(
            project_id=project.id,
            hash__in=locked_primary_hashes).update(group=destination_id)

    def get_activity_args(self) -> Mapping[str, Any]:
        return {"fingerprints": self.fingerprints}


_REPLACEMENT_TYPE_LABELS: BidirectionalMapping = BidirectionalMapping({
    PrimaryHashUnmergeReplacement:
    "primary_hash",
})


@dataclass(frozen=True)
class UnmergeArgsBase(abc.ABC):
    """
    Parsed arguments of the Sentry unmerge task. Since events of the source
    issue are processed in batches, one can think of each batch as belonging to
    a state in a statemachine.

    That statemachine has only two states: Processing the first page
    (`InitialUnmergeArgs`), processing second, third, ... page
    (`SuccessiveUnmergeArgs`). On the first page postgres hashes are migrated,
    activity models are created, eventstream and pagination state is
    initialized, and so the successive tasks need to carry significantly more