Esempio n. 1
0
 def try_lock(item):
     """
     Attempt to immedately acquire a lock on the timeline at
     key, returning the lock if it can be acquired, otherwise
     returning ``None``.
     """
     key, timestamp = item
     lock = Lock(make_timeline_key(self.namespace, key), timeout=5, nowait=True)
     return lock if lock.acquire() else None, item
Esempio n. 2
0
    def get_or_create(self, _cache=False, **kwargs):
        """
        A modified version of Django's get_or_create which will create a distributed
        lock (using the cache backend) whenever it hits the create clause.
        """
        defaults = kwargs.pop('defaults', {})

        # before locking attempt to fetch the instance
        try:
            if _cache:
                return self.get_from_cache(**kwargs), False
            return self.get(**kwargs), False
        except self.model.DoesNotExist:
            pass
        lock_key = self.__make_key('lock', kwargs)

        # instance not found, lets grab a lock and attempt to create it
        with Lock(lock_key):
            # its important we get() before create() to ensure that if
            # someone beat us to creating it from the time we did our very
            # first .get(), that we get the result back as we cannot
            # rely on unique constraints existing
            instance, created = super(BaseManager,
                                      self).get_or_create(defaults=defaults,
                                                          **kwargs)

        return instance, created
Esempio n. 3
0
    def _finish_login_pipeline(self, identity):
        """
        The login flow executes both with anonymous and authenticated users.

        Upon completion a few branches exist:

        If the identity is already linked, the user should be logged in
        and redirected immediately.

        Otherwise, the user is presented with a confirmation window. That window
        will show them the new account that will be created, and if they're
        already authenticated an optional button to associate the identity with
        their account.
        """
        auth_provider = self.auth_provider
        lock_key = 'sso:auth:{}:{}'.format(
            auth_provider.id,
            md5(unicode(identity['id'])).hexdigest(),
        )
        with Lock(lock_key, timeout=5):
            try:
                auth_identity = AuthIdentity.objects.get(
                    auth_provider=auth_provider,
                    ident=identity['id'],
                )
            except AuthIdentity.DoesNotExist:
                return self._handle_unknown_identity(identity)
            return self._handle_existing_identity(auth_identity, identity)
Esempio n. 4
0
    def ensure_blob(self):
        if self.blob:
            return

        lock_key = 'fileblob:convert:{}'.format(self.checksum)
        with Lock(lock_key, timeout=60):
            blob, created = FileBlob.objects.get_or_create(
                checksum=self.checksum,
                defaults={
                    'storage': self.storage,
                    'storage_options': self.storage_options,
                    'path': self.path,
                    'size': self.size,
                    'timestamp': self.timestamp,
                },
            )

            # if this blob already existed, lets kill the duplicate
            # TODO(dcramer): kill data when fully migrated
            # if self.path != blob.path:
            #     get_storage_class(self.storage)(
            #         **self.storage_options
            #     ).delete(self.path)

            self.update(
                blob=blob,
                # TODO(dcramer): kill data when fully migrated
                # checksum=None,
                # path=None,
                # storage=None,
                # storage_options={},
            )
Esempio n. 5
0
    def create_or_update(self, **kwargs):
        """
        Similar to get_or_create, either updates a row or creates it.

        The result will be (rows affected, False), if the row was not created,
        or (instance, True) if the object is new.
        """
        defaults = kwargs.pop('defaults', {})

        # before locking attempt to fetch the instance
        affected = self.filter(**kwargs).update(**defaults)
        if affected:
            return affected, False
        lock_key = self.__make_key('lock', kwargs)

        # instance not found, lets grab a lock and attempt to create it
        with Lock(lock_key) as lock:
            if lock.was_locked:
                affected = self.filter(**kwargs).update(**defaults)
                return affected, False

            for k, v in defaults.iteritems():
                if isinstance(v, ExpressionNode):
                    kwargs[k] = resolve_expression_node(self.model(), v)
            return self.create(**kwargs), True
Esempio n. 6
0
 def save(self, *args, **kwargs):
     if not self.slug:
         lock_key = 'slug:project'
         with Lock(lock_key):
             slugify_instance(self, self.name, organization=self.organization)
         super(Project, self).save(*args, **kwargs)
     else:
         super(Project, self).save(*args, **kwargs)
Esempio n. 7
0
 def post_process(self, group, event, is_new, is_sample, **kwargs):
     lock_key = 'lock_mail:%s' % group.id
     try:
         with Lock(lock_key, timeout=0.5):
             group = Group.objects.get(pk=group.pk)
             self._post_process(group, event, is_new, is_sample, **kwargs)
     except UnableToGetLock:
         pass
Esempio n. 8
0
    def test_basic(self):
        timeout = 10
        lock = Lock('basic', timeout=timeout)

        assert lock.held is False
        assert lock.seconds_remaining is 0

        assert lock.acquire() is True
        assert timeout > lock.seconds_remaining > (timeout - 0.1)
        assert lock.held is True

        assert lock.acquire() is True  # ensure reentrancy

        assert lock.release() is True
        assert lock.seconds_remaining is 0
        assert lock.held is False
        assert lock.release() is False
Esempio n. 9
0
    def schedule(self, deadline, chunk=1000):
        # TODO: This doesn't lead to a fair balancing of workers, ideally each
        # scheduling task would be executed by a different process for each
        # host. There is also no failure isolation here, so a single shard
        # failure will cause the remainder of the shards to not be able to be
        # scheduled.
        for host in self.cluster.hosts:
            connection = self.cluster.get_local_client(host)

            with Lock('{0}:s:{1}'.format(self.namespace, host), nowait=True, timeout=30):
                # Prevent a runaway loop by setting a maximum number of
                # iterations. Note that this limits the total number of
                # expected items in any specific scheduling interval to chunk *
                # maximum_iterations.
                maximum_iterations = 1000
                for i in xrange(maximum_iterations):
                    items = connection.zrangebyscore(
                        make_schedule_key(self.namespace, SCHEDULE_STATE_WAITING),
                        min=0,
                        max=deadline,
                        withscores=True,
                        start=0,
                        num=chunk,
                    )

                    # XXX: Redis will error if we try and execute an empty
                    # transaction. If there are no items to move between states, we
                    # need to exit the loop now. (This can happen on the first
                    # iteration of the loop if there is nothing to do, or on a
                    # subsequent iteration if there was exactly the same number of
                    # items to change states as the chunk size.)
                    if not items:
                        break

                    with connection.pipeline() as pipeline:
                        pipeline.multi()

                        pipeline.zrem(
                            make_schedule_key(self.namespace, SCHEDULE_STATE_WAITING),
                            *[key for key, timestamp in items]
                        )

                        pipeline.zadd(
                            make_schedule_key(self.namespace, SCHEDULE_STATE_READY),
                            *itertools.chain.from_iterable([(timestamp, key) for (key, timestamp) in items])
                        )

                        for key, timestamp in items:
                            yield ScheduleEntry(key, timestamp)

                        pipeline.execute()

                    # If we retrieved less than the chunk size of items, we don't
                    # need try to retrieve more items.
                    if len(items) < chunk:
                        break
                else:
                    raise RuntimeError('loop exceeded maximum iterations (%s)' % (maximum_iterations,))
Esempio n. 10
0
    def test_context(self):
        timeout = 10
        lock = Lock('ctx', timeout=timeout)

        with lock as result:
            assert lock is result
            assert lock.held is True

        assert lock.held is False
Esempio n. 11
0
 def save(self, *args, **kwargs):
     if not self.slug:
         lock_key = 'slug:organization'
         with Lock(lock_key):
             slugify_instance(self,
                              self.name,
                              reserved=RESERVED_ORGANIZATION_SLUGS)
         super(Organization, self).save(*args, **kwargs)
     else:
         super(Organization, self).save(*args, **kwargs)
Esempio n. 12
0
def process_pending():
    """
    Process pending buffers.
    """
    from sentry import app
    lock_key = 'buffer:process_pending'
    try:
        with Lock(lock_key, nowait=True, timeout=60):
            app.buffer.process_pending()
    except UnableToGetLock:
        pass
Esempio n. 13
0
    def delete(self, key):
        timeline_key = make_timeline_key(self.namespace, key)

        connection = self.cluster.get_local_client_for_key(timeline_key)
        with Lock(timeline_key, nowait=True, timeout=30), \
                connection.pipeline() as pipeline:
            truncate_timeline(pipeline, (timeline_key,), (0, timeline_key))
            truncate_timeline(pipeline, (make_digest_key(timeline_key),), (0, timeline_key))
            pipeline.delete(make_last_processed_timestamp_key(timeline_key))
            pipeline.zrem(make_schedule_key(self.namespace, SCHEDULE_STATE_READY), key)
            pipeline.zrem(make_schedule_key(self.namespace, SCHEDULE_STATE_WAITING), key)
            pipeline.execute()
Esempio n. 14
0
    def test_basic(self):
        timeout = 10
        lock = Lock('basic', timeout=timeout)

        assert lock.held is False
        assert lock.seconds_remaining is 0

        assert lock.acquire() is True
        assert timeout > lock.seconds_remaining > (timeout - 0.1)
        assert lock.held is True

        with pytest.raises(LockAlreadyHeld):
            lock.acquire()

        assert lock.release() is True
        assert lock.seconds_remaining is 0
        assert lock.held is False
        assert lock.release() is False
Esempio n. 15
0
    def from_file(cls, fileobj):
        """
        Retrieve a FileBlob instance for the given file.

        If not already present, this will cause it to be stored.

        >>> blob = FileBlob.from_file(fileobj)
        """
        size = 0
        checksum = sha1('')
        for chunk in fileobj:
            size += len(chunk)
            checksum.update(chunk)
        checksum = checksum.hexdigest()

        lock_key = 'fileblob:upload:{}'.format(checksum)
        # TODO(dcramer): the database here is safe, but if this lock expires
        # and duplicate files are uploaded then we need to prune one
        with Lock(lock_key, timeout=600):
            # test for presence
            try:
                existing = FileBlob.objects.get(checksum=checksum)
            except FileBlob.DoesNotExist:
                pass
            else:
                return existing

            blob = cls(
                size=size,
                checksum=checksum,
                storage=settings.SENTRY_FILESTORE,
                storage_options=settings.SENTRY_FILESTORE_OPTIONS,
            )

            blob.path = cls.generate_unique_path(blob.timestamp)

            storage = blob.get_storage()
            storage.save(blob.path, fileobj)
            blob.save()

        metrics.timing('filestore.blob-size', blob.size)
        return blob
Esempio n. 16
0
    def digest(self, key, minimum_delay=None):
        if minimum_delay is None:
            minimum_delay = self.minimum_delay

        timeline_key = make_timeline_key(self.namespace, key)
        digest_key = make_digest_key(timeline_key)

        connection = self.cluster.get_local_client_for_key(timeline_key)

        with Lock(timeline_key, nowait=True, timeout=30):
            # Check to ensure the timeline is in the correct state ("ready")
            # before sending. This acts as a throttling mechanism to prevent
            # sending a digest before it's next scheduled delivery time in a
            # race condition scenario.
            if connection.zscore(make_schedule_key(self.namespace, SCHEDULE_STATE_READY), key) is None:
                raise InvalidState('Timeline is not in the ready state.')

            with connection.pipeline() as pipeline:
                pipeline.watch(digest_key)  # This shouldn't be necessary, but better safe than sorry?

                if pipeline.exists(digest_key):
                    pipeline.multi()
                    pipeline.zunionstore(digest_key, (timeline_key, digest_key), aggregate='max')
                    pipeline.delete(timeline_key)
                    pipeline.expire(digest_key, self.ttl)
                    pipeline.execute()
                else:
                    pipeline.multi()
                    pipeline.rename(timeline_key, digest_key)
                    pipeline.expire(digest_key, self.ttl)
                    try:
                        pipeline.execute()
                    except ResponseError as error:
                        if 'no such key' in str(error):
                            logger.debug('Could not move timeline for digestion (likely has no contents.)')
                        else:
                            raise

            # XXX: This must select all records, even though not all of them will
            # be returned if they exceed the capacity, to ensure that all records
            # will be garbage collected.
            records = connection.zrevrange(digest_key, 0, -1, withscores=True)
            if not records:
                logger.info('Retrieved timeline containing no records.')

            def get_records_for_digest():
                with connection.pipeline(transaction=False) as pipeline:
                    for record_key, timestamp in records:
                        pipeline.get(make_record_key(timeline_key, record_key))

                    for (record_key, timestamp), value in zip(records, pipeline.execute()):
                        # We have to handle failures if the key does not exist --
                        # this could happen due to evictions or race conditions
                        # where the record was added to a timeline while it was
                        # already being digested.
                        if value is None:
                            logger.warning('Could not retrieve event for timeline.')
                        else:
                            yield Record(record_key, self.codec.decode(value), timestamp)

            yield itertools.islice(get_records_for_digest(), self.capacity)

            def cleanup_records(pipeline):
                record_keys = [make_record_key(timeline_key, record_key) for record_key, score in records]
                pipeline.delete(digest_key, *record_keys)

            def reschedule():
                with connection.pipeline() as pipeline:
                    pipeline.watch(digest_key)  # This shouldn't be necessary, but better safe than sorry?
                    pipeline.multi()

                    cleanup_records(pipeline)
                    pipeline.zrem(make_schedule_key(self.namespace, SCHEDULE_STATE_READY), key)
                    pipeline.zadd(make_schedule_key(self.namespace, SCHEDULE_STATE_WAITING), time.time() + minimum_delay, key)
                    pipeline.setex(make_last_processed_timestamp_key(timeline_key), self.ttl, int(time.time()))
                    pipeline.execute()

            def unschedule():
                with connection.pipeline() as pipeline:
                    # Watch the timeline to ensure that no other transactions add
                    # events to the timeline while we are trying to delete it.
                    pipeline.watch(timeline_key)
                    pipeline.multi()
                    if connection.zcard(timeline_key) == 0:
                        cleanup_records(pipeline)
                        pipeline.delete(make_last_processed_timestamp_key(timeline_key))
                        pipeline.zrem(make_schedule_key(self.namespace, SCHEDULE_STATE_READY), key)
                        pipeline.zrem(make_schedule_key(self.namespace, SCHEDULE_STATE_WAITING), key)
                        pipeline.execute()

            # If there were records in the digest, we need to schedule it so
            # that we schedule any records that were added during digestion. If
            # there were no items, we can try to remove the timeline from the
            # digestion schedule.
            if records:
                reschedule()
            else:
                try:
                    unschedule()
                except WatchError:
                    logger.debug('Could not remove timeline from schedule, rescheduling instead')
                    reschedule()
Esempio n. 17
0
 def delete(self, *args, **kwargs):
     lock_key = 'fileblob:upload:{}'.format(self.checksum)
     with Lock(lock_key, timeout=600):
         if self.path:
             self.deletefile(commit=False)
         super(FileBlob, self).delete(*args, **kwargs)