def try_lock(item): """ Attempt to immedately acquire a lock on the timeline at key, returning the lock if it can be acquired, otherwise returning ``None``. """ key, timestamp = item lock = Lock(make_timeline_key(self.namespace, key), timeout=5, nowait=True) return lock if lock.acquire() else None, item
def get_or_create(self, _cache=False, **kwargs): """ A modified version of Django's get_or_create which will create a distributed lock (using the cache backend) whenever it hits the create clause. """ defaults = kwargs.pop('defaults', {}) # before locking attempt to fetch the instance try: if _cache: return self.get_from_cache(**kwargs), False return self.get(**kwargs), False except self.model.DoesNotExist: pass lock_key = self.__make_key('lock', kwargs) # instance not found, lets grab a lock and attempt to create it with Lock(lock_key): # its important we get() before create() to ensure that if # someone beat us to creating it from the time we did our very # first .get(), that we get the result back as we cannot # rely on unique constraints existing instance, created = super(BaseManager, self).get_or_create(defaults=defaults, **kwargs) return instance, created
def _finish_login_pipeline(self, identity): """ The login flow executes both with anonymous and authenticated users. Upon completion a few branches exist: If the identity is already linked, the user should be logged in and redirected immediately. Otherwise, the user is presented with a confirmation window. That window will show them the new account that will be created, and if they're already authenticated an optional button to associate the identity with their account. """ auth_provider = self.auth_provider lock_key = 'sso:auth:{}:{}'.format( auth_provider.id, md5(unicode(identity['id'])).hexdigest(), ) with Lock(lock_key, timeout=5): try: auth_identity = AuthIdentity.objects.get( auth_provider=auth_provider, ident=identity['id'], ) except AuthIdentity.DoesNotExist: return self._handle_unknown_identity(identity) return self._handle_existing_identity(auth_identity, identity)
def ensure_blob(self): if self.blob: return lock_key = 'fileblob:convert:{}'.format(self.checksum) with Lock(lock_key, timeout=60): blob, created = FileBlob.objects.get_or_create( checksum=self.checksum, defaults={ 'storage': self.storage, 'storage_options': self.storage_options, 'path': self.path, 'size': self.size, 'timestamp': self.timestamp, }, ) # if this blob already existed, lets kill the duplicate # TODO(dcramer): kill data when fully migrated # if self.path != blob.path: # get_storage_class(self.storage)( # **self.storage_options # ).delete(self.path) self.update( blob=blob, # TODO(dcramer): kill data when fully migrated # checksum=None, # path=None, # storage=None, # storage_options={}, )
def create_or_update(self, **kwargs): """ Similar to get_or_create, either updates a row or creates it. The result will be (rows affected, False), if the row was not created, or (instance, True) if the object is new. """ defaults = kwargs.pop('defaults', {}) # before locking attempt to fetch the instance affected = self.filter(**kwargs).update(**defaults) if affected: return affected, False lock_key = self.__make_key('lock', kwargs) # instance not found, lets grab a lock and attempt to create it with Lock(lock_key) as lock: if lock.was_locked: affected = self.filter(**kwargs).update(**defaults) return affected, False for k, v in defaults.iteritems(): if isinstance(v, ExpressionNode): kwargs[k] = resolve_expression_node(self.model(), v) return self.create(**kwargs), True
def save(self, *args, **kwargs): if not self.slug: lock_key = 'slug:project' with Lock(lock_key): slugify_instance(self, self.name, organization=self.organization) super(Project, self).save(*args, **kwargs) else: super(Project, self).save(*args, **kwargs)
def post_process(self, group, event, is_new, is_sample, **kwargs): lock_key = 'lock_mail:%s' % group.id try: with Lock(lock_key, timeout=0.5): group = Group.objects.get(pk=group.pk) self._post_process(group, event, is_new, is_sample, **kwargs) except UnableToGetLock: pass
def test_basic(self): timeout = 10 lock = Lock('basic', timeout=timeout) assert lock.held is False assert lock.seconds_remaining is 0 assert lock.acquire() is True assert timeout > lock.seconds_remaining > (timeout - 0.1) assert lock.held is True assert lock.acquire() is True # ensure reentrancy assert lock.release() is True assert lock.seconds_remaining is 0 assert lock.held is False assert lock.release() is False
def schedule(self, deadline, chunk=1000): # TODO: This doesn't lead to a fair balancing of workers, ideally each # scheduling task would be executed by a different process for each # host. There is also no failure isolation here, so a single shard # failure will cause the remainder of the shards to not be able to be # scheduled. for host in self.cluster.hosts: connection = self.cluster.get_local_client(host) with Lock('{0}:s:{1}'.format(self.namespace, host), nowait=True, timeout=30): # Prevent a runaway loop by setting a maximum number of # iterations. Note that this limits the total number of # expected items in any specific scheduling interval to chunk * # maximum_iterations. maximum_iterations = 1000 for i in xrange(maximum_iterations): items = connection.zrangebyscore( make_schedule_key(self.namespace, SCHEDULE_STATE_WAITING), min=0, max=deadline, withscores=True, start=0, num=chunk, ) # XXX: Redis will error if we try and execute an empty # transaction. If there are no items to move between states, we # need to exit the loop now. (This can happen on the first # iteration of the loop if there is nothing to do, or on a # subsequent iteration if there was exactly the same number of # items to change states as the chunk size.) if not items: break with connection.pipeline() as pipeline: pipeline.multi() pipeline.zrem( make_schedule_key(self.namespace, SCHEDULE_STATE_WAITING), *[key for key, timestamp in items] ) pipeline.zadd( make_schedule_key(self.namespace, SCHEDULE_STATE_READY), *itertools.chain.from_iterable([(timestamp, key) for (key, timestamp) in items]) ) for key, timestamp in items: yield ScheduleEntry(key, timestamp) pipeline.execute() # If we retrieved less than the chunk size of items, we don't # need try to retrieve more items. if len(items) < chunk: break else: raise RuntimeError('loop exceeded maximum iterations (%s)' % (maximum_iterations,))
def test_context(self): timeout = 10 lock = Lock('ctx', timeout=timeout) with lock as result: assert lock is result assert lock.held is True assert lock.held is False
def save(self, *args, **kwargs): if not self.slug: lock_key = 'slug:organization' with Lock(lock_key): slugify_instance(self, self.name, reserved=RESERVED_ORGANIZATION_SLUGS) super(Organization, self).save(*args, **kwargs) else: super(Organization, self).save(*args, **kwargs)
def process_pending(): """ Process pending buffers. """ from sentry import app lock_key = 'buffer:process_pending' try: with Lock(lock_key, nowait=True, timeout=60): app.buffer.process_pending() except UnableToGetLock: pass
def delete(self, key): timeline_key = make_timeline_key(self.namespace, key) connection = self.cluster.get_local_client_for_key(timeline_key) with Lock(timeline_key, nowait=True, timeout=30), \ connection.pipeline() as pipeline: truncate_timeline(pipeline, (timeline_key,), (0, timeline_key)) truncate_timeline(pipeline, (make_digest_key(timeline_key),), (0, timeline_key)) pipeline.delete(make_last_processed_timestamp_key(timeline_key)) pipeline.zrem(make_schedule_key(self.namespace, SCHEDULE_STATE_READY), key) pipeline.zrem(make_schedule_key(self.namespace, SCHEDULE_STATE_WAITING), key) pipeline.execute()
def test_basic(self): timeout = 10 lock = Lock('basic', timeout=timeout) assert lock.held is False assert lock.seconds_remaining is 0 assert lock.acquire() is True assert timeout > lock.seconds_remaining > (timeout - 0.1) assert lock.held is True with pytest.raises(LockAlreadyHeld): lock.acquire() assert lock.release() is True assert lock.seconds_remaining is 0 assert lock.held is False assert lock.release() is False
def from_file(cls, fileobj): """ Retrieve a FileBlob instance for the given file. If not already present, this will cause it to be stored. >>> blob = FileBlob.from_file(fileobj) """ size = 0 checksum = sha1('') for chunk in fileobj: size += len(chunk) checksum.update(chunk) checksum = checksum.hexdigest() lock_key = 'fileblob:upload:{}'.format(checksum) # TODO(dcramer): the database here is safe, but if this lock expires # and duplicate files are uploaded then we need to prune one with Lock(lock_key, timeout=600): # test for presence try: existing = FileBlob.objects.get(checksum=checksum) except FileBlob.DoesNotExist: pass else: return existing blob = cls( size=size, checksum=checksum, storage=settings.SENTRY_FILESTORE, storage_options=settings.SENTRY_FILESTORE_OPTIONS, ) blob.path = cls.generate_unique_path(blob.timestamp) storage = blob.get_storage() storage.save(blob.path, fileobj) blob.save() metrics.timing('filestore.blob-size', blob.size) return blob
def digest(self, key, minimum_delay=None): if minimum_delay is None: minimum_delay = self.minimum_delay timeline_key = make_timeline_key(self.namespace, key) digest_key = make_digest_key(timeline_key) connection = self.cluster.get_local_client_for_key(timeline_key) with Lock(timeline_key, nowait=True, timeout=30): # Check to ensure the timeline is in the correct state ("ready") # before sending. This acts as a throttling mechanism to prevent # sending a digest before it's next scheduled delivery time in a # race condition scenario. if connection.zscore(make_schedule_key(self.namespace, SCHEDULE_STATE_READY), key) is None: raise InvalidState('Timeline is not in the ready state.') with connection.pipeline() as pipeline: pipeline.watch(digest_key) # This shouldn't be necessary, but better safe than sorry? if pipeline.exists(digest_key): pipeline.multi() pipeline.zunionstore(digest_key, (timeline_key, digest_key), aggregate='max') pipeline.delete(timeline_key) pipeline.expire(digest_key, self.ttl) pipeline.execute() else: pipeline.multi() pipeline.rename(timeline_key, digest_key) pipeline.expire(digest_key, self.ttl) try: pipeline.execute() except ResponseError as error: if 'no such key' in str(error): logger.debug('Could not move timeline for digestion (likely has no contents.)') else: raise # XXX: This must select all records, even though not all of them will # be returned if they exceed the capacity, to ensure that all records # will be garbage collected. records = connection.zrevrange(digest_key, 0, -1, withscores=True) if not records: logger.info('Retrieved timeline containing no records.') def get_records_for_digest(): with connection.pipeline(transaction=False) as pipeline: for record_key, timestamp in records: pipeline.get(make_record_key(timeline_key, record_key)) for (record_key, timestamp), value in zip(records, pipeline.execute()): # We have to handle failures if the key does not exist -- # this could happen due to evictions or race conditions # where the record was added to a timeline while it was # already being digested. if value is None: logger.warning('Could not retrieve event for timeline.') else: yield Record(record_key, self.codec.decode(value), timestamp) yield itertools.islice(get_records_for_digest(), self.capacity) def cleanup_records(pipeline): record_keys = [make_record_key(timeline_key, record_key) for record_key, score in records] pipeline.delete(digest_key, *record_keys) def reschedule(): with connection.pipeline() as pipeline: pipeline.watch(digest_key) # This shouldn't be necessary, but better safe than sorry? pipeline.multi() cleanup_records(pipeline) pipeline.zrem(make_schedule_key(self.namespace, SCHEDULE_STATE_READY), key) pipeline.zadd(make_schedule_key(self.namespace, SCHEDULE_STATE_WAITING), time.time() + minimum_delay, key) pipeline.setex(make_last_processed_timestamp_key(timeline_key), self.ttl, int(time.time())) pipeline.execute() def unschedule(): with connection.pipeline() as pipeline: # Watch the timeline to ensure that no other transactions add # events to the timeline while we are trying to delete it. pipeline.watch(timeline_key) pipeline.multi() if connection.zcard(timeline_key) == 0: cleanup_records(pipeline) pipeline.delete(make_last_processed_timestamp_key(timeline_key)) pipeline.zrem(make_schedule_key(self.namespace, SCHEDULE_STATE_READY), key) pipeline.zrem(make_schedule_key(self.namespace, SCHEDULE_STATE_WAITING), key) pipeline.execute() # If there were records in the digest, we need to schedule it so # that we schedule any records that were added during digestion. If # there were no items, we can try to remove the timeline from the # digestion schedule. if records: reschedule() else: try: unschedule() except WatchError: logger.debug('Could not remove timeline from schedule, rescheduling instead') reschedule()
def delete(self, *args, **kwargs): lock_key = 'fileblob:upload:{}'.format(self.checksum) with Lock(lock_key, timeout=600): if self.path: self.deletefile(commit=False) super(FileBlob, self).delete(*args, **kwargs)