def delete_project(object_id, transaction_id=None, **kwargs): from sentry import deletions from sentry.models import Project, ProjectStatus try: instance = Project.objects.get(id=object_id) except Project.DoesNotExist: return if instance.status == ProjectStatus.VISIBLE: raise DeleteAborted task = deletions.get( model=Project, query={ 'id': object_id, }, transaction_id=transaction_id or uuid4().hex, ) has_more = task.chunk() if has_more: delete_project.apply_async( kwargs={'object_id': object_id, 'transaction_id': transaction_id}, countdown=15, )
def delete_groups(object_ids, transaction_id=None, eventstream_state=None, **kwargs): from sentry import deletions, eventstream from sentry.models import Group transaction_id = transaction_id or uuid4().hex max_batch_size = 100 current_batch, rest = object_ids[:max_batch_size], object_ids[max_batch_size:] task = deletions.get( model=Group, query={ 'id__in': current_batch, }, transaction_id=transaction_id, ) has_more = task.chunk() if has_more or rest: delete_groups.apply_async( kwargs={'object_ids': object_ids if has_more else rest, 'transaction_id': transaction_id, 'eventstream_state': eventstream_state}, countdown=15, ) else: # all groups have been deleted if eventstream_state: eventstream.end_delete_groups(eventstream_state)
def delete_api_application(object_id, transaction_id=None, **kwargs): from sentry import deletions from sentry.models import ApiApplication, ApiApplicationStatus try: instance = ApiApplication.objects.get(id=object_id) except ApiApplication.DoesNotExist: return if instance.status == ApiApplicationStatus.active: raise DeleteAborted task = deletions.get( model=ApiApplication, query={ 'id': object_id, }, transaction_id=transaction_id or uuid4().hex, ) has_more = task.chunk() if has_more: delete_api_application.apply_async( kwargs={'object_id': object_id, 'transaction_id': transaction_id}, countdown=15, )
def generic_delete(app_label, model_name, object_id, transaction_id=None, actor_id=None, **kwargs): from sentry import deletions model = get_model(app_label, model_name) try: instance = model.objects.get(id=object_id) except model.DoesNotExist: return if instance.status == ObjectStatus.VISIBLE: raise DeleteAborted task = deletions.get( model=model, actor_id=actor_id, query={ 'id': object_id, }, transaction_id=transaction_id or uuid4().hex, ) has_more = task.chunk() if has_more: generic_delete.apply_async( kwargs={ 'app_label': app_label, 'model_name': model_name, 'object_id': object_id, 'transaction_id': transaction_id, 'actor_id': actor_id, }, countdown=15, )
def delete_organization(object_id, transaction_id=None, actor_id=None, **kwargs): from sentry import deletions from sentry.models import Organization, OrganizationStatus try: instance = Organization.objects.get(id=object_id) except Organization.DoesNotExist: return if instance.status == OrganizationStatus.VISIBLE: raise DeleteAborted # compat: can be removed after we switch to scheduled deletions if instance.status != OrganizationStatus.DELETION_IN_PROGRESS: pending_delete.send( sender=type(instance), instance=instance, ) task = deletions.get( model=Organization, query={ 'id': object_id, }, transaction_id=transaction_id or uuid4().hex, actor_id=actor_id, ) has_more = task.chunk() if has_more: delete_organization.apply_async( kwargs={'object_id': object_id, 'transaction_id': transaction_id, 'actor_id': actor_id}, countdown=15, )
def create_deletion_task(days, project_id, model, dtfield, order_by): from sentry import models from sentry import deletions from sentry import similarity query = { '{}__lte'.format(dtfield): (timezone.now() - timedelta(days=days)), } if project_id: if 'project' in model._meta.get_all_field_names(): query['project'] = project_id else: query['project_id'] = project_id skip_models = [ # Handled by other parts of cleanup models.Event, models.EventMapping, models.Group, models.GroupEmailThread, models.GroupRuleStatus, # Handled by TTL similarity.features, ] + [b[0] for b in EXTRA_BULK_QUERY_DELETES] task = deletions.get( model=model, query=query, order_by=order_by, skip_models=skip_models, transaction_id=uuid4().hex, ) return task
def multiprocess_worker(task_queue): # Configure within each Process import logging from sentry.utils.imports import import_string logger = logging.getLogger('sentry.cleanup') configured = False while True: j = task_queue.get() if j == _STOP_WORKER: task_queue.task_done() return # On first task, configure Sentry environment if not configured: from sentry.runner import configure configure() from sentry import models from sentry import deletions from sentry import similarity skip_models = [ # Handled by other parts of cleanup models.Event, models.EventMapping, models.EventAttachment, models.UserReport, models.Group, models.GroupEmailThread, models.GroupRuleStatus, models.GroupHashTombstone, # Handled by TTL similarity.features, ] + [b[0] for b in EXTRA_BULK_QUERY_DELETES] configured = True model, chunk = j model = import_string(model) try: task = deletions.get( model=model, query={'id__in': chunk}, skip_models=skip_models, transaction_id=uuid4().hex, ) while True: if not task.chunk(): break except Exception as e: logger.exception(e) finally: task_queue.task_done()
def delete_tag_key(object_id, transaction_id=None, **kwargs): from sentry import deletions from sentry.models import TagKey task = deletions.get( model=TagKey, query={ 'id': object_id, }, transaction_id=transaction_id or uuid4().hex, ) has_more = task.chunk() if has_more: delete_tag_key.apply_async( kwargs={'object_id': object_id, 'transaction_id': transaction_id}, countdown=15, )
def generic_delete(app_label, model_name, object_id, transaction_id=None, actor_id=None, **kwargs): from sentry import deletions from sentry.models import User model = get_model(app_label, model_name) try: instance = model.objects.get(id=object_id) except model.DoesNotExist: return if instance.status != ObjectStatus.DELETION_IN_PROGRESS: pending_delete.send( sender=type(instance), instance=instance, actor=User.objects.get(id=actor_id) if actor_id else None, ) if instance.status == ObjectStatus.VISIBLE: raise DeleteAborted task = deletions.get( model=model, actor_id=actor_id, query={ 'id': object_id, }, transaction_id=transaction_id or uuid4().hex, ) has_more = task.chunk() if has_more: generic_delete.apply_async( kwargs={ 'app_label': app_label, 'model_name': model_name, 'object_id': object_id, 'transaction_id': transaction_id, 'actor_id': actor_id, }, countdown=15, )
def run_deletion(deletion_id): from sentry import deletions from sentry.models import ScheduledDeletion try: deletion = ScheduledDeletion.objects.get( id=deletion_id, ) except ScheduledDeletion.DoesNotExist: return if deletion.aborted: raise DeleteAborted if not deletion.in_progress: actor = deletion.get_actor() instance = deletion.get_instance() with transaction.atomic(): deletion.update(in_progress=True) pending_delete.send( sender=type(instance), instance=instance, actor=actor, ) task = deletions.get( model=deletion.get_model(), query={ 'id': deletion.object_id, }, transaction_id=deletion.guid, actor_id=deletion.actor_id, ) has_more = task.chunk() if has_more: run_deletion.apply_async( kwargs={'deletion_id': deletion_id}, countdown=15, ) deletion.delete()
def delete_alert_rule(alert_rule_id, transaction_id=None, **kwargs): from sentry.incidents.models import AlertRule try: instance = AlertRule.objects_with_deleted.get(id=alert_rule_id) except AlertRule.DoesNotExist: return if instance.status not in ( AlertRuleStatus.DELETION_IN_PROGRESS.value, AlertRuleStatus.PENDING_DELETION.value, ): raise DeleteAborted task = deletions.get( model=AlertRule, query={"id": alert_rule_id}, transaction_id=transaction_id or uuid4().hex ) has_more = task.chunk() if has_more: delete_alert_rule.apply_async( kwargs={"alert_rule_id": alert_rule_id, "transaction_id": transaction_id}, countdown=15 )
def delete_organization_integration(object_id, transaction_id=None, actor_id=None, **kwargs): from sentry import deletions from sentry.models import OrganizationIntegration, Repository try: instance = OrganizationIntegration.objects.get(id=object_id) except OrganizationIntegration.DoesNotExist: return if instance.status == ObjectStatus.VISIBLE: raise DeleteAborted # dissociate repos from that integration Repository.objects.filter( organization_id=instance.organization_id, integration_id=instance.integration_id, ).update(integration_id=None, ) task = deletions.get( model=OrganizationIntegration, actor_id=actor_id, query={ 'id': object_id, }, transaction_id=transaction_id or uuid4().hex, ) has_more = task.chunk() if has_more: delete_organization_integration.apply_async( kwargs={ 'object_id': object_id, 'transaction_id': transaction_id, 'actor_id': actor_id, }, countdown=15, )
def delete_api_application(object_id, transaction_id=None, **kwargs): from sentry import deletions from sentry.models import ApiApplication, ApiApplicationStatus try: instance = ApiApplication.objects.get(id=object_id) except ApiApplication.DoesNotExist: return if instance.status == ApiApplicationStatus.active: raise DeleteAborted task = deletions.get(model=ApiApplication, query={"id": object_id}, transaction_id=transaction_id or uuid4().hex) has_more = task.chunk() if has_more: delete_api_application.apply_async(kwargs={ "object_id": object_id, "transaction_id": transaction_id }, countdown=15)
def delete_project(object_id, transaction_id=None, **kwargs): from sentry import deletions from sentry.models import Project, ProjectStatus try: instance = Project.objects.get(id=object_id) except Project.DoesNotExist: return if instance.status == ProjectStatus.VISIBLE: raise DeleteAborted task = deletions.get(model=Project, query={"id": object_id}, transaction_id=transaction_id or uuid4().hex) has_more = task.chunk() if has_more: delete_project.apply_async(kwargs={ "object_id": object_id, "transaction_id": transaction_id }, countdown=15)
def generic_delete(app_label, model_name, object_id, transaction_id=None, actor_id=None, **kwargs): from sentry import deletions from sentry.models import User model = apps.get_model(app_label, model_name) try: instance = model.objects.get(id=object_id) except model.DoesNotExist: return if instance.status != ObjectStatus.DELETION_IN_PROGRESS: pending_delete.send( sender=type(instance), instance=instance, actor=User.objects.get(id=actor_id) if actor_id else None, ) if instance.status == ObjectStatus.VISIBLE: raise DeleteAborted task = deletions.get( model=model, actor_id=actor_id, query={"id": object_id}, transaction_id=transaction_id or uuid4().hex, ) has_more = task.chunk() if has_more: generic_delete.apply_async( kwargs={ "app_label": app_label, "model_name": model_name, "object_id": object_id, "transaction_id": transaction_id, "actor_id": actor_id, }, countdown=15, )
def delete_repository(object_id, transaction_id=None, actor_id=None, **kwargs): # TODO this method is no longer in use and should be removed when jobs are # no longer being enqueued for it. from sentry import deletions from sentry.models import Repository, User try: instance = Repository.objects.get(id=object_id) except Repository.DoesNotExist: return if instance.status == ObjectStatus.VISIBLE: raise DeleteAborted # compat: can be removed after we switch to scheduled deletions if instance.status != ObjectStatus.DELETION_IN_PROGRESS: pending_delete.send( sender=type(instance), instance=instance, actor=User.objects.get(id=actor_id) if actor_id else None, ) task = deletions.get( model=Repository, actor_id=actor_id, query={"id": object_id}, transaction_id=transaction_id or uuid4().hex, ) has_more = task.chunk() if has_more: delete_repository.apply_async( kwargs={ "object_id": object_id, "transaction_id": transaction_id, "actor_id": actor_id }, countdown=15, )
def delete_repository(object_id, transaction_id=None, actor_id=None, **kwargs): from sentry import deletions from sentry.models import Repository, User try: instance = Repository.objects.get(id=object_id) except Repository.DoesNotExist: return if instance.status == ObjectStatus.VISIBLE: raise DeleteAborted # compat: can be removed after we switch to scheduled deletions if instance.status != ObjectStatus.DELETION_IN_PROGRESS: pending_delete.send( sender=type(instance), instance=instance, actor=User.objects.get(id=actor_id) if actor_id else None, ) task = deletions.get( model=Repository, actor_id=actor_id, query={ 'id': object_id, }, transaction_id=transaction_id or uuid4().hex, ) has_more = task.chunk() if has_more: delete_repository.apply_async( kwargs={ 'object_id': object_id, 'transaction_id': transaction_id, 'actor_id': actor_id, }, countdown=15, )
def run_deletion(deletion_id): from sentry import deletions from sentry.models import ScheduledDeletion try: deletion = ScheduledDeletion.objects.get(id=deletion_id, ) except ScheduledDeletion.DoesNotExist: return if deletion.aborted: raise DeleteAborted if not deletion.in_progress: actor = deletion.get_actor() instance = deletion.get_instance() with transaction.atomic(): deletion.update(in_progress=True) pending_delete.send( sender=type(instance), instance=instance, actor=actor, ) task = deletions.get( model=deletion.get_model(), query={ 'id': deletion.object_id, }, transaction_id=deletion.guid, actor_id=deletion.actor_id, ) has_more = task.chunk() if has_more: run_deletion.apply_async( kwargs={'deletion_id': deletion_id}, countdown=15, ) deletion.delete()
def delete_organization_integration(object_id, transaction_id=None, actor_id=None, **kwargs): from sentry import deletions from sentry.models import OrganizationIntegration, Repository try: instance = OrganizationIntegration.objects.get(id=object_id) except OrganizationIntegration.DoesNotExist: return if instance.status == ObjectStatus.VISIBLE: raise DeleteAborted # dissociate repos from that integration Repository.objects.filter( organization_id=instance.organization_id, integration_id=instance.integration_id, ).update( integration_id=None, ) task = deletions.get( model=OrganizationIntegration, actor_id=actor_id, query={ 'id': object_id, }, transaction_id=transaction_id or uuid4().hex, ) has_more = task.chunk() if has_more: delete_organization_integration.apply_async( kwargs={ 'object_id': object_id, 'transaction_id': transaction_id, 'actor_id': actor_id, }, countdown=15, )
def delete_tag_key(object_id, model=None, transaction_id=None, **kwargs): from sentry import deletions # TODO(brett): remove this (and make model a normal arg) after deploy if model is None: # if the model wasn't sent we can assume it's from legacy code from sentry.tagstore.legacy.models import TagKey as model task = deletions.get( model=model, query={ 'id': object_id, }, transaction_id=transaction_id or uuid4().hex, ) has_more = task.chunk() if has_more: delete_tag_key.apply_async( kwargs={'object_id': object_id, 'model': model, 'transaction_id': transaction_id}, countdown=15, )
def delete_team(object_id, transaction_id=None, **kwargs): from sentry import deletions from sentry.incidents.models import AlertRule from sentry.models import Rule, Team, TeamStatus try: instance = Team.objects.get(id=object_id) except Team.DoesNotExist: return if instance.status == TeamStatus.VISIBLE: raise DeleteAborted task = deletions.get( model=Team, query={"id": object_id}, transaction_id=transaction_id or uuid4().hex ) AlertRule.objects.filter(owner_id=instance.actor_id).update(owner=None) Rule.objects.filter(owner_id=instance.actor_id).update(owner=None) has_more = task.chunk() if has_more: delete_team.apply_async( kwargs={"object_id": object_id, "transaction_id": transaction_id}, countdown=15 )
def delete_api_application(object_id, transaction_id=None, **kwargs): # TODO this method is no longer in use and should be removed when jobs are # no longer being enqueued for it. from sentry import deletions from sentry.models import ApiApplication, ApiApplicationStatus try: instance = ApiApplication.objects.get(id=object_id) except ApiApplication.DoesNotExist: return if instance.status == ApiApplicationStatus.active: raise DeleteAborted task = deletions.get(model=ApiApplication, query={"id": object_id}, transaction_id=transaction_id or uuid4().hex) has_more = task.chunk() if has_more: delete_api_application.apply_async(kwargs={ "object_id": object_id, "transaction_id": transaction_id }, countdown=15)
def delete_organization(object_id, transaction_id=None, actor_id=None, **kwargs): # TODO(mark) remove this task once all in flight jobs have been processed. from sentry import deletions from sentry.models import Organization, OrganizationStatus try: instance = Organization.objects.get(id=object_id) except Organization.DoesNotExist: return if instance.status == OrganizationStatus.VISIBLE: raise DeleteAborted # compat: can be removed after we switch to scheduled deletions if instance.status != OrganizationStatus.DELETION_IN_PROGRESS: pending_delete.send(sender=type(instance), instance=instance) task = deletions.get( model=Organization, query={"id": object_id}, transaction_id=transaction_id or uuid4().hex, actor_id=actor_id, ) has_more = task.chunk() if has_more: delete_organization.apply_async( kwargs={ "object_id": object_id, "transaction_id": transaction_id, "actor_id": actor_id }, countdown=15, )
def cleanup(days, project, concurrency, silent, model, router, timed): """Delete a portion of trailing data based on creation date. All data that is older than `--days` will be deleted. The default for this is 30 days. In the default setting all projects will be truncated but if you have a specific project you want to limit this to this can be done with the `--project` flag which accepts a project ID or a string with the form `org/project` where both are slugs. """ if concurrency < 1: click.echo('Error: Minimum concurrency is 1', err=True) raise click.Abort() from threading import Thread from django.db import router as db_router from sentry.app import nodestore from sentry.db.deletion import BulkDeleteQuery from sentry import deletions from sentry import models if timed: import time from sentry.utils import metrics start_time = time.time() # list of models which this query is restricted to model_list = {m.lower() for m in model} def is_filtered(model): if router is not None and db_router.db_for_write(model) != router: return True if not model_list: return False return model.__name__.lower() not in model_list # Deletions that use `BulkDeleteQuery` (and don't need to worry about child relations) # (model, datetime_field, order_by) BULK_QUERY_DELETES = ( (models.GroupEmailThread, 'date', None), (models.GroupRuleStatus, 'date_added', None), (models.GroupTagValue, 'last_seen', None), (models.TagValue, 'last_seen', None), (models.EventTag, 'date_added', 'date_added'), ) # Deletions that use the `deletions` code path (which handles their child relations) # (model, datetime_field, order_by) DELETES = ( (models.Event, 'datetime', None), (models.Group, 'last_seen', 'last_seen'), ) if not silent: click.echo('Removing expired values for LostPasswordHash') if is_filtered(models.LostPasswordHash): if not silent: click.echo('>> Skipping LostPasswordHash') else: models.LostPasswordHash.objects.filter(date_added__lte=timezone.now() - timedelta(hours=48)).delete() for model in [models.ApiGrant, models.ApiToken]: if not silent: click.echo('Removing expired values for {}'.format(model.__name__)) if is_filtered(model): if not silent: click.echo('>> Skipping {}'.format(model.__name__)) else: model.objects.filter(expires_at__lt=timezone.now()).delete() project_id = None if project: click.echo( "Bulk NodeStore deletion not available for project selection", err=True) project_id = get_project(project) if project_id is None: click.echo('Error: Project not found', err=True) raise click.Abort() else: if not silent: click.echo("Removing old NodeStore values") else: cutoff = timezone.now() - timedelta(days=days) try: nodestore.cleanup(cutoff) except NotImplementedError: click.echo( "NodeStore backend does not support cleanup operation", err=True) for model, dtfield, order_by in BULK_QUERY_DELETES: if not silent: click.echo( "Removing {model} for days={days} project={project}".format( model=model.__name__, days=days, project=project or '*', )) if is_filtered(model): if not silent: click.echo('>> Skipping %s' % model.__name__) else: BulkDeleteQuery( model=model, dtfield=dtfield, days=days, project_id=project_id, order_by=order_by, ).execute() for model, dtfield, order_by in DELETES: if not silent: click.echo( "Removing {model} for days={days} project={project}".format( model=model.__name__, days=days, project=project or '*', )) if is_filtered(model): if not silent: click.echo('>> Skipping %s' % model.__name__) else: query = { '{}__lte'.format(dtfield): (timezone.now() - timedelta(days=days)), } if project_id: if 'project' in model._meta.get_all_field_names(): query['project'] = project_id else: query['project_id'] = project_id task = deletions.get( model=model, query=query, order_by=order_by, transaction_id=uuid4().hex, ) def _chunk_until_complete(num_shards=None, shard_id=None): has_more = True while has_more: has_more = task.chunk(num_shards=num_shards, shard_id=shard_id) if concurrency > 1: threads = [] for shard_id in range(concurrency): t = Thread(target=( lambda shard_id=shard_id: _chunk_until_complete( num_shards=concurrency, shard_id=shard_id))) t.start() threads.append(t) for t in threads: t.join() else: _chunk_until_complete() # EventMapping is fairly expensive and is special cased as it's likely you # won't need a reference to an event for nearly as long if not silent: click.echo("Removing expired values for EventMapping") if is_filtered(models.EventMapping): if not silent: click.echo('>> Skipping EventMapping') else: BulkDeleteQuery(model=models.EventMapping, dtfield='date_added', days=min(days, 7), project_id=project_id, order_by='-date_added').execute() # Clean up FileBlob instances which are no longer used and aren't super # recent (as there could be a race between blob creation and reference) if not silent: click.echo("Cleaning up unused FileBlob references") if is_filtered(models.FileBlob): if not silent: click.echo('>> Skipping FileBlob') else: cleanup_unused_files(silent) if timed: duration = int(time.time() - start_time) metrics.timing('cleanup.duration', duration, instance=router) click.echo("Clean up took %s second(s)." % duration)