예제 #1
0
파일: cleanup.py 프로젝트: gdyuldin/sentry
    def generic_delete(self, model, dtfield, days=None, chunk_size=1000):
        if days is None:
            days = self.days

        cutoff = timezone.now() - timedelta(days=days)

        qs = model.objects.filter(**{'%s__lte' % (dtfield, ): cutoff})
        if self.project:
            qs = qs.filter(project=self.project)

        # XXX: we step through because the deletion collector will pull all
        # relations into memory
        count = 0
        while qs.exists():
            # TODO(dcramer): change this to delete by chunks of IDs and utilize
            # bulk_delete_objects
            self.stdout.write("Removing {model} chunk {count}\n".format(
                model=model.__name__,
                count=count,
            ))
            if self.concurrency > 1:
                worker_pool = ThreadPool(workers=self.concurrency)
                for obj in qs[:chunk_size].iterator():
                    worker_pool.add(obj.id, delete_object, [obj])
                    count += 1
                worker_pool.join()
                del worker_pool
            else:
                for obj in qs[:chunk_size].iterator():
                    delete_object(obj)
                    count += 1
예제 #2
0
파일: base.py 프로젝트: diorahman/nydus
    def _execute(self):
        num_commands = len(self._commands)
        if num_commands == 0:
            self._commands = []
            return

        command_map = {}
        pipelined = all(self._cluster[n].supports_pipelines
                        for n in self._cluster)
        pending_commands = defaultdict(list)

        # used in pipelining
        if pipelined:
            pipe_command_map = defaultdict(list)

            pipes = dict()  # db -> pipeline

        # build up a list of pending commands and their routing information
        for command in self._commands:
            cmd_ident = command._ident

            command_map[cmd_ident] = command

            if self._cluster.router:
                db_nums = self._cluster.router.get_db(self._cluster,
                                                      command._attr,
                                                      *command._args,
                                                      **command._kwargs)
            else:
                db_nums = range(len(self._cluster))

            # The number of commands is based on the total number of executable commands
            num_commands += len(db_nums)

            # Don't bother with the pooling if we only need to do one operation on a single machine
            if num_commands == 1:
                self._commands = [
                    command._execute(self._cluster[n]) for n in n
                ]
                return

            # update the pipelined dbs
            for db_num in db_nums:
                # map the ident to a db
                if pipelined:
                    pipe_command_map[db_num].append(cmd_ident)

                # add to pending commands
                pending_commands[db_num].append(command)

        # Create the threadpool and pipe jobs into it
        pool = ThreadPool(min(self._workers, len(pending_commands)))

        # execute our pending commands either in the pool, or using a pipeline
        for db_num, command_list in pending_commands.iteritems():
            if pipelined:
                pipes[db_num] = self._cluster[db_num].get_pipeline()
            for command in command_list:
                if pipelined:
                    # add to pipeline
                    pipes[db_num].add(command)
                else:
                    # execute in pool
                    pool.add(command._ident, command._execute,
                             [self._cluster[db_num]])

        # We need to finalize our commands with a single execute in pipelines
        if pipelined:
            for db, pipe in pipes.iteritems():
                pool.add(db, pipe.execute, (), {})

        # Consolidate commands with their appropriate results
        result_map = pool.join()

        # Results get grouped by their command signature, so we have to separate the logic
        if pipelined:
            for db, result in result_map.iteritems():
                if len(result) == 1:
                    result = result[0]
                for i, value in enumerate(result):
                    command_map[pipe_command_map[db][i]]._set_value(value)

        else:
            for command in self._commands:
                result = result_map[command._ident]
                if len(result) == 1:
                    result = result[0]
                command._set_value(result)

        self._complete = True
예제 #3
0
파일: map.py 프로젝트: tupy/nydus
 def get_pool(self, commands):
     return ThreadPool(min(self._workers, len(commands)))
예제 #4
0
파일: cleanup.py 프로젝트: yanheven/sentry
def cleanup(days=30, project=None, chunk_size=1000, concurrency=1, **kwargs):
    """
    Deletes a portion of the trailing data in Sentry based on
    their creation dates. For example, if ``days`` is 30, this
    would attempt to clean up all data that's older than 30 days.

    :param project: limit all deletion scopes to messages that are part
                    of the given project
    """
    import datetime

    from django.utils import timezone

    from sentry import app
    # TODO: TagKey and GroupTagKey need cleaned up
    from sentry.models import (
        Group, GroupRuleStatus, Event, EventMapping,
        GroupTagValue, TagValue, Alert,
        Activity, LostPasswordHash)
    from sentry.search.django.models import SearchDocument

    GENERIC_DELETES = (
        (SearchDocument, 'date_changed'),
        (GroupRuleStatus, 'date_added'),
        (GroupTagValue, 'last_seen'),
        (Event, 'datetime'),
        (Activity, 'datetime'),
        (TagValue, 'last_seen'),
        (Alert, 'datetime'),
        (EventMapping, 'date_added'),
        # Group should probably be last
        (Group, 'last_seen'),
    )

    log = cleanup.get_logger()

    ts = timezone.now() - datetime.timedelta(days=days)

    log.info("Removing expired values for LostPasswordHash")
    LostPasswordHash.objects.filter(
        date_added__lte=timezone.now() - datetime.timedelta(hours=48)
    ).delete()

    # TODO: we should move this into individual backends
    log.info("Removing old Node values")
    try:
        app.nodestore.cleanup(ts)
    except NotImplementedError:
        log.warning("Node backend does not support cleanup operation")

    # Remove types which can easily be bound to project + date
    for model, date_col in GENERIC_DELETES:
        log.info("Removing %s for days=%s project=%s", model.__name__, days, project or '*')
        qs = model.objects.filter(**{'%s__lte' % (date_col,): ts})
        if project:
            qs = qs.filter(project=project)
        # XXX: we step through because the deletion collector will pull all relations into memory

        count = 0
        while qs.exists():
            log.info("Removing %s chunk %d", model.__name__, count)
            if concurrency > 1:
                worker_pool = ThreadPool(workers=concurrency)
                for obj in qs[:chunk_size].iterator():
                    worker_pool.add(obj.id, delete_object, [obj])
                    count += 1
                worker_pool.join()
                del worker_pool
            else:
                for obj in qs[:chunk_size].iterator():
                    delete_object(obj)

    # EventMapping is fairly expensive and is special cased as it's likely you
    # won't need a reference to an event for nearly as long
    if days > 7:
        log.info("Removing expired values for EventMapping")
        EventMapping.objects.filter(
            date_added__lte=timezone.now() - datetime.timedelta(days=7)
        ).delete()