Example #1
0
    def handle(self, *args, **kwargs):
        """Reindexing work.

        Creates a Tasktree that creates new indexes
        over the old ones so the search feature
        works while the indexation occurs.

        """
        force = kwargs.get('force', False)

        if is_reindexing_amo() and not force:
            raise CommandError('Indexation already occuring - use --force to '
                               'bypass')

        log('Starting the reindexation', stdout=self.stdout)

        modules = get_modules(with_stats=kwargs.get('with_stats', False))

        if kwargs.get('wipe', False):
            skip_confirmation = kwargs.get('noinput', False)
            confirm = ''
            if not skip_confirmation:
                confirm = raw_input('Are you sure you want to wipe all AMO '
                                    'Elasticsearch indexes? (yes/no): ')

                while confirm not in ('yes', 'no'):
                    confirm = raw_input('Please enter either "yes" or "no": ')

            if (confirm == 'yes' or skip_confirmation):
                unflag_database(stdout=self.stdout)
                for index in set(modules.keys()):
                    ES.indices.delete(index, ignore=404)
            else:
                raise CommandError("Aborted.")
        elif force:
            unflag_database(stdout=self.stdout)

        alias_actions = []

        def add_alias_action(action, index, alias):
            action = {action: {'index': index, 'alias': alias}}
            if action in alias_actions:
                return
            alias_actions.append(action)

        # Creating a task tree.
        log('Building the task tree', stdout=self.stdout)
        tree = TaskTree()
        last_action = None

        to_remove = []

        # For each alias, we create a new time-stamped index.
        for alias, module in modules.items():
            old_index = None

            olds = ES.indices.get_aliases(alias, ignore=404)
            for old_index in olds:
                # Mark the index to be removed later.
                to_remove.append(old_index)
                # Mark the alias to be removed from that index.
                add_alias_action('remove', old_index, alias)

            # Create a new index, using the alias name with a timestamp.
            new_index = timestamp_index(alias)

            # If old_index is None that could mean it's a full index.
            # In that case we want to continue index in it.
            if ES.indices.exists(alias):
                old_index = alias

            # Flag the database.
            step1 = tree.add_task(flag_database,
                                  args=[new_index, old_index, alias])
            step2 = step1.add_task(create_new_index, args=[alias, new_index])
            step3 = step2.add_task(index_data, args=[alias, new_index])
            last_action = step3

            # Adding new index to the alias.
            add_alias_action('add', new_index, alias)

        # Alias the new index and remove the old aliases, if any.
        renaming_step = last_action.add_task(update_aliases,
                                             args=[alias_actions])

        # Unflag the database - there's no need to duplicate the
        # indexing anymore.
        delete = renaming_step.add_task(unflag_database)

        # Delete the old indexes, if any.
        if to_remove:
            delete.add_task(delete_indexes, args=[to_remove])

        # Let's do it.
        log('Running all indexation tasks', stdout=self.stdout)

        os.environ['FORCE_INDEXING'] = '1'

        # This is a bit convoluted, and more complicated than simply providing
        # the soft and hard time limits on the @task decorator. But we're not
        # using the @task decorator here, but a decorator from celery_tasktree.
        if not getattr(settings, 'CELERY_ALWAYS_EAGER', False):
            control.time_limit(
                'olympia.lib.es.management.commands.reindex.index_data',
                soft=time_limits['soft'],
                hard=time_limits['hard'])

        try:
            tree.apply_async()
            if not getattr(settings, 'CELERY_ALWAYS_EAGER', False):
                time.sleep(10)  # give celeryd some time to flag the DB
            while is_reindexing_amo():
                sys.stdout.write('.')
                sys.stdout.flush()
                time.sleep(5)
        finally:
            del os.environ['FORCE_INDEXING']

        sys.stdout.write('\n')

        # Let's return the /_aliases values.
        aliases = ES.indices.get_aliases()
        aliases = json.dumps(aliases, sort_keys=True, indent=4)
        summary = _SUMMARY % (len(modules), aliases)
        log(summary, stdout=self.stdout)
Example #2
0
def set_time_limit(task_name, soft=60, hard=120, reply=True):
    return control.time_limit(task_name,soft,hard,reply)
Example #3
0
    def handle(self, *args, **kwargs):
        """Reindexing work.

        Creates a Tasktree that creates new indexes
        over the old ones so the search feature
        works while the indexation occurs.

        """
        force = kwargs.get("force", False)

        if is_reindexing_amo() and not force:
            raise CommandError("Indexation already occuring - use --force to " "bypass")

        log("Starting the reindexation", stdout=self.stdout)

        modules = ["addons"]
        if kwargs.get("with_stats", False):
            modules.append("stats")

        if kwargs.get("wipe", False):
            skip_confirmation = kwargs.get("noinput", False)
            confirm = ""
            if not skip_confirmation:
                confirm = raw_input("Are you sure you want to wipe all AMO " "Elasticsearch indexes? (yes/no): ")

                while confirm not in ("yes", "no"):
                    confirm = raw_input('Please enter either "yes" or "no": ')

            if confirm == "yes" or skip_confirmation:
                unflag_database(stdout=self.stdout)
                for index in set(MODULES[m].get_alias() for m in modules):
                    ES.indices.delete(index, ignore=404)
            else:
                raise CommandError("Aborted.")
        elif force:
            unflag_database(stdout=self.stdout)

        alias_actions = []

        def add_alias_action(action, index, alias):
            action = {action: {"index": index, "alias": alias}}
            if action in alias_actions:
                return
            alias_actions.append(action)

        # Creating a task tree.
        log("Building the task tree", stdout=self.stdout)
        tree = TaskTree()
        last_action = None

        to_remove = []

        # For each index, we create a new time-stamped index.
        for module in modules:
            old_index = None
            alias = MODULES[module].get_alias()

            olds = ES.indices.get_aliases(alias, ignore=404)
            for old_index in olds:
                # Mark the index to be removed later.
                to_remove.append(old_index)
                # Mark the alias to be removed from that index.
                add_alias_action("remove", old_index, alias)

            # Create a new index, using the alias name with a timestamp.
            new_index = timestamp_index(alias)

            # If old_index is None that could mean it's a full index.
            # In that case we want to continue index in it.
            if ES.indices.exists(alias):
                old_index = alias

            # Flag the database.
            step1 = tree.add_task(flag_database, args=[new_index, old_index, alias])
            step2 = step1.add_task(create_new_index, args=[module, new_index])
            step3 = step2.add_task(index_data, args=[module, new_index])
            last_action = step3

            # Adding new index to the alias.
            add_alias_action("add", new_index, alias)

        # Alias the new index and remove the old aliases, if any.
        renaming_step = last_action.add_task(update_aliases, args=[alias_actions])

        # Unflag the database - there's no need to duplicate the
        # indexing anymore.
        delete = renaming_step.add_task(unflag_database)

        # Delete the old indexes, if any.
        if to_remove:
            delete.add_task(delete_indexes, args=[to_remove])

        # Let's do it.
        log("Running all indexation tasks", stdout=self.stdout)

        os.environ["FORCE_INDEXING"] = "1"

        # This is a bit convoluted, and more complicated than simply providing
        # the soft and hard time limits on the @task decorator. But we're not
        # using the @task decorator here, but a decorator from celery_tasktree.
        if not getattr(settings, "CELERY_ALWAYS_EAGER", False):
            control.time_limit(
                "olympia.lib.es.management.commands.reindex.index_data",
                soft=time_limits["soft"],
                hard=time_limits["hard"],
            )

        try:
            tree.apply_async()
            if not getattr(settings, "CELERY_ALWAYS_EAGER", False):
                time.sleep(10)  # give celeryd some time to flag the DB
            while is_reindexing_amo():
                sys.stdout.write(".")
                sys.stdout.flush()
                time.sleep(5)
        finally:
            del os.environ["FORCE_INDEXING"]

        sys.stdout.write("\n")

        # Let's return the /_aliases values.
        aliases = ES.indices.get_aliases()
        aliases = json.dumps(aliases, sort_keys=True, indent=4)
        summary = _SUMMARY % (len(modules), aliases)
        log(summary, stdout=self.stdout)
Example #4
0
    def handle(self, *args, **kwargs):
        """Reindexing work.

        Creates a Tasktree that creates new indexes
        over the old ones so the search feature
        works while the indexation occurs.

        """
        force = kwargs.get('force', False)

        if is_reindexing_amo() and not force:
            raise CommandError('Indexation already occuring - use --force to '
                               'bypass')

        log('Starting the reindexation', stdout=self.stdout)

        modules = get_modules(with_stats=kwargs.get('with_stats', False))

        if kwargs.get('wipe', False):
            skip_confirmation = kwargs.get('noinput', False)
            confirm = ''
            if not skip_confirmation:
                confirm = raw_input('Are you sure you want to wipe all AMO '
                                    'Elasticsearch indexes? (yes/no): ')

                while confirm not in ('yes', 'no'):
                    confirm = raw_input('Please enter either "yes" or "no": ')

            if (confirm == 'yes' or skip_confirmation):
                unflag_database(stdout=self.stdout)
                for index in set(modules.keys()):
                    ES.indices.delete(index, ignore=404)
            else:
                raise CommandError("Aborted.")
        elif force:
            unflag_database(stdout=self.stdout)

        alias_actions = []

        def add_alias_action(action, index, alias):
            action = {action: {'index': index, 'alias': alias}}
            if action in alias_actions:
                return
            alias_actions.append(action)

        # Creating a task tree.
        log('Building the task tree', stdout=self.stdout)
        tree = TaskTree()
        last_action = None

        to_remove = []

        # For each alias, we create a new time-stamped index.
        for alias, module in modules.items():
            old_index = None

            olds = ES.indices.get_aliases(alias, ignore=404)
            for old_index in olds:
                # Mark the index to be removed later.
                to_remove.append(old_index)
                # Mark the alias to be removed from that index.
                add_alias_action('remove', old_index, alias)

            # Create a new index, using the alias name with a timestamp.
            new_index = timestamp_index(alias)

            # If old_index is None that could mean it's a full index.
            # In that case we want to continue index in it.
            if ES.indices.exists(alias):
                old_index = alias

            # Flag the database.
            step1 = tree.add_task(flag_database,
                                  args=[new_index, old_index, alias])
            step2 = step1.add_task(create_new_index,
                                   args=[alias, new_index])
            step3 = step2.add_task(index_data,
                                   args=[alias, new_index])
            last_action = step3

            # Adding new index to the alias.
            add_alias_action('add', new_index, alias)

        # Alias the new index and remove the old aliases, if any.
        renaming_step = last_action.add_task(update_aliases,
                                             args=[alias_actions])

        # Unflag the database - there's no need to duplicate the
        # indexing anymore.
        delete = renaming_step.add_task(unflag_database)

        # Delete the old indexes, if any.
        if to_remove:
            delete.add_task(delete_indexes, args=[to_remove])

        # Let's do it.
        log('Running all indexation tasks', stdout=self.stdout)

        os.environ['FORCE_INDEXING'] = '1'

        # This is a bit convoluted, and more complicated than simply providing
        # the soft and hard time limits on the @task decorator. But we're not
        # using the @task decorator here, but a decorator from celery_tasktree.
        if not getattr(settings, 'CELERY_ALWAYS_EAGER', False):
            control.time_limit(
                'olympia.lib.es.management.commands.reindex.index_data',
                soft=time_limits['soft'],
                hard=time_limits['hard'])

        try:
            tree.apply_async()
            if not getattr(settings, 'CELERY_ALWAYS_EAGER', False):
                time.sleep(10)   # give celeryd some time to flag the DB
            while is_reindexing_amo():
                sys.stdout.write('.')
                sys.stdout.flush()
                time.sleep(5)
        finally:
            del os.environ['FORCE_INDEXING']

        sys.stdout.write('\n')

        # Let's return the /_aliases values.
        aliases = ES.indices.get_aliases()
        aliases = json.dumps(aliases, sort_keys=True, indent=4)
        summary = _SUMMARY % (len(modules), aliases)
        log(summary, stdout=self.stdout)