예제 #1
0
    def execute_workflow(self, workflow):
        # Let's do it.
        self.stdout.write('Running all indexation tasks')

        os.environ['FORCE_INDEXING'] = '1'

        try:
            workflow.apply_async()

            if not getattr(settings, 'CELERY_TASK_ALWAYS_EAGER', False):
                time.sleep(10)   # give celeryd some time to flag the DB
            while is_reindexing_amo():
                self.stdout.write('.')
                self.stdout.flush()
                time.sleep(5)
        finally:
            del os.environ['FORCE_INDEXING']

        self.stdout.write('\n')

        # Let's return the /_aliases values.
        aliases = ES.indices.get_alias()
        aliases = json.dumps(aliases, sort_keys=True, indent=4)
        summary = _SUMMARY % aliases
        self.stdout.write(summary)
예제 #2
0
    def _test_reindexation(self):
        # Current indices with aliases.
        old_indices = self.get_indices_aliases()

        # This is to start a reindexation in the background.
        class ReindexThread(threading.Thread):
            def __init__(self):
                self.stdout = StringIO.StringIO()
                super(ReindexThread, self).__init__()

            def run(self):
                # We need to wait at least a second, to make sure the alias
                # name is going to be different, since we already create an
                # alias in setUpClass.
                time.sleep(1)
                management.call_command('reindex', stdout=self.stdout)

        t = ReindexThread()
        t.start()

        # Wait for the reindex in the thread to flag the database.
        # The database transaction isn't shared with the thread, so force the
        # commit.
        while t.is_alive() and not is_reindexing_amo():
            connection._commit()
            connection.clean_savepoints()

        # We should still be able to search in the foreground while the reindex
        # is being done in the background. We should also be able to index new
        # documents, and they should not be lost.
        old_addons_count = len(self.expected)
        while t.is_alive() and len(self.expected) < old_addons_count + 3:
            self.expected.append(addon_factory())
            connection._commit()
            connection.clean_savepoints()
            self.refresh()
            self.check_results(self.expected)

        if len(self.expected) == old_addons_count:
            raise AssertionError('Could not index objects in foreground while '
                                 'reindexing in the background.')

        t.join()  # Wait for the thread to finish.
        t.stdout.seek(0)
        stdout = t.stdout.read()
        assert 'Reindexation done' in stdout, stdout

        # The reindexation is done, let's double check we have all our docs.
        connection._commit()
        connection.clean_savepoints()
        self.refresh()
        self.check_results(self.expected)

        # New indices have been created, and aliases now point to them.
        new_indices = self.get_indices_aliases()
        assert len(new_indices)
        assert old_indices != new_indices, (stdout, old_indices, new_indices)

        self.check_settings(new_indices)
예제 #3
0
    def _test_reindexation(self):
        # Current indices with aliases.
        old_indices = self.get_indices_aliases()

        # This is to start a reindexation in the background.
        class ReindexThread(threading.Thread):
            def __init__(self):
                self.stdout = StringIO.StringIO()
                super(ReindexThread, self).__init__()

            def run(self):
                # We need to wait at least a second, to make sure the alias
                # name is going to be different, since we already create an
                # alias in setUpClass.
                time.sleep(1)
                management.call_command('reindex', stdout=self.stdout)
        t = ReindexThread()
        t.start()

        # Wait for the reindex in the thread to flag the database.
        # The database transaction isn't shared with the thread, so force the
        # commit.
        while t.is_alive() and not is_reindexing_amo():
            connection._commit()
            connection.clean_savepoints()

        # We should still be able to search in the foreground while the reindex
        # is being done in the background. We should also be able to index new
        # documents, and they should not be lost.
        old_addons_count = len(self.expected)
        while t.is_alive() and len(self.expected) < old_addons_count + 3:
            self.expected.append(addon_factory())
            connection._commit()
            connection.clean_savepoints()
            self.refresh()
            self.check_results(self.expected)

        if len(self.expected) == old_addons_count:
            raise AssertionError('Could not index objects in foreground while '
                                 'reindexing in the background.')

        t.join()  # Wait for the thread to finish.
        t.stdout.seek(0)
        stdout = t.stdout.read()
        assert 'Reindexation done' in stdout, stdout

        # The reindexation is done, let's double check we have all our docs.
        connection._commit()
        connection.clean_savepoints()
        self.refresh()
        self.check_results(self.expected)

        # New indices have been created, and aliases now point to them.
        new_indices = self.get_indices_aliases()
        assert len(new_indices)
        assert old_indices != new_indices, (stdout, old_indices, new_indices)

        self.check_settings(new_indices)
예제 #4
0
    def setUp(self):
        super(TestIndexCommand, self).setUp()
        if is_reindexing_amo():
            unflag_reindexing_amo()

        self.url = reverse('search.search')

        # We store previously existing indices in order to delete the ones
        # created during this test run.
        self.indices = self.es.indices.status()['indices'].keys()
예제 #5
0
    def setUp(self):
        super(TestIndexCommand, self).setUp()
        if is_reindexing_amo():
            unflag_reindexing_amo()

        self.url = reverse('search.search')

        # We store previously existing indices in order to delete the ones
        # created during this test run.
        self.indices = self.es.indices.stats()['indices'].keys()
예제 #6
0
    def handle(self, *args, **kwargs):
        """Reindexing work.

        Creates a task chain that creates new indexes over the old ones so the
        search feature works while the indexation occurs.

        """
        force = kwargs['force']

        if is_reindexing_amo() and not force:
            raise CommandError('Indexation already occurring - use --force to '
                               'bypass')

        alias = settings.ES_INDEXES.get(kwargs['key'], None)
        if alias is None:
            raise CommandError(
                'Invalid --key parameter. It should be one of: %s.' % (
                    self.accepted_keys()
                )
            )
        self.stdout.write('Starting the reindexation for %s.' % alias)

        if kwargs['wipe']:
            skip_confirmation = kwargs['noinput']
            confirm = ''
            if not skip_confirmation:
                confirm = input('Are you sure you want to wipe all AMO '
                                'Elasticsearch indexes? (yes/no): ')

                while confirm not in ('yes', 'no'):
                    confirm = input('Please enter either "yes" or "no": ')

            if (confirm == 'yes' or skip_confirmation):
                unflag_database()
                # Retrieve the actual index and delete it. That way whether or
                # not this was an alias or an index (which is wrong, but
                # happens if data was indexed before the first reindex was
                # done) doesn't matter.
                try:
                    index = next(iter(ES.indices.get(alias)))
                    ES.indices.delete(index)
                except NotFoundError:
                    pass
            else:
                raise CommandError('Aborted.')
        elif force:
            unflag_database()

        workflow = self.create_workflow(alias)
        self.execute_workflow(workflow)
예제 #7
0
    def setUp(self):
        super(TestIndexCommand, self).setUp()
        if is_reindexing_amo():
            unflag_reindexing_amo()

        self.url = reverse_ns('addon-search')

        # We store previously existing indices in order to delete the ones
        # created during this test run.
        self.indices = self.es.indices.stats()['indices'].keys()

        self.addons = []
        self.expected = self.addons[:]
        # Monkeypatch Celerys ".get()" inside async task error
        # until https://github.com/celery/celery/issues/4661 (which isn't just
        # about retries but a general regression that manifests only in
        # eager-mode) fixed.
        self.patch('celery.app.task.denied_join_result')
예제 #8
0
    def handle(self, *args, **kwargs):
        """Reindexing work.

        Creates a task chain that creates new indexes over the old ones so the
        search feature works while the indexation occurs.

        """
        force = kwargs['force']

        if is_reindexing_amo() and not force:
            raise CommandError('Indexation already occurring - use --force to '
                               'bypass')

        if kwargs['data'] not in ('addons', 'stats'):
            raise CommandError('--data should be "addons" or "stats".')

        alias = get_alias(kwargs['data'])

        self.stdout.write('Starting the reindexation')

        if kwargs['wipe']:
            skip_confirmation = kwargs['noinput']
            confirm = ''
            if not skip_confirmation:
                confirm = input('Are you sure you want to wipe all AMO '
                                'Elasticsearch indexes? (yes/no): ')

                while confirm not in ('yes', 'no'):
                    confirm = input('Please enter either "yes" or "no": ')

            if (confirm == 'yes' or skip_confirmation):
                unflag_database()
                ES.indices.delete(alias, ignore=404)
            else:
                raise CommandError('Aborted.')
        elif force:
            unflag_database()

        workflow = self.create_workflow(alias)
        self.execute_workflow(workflow)
예제 #9
0
    def handle(self, *args, **kwargs):
        """Reindexing work.

        Creates a Tasktree that creates new indexes
        over the old ones so the search feature
        works while the indexation occurs.

        """
        force = kwargs.get('force', False)

        if is_reindexing_amo() and not force:
            raise CommandError('Indexation already occuring - use --force to '
                               'bypass')

        log('Starting the reindexation', stdout=self.stdout)

        modules = get_modules(with_stats=kwargs.get('with_stats', False))

        if kwargs.get('wipe', False):
            skip_confirmation = kwargs.get('noinput', False)
            confirm = ''
            if not skip_confirmation:
                confirm = raw_input('Are you sure you want to wipe all AMO '
                                    'Elasticsearch indexes? (yes/no): ')

                while confirm not in ('yes', 'no'):
                    confirm = raw_input('Please enter either "yes" or "no": ')

            if (confirm == 'yes' or skip_confirmation):
                unflag_database(stdout=self.stdout)
                for index in set(modules.keys()):
                    ES.indices.delete(index, ignore=404)
            else:
                raise CommandError("Aborted.")
        elif force:
            unflag_database(stdout=self.stdout)

        alias_actions = []

        def add_alias_action(action, index, alias):
            action = {action: {'index': index, 'alias': alias}}
            if action in alias_actions:
                return
            alias_actions.append(action)

        # Creating a task tree.
        log('Building the task tree', stdout=self.stdout)
        tree = TaskTree()
        last_action = None

        to_remove = []

        # For each alias, we create a new time-stamped index.
        for alias, module in modules.items():
            old_index = None

            olds = ES.indices.get_aliases(alias, ignore=404)
            for old_index in olds:
                # Mark the index to be removed later.
                to_remove.append(old_index)
                # Mark the alias to be removed from that index.
                add_alias_action('remove', old_index, alias)

            # Create a new index, using the alias name with a timestamp.
            new_index = timestamp_index(alias)

            # If old_index is None that could mean it's a full index.
            # In that case we want to continue index in it.
            if ES.indices.exists(alias):
                old_index = alias

            # Flag the database.
            step1 = tree.add_task(flag_database,
                                  args=[new_index, old_index, alias])
            step2 = step1.add_task(create_new_index, args=[alias, new_index])
            step3 = step2.add_task(index_data, args=[alias, new_index])
            last_action = step3

            # Adding new index to the alias.
            add_alias_action('add', new_index, alias)

        # Alias the new index and remove the old aliases, if any.
        renaming_step = last_action.add_task(update_aliases,
                                             args=[alias_actions])

        # Unflag the database - there's no need to duplicate the
        # indexing anymore.
        delete = renaming_step.add_task(unflag_database)

        # Delete the old indexes, if any.
        if to_remove:
            delete.add_task(delete_indexes, args=[to_remove])

        # Let's do it.
        log('Running all indexation tasks', stdout=self.stdout)

        os.environ['FORCE_INDEXING'] = '1'

        # This is a bit convoluted, and more complicated than simply providing
        # the soft and hard time limits on the @task decorator. But we're not
        # using the @task decorator here, but a decorator from celery_tasktree.
        if not getattr(settings, 'CELERY_ALWAYS_EAGER', False):
            control.time_limit(
                'olympia.lib.es.management.commands.reindex.index_data',
                soft=time_limits['soft'],
                hard=time_limits['hard'])

        try:
            tree.apply_async()
            if not getattr(settings, 'CELERY_ALWAYS_EAGER', False):
                time.sleep(10)  # give celeryd some time to flag the DB
            while is_reindexing_amo():
                sys.stdout.write('.')
                sys.stdout.flush()
                time.sleep(5)
        finally:
            del os.environ['FORCE_INDEXING']

        sys.stdout.write('\n')

        # Let's return the /_aliases values.
        aliases = ES.indices.get_aliases()
        aliases = json.dumps(aliases, sort_keys=True, indent=4)
        summary = _SUMMARY % (len(modules), aliases)
        log(summary, stdout=self.stdout)
예제 #10
0
    def handle(self, *args, **kwargs):
        """Reindexing work.

        Creates a Tasktree that creates new indexes
        over the old ones so the search feature
        works while the indexation occurs.

        """
        force = kwargs.get("force", False)

        if is_reindexing_amo() and not force:
            raise CommandError("Indexation already occuring - use --force to " "bypass")

        log("Starting the reindexation", stdout=self.stdout)

        modules = ["addons"]
        if kwargs.get("with_stats", False):
            modules.append("stats")

        if kwargs.get("wipe", False):
            skip_confirmation = kwargs.get("noinput", False)
            confirm = ""
            if not skip_confirmation:
                confirm = raw_input("Are you sure you want to wipe all AMO " "Elasticsearch indexes? (yes/no): ")

                while confirm not in ("yes", "no"):
                    confirm = raw_input('Please enter either "yes" or "no": ')

            if confirm == "yes" or skip_confirmation:
                unflag_database(stdout=self.stdout)
                for index in set(MODULES[m].get_alias() for m in modules):
                    ES.indices.delete(index, ignore=404)
            else:
                raise CommandError("Aborted.")
        elif force:
            unflag_database(stdout=self.stdout)

        alias_actions = []

        def add_alias_action(action, index, alias):
            action = {action: {"index": index, "alias": alias}}
            if action in alias_actions:
                return
            alias_actions.append(action)

        # Creating a task tree.
        log("Building the task tree", stdout=self.stdout)
        tree = TaskTree()
        last_action = None

        to_remove = []

        # For each index, we create a new time-stamped index.
        for module in modules:
            old_index = None
            alias = MODULES[module].get_alias()

            olds = ES.indices.get_aliases(alias, ignore=404)
            for old_index in olds:
                # Mark the index to be removed later.
                to_remove.append(old_index)
                # Mark the alias to be removed from that index.
                add_alias_action("remove", old_index, alias)

            # Create a new index, using the alias name with a timestamp.
            new_index = timestamp_index(alias)

            # If old_index is None that could mean it's a full index.
            # In that case we want to continue index in it.
            if ES.indices.exists(alias):
                old_index = alias

            # Flag the database.
            step1 = tree.add_task(flag_database, args=[new_index, old_index, alias])
            step2 = step1.add_task(create_new_index, args=[module, new_index])
            step3 = step2.add_task(index_data, args=[module, new_index])
            last_action = step3

            # Adding new index to the alias.
            add_alias_action("add", new_index, alias)

        # Alias the new index and remove the old aliases, if any.
        renaming_step = last_action.add_task(update_aliases, args=[alias_actions])

        # Unflag the database - there's no need to duplicate the
        # indexing anymore.
        delete = renaming_step.add_task(unflag_database)

        # Delete the old indexes, if any.
        if to_remove:
            delete.add_task(delete_indexes, args=[to_remove])

        # Let's do it.
        log("Running all indexation tasks", stdout=self.stdout)

        os.environ["FORCE_INDEXING"] = "1"

        # This is a bit convoluted, and more complicated than simply providing
        # the soft and hard time limits on the @task decorator. But we're not
        # using the @task decorator here, but a decorator from celery_tasktree.
        if not getattr(settings, "CELERY_ALWAYS_EAGER", False):
            control.time_limit(
                "olympia.lib.es.management.commands.reindex.index_data",
                soft=time_limits["soft"],
                hard=time_limits["hard"],
            )

        try:
            tree.apply_async()
            if not getattr(settings, "CELERY_ALWAYS_EAGER", False):
                time.sleep(10)  # give celeryd some time to flag the DB
            while is_reindexing_amo():
                sys.stdout.write(".")
                sys.stdout.flush()
                time.sleep(5)
        finally:
            del os.environ["FORCE_INDEXING"]

        sys.stdout.write("\n")

        # Let's return the /_aliases values.
        aliases = ES.indices.get_aliases()
        aliases = json.dumps(aliases, sort_keys=True, indent=4)
        summary = _SUMMARY % (len(modules), aliases)
        log(summary, stdout=self.stdout)
예제 #11
0
    def handle(self, *args, **kwargs):
        """Reindexing work.

        Creates a task chain that creates new indexes
        over the old ones so the search feature
        works while the indexation occurs.

        """
        force = kwargs.get('force', False)

        if is_reindexing_amo() and not force:
            raise CommandError('Indexation already occurring - use --force to '
                               'bypass')

        self.stdout.write('Starting the reindexation')

        modules = get_modules(with_stats=kwargs.get('with_stats', False))

        if kwargs.get('wipe', False):
            skip_confirmation = kwargs.get('noinput', False)
            confirm = ''
            if not skip_confirmation:
                confirm = raw_input('Are you sure you want to wipe all AMO '
                                    'Elasticsearch indexes? (yes/no): ')

                while confirm not in ('yes', 'no'):
                    confirm = raw_input('Please enter either "yes" or "no": ')

            if (confirm == 'yes' or skip_confirmation):
                unflag_database()
                for index in set(modules.keys()):
                    ES.indices.delete(index, ignore=404)
            else:
                raise CommandError("Aborted.")
        elif force:
            unflag_database()

        alias_actions = []

        def add_alias_action(action, index, alias):
            action = {action: {'index': index, 'alias': alias}}
            if action in alias_actions:
                return
            alias_actions.append(action)

        # Creating a task chain.
        self.stdout.write('Building the task chain')

        to_remove = []
        workflow = []

        # For each alias, we create a new time-stamped index.
        for alias, module in modules.items():
            old_index = None

            try:
                olds = ES.indices.get_alias(alias)
                for old_index in olds:
                    # Mark the index to be removed later.
                    to_remove.append(old_index)
                    # Mark the alias to be removed from that index.
                    add_alias_action('remove', old_index, alias)
            except NotFoundError:
                # If the alias dit not exist, ignore it, don't try to remove
                # it.
                pass

            # Create a new index, using the alias name with a timestamp.
            new_index = timestamp_index(alias)

            # If old_index is None that could mean it's a full index.
            # In that case we want to continue index in it.
            if ES.indices.exists(alias):
                old_index = alias

            # Flag the database.
            workflow.append(
                flag_database.si(new_index, old_index, alias) |
                create_new_index.si(alias, new_index) |
                index_data.si(alias, new_index)
            )

            # Adding new index to the alias.
            add_alias_action('add', new_index, alias)

        workflow = group(workflow)

        # Alias the new index and remove the old aliases, if any.
        workflow |= update_aliases.si(alias_actions)

        # Unflag the database - there's no need to duplicate the
        # indexing anymore.
        workflow |= unflag_database.si()

        # Delete the old indexes, if any.
        if to_remove:
            workflow |= delete_indexes.si(to_remove)

        # Let's do it.
        self.stdout.write('Running all indexation tasks')

        os.environ['FORCE_INDEXING'] = '1'

        try:
            workflow.apply_async()
            if not getattr(settings, 'CELERY_ALWAYS_EAGER', False):
                time.sleep(10)   # give celeryd some time to flag the DB
            while is_reindexing_amo():
                self.stdout.write('.')
                self.stdout.flush()
                time.sleep(5)
        finally:
            del os.environ['FORCE_INDEXING']

        self.stdout.write('\n')

        # Let's return the /_aliases values.
        aliases = ES.indices.get_alias()
        aliases = json.dumps(aliases, sort_keys=True, indent=4)
        summary = _SUMMARY % (len(modules), aliases)
        self.stdout.write(summary)
예제 #12
0
    def handle(self, *args, **kwargs):
        """Reindexing work.

        Creates a Tasktree that creates new indexes
        over the old ones so the search feature
        works while the indexation occurs.

        """
        force = kwargs.get('force', False)

        if is_reindexing_amo() and not force:
            raise CommandError('Indexation already occuring - use --force to '
                               'bypass')

        log('Starting the reindexation', stdout=self.stdout)

        modules = get_modules(with_stats=kwargs.get('with_stats', False))

        if kwargs.get('wipe', False):
            skip_confirmation = kwargs.get('noinput', False)
            confirm = ''
            if not skip_confirmation:
                confirm = raw_input('Are you sure you want to wipe all AMO '
                                    'Elasticsearch indexes? (yes/no): ')

                while confirm not in ('yes', 'no'):
                    confirm = raw_input('Please enter either "yes" or "no": ')

            if (confirm == 'yes' or skip_confirmation):
                unflag_database(stdout=self.stdout)
                for index in set(modules.keys()):
                    ES.indices.delete(index, ignore=404)
            else:
                raise CommandError("Aborted.")
        elif force:
            unflag_database(stdout=self.stdout)

        alias_actions = []

        def add_alias_action(action, index, alias):
            action = {action: {'index': index, 'alias': alias}}
            if action in alias_actions:
                return
            alias_actions.append(action)

        # Creating a task tree.
        log('Building the task tree', stdout=self.stdout)
        tree = TaskTree()
        last_action = None

        to_remove = []

        # For each alias, we create a new time-stamped index.
        for alias, module in modules.items():
            old_index = None

            olds = ES.indices.get_aliases(alias, ignore=404)
            for old_index in olds:
                # Mark the index to be removed later.
                to_remove.append(old_index)
                # Mark the alias to be removed from that index.
                add_alias_action('remove', old_index, alias)

            # Create a new index, using the alias name with a timestamp.
            new_index = timestamp_index(alias)

            # If old_index is None that could mean it's a full index.
            # In that case we want to continue index in it.
            if ES.indices.exists(alias):
                old_index = alias

            # Flag the database.
            step1 = tree.add_task(flag_database,
                                  args=[new_index, old_index, alias])
            step2 = step1.add_task(create_new_index,
                                   args=[alias, new_index])
            step3 = step2.add_task(index_data,
                                   args=[alias, new_index])
            last_action = step3

            # Adding new index to the alias.
            add_alias_action('add', new_index, alias)

        # Alias the new index and remove the old aliases, if any.
        renaming_step = last_action.add_task(update_aliases,
                                             args=[alias_actions])

        # Unflag the database - there's no need to duplicate the
        # indexing anymore.
        delete = renaming_step.add_task(unflag_database)

        # Delete the old indexes, if any.
        if to_remove:
            delete.add_task(delete_indexes, args=[to_remove])

        # Let's do it.
        log('Running all indexation tasks', stdout=self.stdout)

        os.environ['FORCE_INDEXING'] = '1'

        # This is a bit convoluted, and more complicated than simply providing
        # the soft and hard time limits on the @task decorator. But we're not
        # using the @task decorator here, but a decorator from celery_tasktree.
        if not getattr(settings, 'CELERY_ALWAYS_EAGER', False):
            control.time_limit(
                'olympia.lib.es.management.commands.reindex.index_data',
                soft=time_limits['soft'],
                hard=time_limits['hard'])

        try:
            tree.apply_async()
            if not getattr(settings, 'CELERY_ALWAYS_EAGER', False):
                time.sleep(10)   # give celeryd some time to flag the DB
            while is_reindexing_amo():
                sys.stdout.write('.')
                sys.stdout.flush()
                time.sleep(5)
        finally:
            del os.environ['FORCE_INDEXING']

        sys.stdout.write('\n')

        # Let's return the /_aliases values.
        aliases = ES.indices.get_aliases()
        aliases = json.dumps(aliases, sort_keys=True, indent=4)
        summary = _SUMMARY % (len(modules), aliases)
        log(summary, stdout=self.stdout)