Example #1
0
    def test_reindexation(self):
        # Adding an addon.
        addon = amo.tests.addon_factory()
        self.refresh()

        # The search should return the addon.
        wanted = [addon]
        self.check_results(wanted)

        # Current indices with aliases.
        old_indices = self.get_indices_aliases()

        # This is to start a reindexation in the background.
        class ReindexThread(threading.Thread):
            def __init__(self):
                self.stdout = StringIO.StringIO()
                super(ReindexThread, self).__init__()

            def run(self):
                management.call_command('reindex', stdout=self.stdout)
        t = ReindexThread()
        t.start()

        # Wait for the reindex in the thread to flag the database.
        # The database transaction isn't shared with the thread, so force the
        # commit.
        while t.is_alive() and not is_reindexing_amo():
            connection._commit()
            connection.clean_savepoints()

        # We should still be able to search in the foreground while the reindex
        # is being done in the background. We should also be able to index new
        # documents, and they should not be lost.
        old_addons_count = len(wanted)
        while t.is_alive() and len(wanted) < old_addons_count + 3:
            wanted.append(amo.tests.addon_factory())
            connection._commit()
            connection.clean_savepoints()
            amo.search.get_es().refresh()
            self.check_results(wanted)

        if len(wanted) == old_addons_count:
            raise AssertionError('Could not index objects in foreground while '
                                 'reindexing in the background.')

        t.join()  # Wait for the thread to finish.
        t.stdout.seek(0)
        stdout = t.stdout.read()
        assert 'Reindexation done' in stdout, stdout

        # The reindexation is done, let's double check we have all our docs.
        connection._commit()
        connection.clean_savepoints()
        amo.search.get_es().refresh()
        self.check_results(wanted)

        # New indices have been created, and aliases now point to them.
        new_indices = self.get_indices_aliases()
        eq_(len(old_indices), len(new_indices), (old_indices, new_indices))
        assert new_indices != old_indices, stdout
Example #2
0
    def test_reindexation(self):
        # Adding an addon.
        addon = amo.tests.addon_factory()
        self.refresh()

        # The search should return the addon.
        wanted = [addon]
        self.check_results(wanted)

        # Current indices with aliases.
        old_indices = self.get_indices_aliases()

        # This is to start a reindexation in the background.
        class ReindexThread(threading.Thread):
            def __init__(self):
                self.stdout = StringIO.StringIO()
                super(ReindexThread, self).__init__()

            def run(self):
                management.call_command('reindex', stdout=self.stdout)
        t = ReindexThread()
        t.start()

        # Wait for the reindex in the thread to flag the database.
        # The database transaction isn't shared with the thread, so force the
        # commit.
        while t.is_alive() and not is_reindexing_amo():
            connection._commit()
            connection.clean_savepoints()

        # We should still be able to search in the foreground while the reindex
        # is being done in the background. We should also be able to index new
        # documents, and they should not be lost.
        old_addons_count = len(wanted)
        while t.is_alive() and len(wanted) < old_addons_count + 3:
            wanted.append(amo.tests.addon_factory())
            connection._commit()
            connection.clean_savepoints()
            amo.search.get_es().refresh()
            self.check_results(wanted)

        if len(wanted) == old_addons_count:
            raise AssertionError('Could not index objects in foreground while '
                                 'reindexing in the background.')

        t.join()  # Wait for the thread to finish.
        t.stdout.seek(0)
        stdout = t.stdout.read()
        assert 'Reindexation done' in stdout, stdout

        # The reindexation is done, let's double check we have all our docs.
        connection._commit()
        connection.clean_savepoints()
        amo.search.get_es().refresh()
        self.check_results(wanted)

        # New indices have been created, and aliases now point to them.
        new_indices = self.get_indices_aliases()
        eq_(len(old_indices), len(new_indices), (old_indices, new_indices))
        assert new_indices != old_indices
Example #3
0
    def setUp(self):
        super(TestIndexCommand, self).setUp()
        if is_reindexing_amo():
            unflag_reindexing_amo()

        self.url = reverse('search.search')

        # Any index created during the test will be deleted.
        self.indices = call_es('_status').json()['indices'].keys()
Example #4
0
    def setUp(self):
        super(TestIndexCommand, self).setUp()
        if is_reindexing_amo():
            unflag_reindexing_amo()

        self.url = reverse('search.search')

        # Any index created during the test will be deleted.
        self.indices = call_es('_status').json()['indices'].keys()
Example #5
0
    def handle(self, *args, **kwargs):
        """Reindexing work.

        Creates a Tasktree that creates new indexes
        over the old ones so the search feature
        works while the indexation occurs.

        """
        force = kwargs.get('force', False)

        if is_reindexing_amo() and not force:
            raise CommandError('Indexation already occuring - use --force to '
                               'bypass')

        prefix = kwargs.get('prefix', '')
        log('Starting the reindexation', stdout=self.stdout)

        if kwargs.get('with_stats', False):
            # Add the stats indexes back.
            _ALIASES.update(_STATS_ALIASES)

        if kwargs.get('wipe', False):
            confirm = raw_input('Are you sure you want to wipe all AMO '
                                'Elasticsearch indexes? (yes/no): ')

            while confirm not in ('yes', 'no'):
                confirm = raw_input('Please enter either "yes" or "no": ')

            if confirm == 'yes':
                unflag_database(stdout=self.stdout)
                for index in set(_ALIASES.values()):
                    requests.delete(url('/%s') % index)
            else:
                raise CommandError("Aborted.")
        elif force:
            unflag_database(stdout=self.stdout)

        # Get list current aliases at /_aliases.
        all_aliases = requests.get(url('/_aliases')).json()

        # building the list of indexes
        indexes = set([prefix + index for index in
                       _ALIASES.values()])

        actions = []

        def add_action(*elmt):
            if elmt in actions:
                return
            actions.append(elmt)

        all_aliases = all_aliases.items()

        # creating a task tree
        log('Building the task tree', stdout=self.stdout)
        tree = TaskTree()
        last_action = None

        to_remove = []

        # for each index, we create a new time-stamped index
        for alias in indexes:
            is_stats = 'stats' in alias
            old_index = None

            for aliased_index, alias_ in all_aliases:
                if alias in alias_['aliases'].keys():
                    # mark the index to be removed later
                    old_index = aliased_index
                    to_remove.append(aliased_index)

                    # mark the alias to be removed as well
                    add_action('remove', aliased_index, alias)

            # create a new index, using the alias name with a timestamp
            new_index = timestamp_index(alias)

            # if old_index is None that could mean it's a full index
            # In that case we want to continue index in it
            future_alias = url('/%s' % alias)
            if requests.head(future_alias).status_code == 200:
                old_index = alias

            # flag the database
            step1 = tree.add_task(flag_database,
                                  args=[new_index, old_index, alias],
                                  kwargs={'stdout': self.stdout})
            step2 = step1.add_task(create_mapping,
                                   args=[new_index, alias],
                                   kwargs={'stdout': self.stdout})
            step3 = step2.add_task(create_index,
                                   args=[new_index, is_stats],
                                   kwargs={'stdout': self.stdout})
            last_action = step3

            # adding new index to the alias
            add_action('add', new_index, alias)

        # Alias the new index and remove the old aliases, if any.
        renaming_step = last_action.add_task(run_aliases_actions,
                                             args=[actions],
                                             kwargs={'stdout': self.stdout})

        # unflag the database - there's no need to duplicate the
        # indexing anymore
        delete = renaming_step.add_task(unflag_database,
                                        kwargs={'stdout': self.stdout})

        # Delete the old indexes, if any
        delete.add_task(delete_indexes,
                        args=[to_remove], kwargs={'stdout': self.stdout})

        # let's do it
        log('Running all indexation tasks', stdout=self.stdout)

        os.environ['FORCE_INDEXING'] = '1'
        try:
            tree.apply_async()
            time.sleep(10)   # give celeryd some time to flag the DB
            while is_reindexing_amo():
                sys.stdout.write('.')
                sys.stdout.flush()
                time.sleep(5)
        finally:
            del os.environ['FORCE_INDEXING']

        sys.stdout.write('\n')

        # let's return the /_aliases values
        aliases = call_es('_aliases').json()
        aliases = json.dumps(aliases, sort_keys=True, indent=4)
        summary = _SUMMARY % (len(indexes), aliases)
        log(summary, stdout=self.stdout)
Example #6
0
    def handle(self, *args, **kwargs):
        """Reindexing work.

        Creates a Tasktree that creates new indexes
        over the old ones so the search feature
        works while the indexation occurs.

        """
        force = kwargs.get('force', False)

        if is_reindexing_amo() and not force:
            raise CommandError('Indexation already occuring - use --force to '
                               'bypass')

        log('Starting the reindexation', stdout=self.stdout)

        modules = ['addons']
        if kwargs.get('with_stats', False):
            modules.append('stats')

        if kwargs.get('wipe', False):
            confirm = raw_input('Are you sure you want to wipe all AMO '
                                'Elasticsearch indexes? (yes/no): ')

            while confirm not in ('yes', 'no'):
                confirm = raw_input('Please enter either "yes" or "no": ')

            if confirm == 'yes':
                unflag_database(stdout=self.stdout)
                for index in set(MODULES[m].get_alias() for m in modules):
                    ES.indices.delete(index)
            else:
                raise CommandError("Aborted.")
        elif force:
            unflag_database(stdout=self.stdout)

        alias_actions = []

        def add_alias_action(action, index, alias):
            action = {action: {'index': index, 'alias': alias}}
            if action in alias_actions:
                return
            alias_actions.append(action)

        # Creating a task tree.
        log('Building the task tree', stdout=self.stdout)
        tree = TaskTree()
        last_action = None

        to_remove = []

        # For each index, we create a new time-stamped index.
        for module in modules:
            old_index = None
            alias = MODULES[module].get_alias()

            olds = ES.indices.get_aliases(alias, ignore=404)
            for old_index in olds:
                # Mark the index to be removed later.
                to_remove.append(old_index)
                # Mark the alias to be removed from that index.
                add_alias_action('remove', old_index, alias)

            # Create a new index, using the alias name with a timestamp.
            new_index = timestamp_index(alias)

            # If old_index is None that could mean it's a full index.
            # In that case we want to continue index in it.
            if ES.indices.exists(alias):
                old_index = alias

            # Flag the database.
            step1 = tree.add_task(flag_database,
                                  args=[new_index, old_index, alias])
            step2 = step1.add_task(create_new_index, args=[module, new_index])
            step3 = step2.add_task(index_data, args=[module, new_index])
            last_action = step3

            # Adding new index to the alias.
            add_alias_action('add', new_index, alias)

        # Alias the new index and remove the old aliases, if any.
        renaming_step = last_action.add_task(update_aliases,
                                             args=[alias_actions])

        # Unflag the database - there's no need to duplicate the
        # indexing anymore.
        delete = renaming_step.add_task(unflag_database)

        # Delete the old indexes, if any.
        if to_remove:
            delete.add_task(delete_indexes, args=[to_remove])

        # Let's do it.
        log('Running all indexation tasks', stdout=self.stdout)

        os.environ['FORCE_INDEXING'] = '1'
        try:
            tree.apply_async()
            if not getattr(settings, 'CELERY_ALWAYS_EAGER', False):
                time.sleep(10)  # give celeryd some time to flag the DB
            while is_reindexing_amo():
                sys.stdout.write('.')
                sys.stdout.flush()
                time.sleep(5)
        finally:
            del os.environ['FORCE_INDEXING']

        sys.stdout.write('\n')

        # Let's return the /_aliases values.
        aliases = ES.indices.get_aliases()
        aliases = json.dumps(aliases, sort_keys=True, indent=4)
        summary = _SUMMARY % (len(modules), aliases)
        log(summary, stdout=self.stdout)
Example #7
0
    def handle(self, *args, **kwargs):
        """Reindexing work.

        Creates a Tasktree that creates new indexes
        over the old ones so the search feature
        works while the indexation occurs.

        """
        force = kwargs.get('force', False)

        if is_reindexing_amo() and not force:
            raise CommandError('Indexation already occuring - use --force to '
                               'bypass')

        log('Starting the reindexation', stdout=self.stdout)

        modules = ['addons']
        if kwargs.get('with_stats', False):
            modules.append('stats')

        if kwargs.get('wipe', False):
            confirm = raw_input('Are you sure you want to wipe all AMO '
                                'Elasticsearch indexes? (yes/no): ')

            while confirm not in ('yes', 'no'):
                confirm = raw_input('Please enter either "yes" or "no": ')

            if confirm == 'yes':
                unflag_database(stdout=self.stdout)
                for index in set(MODULES[m].get_alias() for m in modules):
                    ES.indices.delete(index, ignore=404)
            else:
                raise CommandError("Aborted.")
        elif force:
            unflag_database(stdout=self.stdout)

        alias_actions = []

        def add_alias_action(action, index, alias):
            action = {action: {'index': index, 'alias': alias}}
            if action in alias_actions:
                return
            alias_actions.append(action)

        # Creating a task tree.
        log('Building the task tree', stdout=self.stdout)
        tree = TaskTree()
        last_action = None

        to_remove = []

        # For each index, we create a new time-stamped index.
        for module in modules:
            old_index = None
            alias = MODULES[module].get_alias()

            olds = ES.indices.get_aliases(alias, ignore=404)
            for old_index in olds:
                # Mark the index to be removed later.
                to_remove.append(old_index)
                # Mark the alias to be removed from that index.
                add_alias_action('remove', old_index, alias)

            # Create a new index, using the alias name with a timestamp.
            new_index = timestamp_index(alias)

            # If old_index is None that could mean it's a full index.
            # In that case we want to continue index in it.
            if ES.indices.exists(alias):
                old_index = alias

            # Flag the database.
            step1 = tree.add_task(flag_database,
                                  args=[new_index, old_index, alias])
            step2 = step1.add_task(create_new_index,
                                   args=[module, new_index])
            step3 = step2.add_task(index_data,
                                   args=[module, new_index])
            last_action = step3

            # Adding new index to the alias.
            add_alias_action('add', new_index, alias)

        # Alias the new index and remove the old aliases, if any.
        renaming_step = last_action.add_task(update_aliases,
                                             args=[alias_actions])

        # Unflag the database - there's no need to duplicate the
        # indexing anymore.
        delete = renaming_step.add_task(unflag_database)

        # Delete the old indexes, if any.
        if to_remove:
            delete.add_task(delete_indexes, args=[to_remove])

        # Let's do it.
        log('Running all indexation tasks', stdout=self.stdout)

        os.environ['FORCE_INDEXING'] = '1'

        # This is a bit convoluted, and more complicated than simply providing
        # the soft and hard time limits on the @task decorator. But we're not
        # using the @task decorator here, but a decorator from celery_tasktree.
        if not getattr(settings, 'CELERY_ALWAYS_EAGER', False):
            control.time_limit('lib.es.management.commands.reindex.index_data',
                               soft=time_limits['soft'],
                               hard=time_limits['hard'])

        try:
            tree.apply_async()
            if not getattr(settings, 'CELERY_ALWAYS_EAGER', False):
                time.sleep(10)   # give celeryd some time to flag the DB
            while is_reindexing_amo():
                sys.stdout.write('.')
                sys.stdout.flush()
                time.sleep(5)
        finally:
            del os.environ['FORCE_INDEXING']

        sys.stdout.write('\n')

        # Let's return the /_aliases values.
        aliases = ES.indices.get_aliases()
        aliases = json.dumps(aliases, sort_keys=True, indent=4)
        summary = _SUMMARY % (len(modules), aliases)
        log(summary, stdout=self.stdout)
Example #8
0
    def handle(self, *args, **kwargs):
        """Reindexing work.

        Creates a Tasktree that creates new indexes
        over the old ones so the search feature
        works while the indexation occurs.

        """
        if django_settings.MARKETPLACE:
            raise CommandError('This command affects the AMO ES indexes and '
                               'can only be run from AMO.')

        force = kwargs.get('force', False)

        if is_reindexing_amo() and not force:
            raise CommandError('Indexation already occuring - use --force to '
                               'bypass')

        prefix = kwargs.get('prefix', '')
        log('Starting the reindexation', stdout=self.stdout)

        if kwargs.get('with_stats', False):
            # Add the stats indexes back.
            _ALIASES.update(_STATS_ALIASES)

        if kwargs.get('wipe', False):
            confirm = raw_input('Are you sure you want to wipe all AMO '
                                'Elasticsearch indexes? (yes/no): ')

            while confirm not in ('yes', 'no'):
                confirm = raw_input('Please enter either "yes" or "no": ')

            if confirm == 'yes':
                unflag_database(stdout=self.stdout)
                for index in set(_ALIASES.values()):
                    requests.delete(url('/%s') % index)
            else:
                raise CommandError("Aborted.")
        elif force:
            unflag_database(stdout=self.stdout)

        # Get list current aliases at /_aliases.
        all_aliases = requests.get(url('/_aliases')).json()

        # building the list of indexes
        indexes = set([prefix + index for index in _ALIASES.values()])

        actions = []

        def add_action(*elmt):
            if elmt in actions:
                return
            actions.append(elmt)

        all_aliases = all_aliases.items()

        # creating a task tree
        log('Building the task tree', stdout=self.stdout)
        tree = TaskTree()
        last_action = None

        to_remove = []

        # for each index, we create a new time-stamped index
        for alias in indexes:
            is_stats = 'stats' in alias
            old_index = None

            for aliased_index, alias_ in all_aliases:
                if alias in alias_['aliases'].keys():
                    # mark the index to be removed later
                    old_index = aliased_index
                    to_remove.append(aliased_index)

                    # mark the alias to be removed as well
                    add_action('remove', aliased_index, alias)

            # create a new index, using the alias name with a timestamp
            new_index = timestamp_index(alias)

            # if old_index is None that could mean it's a full index
            # In that case we want to continue index in it
            future_alias = url('/%s' % alias)
            if requests.head(future_alias).status_code == 200:
                old_index = alias

            # flag the database
            step1 = tree.add_task(flag_database,
                                  args=[new_index, old_index, alias],
                                  kwargs={'stdout': self.stdout})
            step2 = step1.add_task(create_mapping,
                                   args=[new_index, alias],
                                   kwargs={'stdout': self.stdout})
            step3 = step2.add_task(create_index,
                                   args=[new_index, is_stats],
                                   kwargs={'stdout': self.stdout})
            last_action = step3

            # adding new index to the alias
            add_action('add', new_index, alias)

        # Alias the new index and remove the old aliases, if any.
        renaming_step = last_action.add_task(run_aliases_actions,
                                             args=[actions],
                                             kwargs={'stdout': self.stdout})

        # unflag the database - there's no need to duplicate the
        # indexing anymore
        delete = renaming_step.add_task(unflag_database,
                                        kwargs={'stdout': self.stdout})

        # Delete the old indexes, if any
        delete.add_task(delete_indexes,
                        args=[to_remove],
                        kwargs={'stdout': self.stdout})

        # let's do it
        log('Running all indexation tasks', stdout=self.stdout)

        os.environ['FORCE_INDEXING'] = '1'
        try:
            tree.apply_async()
            time.sleep(10)  # give celeryd some time to flag the DB
            while is_reindexing_amo():
                sys.stdout.write('.')
                sys.stdout.flush()
                time.sleep(5)
        finally:
            del os.environ['FORCE_INDEXING']

        sys.stdout.write('\n')

        # let's return the /_aliases values
        aliases = call_es('_aliases').json()
        aliases = json.dumps(aliases, sort_keys=True, indent=4)
        summary = _SUMMARY % (len(indexes), aliases)
        log(summary, stdout=self.stdout)