class ESTestCase(TestCase): """Base class for tests that require elasticsearch.""" # ES is slow to set up so this uses class setup/teardown. That happens # outside Django transactions so be careful to clean up afterwards. mock_es = False # We need ES indexes aliases to match prod behaviour, but also we need the # names need to stay consistent during the whole test run, so we generate # them at import time. Note that this works because pytest overrides # ES_INDEXES before the test run even begins - if we were using # override_settings() on ES_INDEXES we'd be in trouble. index_names = {key: timestamp_index(value) for key, value in settings.ES_INDEXES.items()} @classmethod def setUpClass(cls): cls.es = amo_search.get_es(timeout=settings.ES_TIMEOUT) cls._SEARCH_ANALYZER_MAP = amo.SEARCH_ANALYZER_MAP amo.SEARCH_ANALYZER_MAP = { 'english': ['en-us'], 'spanish': ['es'], } super(ESTestCase, cls).setUpClass() @classmethod def setUpTestData(cls): try: cls.es.cluster.health() except Exception, e: e.args = tuple( [u"%s (it looks like ES is not running, try starting it or " u"don't run ES tests: make test_no_es)" % e.args[0]] + list(e.args[1:])) raise aliases_and_indexes = set(settings.ES_INDEXES.values() + cls.es.indices.get_aliases().keys()) for key in aliases_and_indexes: if key.startswith('test_amo'): cls.es.indices.delete(key, ignore=[404]) # Create new search and stats indexes with the timestamped name. # This is crucial to set up the correct mappings before we start # indexing things in tests. search_indexers.create_new_index( index_name=cls.index_names['default']) stats_search.create_new_index(index_name=cls.index_names['stats']) # Alias it to the name the code is going to use (which is suffixed by # pytest to avoid clashing with the real thing). actions = [ {'add': {'index': cls.index_names['default'], 'alias': settings.ES_INDEXES['default']}}, {'add': {'index': cls.index_names['stats'], 'alias': settings.ES_INDEXES['stats']}} ] cls.es.indices.update_aliases({'actions': actions}) super(ESTestCase, cls).setUpTestData()
# We might not have gettext available in jinja2.env.globals when running tests. # It's only added to the globals when activating a language (which # is usually done in the middlewares). During tests, however, we might not be # running middlewares, and thus not activating a language, and thus not # installing gettext in the globals, and thus not have it in the context when # rendering templates. translation.activate('en-us') # We need ES indexes aliases to match prod behaviour, but also we need the # names need to stay consistent during the whole test run, so we generate # them at import time. Note that this works because pytest overrides # ES_INDEXES before the test run even begins - if we were using # override_settings() on ES_INDEXES we'd be in trouble. ES_INDEX_SUFFIXES = { key: timestamp_index('') for key in settings.ES_INDEXES.keys() } def get_es_index_name(key): """Return the name of the actual index used in tests for a given key taken from settings.ES_INDEXES. Can be used to check whether aliases have been set properly - ES_INDEXES will give the aliases, and this method will give the indices the aliases point to.""" value = settings.ES_INDEXES[key] return '%s%s' % (value, ES_INDEX_SUFFIXES[key])
def create_workflow(self, alias): alias_actions = [] def add_alias_action(action, index, alias): action = {action: {'index': index, 'alias': alias}} if action in alias_actions: return alias_actions.append(action) # Creating a task chain. self.stdout.write('Building the task chain') to_remove = [] old_index = None try: olds = ES.indices.get_alias(alias) for old_index in olds: # Mark the index to be removed later. to_remove.append(old_index) # Mark the alias to be removed from that index. add_alias_action('remove', old_index, alias) except NotFoundError: # If the alias dit not exist, ignore it, don't try to remove # it. pass # Create a new index, using the alias name with a timestamp. new_index = timestamp_index(alias) # Mark the alias to be added at the end. add_alias_action('add', new_index, alias) # If old_index is None that could mean it's a full index. # In that case we want to continue index in it. if ES.indices.exists(alias): old_index = alias # Main chain for this alias that: # - creates the new index # - then, flags the database (which in turn makes every index call # index data on both the old and the new index). workflow = ( create_new_index.si(alias, new_index) | flag_database.si(new_index, old_index, alias) ) # ... Then start indexing data. gather_index_data_tasks() is a # function returning a group of indexing tasks. index_data_tasks = gather_index_data_tasks(alias, new_index) if index_data_tasks.tasks: # Add the group to the chain, if it's not empty. workflow |= index_data_tasks # Chain with a task that updates the aliases to point to the new # index and remove the old aliases, if any. workflow |= update_aliases.si(alias_actions) # Chain with a task that unflags the database - there's no need to # duplicate the indexing anymore. workflow |= unflag_database.si() # Finish the chain by a task that deletes the old indexes, if any. if to_remove: workflow |= delete_indexes.si(to_remove) return workflow
def handle(self, *args, **kwargs): """Reindexing work. Creates a Tasktree that creates new indexes over the old ones so the search feature works while the indexation occurs. """ force = kwargs.get('force', False) if is_reindexing_amo() and not force: raise CommandError('Indexation already occuring - use --force to ' 'bypass') log('Starting the reindexation', stdout=self.stdout) modules = get_modules(with_stats=kwargs.get('with_stats', False)) if kwargs.get('wipe', False): skip_confirmation = kwargs.get('noinput', False) confirm = '' if not skip_confirmation: confirm = raw_input('Are you sure you want to wipe all AMO ' 'Elasticsearch indexes? (yes/no): ') while confirm not in ('yes', 'no'): confirm = raw_input('Please enter either "yes" or "no": ') if (confirm == 'yes' or skip_confirmation): unflag_database(stdout=self.stdout) for index in set(modules.keys()): ES.indices.delete(index, ignore=404) else: raise CommandError("Aborted.") elif force: unflag_database(stdout=self.stdout) alias_actions = [] def add_alias_action(action, index, alias): action = {action: {'index': index, 'alias': alias}} if action in alias_actions: return alias_actions.append(action) # Creating a task tree. log('Building the task tree', stdout=self.stdout) tree = TaskTree() last_action = None to_remove = [] # For each alias, we create a new time-stamped index. for alias, module in modules.items(): old_index = None olds = ES.indices.get_aliases(alias, ignore=404) for old_index in olds: # Mark the index to be removed later. to_remove.append(old_index) # Mark the alias to be removed from that index. add_alias_action('remove', old_index, alias) # Create a new index, using the alias name with a timestamp. new_index = timestamp_index(alias) # If old_index is None that could mean it's a full index. # In that case we want to continue index in it. if ES.indices.exists(alias): old_index = alias # Flag the database. step1 = tree.add_task(flag_database, args=[new_index, old_index, alias]) step2 = step1.add_task(create_new_index, args=[alias, new_index]) step3 = step2.add_task(index_data, args=[alias, new_index]) last_action = step3 # Adding new index to the alias. add_alias_action('add', new_index, alias) # Alias the new index and remove the old aliases, if any. renaming_step = last_action.add_task(update_aliases, args=[alias_actions]) # Unflag the database - there's no need to duplicate the # indexing anymore. delete = renaming_step.add_task(unflag_database) # Delete the old indexes, if any. if to_remove: delete.add_task(delete_indexes, args=[to_remove]) # Let's do it. log('Running all indexation tasks', stdout=self.stdout) os.environ['FORCE_INDEXING'] = '1' # This is a bit convoluted, and more complicated than simply providing # the soft and hard time limits on the @task decorator. But we're not # using the @task decorator here, but a decorator from celery_tasktree. if not getattr(settings, 'CELERY_ALWAYS_EAGER', False): control.time_limit( 'olympia.lib.es.management.commands.reindex.index_data', soft=time_limits['soft'], hard=time_limits['hard']) try: tree.apply_async() if not getattr(settings, 'CELERY_ALWAYS_EAGER', False): time.sleep(10) # give celeryd some time to flag the DB while is_reindexing_amo(): sys.stdout.write('.') sys.stdout.flush() time.sleep(5) finally: del os.environ['FORCE_INDEXING'] sys.stdout.write('\n') # Let's return the /_aliases values. aliases = ES.indices.get_aliases() aliases = json.dumps(aliases, sort_keys=True, indent=4) summary = _SUMMARY % (len(modules), aliases) log(summary, stdout=self.stdout)
# We might not have gettext available in jinja2.env.globals when running tests. # It's only added to the globals when activating a language (which # is usually done in the middlewares). During tests, however, we might not be # running middlewares, and thus not activating a language, and thus not # installing gettext in the globals, and thus not have it in the context when # rendering templates. translation.activate('en-us') # We need ES indexes aliases to match prod behaviour, but also we need the # names need to stay consistent during the whole test run, so we generate # them at import time. Note that this works because pytest overrides # ES_INDEXES before the test run even begins - if we were using # override_settings() on ES_INDEXES we'd be in trouble. ES_INDEX_SUFFIXES = { key: timestamp_index('') for key in settings.ES_INDEXES.keys()} def get_es_index_name(key): """Return the name of the actual index used in tests for a given key taken from settings.ES_INDEXES. Can be used to check whether aliases have been set properly - ES_INDEXES will give the aliases, and this method will give the indices the aliases point to.""" value = settings.ES_INDEXES[key] return '%s%s' % (value, ES_INDEX_SUFFIXES[key]) def setup_es_test_data(es):
def handle(self, *args, **kwargs): """Reindexing work. Creates a Tasktree that creates new indexes over the old ones so the search feature works while the indexation occurs. """ force = kwargs.get("force", False) if is_reindexing_amo() and not force: raise CommandError("Indexation already occuring - use --force to " "bypass") log("Starting the reindexation", stdout=self.stdout) modules = ["addons"] if kwargs.get("with_stats", False): modules.append("stats") if kwargs.get("wipe", False): skip_confirmation = kwargs.get("noinput", False) confirm = "" if not skip_confirmation: confirm = raw_input("Are you sure you want to wipe all AMO " "Elasticsearch indexes? (yes/no): ") while confirm not in ("yes", "no"): confirm = raw_input('Please enter either "yes" or "no": ') if confirm == "yes" or skip_confirmation: unflag_database(stdout=self.stdout) for index in set(MODULES[m].get_alias() for m in modules): ES.indices.delete(index, ignore=404) else: raise CommandError("Aborted.") elif force: unflag_database(stdout=self.stdout) alias_actions = [] def add_alias_action(action, index, alias): action = {action: {"index": index, "alias": alias}} if action in alias_actions: return alias_actions.append(action) # Creating a task tree. log("Building the task tree", stdout=self.stdout) tree = TaskTree() last_action = None to_remove = [] # For each index, we create a new time-stamped index. for module in modules: old_index = None alias = MODULES[module].get_alias() olds = ES.indices.get_aliases(alias, ignore=404) for old_index in olds: # Mark the index to be removed later. to_remove.append(old_index) # Mark the alias to be removed from that index. add_alias_action("remove", old_index, alias) # Create a new index, using the alias name with a timestamp. new_index = timestamp_index(alias) # If old_index is None that could mean it's a full index. # In that case we want to continue index in it. if ES.indices.exists(alias): old_index = alias # Flag the database. step1 = tree.add_task(flag_database, args=[new_index, old_index, alias]) step2 = step1.add_task(create_new_index, args=[module, new_index]) step3 = step2.add_task(index_data, args=[module, new_index]) last_action = step3 # Adding new index to the alias. add_alias_action("add", new_index, alias) # Alias the new index and remove the old aliases, if any. renaming_step = last_action.add_task(update_aliases, args=[alias_actions]) # Unflag the database - there's no need to duplicate the # indexing anymore. delete = renaming_step.add_task(unflag_database) # Delete the old indexes, if any. if to_remove: delete.add_task(delete_indexes, args=[to_remove]) # Let's do it. log("Running all indexation tasks", stdout=self.stdout) os.environ["FORCE_INDEXING"] = "1" # This is a bit convoluted, and more complicated than simply providing # the soft and hard time limits on the @task decorator. But we're not # using the @task decorator here, but a decorator from celery_tasktree. if not getattr(settings, "CELERY_ALWAYS_EAGER", False): control.time_limit( "olympia.lib.es.management.commands.reindex.index_data", soft=time_limits["soft"], hard=time_limits["hard"], ) try: tree.apply_async() if not getattr(settings, "CELERY_ALWAYS_EAGER", False): time.sleep(10) # give celeryd some time to flag the DB while is_reindexing_amo(): sys.stdout.write(".") sys.stdout.flush() time.sleep(5) finally: del os.environ["FORCE_INDEXING"] sys.stdout.write("\n") # Let's return the /_aliases values. aliases = ES.indices.get_aliases() aliases = json.dumps(aliases, sort_keys=True, indent=4) summary = _SUMMARY % (len(modules), aliases) log(summary, stdout=self.stdout)
def handle(self, *args, **kwargs): """Reindexing work. Creates a task chain that creates new indexes over the old ones so the search feature works while the indexation occurs. """ force = kwargs.get('force', False) if is_reindexing_amo() and not force: raise CommandError('Indexation already occurring - use --force to ' 'bypass') self.stdout.write('Starting the reindexation') modules = get_modules(with_stats=kwargs.get('with_stats', False)) if kwargs.get('wipe', False): skip_confirmation = kwargs.get('noinput', False) confirm = '' if not skip_confirmation: confirm = raw_input('Are you sure you want to wipe all AMO ' 'Elasticsearch indexes? (yes/no): ') while confirm not in ('yes', 'no'): confirm = raw_input('Please enter either "yes" or "no": ') if (confirm == 'yes' or skip_confirmation): unflag_database() for index in set(modules.keys()): ES.indices.delete(index, ignore=404) else: raise CommandError("Aborted.") elif force: unflag_database() alias_actions = [] def add_alias_action(action, index, alias): action = {action: {'index': index, 'alias': alias}} if action in alias_actions: return alias_actions.append(action) # Creating a task chain. self.stdout.write('Building the task chain') to_remove = [] workflow = [] # For each alias, we create a new time-stamped index. for alias, module in modules.items(): old_index = None try: olds = ES.indices.get_alias(alias) for old_index in olds: # Mark the index to be removed later. to_remove.append(old_index) # Mark the alias to be removed from that index. add_alias_action('remove', old_index, alias) except NotFoundError: # If the alias dit not exist, ignore it, don't try to remove # it. pass # Create a new index, using the alias name with a timestamp. new_index = timestamp_index(alias) # If old_index is None that could mean it's a full index. # In that case we want to continue index in it. if ES.indices.exists(alias): old_index = alias # Flag the database. workflow.append( flag_database.si(new_index, old_index, alias) | create_new_index.si(alias, new_index) | index_data.si(alias, new_index) ) # Adding new index to the alias. add_alias_action('add', new_index, alias) workflow = group(workflow) # Alias the new index and remove the old aliases, if any. workflow |= update_aliases.si(alias_actions) # Unflag the database - there's no need to duplicate the # indexing anymore. workflow |= unflag_database.si() # Delete the old indexes, if any. if to_remove: workflow |= delete_indexes.si(to_remove) # Let's do it. self.stdout.write('Running all indexation tasks') os.environ['FORCE_INDEXING'] = '1' try: workflow.apply_async() if not getattr(settings, 'CELERY_ALWAYS_EAGER', False): time.sleep(10) # give celeryd some time to flag the DB while is_reindexing_amo(): self.stdout.write('.') self.stdout.flush() time.sleep(5) finally: del os.environ['FORCE_INDEXING'] self.stdout.write('\n') # Let's return the /_aliases values. aliases = ES.indices.get_alias() aliases = json.dumps(aliases, sort_keys=True, indent=4) summary = _SUMMARY % (len(modules), aliases) self.stdout.write(summary)
# We might not have gettext available in jinja2.env.globals when running tests. # It's only added to the globals when activating a language (which # is usually done in the middlewares). During tests, however, we might not be # running middlewares, and thus not activating a language, and thus not # installing gettext in the globals, and thus not have it in the context when # rendering templates. translation.activate('en-us') # We need ES indexes aliases to match prod behaviour, but also we need the # names need to stay consistent during the whole test run, so we generate # them at import time. Note that this works because pytest overrides # ES_INDEXES before the test run even begins - if we were using # override_settings() on ES_INDEXES we'd be in trouble. ES_INDEX_SUFFIXES = {key: timestamp_index('') for key in settings.ES_INDEXES.keys()} # django2.2 encodes with the decimal code; django3.2 with the hex code. SQUOTE_ESCAPED = escape("'") def get_es_index_name(key): """Return the name of the actual index used in tests for a given key taken from settings.ES_INDEXES. Can be used to check whether aliases have been set properly - ES_INDEXES will give the aliases, and this method will give the indices the aliases point to.""" value = settings.ES_INDEXES[key] return '%s%s' % (value, ES_INDEX_SUFFIXES[key])
class ESTestCase(TestCase): # We need ES indexes aliases to match prod behaviour, but also we need the # names need to stay consistent during the whole test run, so we generate # them at import time. Note that this works because pytest overrides # ES_INDEXES before the test run even begins - if we were using # override_settings() on ES_INDEXES we'd be in trouble. index_suffixes = { key: timestamp_index('') for key in settings.ES_INDEXES.keys() } @classmethod def get_index_name(cls, key): """Return the name of the actual index used in tests for a given key taken from settings.ES_INDEXES. Can be used to check whether aliases have been set properly - ES_INDEXES will give the aliases, and this method will give the indices the aliases point to.""" value = settings.ES_INDEXES[key] return '%s%s' % (value, cls.index_suffixes[key]) def setUp(self): stop_es_mocks() @classmethod def setUpClass(cls): stop_es_mocks() cls.es = amo_search.get_es(timeout=settings.ES_TIMEOUT) cls._SEARCH_ANALYZER_MAP = amo.SEARCH_ANALYZER_MAP amo.SEARCH_ANALYZER_MAP = { 'english': ['en-us'], 'spanish': ['es'], } super(ESTestCase, cls).setUpClass() @classmethod def setUpTestData(cls): stop_es_mocks() try: cls.es.cluster.health() except Exception, e: e.args = tuple([ u"%s (it looks like ES is not running, try starting it or " u"don't run ES tests: make test_no_es)" % e.args[0] ] + list(e.args[1:])) raise aliases_and_indexes = set(settings.ES_INDEXES.values() + cls.es.indices.get_aliases().keys()) for key in aliases_and_indexes: if key.startswith('test_amo'): cls.es.indices.delete(key, ignore=[404]) # Figure out the name of the indices we're going to create from the # suffixes generated at import time. Like the aliases later, the name # has been prefixed by pytest, we need to add a suffix that is unique # to this test run. actual_indices = { key: cls.get_index_name(key) for key in settings.ES_INDEXES.keys() } # Create new search and stats indexes with the timestamped name. # This is crucial to set up the correct mappings before we start # indexing things in tests. search_indexers.create_new_index(index_name=actual_indices['default']) stats_search.create_new_index(index_name=actual_indices['stats']) # Alias it to the name the code is going to use (which is suffixed by # pytest to avoid clashing with the real thing). actions = [{ 'add': { 'index': actual_indices['default'], 'alias': settings.ES_INDEXES['default'] } }, { 'add': { 'index': actual_indices['stats'], 'alias': settings.ES_INDEXES['stats'] } }] cls.es.indices.update_aliases({'actions': actions}) super(ESTestCase, cls).setUpTestData()