def test_reindexation(self): # Adding an addon. addon = amo.tests.addon_factory() self.refresh() # The search should return the addon. wanted = [addon] self.check_results(wanted) # Current indices with aliases. old_indices = self.get_indices_aliases() # This is to start a reindexation in the background. class ReindexThread(threading.Thread): def __init__(self): self.stdout = StringIO.StringIO() super(ReindexThread, self).__init__() def run(self): management.call_command('reindex', stdout=self.stdout) t = ReindexThread() t.start() # Wait for the reindex in the thread to flag the database. # The database transaction isn't shared with the thread, so force the # commit. while t.is_alive() and not is_reindexing_amo(): connection._commit() connection.clean_savepoints() # We should still be able to search in the foreground while the reindex # is being done in the background. We should also be able to index new # documents, and they should not be lost. old_addons_count = len(wanted) while t.is_alive() and len(wanted) < old_addons_count + 3: wanted.append(amo.tests.addon_factory()) connection._commit() connection.clean_savepoints() amo.search.get_es().refresh() self.check_results(wanted) if len(wanted) == old_addons_count: raise AssertionError('Could not index objects in foreground while ' 'reindexing in the background.') t.join() # Wait for the thread to finish. t.stdout.seek(0) stdout = t.stdout.read() assert 'Reindexation done' in stdout, stdout # The reindexation is done, let's double check we have all our docs. connection._commit() connection.clean_savepoints() amo.search.get_es().refresh() self.check_results(wanted) # New indices have been created, and aliases now point to them. new_indices = self.get_indices_aliases() eq_(len(old_indices), len(new_indices), (old_indices, new_indices)) assert new_indices != old_indices, stdout
def test_reindexation(self): # Adding an addon. addon = amo.tests.addon_factory() self.refresh() # The search should return the addon. wanted = [addon] self.check_results(wanted) # Current indices with aliases. old_indices = self.get_indices_aliases() # This is to start a reindexation in the background. class ReindexThread(threading.Thread): def __init__(self): self.stdout = StringIO.StringIO() super(ReindexThread, self).__init__() def run(self): management.call_command('reindex', stdout=self.stdout) t = ReindexThread() t.start() # Wait for the reindex in the thread to flag the database. # The database transaction isn't shared with the thread, so force the # commit. while t.is_alive() and not is_reindexing_amo(): connection._commit() connection.clean_savepoints() # We should still be able to search in the foreground while the reindex # is being done in the background. We should also be able to index new # documents, and they should not be lost. old_addons_count = len(wanted) while t.is_alive() and len(wanted) < old_addons_count + 3: wanted.append(amo.tests.addon_factory()) connection._commit() connection.clean_savepoints() amo.search.get_es().refresh() self.check_results(wanted) if len(wanted) == old_addons_count: raise AssertionError('Could not index objects in foreground while ' 'reindexing in the background.') t.join() # Wait for the thread to finish. t.stdout.seek(0) stdout = t.stdout.read() assert 'Reindexation done' in stdout, stdout # The reindexation is done, let's double check we have all our docs. connection._commit() connection.clean_savepoints() amo.search.get_es().refresh() self.check_results(wanted) # New indices have been created, and aliases now point to them. new_indices = self.get_indices_aliases() eq_(len(old_indices), len(new_indices), (old_indices, new_indices)) assert new_indices != old_indices
def setUp(self): super(TestIndexCommand, self).setUp() if is_reindexing_amo(): unflag_reindexing_amo() self.url = reverse('search.search') # Any index created during the test will be deleted. self.indices = call_es('_status').json()['indices'].keys()
def handle(self, *args, **kwargs): """Reindexing work. Creates a Tasktree that creates new indexes over the old ones so the search feature works while the indexation occurs. """ force = kwargs.get('force', False) if is_reindexing_amo() and not force: raise CommandError('Indexation already occuring - use --force to ' 'bypass') prefix = kwargs.get('prefix', '') log('Starting the reindexation', stdout=self.stdout) if kwargs.get('with_stats', False): # Add the stats indexes back. _ALIASES.update(_STATS_ALIASES) if kwargs.get('wipe', False): confirm = raw_input('Are you sure you want to wipe all AMO ' 'Elasticsearch indexes? (yes/no): ') while confirm not in ('yes', 'no'): confirm = raw_input('Please enter either "yes" or "no": ') if confirm == 'yes': unflag_database(stdout=self.stdout) for index in set(_ALIASES.values()): requests.delete(url('/%s') % index) else: raise CommandError("Aborted.") elif force: unflag_database(stdout=self.stdout) # Get list current aliases at /_aliases. all_aliases = requests.get(url('/_aliases')).json() # building the list of indexes indexes = set([prefix + index for index in _ALIASES.values()]) actions = [] def add_action(*elmt): if elmt in actions: return actions.append(elmt) all_aliases = all_aliases.items() # creating a task tree log('Building the task tree', stdout=self.stdout) tree = TaskTree() last_action = None to_remove = [] # for each index, we create a new time-stamped index for alias in indexes: is_stats = 'stats' in alias old_index = None for aliased_index, alias_ in all_aliases: if alias in alias_['aliases'].keys(): # mark the index to be removed later old_index = aliased_index to_remove.append(aliased_index) # mark the alias to be removed as well add_action('remove', aliased_index, alias) # create a new index, using the alias name with a timestamp new_index = timestamp_index(alias) # if old_index is None that could mean it's a full index # In that case we want to continue index in it future_alias = url('/%s' % alias) if requests.head(future_alias).status_code == 200: old_index = alias # flag the database step1 = tree.add_task(flag_database, args=[new_index, old_index, alias], kwargs={'stdout': self.stdout}) step2 = step1.add_task(create_mapping, args=[new_index, alias], kwargs={'stdout': self.stdout}) step3 = step2.add_task(create_index, args=[new_index, is_stats], kwargs={'stdout': self.stdout}) last_action = step3 # adding new index to the alias add_action('add', new_index, alias) # Alias the new index and remove the old aliases, if any. renaming_step = last_action.add_task(run_aliases_actions, args=[actions], kwargs={'stdout': self.stdout}) # unflag the database - there's no need to duplicate the # indexing anymore delete = renaming_step.add_task(unflag_database, kwargs={'stdout': self.stdout}) # Delete the old indexes, if any delete.add_task(delete_indexes, args=[to_remove], kwargs={'stdout': self.stdout}) # let's do it log('Running all indexation tasks', stdout=self.stdout) os.environ['FORCE_INDEXING'] = '1' try: tree.apply_async() time.sleep(10) # give celeryd some time to flag the DB while is_reindexing_amo(): sys.stdout.write('.') sys.stdout.flush() time.sleep(5) finally: del os.environ['FORCE_INDEXING'] sys.stdout.write('\n') # let's return the /_aliases values aliases = call_es('_aliases').json() aliases = json.dumps(aliases, sort_keys=True, indent=4) summary = _SUMMARY % (len(indexes), aliases) log(summary, stdout=self.stdout)
def handle(self, *args, **kwargs): """Reindexing work. Creates a Tasktree that creates new indexes over the old ones so the search feature works while the indexation occurs. """ force = kwargs.get('force', False) if is_reindexing_amo() and not force: raise CommandError('Indexation already occuring - use --force to ' 'bypass') log('Starting the reindexation', stdout=self.stdout) modules = ['addons'] if kwargs.get('with_stats', False): modules.append('stats') if kwargs.get('wipe', False): confirm = raw_input('Are you sure you want to wipe all AMO ' 'Elasticsearch indexes? (yes/no): ') while confirm not in ('yes', 'no'): confirm = raw_input('Please enter either "yes" or "no": ') if confirm == 'yes': unflag_database(stdout=self.stdout) for index in set(MODULES[m].get_alias() for m in modules): ES.indices.delete(index) else: raise CommandError("Aborted.") elif force: unflag_database(stdout=self.stdout) alias_actions = [] def add_alias_action(action, index, alias): action = {action: {'index': index, 'alias': alias}} if action in alias_actions: return alias_actions.append(action) # Creating a task tree. log('Building the task tree', stdout=self.stdout) tree = TaskTree() last_action = None to_remove = [] # For each index, we create a new time-stamped index. for module in modules: old_index = None alias = MODULES[module].get_alias() olds = ES.indices.get_aliases(alias, ignore=404) for old_index in olds: # Mark the index to be removed later. to_remove.append(old_index) # Mark the alias to be removed from that index. add_alias_action('remove', old_index, alias) # Create a new index, using the alias name with a timestamp. new_index = timestamp_index(alias) # If old_index is None that could mean it's a full index. # In that case we want to continue index in it. if ES.indices.exists(alias): old_index = alias # Flag the database. step1 = tree.add_task(flag_database, args=[new_index, old_index, alias]) step2 = step1.add_task(create_new_index, args=[module, new_index]) step3 = step2.add_task(index_data, args=[module, new_index]) last_action = step3 # Adding new index to the alias. add_alias_action('add', new_index, alias) # Alias the new index and remove the old aliases, if any. renaming_step = last_action.add_task(update_aliases, args=[alias_actions]) # Unflag the database - there's no need to duplicate the # indexing anymore. delete = renaming_step.add_task(unflag_database) # Delete the old indexes, if any. if to_remove: delete.add_task(delete_indexes, args=[to_remove]) # Let's do it. log('Running all indexation tasks', stdout=self.stdout) os.environ['FORCE_INDEXING'] = '1' try: tree.apply_async() if not getattr(settings, 'CELERY_ALWAYS_EAGER', False): time.sleep(10) # give celeryd some time to flag the DB while is_reindexing_amo(): sys.stdout.write('.') sys.stdout.flush() time.sleep(5) finally: del os.environ['FORCE_INDEXING'] sys.stdout.write('\n') # Let's return the /_aliases values. aliases = ES.indices.get_aliases() aliases = json.dumps(aliases, sort_keys=True, indent=4) summary = _SUMMARY % (len(modules), aliases) log(summary, stdout=self.stdout)
def handle(self, *args, **kwargs): """Reindexing work. Creates a Tasktree that creates new indexes over the old ones so the search feature works while the indexation occurs. """ force = kwargs.get('force', False) if is_reindexing_amo() and not force: raise CommandError('Indexation already occuring - use --force to ' 'bypass') log('Starting the reindexation', stdout=self.stdout) modules = ['addons'] if kwargs.get('with_stats', False): modules.append('stats') if kwargs.get('wipe', False): confirm = raw_input('Are you sure you want to wipe all AMO ' 'Elasticsearch indexes? (yes/no): ') while confirm not in ('yes', 'no'): confirm = raw_input('Please enter either "yes" or "no": ') if confirm == 'yes': unflag_database(stdout=self.stdout) for index in set(MODULES[m].get_alias() for m in modules): ES.indices.delete(index, ignore=404) else: raise CommandError("Aborted.") elif force: unflag_database(stdout=self.stdout) alias_actions = [] def add_alias_action(action, index, alias): action = {action: {'index': index, 'alias': alias}} if action in alias_actions: return alias_actions.append(action) # Creating a task tree. log('Building the task tree', stdout=self.stdout) tree = TaskTree() last_action = None to_remove = [] # For each index, we create a new time-stamped index. for module in modules: old_index = None alias = MODULES[module].get_alias() olds = ES.indices.get_aliases(alias, ignore=404) for old_index in olds: # Mark the index to be removed later. to_remove.append(old_index) # Mark the alias to be removed from that index. add_alias_action('remove', old_index, alias) # Create a new index, using the alias name with a timestamp. new_index = timestamp_index(alias) # If old_index is None that could mean it's a full index. # In that case we want to continue index in it. if ES.indices.exists(alias): old_index = alias # Flag the database. step1 = tree.add_task(flag_database, args=[new_index, old_index, alias]) step2 = step1.add_task(create_new_index, args=[module, new_index]) step3 = step2.add_task(index_data, args=[module, new_index]) last_action = step3 # Adding new index to the alias. add_alias_action('add', new_index, alias) # Alias the new index and remove the old aliases, if any. renaming_step = last_action.add_task(update_aliases, args=[alias_actions]) # Unflag the database - there's no need to duplicate the # indexing anymore. delete = renaming_step.add_task(unflag_database) # Delete the old indexes, if any. if to_remove: delete.add_task(delete_indexes, args=[to_remove]) # Let's do it. log('Running all indexation tasks', stdout=self.stdout) os.environ['FORCE_INDEXING'] = '1' # This is a bit convoluted, and more complicated than simply providing # the soft and hard time limits on the @task decorator. But we're not # using the @task decorator here, but a decorator from celery_tasktree. if not getattr(settings, 'CELERY_ALWAYS_EAGER', False): control.time_limit('lib.es.management.commands.reindex.index_data', soft=time_limits['soft'], hard=time_limits['hard']) try: tree.apply_async() if not getattr(settings, 'CELERY_ALWAYS_EAGER', False): time.sleep(10) # give celeryd some time to flag the DB while is_reindexing_amo(): sys.stdout.write('.') sys.stdout.flush() time.sleep(5) finally: del os.environ['FORCE_INDEXING'] sys.stdout.write('\n') # Let's return the /_aliases values. aliases = ES.indices.get_aliases() aliases = json.dumps(aliases, sort_keys=True, indent=4) summary = _SUMMARY % (len(modules), aliases) log(summary, stdout=self.stdout)
def handle(self, *args, **kwargs): """Reindexing work. Creates a Tasktree that creates new indexes over the old ones so the search feature works while the indexation occurs. """ if django_settings.MARKETPLACE: raise CommandError('This command affects the AMO ES indexes and ' 'can only be run from AMO.') force = kwargs.get('force', False) if is_reindexing_amo() and not force: raise CommandError('Indexation already occuring - use --force to ' 'bypass') prefix = kwargs.get('prefix', '') log('Starting the reindexation', stdout=self.stdout) if kwargs.get('with_stats', False): # Add the stats indexes back. _ALIASES.update(_STATS_ALIASES) if kwargs.get('wipe', False): confirm = raw_input('Are you sure you want to wipe all AMO ' 'Elasticsearch indexes? (yes/no): ') while confirm not in ('yes', 'no'): confirm = raw_input('Please enter either "yes" or "no": ') if confirm == 'yes': unflag_database(stdout=self.stdout) for index in set(_ALIASES.values()): requests.delete(url('/%s') % index) else: raise CommandError("Aborted.") elif force: unflag_database(stdout=self.stdout) # Get list current aliases at /_aliases. all_aliases = requests.get(url('/_aliases')).json() # building the list of indexes indexes = set([prefix + index for index in _ALIASES.values()]) actions = [] def add_action(*elmt): if elmt in actions: return actions.append(elmt) all_aliases = all_aliases.items() # creating a task tree log('Building the task tree', stdout=self.stdout) tree = TaskTree() last_action = None to_remove = [] # for each index, we create a new time-stamped index for alias in indexes: is_stats = 'stats' in alias old_index = None for aliased_index, alias_ in all_aliases: if alias in alias_['aliases'].keys(): # mark the index to be removed later old_index = aliased_index to_remove.append(aliased_index) # mark the alias to be removed as well add_action('remove', aliased_index, alias) # create a new index, using the alias name with a timestamp new_index = timestamp_index(alias) # if old_index is None that could mean it's a full index # In that case we want to continue index in it future_alias = url('/%s' % alias) if requests.head(future_alias).status_code == 200: old_index = alias # flag the database step1 = tree.add_task(flag_database, args=[new_index, old_index, alias], kwargs={'stdout': self.stdout}) step2 = step1.add_task(create_mapping, args=[new_index, alias], kwargs={'stdout': self.stdout}) step3 = step2.add_task(create_index, args=[new_index, is_stats], kwargs={'stdout': self.stdout}) last_action = step3 # adding new index to the alias add_action('add', new_index, alias) # Alias the new index and remove the old aliases, if any. renaming_step = last_action.add_task(run_aliases_actions, args=[actions], kwargs={'stdout': self.stdout}) # unflag the database - there's no need to duplicate the # indexing anymore delete = renaming_step.add_task(unflag_database, kwargs={'stdout': self.stdout}) # Delete the old indexes, if any delete.add_task(delete_indexes, args=[to_remove], kwargs={'stdout': self.stdout}) # let's do it log('Running all indexation tasks', stdout=self.stdout) os.environ['FORCE_INDEXING'] = '1' try: tree.apply_async() time.sleep(10) # give celeryd some time to flag the DB while is_reindexing_amo(): sys.stdout.write('.') sys.stdout.flush() time.sleep(5) finally: del os.environ['FORCE_INDEXING'] sys.stdout.write('\n') # let's return the /_aliases values aliases = call_es('_aliases').json() aliases = json.dumps(aliases, sort_keys=True, indent=4) summary = _SUMMARY % (len(indexes), aliases) log(summary, stdout=self.stdout)