def execute_workflow(self, workflow): # Let's do it. self.stdout.write('Running all indexation tasks') os.environ['FORCE_INDEXING'] = '1' try: workflow.apply_async() if not getattr(settings, 'CELERY_TASK_ALWAYS_EAGER', False): time.sleep(10) # give celeryd some time to flag the DB while is_reindexing_amo(): self.stdout.write('.') self.stdout.flush() time.sleep(5) finally: del os.environ['FORCE_INDEXING'] self.stdout.write('\n') # Let's return the /_aliases values. aliases = ES.indices.get_alias() aliases = json.dumps(aliases, sort_keys=True, indent=4) summary = _SUMMARY % aliases self.stdout.write(summary)
def _test_reindexation(self): # Current indices with aliases. old_indices = self.get_indices_aliases() # This is to start a reindexation in the background. class ReindexThread(threading.Thread): def __init__(self): self.stdout = StringIO.StringIO() super(ReindexThread, self).__init__() def run(self): # We need to wait at least a second, to make sure the alias # name is going to be different, since we already create an # alias in setUpClass. time.sleep(1) management.call_command('reindex', stdout=self.stdout) t = ReindexThread() t.start() # Wait for the reindex in the thread to flag the database. # The database transaction isn't shared with the thread, so force the # commit. while t.is_alive() and not is_reindexing_amo(): connection._commit() connection.clean_savepoints() # We should still be able to search in the foreground while the reindex # is being done in the background. We should also be able to index new # documents, and they should not be lost. old_addons_count = len(self.expected) while t.is_alive() and len(self.expected) < old_addons_count + 3: self.expected.append(addon_factory()) connection._commit() connection.clean_savepoints() self.refresh() self.check_results(self.expected) if len(self.expected) == old_addons_count: raise AssertionError('Could not index objects in foreground while ' 'reindexing in the background.') t.join() # Wait for the thread to finish. t.stdout.seek(0) stdout = t.stdout.read() assert 'Reindexation done' in stdout, stdout # The reindexation is done, let's double check we have all our docs. connection._commit() connection.clean_savepoints() self.refresh() self.check_results(self.expected) # New indices have been created, and aliases now point to them. new_indices = self.get_indices_aliases() assert len(new_indices) assert old_indices != new_indices, (stdout, old_indices, new_indices) self.check_settings(new_indices)
def setUp(self): super(TestIndexCommand, self).setUp() if is_reindexing_amo(): unflag_reindexing_amo() self.url = reverse('search.search') # We store previously existing indices in order to delete the ones # created during this test run. self.indices = self.es.indices.status()['indices'].keys()
def setUp(self): super(TestIndexCommand, self).setUp() if is_reindexing_amo(): unflag_reindexing_amo() self.url = reverse('search.search') # We store previously existing indices in order to delete the ones # created during this test run. self.indices = self.es.indices.stats()['indices'].keys()
def handle(self, *args, **kwargs): """Reindexing work. Creates a task chain that creates new indexes over the old ones so the search feature works while the indexation occurs. """ force = kwargs['force'] if is_reindexing_amo() and not force: raise CommandError('Indexation already occurring - use --force to ' 'bypass') alias = settings.ES_INDEXES.get(kwargs['key'], None) if alias is None: raise CommandError( 'Invalid --key parameter. It should be one of: %s.' % ( self.accepted_keys() ) ) self.stdout.write('Starting the reindexation for %s.' % alias) if kwargs['wipe']: skip_confirmation = kwargs['noinput'] confirm = '' if not skip_confirmation: confirm = input('Are you sure you want to wipe all AMO ' 'Elasticsearch indexes? (yes/no): ') while confirm not in ('yes', 'no'): confirm = input('Please enter either "yes" or "no": ') if (confirm == 'yes' or skip_confirmation): unflag_database() # Retrieve the actual index and delete it. That way whether or # not this was an alias or an index (which is wrong, but # happens if data was indexed before the first reindex was # done) doesn't matter. try: index = next(iter(ES.indices.get(alias))) ES.indices.delete(index) except NotFoundError: pass else: raise CommandError('Aborted.') elif force: unflag_database() workflow = self.create_workflow(alias) self.execute_workflow(workflow)
def setUp(self): super(TestIndexCommand, self).setUp() if is_reindexing_amo(): unflag_reindexing_amo() self.url = reverse_ns('addon-search') # We store previously existing indices in order to delete the ones # created during this test run. self.indices = self.es.indices.stats()['indices'].keys() self.addons = [] self.expected = self.addons[:] # Monkeypatch Celerys ".get()" inside async task error # until https://github.com/celery/celery/issues/4661 (which isn't just # about retries but a general regression that manifests only in # eager-mode) fixed. self.patch('celery.app.task.denied_join_result')
def handle(self, *args, **kwargs): """Reindexing work. Creates a task chain that creates new indexes over the old ones so the search feature works while the indexation occurs. """ force = kwargs['force'] if is_reindexing_amo() and not force: raise CommandError('Indexation already occurring - use --force to ' 'bypass') if kwargs['data'] not in ('addons', 'stats'): raise CommandError('--data should be "addons" or "stats".') alias = get_alias(kwargs['data']) self.stdout.write('Starting the reindexation') if kwargs['wipe']: skip_confirmation = kwargs['noinput'] confirm = '' if not skip_confirmation: confirm = input('Are you sure you want to wipe all AMO ' 'Elasticsearch indexes? (yes/no): ') while confirm not in ('yes', 'no'): confirm = input('Please enter either "yes" or "no": ') if (confirm == 'yes' or skip_confirmation): unflag_database() ES.indices.delete(alias, ignore=404) else: raise CommandError('Aborted.') elif force: unflag_database() workflow = self.create_workflow(alias) self.execute_workflow(workflow)
def handle(self, *args, **kwargs): """Reindexing work. Creates a Tasktree that creates new indexes over the old ones so the search feature works while the indexation occurs. """ force = kwargs.get('force', False) if is_reindexing_amo() and not force: raise CommandError('Indexation already occuring - use --force to ' 'bypass') log('Starting the reindexation', stdout=self.stdout) modules = get_modules(with_stats=kwargs.get('with_stats', False)) if kwargs.get('wipe', False): skip_confirmation = kwargs.get('noinput', False) confirm = '' if not skip_confirmation: confirm = raw_input('Are you sure you want to wipe all AMO ' 'Elasticsearch indexes? (yes/no): ') while confirm not in ('yes', 'no'): confirm = raw_input('Please enter either "yes" or "no": ') if (confirm == 'yes' or skip_confirmation): unflag_database(stdout=self.stdout) for index in set(modules.keys()): ES.indices.delete(index, ignore=404) else: raise CommandError("Aborted.") elif force: unflag_database(stdout=self.stdout) alias_actions = [] def add_alias_action(action, index, alias): action = {action: {'index': index, 'alias': alias}} if action in alias_actions: return alias_actions.append(action) # Creating a task tree. log('Building the task tree', stdout=self.stdout) tree = TaskTree() last_action = None to_remove = [] # For each alias, we create a new time-stamped index. for alias, module in modules.items(): old_index = None olds = ES.indices.get_aliases(alias, ignore=404) for old_index in olds: # Mark the index to be removed later. to_remove.append(old_index) # Mark the alias to be removed from that index. add_alias_action('remove', old_index, alias) # Create a new index, using the alias name with a timestamp. new_index = timestamp_index(alias) # If old_index is None that could mean it's a full index. # In that case we want to continue index in it. if ES.indices.exists(alias): old_index = alias # Flag the database. step1 = tree.add_task(flag_database, args=[new_index, old_index, alias]) step2 = step1.add_task(create_new_index, args=[alias, new_index]) step3 = step2.add_task(index_data, args=[alias, new_index]) last_action = step3 # Adding new index to the alias. add_alias_action('add', new_index, alias) # Alias the new index and remove the old aliases, if any. renaming_step = last_action.add_task(update_aliases, args=[alias_actions]) # Unflag the database - there's no need to duplicate the # indexing anymore. delete = renaming_step.add_task(unflag_database) # Delete the old indexes, if any. if to_remove: delete.add_task(delete_indexes, args=[to_remove]) # Let's do it. log('Running all indexation tasks', stdout=self.stdout) os.environ['FORCE_INDEXING'] = '1' # This is a bit convoluted, and more complicated than simply providing # the soft and hard time limits on the @task decorator. But we're not # using the @task decorator here, but a decorator from celery_tasktree. if not getattr(settings, 'CELERY_ALWAYS_EAGER', False): control.time_limit( 'olympia.lib.es.management.commands.reindex.index_data', soft=time_limits['soft'], hard=time_limits['hard']) try: tree.apply_async() if not getattr(settings, 'CELERY_ALWAYS_EAGER', False): time.sleep(10) # give celeryd some time to flag the DB while is_reindexing_amo(): sys.stdout.write('.') sys.stdout.flush() time.sleep(5) finally: del os.environ['FORCE_INDEXING'] sys.stdout.write('\n') # Let's return the /_aliases values. aliases = ES.indices.get_aliases() aliases = json.dumps(aliases, sort_keys=True, indent=4) summary = _SUMMARY % (len(modules), aliases) log(summary, stdout=self.stdout)
def handle(self, *args, **kwargs): """Reindexing work. Creates a Tasktree that creates new indexes over the old ones so the search feature works while the indexation occurs. """ force = kwargs.get("force", False) if is_reindexing_amo() and not force: raise CommandError("Indexation already occuring - use --force to " "bypass") log("Starting the reindexation", stdout=self.stdout) modules = ["addons"] if kwargs.get("with_stats", False): modules.append("stats") if kwargs.get("wipe", False): skip_confirmation = kwargs.get("noinput", False) confirm = "" if not skip_confirmation: confirm = raw_input("Are you sure you want to wipe all AMO " "Elasticsearch indexes? (yes/no): ") while confirm not in ("yes", "no"): confirm = raw_input('Please enter either "yes" or "no": ') if confirm == "yes" or skip_confirmation: unflag_database(stdout=self.stdout) for index in set(MODULES[m].get_alias() for m in modules): ES.indices.delete(index, ignore=404) else: raise CommandError("Aborted.") elif force: unflag_database(stdout=self.stdout) alias_actions = [] def add_alias_action(action, index, alias): action = {action: {"index": index, "alias": alias}} if action in alias_actions: return alias_actions.append(action) # Creating a task tree. log("Building the task tree", stdout=self.stdout) tree = TaskTree() last_action = None to_remove = [] # For each index, we create a new time-stamped index. for module in modules: old_index = None alias = MODULES[module].get_alias() olds = ES.indices.get_aliases(alias, ignore=404) for old_index in olds: # Mark the index to be removed later. to_remove.append(old_index) # Mark the alias to be removed from that index. add_alias_action("remove", old_index, alias) # Create a new index, using the alias name with a timestamp. new_index = timestamp_index(alias) # If old_index is None that could mean it's a full index. # In that case we want to continue index in it. if ES.indices.exists(alias): old_index = alias # Flag the database. step1 = tree.add_task(flag_database, args=[new_index, old_index, alias]) step2 = step1.add_task(create_new_index, args=[module, new_index]) step3 = step2.add_task(index_data, args=[module, new_index]) last_action = step3 # Adding new index to the alias. add_alias_action("add", new_index, alias) # Alias the new index and remove the old aliases, if any. renaming_step = last_action.add_task(update_aliases, args=[alias_actions]) # Unflag the database - there's no need to duplicate the # indexing anymore. delete = renaming_step.add_task(unflag_database) # Delete the old indexes, if any. if to_remove: delete.add_task(delete_indexes, args=[to_remove]) # Let's do it. log("Running all indexation tasks", stdout=self.stdout) os.environ["FORCE_INDEXING"] = "1" # This is a bit convoluted, and more complicated than simply providing # the soft and hard time limits on the @task decorator. But we're not # using the @task decorator here, but a decorator from celery_tasktree. if not getattr(settings, "CELERY_ALWAYS_EAGER", False): control.time_limit( "olympia.lib.es.management.commands.reindex.index_data", soft=time_limits["soft"], hard=time_limits["hard"], ) try: tree.apply_async() if not getattr(settings, "CELERY_ALWAYS_EAGER", False): time.sleep(10) # give celeryd some time to flag the DB while is_reindexing_amo(): sys.stdout.write(".") sys.stdout.flush() time.sleep(5) finally: del os.environ["FORCE_INDEXING"] sys.stdout.write("\n") # Let's return the /_aliases values. aliases = ES.indices.get_aliases() aliases = json.dumps(aliases, sort_keys=True, indent=4) summary = _SUMMARY % (len(modules), aliases) log(summary, stdout=self.stdout)
def handle(self, *args, **kwargs): """Reindexing work. Creates a task chain that creates new indexes over the old ones so the search feature works while the indexation occurs. """ force = kwargs.get('force', False) if is_reindexing_amo() and not force: raise CommandError('Indexation already occurring - use --force to ' 'bypass') self.stdout.write('Starting the reindexation') modules = get_modules(with_stats=kwargs.get('with_stats', False)) if kwargs.get('wipe', False): skip_confirmation = kwargs.get('noinput', False) confirm = '' if not skip_confirmation: confirm = raw_input('Are you sure you want to wipe all AMO ' 'Elasticsearch indexes? (yes/no): ') while confirm not in ('yes', 'no'): confirm = raw_input('Please enter either "yes" or "no": ') if (confirm == 'yes' or skip_confirmation): unflag_database() for index in set(modules.keys()): ES.indices.delete(index, ignore=404) else: raise CommandError("Aborted.") elif force: unflag_database() alias_actions = [] def add_alias_action(action, index, alias): action = {action: {'index': index, 'alias': alias}} if action in alias_actions: return alias_actions.append(action) # Creating a task chain. self.stdout.write('Building the task chain') to_remove = [] workflow = [] # For each alias, we create a new time-stamped index. for alias, module in modules.items(): old_index = None try: olds = ES.indices.get_alias(alias) for old_index in olds: # Mark the index to be removed later. to_remove.append(old_index) # Mark the alias to be removed from that index. add_alias_action('remove', old_index, alias) except NotFoundError: # If the alias dit not exist, ignore it, don't try to remove # it. pass # Create a new index, using the alias name with a timestamp. new_index = timestamp_index(alias) # If old_index is None that could mean it's a full index. # In that case we want to continue index in it. if ES.indices.exists(alias): old_index = alias # Flag the database. workflow.append( flag_database.si(new_index, old_index, alias) | create_new_index.si(alias, new_index) | index_data.si(alias, new_index) ) # Adding new index to the alias. add_alias_action('add', new_index, alias) workflow = group(workflow) # Alias the new index and remove the old aliases, if any. workflow |= update_aliases.si(alias_actions) # Unflag the database - there's no need to duplicate the # indexing anymore. workflow |= unflag_database.si() # Delete the old indexes, if any. if to_remove: workflow |= delete_indexes.si(to_remove) # Let's do it. self.stdout.write('Running all indexation tasks') os.environ['FORCE_INDEXING'] = '1' try: workflow.apply_async() if not getattr(settings, 'CELERY_ALWAYS_EAGER', False): time.sleep(10) # give celeryd some time to flag the DB while is_reindexing_amo(): self.stdout.write('.') self.stdout.flush() time.sleep(5) finally: del os.environ['FORCE_INDEXING'] self.stdout.write('\n') # Let's return the /_aliases values. aliases = ES.indices.get_alias() aliases = json.dumps(aliases, sort_keys=True, indent=4) summary = _SUMMARY % (len(modules), aliases) self.stdout.write(summary)