def handle(self, *args, **kwargs): """Reindexing work. Creates a Tasktree that creates new indexes over the old ones so the search feature works while the indexation occurs """ if not django_settings.MARKETPLACE: raise CommandError('This command affects both the marketplace and ' 'AMO ES storage. But the command can only be ' 'run from the Marketplace.') force = kwargs.get('force', False) if database_flagged() and not force: raise CommandError('Indexation already occuring - use --force to ' 'bypass') prefix = kwargs.get('prefix', '') log('Starting the reindexation') if kwargs.get('wipe', False): confirm = raw_input("Are you sure you want to wipe all data from " "ES ? (yes/no): ") while confirm not in ('yes', 'no'): confirm = raw_input('Please enter either "yes" or "no": ') if confirm == 'yes': unflag_database() requests.delete(url('/')) else: raise CommandError("Aborted.") elif force: unflag_database() # Get list current aliases at /_aliases. all_aliases = requests.get(url('/_aliases')).json() # building the list of indexes indexes = set([prefix + index for index in _ALIASES.values()]) actions = [] def add_action(*elmt): if elmt in actions: return actions.append(elmt) all_aliases = all_aliases.items() # creating a task tree log('Building the task tree') tree = TaskTree() last_action = None to_remove = [] # for each index, we create a new time-stamped index for alias in indexes: is_stats = 'stats' in alias old_index = None for aliased_index, alias_ in all_aliases: if alias in alias_['aliases'].keys(): # mark the index to be removed later old_index = aliased_index to_remove.append(aliased_index) # mark the alias to be removed as well add_action('remove', aliased_index, alias) # create a new index, using the alias name with a timestamp new_index = timestamp_index(alias) # if old_index is None that could mean it's a full index # In that case we want to continue index in it future_alias = url('/%s' % alias) if requests.head(future_alias).status_code == 200: old_index = alias # flag the database step1 = tree.add_task(flag_database, args=[new_index, old_index, alias]) step2 = step1.add_task(create_mapping, args=[new_index, alias]) step3 = step2.add_task(create_index, args=[new_index, is_stats]) last_action = step3 # adding new index to the alias add_action('add', new_index, alias) # Alias the new index and remove the old aliases, if any. renaming_step = last_action.add_task(run_aliases_actions, args=[actions]) # unflag the database - there's no need to duplicate the # indexing anymore delete = renaming_step.add_task(unflag_database) # Delete the old indexes, if any delete.add_task(delete_indexes, args=[to_remove]) # let's do it log('Running all indexation tasks') os.environ['FORCE_INDEXING'] = '1' try: tree.apply_async() time.sleep(10) # give celeryd some time to flag the DB while database_flagged(): sys.stdout.write('.') sys.stdout.flush() time.sleep(5) finally: del os.environ['FORCE_INDEXING'] sys.stdout.write('\n') # let's return the /_aliases values aliases = call_es('_aliases').json() aliases = json.dumps(aliases, sort_keys=True, indent=4) return _SUMMARY % (len(indexes), aliases)
def handle(self, *args, **kwargs): """Reindexing work. Creates a Tasktree that creates new indexes over the old ones so the search feature works while the indexation occurs. """ force = kwargs.get('force', False) if is_reindexing_amo() and not force: raise CommandError('Indexation already occuring - use --force to ' 'bypass') log('Starting the reindexation', stdout=self.stdout) modules = get_modules(with_stats=kwargs.get('with_stats', False)) if kwargs.get('wipe', False): skip_confirmation = kwargs.get('noinput', False) confirm = '' if not skip_confirmation: confirm = raw_input('Are you sure you want to wipe all AMO ' 'Elasticsearch indexes? (yes/no): ') while confirm not in ('yes', 'no'): confirm = raw_input('Please enter either "yes" or "no": ') if (confirm == 'yes' or skip_confirmation): unflag_database(stdout=self.stdout) for index in set(modules.keys()): ES.indices.delete(index, ignore=404) else: raise CommandError("Aborted.") elif force: unflag_database(stdout=self.stdout) alias_actions = [] def add_alias_action(action, index, alias): action = {action: {'index': index, 'alias': alias}} if action in alias_actions: return alias_actions.append(action) # Creating a task tree. log('Building the task tree', stdout=self.stdout) tree = TaskTree() last_action = None to_remove = [] # For each alias, we create a new time-stamped index. for alias, module in modules.items(): old_index = None olds = ES.indices.get_aliases(alias, ignore=404) for old_index in olds: # Mark the index to be removed later. to_remove.append(old_index) # Mark the alias to be removed from that index. add_alias_action('remove', old_index, alias) # Create a new index, using the alias name with a timestamp. new_index = timestamp_index(alias) # If old_index is None that could mean it's a full index. # In that case we want to continue index in it. if ES.indices.exists(alias): old_index = alias # Flag the database. step1 = tree.add_task(flag_database, args=[new_index, old_index, alias]) step2 = step1.add_task(create_new_index, args=[alias, new_index]) step3 = step2.add_task(index_data, args=[alias, new_index]) last_action = step3 # Adding new index to the alias. add_alias_action('add', new_index, alias) # Alias the new index and remove the old aliases, if any. renaming_step = last_action.add_task(update_aliases, args=[alias_actions]) # Unflag the database - there's no need to duplicate the # indexing anymore. delete = renaming_step.add_task(unflag_database) # Delete the old indexes, if any. if to_remove: delete.add_task(delete_indexes, args=[to_remove]) # Let's do it. log('Running all indexation tasks', stdout=self.stdout) os.environ['FORCE_INDEXING'] = '1' # This is a bit convoluted, and more complicated than simply providing # the soft and hard time limits on the @task decorator. But we're not # using the @task decorator here, but a decorator from celery_tasktree. if not getattr(settings, 'CELERY_ALWAYS_EAGER', False): control.time_limit( 'olympia.lib.es.management.commands.reindex.index_data', soft=time_limits['soft'], hard=time_limits['hard']) try: tree.apply_async() if not getattr(settings, 'CELERY_ALWAYS_EAGER', False): time.sleep(10) # give celeryd some time to flag the DB while is_reindexing_amo(): sys.stdout.write('.') sys.stdout.flush() time.sleep(5) finally: del os.environ['FORCE_INDEXING'] sys.stdout.write('\n') # Let's return the /_aliases values. aliases = ES.indices.get_aliases() aliases = json.dumps(aliases, sort_keys=True, indent=4) summary = _SUMMARY % (len(modules), aliases) log(summary, stdout=self.stdout)
def handle(self, *args, **kwargs): """Set up reindexing tasks. Creates a Tasktree that creates a new indexes and indexes all objects, then points the alias to this new index when finished. """ if not settings.MARKETPLACE: raise CommandError('This command affects only marketplace and ' 'should be run under Marketplace settings.') force = kwargs.get('force', False) prefix = kwargs.get('prefix', '') if database_flagged() and not force: raise CommandError('Indexation already occuring - use --force to ' 'bypass') elif force: unflag_database() # The list of indexes that is currently aliased by `ALIAS`. aliases = ES.aliases(ALIAS).keys() old_index = aliases[0] if aliases else None # Create a new index, using the index name with a timestamp. new_index = timestamp_index(prefix + ALIAS) # See how the index is currently configured. if old_index: try: s = (ES.get_settings(old_index).get(old_index, {}) .get('settings', {})) except pyelasticsearch.exceptions.ElasticHttpNotFoundError: s = {} else: s = {} num_replicas = s.get('number_of_replicas', settings.ES_DEFAULT_NUM_REPLICAS) num_shards = s.get('number_of_shards', settings.ES_DEFAULT_NUM_SHARDS) # Start our chain of events to re-index. tree = TaskTree() # Flag the database. step1 = tree.add_task( flag_database, args=[new_index, old_index, ALIAS]) # Create the index and mapping. # # Note: We set num_replicas=0 here to decrease load while re-indexing. # In a later step we increase it which results in a more efficient bulk # copy in Elasticsearch. # For ES < 0.90 we manually enable compression. step2 = step1.add_task( create_index, args=[new_index, ALIAS, {'number_of_replicas': 0, 'number_of_shards': num_shards, 'store.compress.tv': True, 'store.compress.stored': True, 'refresh_interval': '-1'}]) # Index all the things! step3 = step2.add_task(run_indexing, args=[new_index]) # After indexing we optimize the index, adjust settings, and point the # alias to the new index. step4 = step3.add_task( update_alias, args=[new_index, old_index, ALIAS, {'number_of_replicas': num_replicas, 'refresh_interval': '5s'}]) # Unflag the database. step5 = step4.add_task(unflag_database) # Delete the old index, if any. if old_index: step5 = step5.add_task(delete_index, args=[old_index]) step5.add_task(output_summary) self.stdout.write('\nNew index and indexing tasks all queued up.\n') os.environ['FORCE_INDEXING'] = '1' try: tree.apply_async() finally: del os.environ['FORCE_INDEXING']
def handle(self, *args, **kwargs): """Reindexing work. Creates a Tasktree that creates new indexes over the old ones so the search feature works while the indexation occurs. """ force = kwargs.get("force", False) if is_reindexing_amo() and not force: raise CommandError("Indexation already occuring - use --force to " "bypass") log("Starting the reindexation", stdout=self.stdout) modules = ["addons"] if kwargs.get("with_stats", False): modules.append("stats") if kwargs.get("wipe", False): skip_confirmation = kwargs.get("noinput", False) confirm = "" if not skip_confirmation: confirm = raw_input("Are you sure you want to wipe all AMO " "Elasticsearch indexes? (yes/no): ") while confirm not in ("yes", "no"): confirm = raw_input('Please enter either "yes" or "no": ') if confirm == "yes" or skip_confirmation: unflag_database(stdout=self.stdout) for index in set(MODULES[m].get_alias() for m in modules): ES.indices.delete(index, ignore=404) else: raise CommandError("Aborted.") elif force: unflag_database(stdout=self.stdout) alias_actions = [] def add_alias_action(action, index, alias): action = {action: {"index": index, "alias": alias}} if action in alias_actions: return alias_actions.append(action) # Creating a task tree. log("Building the task tree", stdout=self.stdout) tree = TaskTree() last_action = None to_remove = [] # For each index, we create a new time-stamped index. for module in modules: old_index = None alias = MODULES[module].get_alias() olds = ES.indices.get_aliases(alias, ignore=404) for old_index in olds: # Mark the index to be removed later. to_remove.append(old_index) # Mark the alias to be removed from that index. add_alias_action("remove", old_index, alias) # Create a new index, using the alias name with a timestamp. new_index = timestamp_index(alias) # If old_index is None that could mean it's a full index. # In that case we want to continue index in it. if ES.indices.exists(alias): old_index = alias # Flag the database. step1 = tree.add_task(flag_database, args=[new_index, old_index, alias]) step2 = step1.add_task(create_new_index, args=[module, new_index]) step3 = step2.add_task(index_data, args=[module, new_index]) last_action = step3 # Adding new index to the alias. add_alias_action("add", new_index, alias) # Alias the new index and remove the old aliases, if any. renaming_step = last_action.add_task(update_aliases, args=[alias_actions]) # Unflag the database - there's no need to duplicate the # indexing anymore. delete = renaming_step.add_task(unflag_database) # Delete the old indexes, if any. if to_remove: delete.add_task(delete_indexes, args=[to_remove]) # Let's do it. log("Running all indexation tasks", stdout=self.stdout) os.environ["FORCE_INDEXING"] = "1" # This is a bit convoluted, and more complicated than simply providing # the soft and hard time limits on the @task decorator. But we're not # using the @task decorator here, but a decorator from celery_tasktree. if not getattr(settings, "CELERY_ALWAYS_EAGER", False): control.time_limit( "olympia.lib.es.management.commands.reindex.index_data", soft=time_limits["soft"], hard=time_limits["hard"], ) try: tree.apply_async() if not getattr(settings, "CELERY_ALWAYS_EAGER", False): time.sleep(10) # give celeryd some time to flag the DB while is_reindexing_amo(): sys.stdout.write(".") sys.stdout.flush() time.sleep(5) finally: del os.environ["FORCE_INDEXING"] sys.stdout.write("\n") # Let's return the /_aliases values. aliases = ES.indices.get_aliases() aliases = json.dumps(aliases, sort_keys=True, indent=4) summary = _SUMMARY % (len(modules), aliases) log(summary, stdout=self.stdout)
def handle(self, *args, **kwargs): """Reindexing work. Creates a Tasktree that creates new indexes over the old ones so the search feature works while the indexation occurs """ if not django_settings.MARKETPLACE: raise CommandError('This command affects both the marketplace and ' 'AMO ES storage. But the command can only be ' 'run from the Marketplace.') force = kwargs.get('force', False) if database_flagged() and not force: raise CommandError('Indexation already occuring - use --force to ' 'bypass') prefix = kwargs.get('prefix', '') log('Starting the reindexation') if kwargs.get('wipe', False): confirm = raw_input("Are you sure you want to wipe all data from " "ES ? (yes/no): ") while confirm not in ('yes', 'no'): confirm = raw_input('Please enter either "yes" or "no": ') if confirm == 'yes': unflag_database() requests.delete(url('/')) else: raise CommandError("Aborted.") elif force: unflag_database() # Get list current aliases at /_aliases. all_aliases = requests.get(url('/_aliases')).json # building the list of indexes indexes = set( [prefix + index for index in django_settings.ES_INDEXES.values()]) actions = [] def add_action(*elmt): if elmt in actions: return actions.append(elmt) all_aliases = all_aliases.items() # creating a task tree log('Building the task tree') tree = TaskTree() last_action = None to_remove = [] # for each index, we create a new time-stamped index for alias in indexes: is_stats = 'stats' in alias old_index = None for aliased_index, alias_ in all_aliases: if alias in alias_['aliases'].keys(): # mark the index to be removed later old_index = aliased_index to_remove.append(aliased_index) # mark the alias to be removed as well add_action('remove', aliased_index, alias) # create a new index, using the alias name with a timestamp new_index = timestamp_index(alias) # if old_index is None that could mean it's a full index # In that case we want to continue index in it future_alias = url('/%s' % alias) if requests.head(future_alias).status_code == 200: old_index = alias # flag the database step1 = tree.add_task(flag_database, args=[new_index, old_index, alias]) step2 = step1.add_task(create_mapping, args=[new_index, alias]) step3 = step2.add_task(create_index, args=[new_index, is_stats]) last_action = step3 # adding new index to the alias add_action('add', new_index, alias) # Alias the new index and remove the old aliases, if any. renaming_step = last_action.add_task(run_aliases_actions, args=[actions]) # unflag the database - there's no need to duplicate the # indexing anymore delete = renaming_step.add_task(unflag_database) # Delete the old indexes, if any delete.add_task(delete_indexes, args=[to_remove]) # let's do it log('Running all indexation tasks') os.environ['FORCE_INDEXING'] = '1' try: tree.apply_async() time.sleep(10) # give celeryd some time to flag the DB while database_flagged(): sys.stdout.write('.') sys.stdout.flush() time.sleep(5) finally: del os.environ['FORCE_INDEXING'] sys.stdout.write('\n') # let's return the /_aliases values aliases = call_es('_aliases').json aliases = json.dumps(aliases, sort_keys=True, indent=4) return _SUMMARY % (len(indexes), aliases)