def post_index(new_index, old_index, alias, index_name, settings): """ Perform post-indexing tasks: * Optimize (which also does a refresh and a flush by default). * Update settings to reset number of replicas. * Point the alias to this new index. * Unflag the database. * Remove the old index. * Output the current alias configuration. """ _print('Optimizing, updating settings and aliases.', alias) # Optimize. ES.indices.optimize(index=new_index) # Update the replicas. ES.indices.put_settings(index=new_index, body=settings) # Add and remove aliases. actions = [ {'add': {'index': new_index, 'alias': alias}} ] if old_index: actions.append( {'remove': {'index': old_index, 'alias': alias}} ) ES.indices.update_aliases(body=dict(actions=actions)) _print('Unflagging the database.', alias) Reindexing.unflag_reindexing(alias=alias) _print('Removing index {index}.'.format(index=old_index), alias) if old_index and ES.indices.exists(index=old_index): ES.indices.delete(index=old_index) alias_output = '' for indexer in INDEXERS: alias = ES_INDEXES[indexer.get_mapping_type_name()] alias_output += unicode(ES.indices.get_aliases(index=alias)) + '\n' _print('Reindexation done. Current aliases configuration: ' '{output}\n'.format(output=alias_output), alias)
def post_index(new_index, old_index, alias, index_name, settings): """ Perform post-indexing tasks: * Optimize (which also does a refresh and a flush by default). * Update settings to reset number of replicas. * Point the alias to this new index. * Unflag the database. * Remove the old index. * Output the current alias configuration. """ _print('Optimizing, updating settings and aliases.', alias) # Optimize. ES.indices.optimize(index=new_index) # Update the replicas. ES.indices.put_settings(index=new_index, body=settings) # Add and remove aliases. actions = [{'add': {'index': new_index, 'alias': alias}}] if old_index: actions.append({'remove': {'index': old_index, 'alias': alias}}) ES.indices.update_aliases(body=dict(actions=actions)) _print('Unflagging the database.', alias) Reindexing.unflag_reindexing(alias=alias) _print('Removing index {index}.'.format(index=old_index), alias) if old_index and ES.indices.exists(index=old_index): ES.indices.delete(index=old_index) alias_output = '' for indexer in INDEXERS: alias = ES_INDEXES[indexer.get_mapping_type_name()] alias_output += unicode(ES.indices.get_aliases(index=alias)) + '\n' _print( 'Reindexation done. Current aliases configuration: ' '{output}\n'.format(output=alias_output), alias)
def test_unflag_reindexing(self): assert Reindexing.objects.filter(alias='foo').count() == 0 # Unflagging unflagged database does nothing. Reindexing.unflag_reindexing(alias='foo') assert Reindexing.objects.filter(alias='foo').count() == 0 # Flag, then unflag. Reindexing.objects.create(alias='foo', new_index='bar', old_index='baz') assert Reindexing.objects.filter(alias='foo').count() == 1 Reindexing.unflag_reindexing(alias='foo') assert Reindexing.objects.filter(alias='foo').count() == 0 # Unflagging another alias doesn't clash. Reindexing.objects.create(alias='bar', new_index='bar', old_index='baz') Reindexing.unflag_reindexing(alias='foo') assert Reindexing.objects.filter(alias='bar').count() == 1
def handle(self, *args, **kwargs): """Set up reindexing tasks. Creates a Tasktree that creates a new indexes and indexes all objects, then points the alias to this new index when finished. """ index_choice = kwargs.get('index', None) prefix = kwargs.get('prefix', '') force = kwargs.get('force', False) if index_choice: # If we only want to reindex a subset of indexes. INDEXES = INDEX_CHOICES.get(index_choice, None) if INDEXES is None: raise CommandError( 'Incorrect index name specified. ' 'Choose one of: %s' % ', '.join(INDEX_CHOICES.keys())) else: INDEXES = INDEXERS if Reindexing.is_reindexing() and not force: raise CommandError('Indexation already occuring - use --force to ' 'bypass') elif force: Reindexing.unflag_reindexing() for INDEXER in INDEXES: index_name = INDEXER.get_mapping_type_name() chunk_size = INDEXER.chunk_size alias = ES_INDEXES[index_name] chunks, total = chunk_indexing(INDEXER, chunk_size) if not total: _print('No items to queue.', alias) else: total_chunks = int(ceil(total / float(chunk_size))) _print('Indexing {total} items into {n} chunks of size {size}' .format(total=total, n=total_chunks, size=chunk_size), alias) # Get the old index if it exists. try: aliases = ES.indices.get_alias(name=alias).keys() except elasticsearch.NotFoundError: aliases = [] old_index = aliases[0] if aliases else None # Create a new index, using the index name with a timestamp. new_index = timestamp_index(prefix + alias) # See how the index is currently configured. if old_index: try: s = (ES.indices.get_settings(index=old_index).get( old_index, {}).get('settings', {})) except elasticsearch.NotFoundError: s = {} else: s = {} num_replicas = s.get('number_of_replicas', settings.ES_DEFAULT_NUM_REPLICAS) num_shards = s.get('number_of_shards', settings.ES_DEFAULT_NUM_SHARDS) pre_task = pre_index.si(new_index, old_index, alias, index_name, { 'analysis': INDEXER.get_analysis(), 'number_of_replicas': 0, 'number_of_shards': num_shards, 'store.compress.tv': True, 'store.compress.stored': True, 'refresh_interval': '-1'}) post_task = post_index.si(new_index, old_index, alias, index_name, {'number_of_replicas': num_replicas, 'refresh_interval': '5s'}) # Ship it. if not total: # If there's no data we still create the index and alias. chain(pre_task, post_task).apply_async() else: index_tasks = [run_indexing.si(new_index, index_name, chunk) for chunk in chunks] if settings.CELERY_ALWAYS_EAGER: # Eager mode and chords don't get along. So we serialize # the tasks as a workaround. index_tasks.insert(0, pre_task) index_tasks.append(post_task) chain(*index_tasks).apply_async() else: chain(pre_task, chord(header=index_tasks, body=post_task)).apply_async() _print('New index and indexing tasks all queued up.')
def handle(self, *args, **kwargs): """Set up reindexing tasks. Creates a Tasktree that creates a new indexes and indexes all objects, then points the alias to this new index when finished. """ index_choice = kwargs.get('index', None) prefix = kwargs.get('prefix', '') force = kwargs.get('force', False) if index_choice: # If we only want to reindex a subset of indexes. INDEXES = INDEX_CHOICES.get(index_choice, None) if INDEXES is None: raise CommandError('Incorrect index name specified. ' 'Choose one of: %s' % ', '.join(INDEX_CHOICES.keys())) else: INDEXES = INDEXERS if Reindexing.is_reindexing() and not force: raise CommandError('Indexation already occuring - use --force to ' 'bypass') elif force: Reindexing.unflag_reindexing() for INDEXER in INDEXES: index_name = INDEXER.get_mapping_type_name() chunk_size = INDEXER.chunk_size alias = ES_INDEXES[index_name] chunks, total = chunk_indexing(INDEXER, chunk_size) if not total: _print('No items to queue.', alias) else: total_chunks = int(ceil(total / float(chunk_size))) _print( 'Indexing {total} items into {n} chunks of size {size}'. format(total=total, n=total_chunks, size=chunk_size), alias) # Get the old index if it exists. try: aliases = ES.indices.get_alias(name=alias).keys() except elasticsearch.NotFoundError: aliases = [] old_index = aliases[0] if aliases else None # Create a new index, using the index name with a timestamp. new_index = timestamp_index(prefix + alias) # See how the index is currently configured. if old_index: try: s = (ES.indices.get_settings(index=old_index).get( old_index, {}).get('settings', {})) except elasticsearch.NotFoundError: s = {} else: s = {} num_replicas = s.get('number_of_replicas', settings.ES_DEFAULT_NUM_REPLICAS) num_shards = s.get('number_of_shards', settings.ES_DEFAULT_NUM_SHARDS) pre_task = pre_index.si( new_index, old_index, alias, index_name, { 'analysis': INDEXER.get_analysis(), 'number_of_replicas': 0, 'number_of_shards': num_shards, 'store.compress.tv': True, 'store.compress.stored': True, 'refresh_interval': '-1' }) post_task = post_index.si(new_index, old_index, alias, index_name, { 'number_of_replicas': num_replicas, 'refresh_interval': '5s' }) # Ship it. if not total: # If there's no data we still create the index and alias. chain(pre_task, post_task).apply_async() else: index_tasks = [ run_indexing.si(new_index, index_name, chunk) for chunk in chunks ] if settings.CELERY_ALWAYS_EAGER: # Eager mode and chords don't get along. So we serialize # the tasks as a workaround. index_tasks.insert(0, pre_task) index_tasks.append(post_task) chain(*index_tasks).apply_async() else: chain(pre_task, chord(header=index_tasks, body=post_task)).apply_async() _print('New index and indexing tasks all queued up.')
def handle(self, *args, **kwargs): """Set up reindexing tasks. Creates a Tasktree that creates a new indexes and indexes all objects, then points the alias to this new index when finished. """ global INDEXES index_choice = kwargs.get('index', None) prefix = kwargs.get('prefix', '') force = kwargs.get('force', False) if index_choice: # If we only want to reindex a subset of indexes. INDEXES = INDEX_DICT.get(index_choice, INDEXES) if Reindexing.is_reindexing() and not force: raise CommandError('Indexation already occuring - use --force to ' 'bypass') elif force: Reindexing.unflag_reindexing() for ALIAS, INDEXER, CHUNK_SIZE in INDEXES: chunks, total = chunk_indexing(INDEXER, CHUNK_SIZE) if not total: _print('No items to queue.', ALIAS) else: total_chunks = int(ceil(total / float(CHUNK_SIZE))) _print('Indexing {total} items into {n} chunks of size {size}' .format(total=total, n=total_chunks, size=CHUNK_SIZE), ALIAS) # Get the old index if it exists. try: aliases = ES.indices.get_alias(name=ALIAS).keys() except elasticsearch.NotFoundError: aliases = [] old_index = aliases[0] if aliases else None # Create a new index, using the index name with a timestamp. new_index = timestamp_index(prefix + ALIAS) # See how the index is currently configured. if old_index: try: s = (ES.indices.get_settings(index=old_index).get( old_index, {}).get('settings', {})) except elasticsearch.NotFoundError: s = {} else: s = {} num_replicas = s.get('number_of_replicas', settings.ES_DEFAULT_NUM_REPLICAS) num_shards = s.get('number_of_shards', settings.ES_DEFAULT_NUM_SHARDS) pre_task = pre_index.si(new_index, old_index, ALIAS, INDEXER, { 'analysis': INDEXER.get_analysis(), 'number_of_replicas': 0, 'number_of_shards': num_shards, 'store.compress.tv': True, 'store.compress.stored': True, 'refresh_interval': '-1'}) post_task = post_index.si(new_index, old_index, ALIAS, INDEXER, { 'number_of_replicas': num_replicas, 'refresh_interval': '5s'}) # Ship it. if not total: # If there's no data we still create the index and alias. chain(pre_task, post_task).apply_async() else: index_tasks = [run_indexing.si(new_index, INDEXER, chunk) for chunk in chunks] chain(pre_task, chord(header=index_tasks, body=post_task)).apply_async() _print('New index and indexing tasks all queued up.')
def unflag_database(): """Unflag the database to indicate that the reindexing is over.""" sys.stdout.write('Unflagging the database\n') Reindexing.unflag_reindexing()
def handle(self, *args, **kwargs): """Set up reindexing tasks. Creates a Tasktree that creates a new indexes and indexes all objects, then points the alias to this new index when finished. """ global INDEXES index_choice = kwargs.get('index', None) prefix = kwargs.get('prefix', '') force = kwargs.get('force', False) if index_choice: # If we only want to reindex a subset of indexes. INDEXES = INDEX_DICT.get(index_choice, INDEXES) if Reindexing.is_reindexing() and not force: raise CommandError('Indexation already occuring - use --force to ' 'bypass') elif force: Reindexing.unflag_reindexing() for ALIAS, INDEXER, CHUNK_SIZE in INDEXES: chunks, total = chunk_indexing(INDEXER, CHUNK_SIZE) if not total: _print('No items to queue.', ALIAS) else: total_chunks = int(ceil(total / float(CHUNK_SIZE))) _print( 'Indexing {total} items into {n} chunks of size {size}'. format(total=total, n=total_chunks, size=CHUNK_SIZE), ALIAS) # Get the old index if it exists. try: aliases = ES.indices.get_alias(name=ALIAS).keys() except elasticsearch.NotFoundError: aliases = [] old_index = aliases[0] if aliases else None # Create a new index, using the index name with a timestamp. new_index = timestamp_index(prefix + ALIAS) # See how the index is currently configured. if old_index: try: s = (ES.indices.get_settings(index=old_index).get( old_index, {}).get('settings', {})) except elasticsearch.NotFoundError: s = {} else: s = {} num_replicas = s.get('number_of_replicas', settings.ES_DEFAULT_NUM_REPLICAS) num_shards = s.get('number_of_shards', settings.ES_DEFAULT_NUM_SHARDS) pre_task = pre_index.si( new_index, old_index, ALIAS, INDEXER, { 'analysis': INDEXER.get_analysis(), 'number_of_replicas': 0, 'number_of_shards': num_shards, 'store.compress.tv': True, 'store.compress.stored': True, 'refresh_interval': '-1' }) post_task = post_index.si(new_index, old_index, ALIAS, INDEXER, { 'number_of_replicas': num_replicas, 'refresh_interval': '5s' }) # Ship it. if not total: # If there's no data we still create the index and alias. chain(pre_task, post_task).apply_async() else: index_tasks = [ run_indexing.si(new_index, INDEXER, chunk) for chunk in chunks ] chain(pre_task, chord(header=index_tasks, body=post_task)).apply_async() _print('New index and indexing tasks all queued up.')