def handle(self, *args, **kwargs): """Set up reindexing tasks. Creates a Tasktree that creates a new indexes and indexes all objects, then points the alias to this new index when finished. """ index_choice = kwargs.get('index', None) prefix = kwargs.get('prefix', '') force = kwargs.get('force', False) if index_choice: # If we only want to reindex a subset of indexes. INDEXES = INDEX_CHOICES.get(index_choice, None) if INDEXES is None: raise CommandError( 'Incorrect index name specified. ' 'Choose one of: %s' % ', '.join(INDEX_CHOICES.keys())) else: INDEXES = INDEXERS if Reindexing.is_reindexing() and not force: raise CommandError('Indexation already occuring - use --force to ' 'bypass') elif force: Reindexing.unflag_reindexing() for INDEXER in INDEXES: index_name = INDEXER.get_mapping_type_name() chunk_size = INDEXER.chunk_size alias = ES_INDEXES[index_name] chunks, total = chunk_indexing(INDEXER, chunk_size) if not total: _print('No items to queue.', alias) else: total_chunks = int(ceil(total / float(chunk_size))) _print('Indexing {total} items into {n} chunks of size {size}' .format(total=total, n=total_chunks, size=chunk_size), alias) # Get the old index if it exists. try: aliases = ES.indices.get_alias(name=alias).keys() except elasticsearch.NotFoundError: aliases = [] old_index = aliases[0] if aliases else None # Create a new index, using the index name with a timestamp. new_index = timestamp_index(prefix + alias) # See how the index is currently configured. if old_index: try: s = (ES.indices.get_settings(index=old_index).get( old_index, {}).get('settings', {})) except elasticsearch.NotFoundError: s = {} else: s = {} num_replicas = s.get('number_of_replicas', settings.ES_DEFAULT_NUM_REPLICAS) num_shards = s.get('number_of_shards', settings.ES_DEFAULT_NUM_SHARDS) pre_task = pre_index.si(new_index, old_index, alias, index_name, { 'analysis': INDEXER.get_analysis(), 'number_of_replicas': 0, 'number_of_shards': num_shards, 'store.compress.tv': True, 'store.compress.stored': True, 'refresh_interval': '-1'}) post_task = post_index.si(new_index, old_index, alias, index_name, {'number_of_replicas': num_replicas, 'refresh_interval': '5s'}) # Ship it. if not total: # If there's no data we still create the index and alias. chain(pre_task, post_task).apply_async() else: index_tasks = [run_indexing.si(new_index, index_name, chunk) for chunk in chunks] if settings.CELERY_ALWAYS_EAGER: # Eager mode and chords don't get along. So we serialize # the tasks as a workaround. index_tasks.insert(0, pre_task) index_tasks.append(post_task) chain(*index_tasks).apply_async() else: chain(pre_task, chord(header=index_tasks, body=post_task)).apply_async() _print('New index and indexing tasks all queued up.')
def test_is_reindexing(self): assert not Reindexing.is_reindexing() Reindexing.objects.create(alias='foo', new_index='bar', old_index='baz') assert Reindexing.is_reindexing()
def handle(self, *args, **kwargs): """Set up reindexing tasks. Creates a Tasktree that creates a new indexes and indexes all objects, then points the alias to this new index when finished. """ index_choice = kwargs.get('index', None) prefix = kwargs.get('prefix', '') force = kwargs.get('force', False) if index_choice: # If we only want to reindex a subset of indexes. INDEXES = INDEX_CHOICES.get(index_choice, None) if INDEXES is None: raise CommandError('Incorrect index name specified. ' 'Choose one of: %s' % ', '.join(INDEX_CHOICES.keys())) else: INDEXES = INDEXERS if Reindexing.is_reindexing() and not force: raise CommandError('Indexation already occuring - use --force to ' 'bypass') elif force: Reindexing.unflag_reindexing() for INDEXER in INDEXES: index_name = INDEXER.get_mapping_type_name() chunk_size = INDEXER.chunk_size alias = ES_INDEXES[index_name] chunks, total = chunk_indexing(INDEXER, chunk_size) if not total: _print('No items to queue.', alias) else: total_chunks = int(ceil(total / float(chunk_size))) _print( 'Indexing {total} items into {n} chunks of size {size}'. format(total=total, n=total_chunks, size=chunk_size), alias) # Get the old index if it exists. try: aliases = ES.indices.get_alias(name=alias).keys() except elasticsearch.NotFoundError: aliases = [] old_index = aliases[0] if aliases else None # Create a new index, using the index name with a timestamp. new_index = timestamp_index(prefix + alias) # See how the index is currently configured. if old_index: try: s = (ES.indices.get_settings(index=old_index).get( old_index, {}).get('settings', {})) except elasticsearch.NotFoundError: s = {} else: s = {} num_replicas = s.get('number_of_replicas', settings.ES_DEFAULT_NUM_REPLICAS) num_shards = s.get('number_of_shards', settings.ES_DEFAULT_NUM_SHARDS) pre_task = pre_index.si( new_index, old_index, alias, index_name, { 'analysis': INDEXER.get_analysis(), 'number_of_replicas': 0, 'number_of_shards': num_shards, 'store.compress.tv': True, 'store.compress.stored': True, 'refresh_interval': '-1' }) post_task = post_index.si(new_index, old_index, alias, index_name, { 'number_of_replicas': num_replicas, 'refresh_interval': '5s' }) # Ship it. if not total: # If there's no data we still create the index and alias. chain(pre_task, post_task).apply_async() else: index_tasks = [ run_indexing.si(new_index, index_name, chunk) for chunk in chunks ] if settings.CELERY_ALWAYS_EAGER: # Eager mode and chords don't get along. So we serialize # the tasks as a workaround. index_tasks.insert(0, pre_task) index_tasks.append(post_task) chain(*index_tasks).apply_async() else: chain(pre_task, chord(header=index_tasks, body=post_task)).apply_async() _print('New index and indexing tasks all queued up.')
def handle(self, *args, **kwargs): """Set up reindexing tasks. Creates a Tasktree that creates a new indexes and indexes all objects, then points the alias to this new index when finished. """ global INDEXES index_choice = kwargs.get('index', None) prefix = kwargs.get('prefix', '') force = kwargs.get('force', False) if index_choice: # If we only want to reindex a subset of indexes. INDEXES = INDEX_DICT.get(index_choice, INDEXES) if Reindexing.is_reindexing() and not force: raise CommandError('Indexation already occuring - use --force to ' 'bypass') elif force: Reindexing.unflag_reindexing() for ALIAS, INDEXER, CHUNK_SIZE in INDEXES: chunks, total = chunk_indexing(INDEXER, CHUNK_SIZE) if not total: _print('No items to queue.', ALIAS) else: total_chunks = int(ceil(total / float(CHUNK_SIZE))) _print('Indexing {total} items into {n} chunks of size {size}' .format(total=total, n=total_chunks, size=CHUNK_SIZE), ALIAS) # Get the old index if it exists. try: aliases = ES.indices.get_alias(name=ALIAS).keys() except elasticsearch.NotFoundError: aliases = [] old_index = aliases[0] if aliases else None # Create a new index, using the index name with a timestamp. new_index = timestamp_index(prefix + ALIAS) # See how the index is currently configured. if old_index: try: s = (ES.indices.get_settings(index=old_index).get( old_index, {}).get('settings', {})) except elasticsearch.NotFoundError: s = {} else: s = {} num_replicas = s.get('number_of_replicas', settings.ES_DEFAULT_NUM_REPLICAS) num_shards = s.get('number_of_shards', settings.ES_DEFAULT_NUM_SHARDS) pre_task = pre_index.si(new_index, old_index, ALIAS, INDEXER, { 'analysis': INDEXER.get_analysis(), 'number_of_replicas': 0, 'number_of_shards': num_shards, 'store.compress.tv': True, 'store.compress.stored': True, 'refresh_interval': '-1'}) post_task = post_index.si(new_index, old_index, ALIAS, INDEXER, { 'number_of_replicas': num_replicas, 'refresh_interval': '5s'}) # Ship it. if not total: # If there's no data we still create the index and alias. chain(pre_task, post_task).apply_async() else: index_tasks = [run_indexing.si(new_index, INDEXER, chunk) for chunk in chunks] chain(pre_task, chord(header=index_tasks, body=post_task)).apply_async() _print('New index and indexing tasks all queued up.')
def handle(self, *args, **kwargs): """Set up reindexing tasks. Creates a Tasktree that creates a new indexes and indexes all objects, then points the alias to this new index when finished. """ global INDEXES index_choice = kwargs.get('index', None) prefix = kwargs.get('prefix', '') force = kwargs.get('force', False) if index_choice: # If we only want to reindex a subset of indexes. INDEXES = INDEX_DICT.get(index_choice, INDEXES) if Reindexing.is_reindexing() and not force: raise CommandError('Indexation already occuring - use --force to ' 'bypass') elif force: unflag_database() chain = None old_indexes = [] for ALIAS, INDEXER, CHUNK_SIZE in INDEXES: # Get the old index if it exists. try: aliases = ES.indices.get_alias(name=ALIAS).keys() except elasticsearch.NotFoundError: aliases = [] old_index = aliases[0] if aliases else None old_indexes.append(old_index) # Create a new index, using the index name with a timestamp. new_index = timestamp_index(prefix + ALIAS) # See how the index is currently configured. if old_index: try: s = (ES.indices.get_settings(index=old_index).get( old_index, {}).get('settings', {})) except elasticsearch.NotFoundError: s = {} else: s = {} num_replicas = s.get('number_of_replicas', settings.ES_DEFAULT_NUM_REPLICAS) num_shards = s.get('number_of_shards', settings.ES_DEFAULT_NUM_SHARDS) # Flag the database to mark as currently indexing. if not chain: chain = flag_database.si(new_index, old_index, ALIAS) else: chain |= flag_database.si(new_index, old_index, ALIAS) # Create the indexes and mappings. # Note: We set num_replicas=0 here to lower load while re-indexing. # In later step we increase it which results in more efficient bulk # copy in ES. For ES < 0.90 we manually enable compression. chain |= create_index.si(new_index, ALIAS, INDEXER, { 'analysis': INDEXER.get_analysis(), 'number_of_replicas': 0, 'number_of_shards': num_shards, 'store.compress.tv': True, 'store.compress.stored': True, 'refresh_interval': '-1'}) # Index all the things! chain |= run_indexing.si(new_index, INDEXER, CHUNK_SIZE) # After indexing we optimize the index, adjust settings, and point # alias to the new index. chain |= update_alias.si(new_index, old_index, ALIAS, { 'number_of_replicas': num_replicas, 'refresh_interval': '5s'}) # Unflag the database to mark as done indexing. chain |= unflag_database.si() # Delete the old index, if any. for old_index in old_indexes: if old_index: chain |= delete_index.si(old_index) # All done! chain |= output_summary.si() # Ship it. self.stdout.write('\nNew index and indexing tasks all queued up.\n') os.environ['FORCE_INDEXING'] = '1' try: chain.apply_async() finally: del os.environ['FORCE_INDEXING']
def handle(self, *args, **kwargs): """Set up reindexing tasks. Creates a Tasktree that creates a new indexes and indexes all objects, then points the alias to this new index when finished. """ global INDEXES index_choice = kwargs.get('index', None) prefix = kwargs.get('prefix', '') force = kwargs.get('force', False) if index_choice: # If we only want to reindex a subset of indexes. INDEXES = INDEX_DICT.get(index_choice, INDEXES) if Reindexing.is_reindexing() and not force: raise CommandError('Indexation already occuring - use --force to ' 'bypass') elif force: Reindexing.unflag_reindexing() for ALIAS, INDEXER, CHUNK_SIZE in INDEXES: chunks, total = chunk_indexing(INDEXER, CHUNK_SIZE) if not total: _print('No items to queue.', ALIAS) else: total_chunks = int(ceil(total / float(CHUNK_SIZE))) _print( 'Indexing {total} items into {n} chunks of size {size}'. format(total=total, n=total_chunks, size=CHUNK_SIZE), ALIAS) # Get the old index if it exists. try: aliases = ES.indices.get_alias(name=ALIAS).keys() except elasticsearch.NotFoundError: aliases = [] old_index = aliases[0] if aliases else None # Create a new index, using the index name with a timestamp. new_index = timestamp_index(prefix + ALIAS) # See how the index is currently configured. if old_index: try: s = (ES.indices.get_settings(index=old_index).get( old_index, {}).get('settings', {})) except elasticsearch.NotFoundError: s = {} else: s = {} num_replicas = s.get('number_of_replicas', settings.ES_DEFAULT_NUM_REPLICAS) num_shards = s.get('number_of_shards', settings.ES_DEFAULT_NUM_SHARDS) pre_task = pre_index.si( new_index, old_index, ALIAS, INDEXER, { 'analysis': INDEXER.get_analysis(), 'number_of_replicas': 0, 'number_of_shards': num_shards, 'store.compress.tv': True, 'store.compress.stored': True, 'refresh_interval': '-1' }) post_task = post_index.si(new_index, old_index, ALIAS, INDEXER, { 'number_of_replicas': num_replicas, 'refresh_interval': '5s' }) # Ship it. if not total: # If there's no data we still create the index and alias. chain(pre_task, post_task).apply_async() else: index_tasks = [ run_indexing.si(new_index, INDEXER, chunk) for chunk in chunks ] chain(pre_task, chord(header=index_tasks, body=post_task)).apply_async() _print('New index and indexing tasks all queued up.')