Example #1
0
def post_index(new_index, old_index, alias, index_name, settings):
    """
    Perform post-indexing tasks:
        * Optimize (which also does a refresh and a flush by default).
        * Update settings to reset number of replicas.
        * Point the alias to this new index.
        * Unflag the database.
        * Remove the old index.
        * Output the current alias configuration.

    """
    _print('Optimizing, updating settings and aliases.', alias)

    # Optimize.
    ES.indices.optimize(index=new_index)

    # Update the replicas.
    ES.indices.put_settings(index=new_index, body=settings)

    # Add and remove aliases.
    actions = [
        {'add': {'index': new_index, 'alias': alias}}
    ]
    if old_index:
        actions.append(
            {'remove': {'index': old_index, 'alias': alias}}
        )
    ES.indices.update_aliases(body=dict(actions=actions))

    _print('Unflagging the database.', alias)
    Reindexing.unflag_reindexing(alias=alias)

    _print('Removing index {index}.'.format(index=old_index), alias)
    if old_index and ES.indices.exists(index=old_index):
        ES.indices.delete(index=old_index)

    alias_output = ''
    for indexer in INDEXERS:
        alias = ES_INDEXES[indexer.get_mapping_type_name()]
        alias_output += unicode(ES.indices.get_aliases(index=alias)) + '\n'

    _print('Reindexation done. Current aliases configuration: '
           '{output}\n'.format(output=alias_output), alias)
Example #2
0
def post_index(new_index, old_index, alias, index_name, settings):
    """
    Perform post-indexing tasks:
        * Optimize (which also does a refresh and a flush by default).
        * Update settings to reset number of replicas.
        * Point the alias to this new index.
        * Unflag the database.
        * Remove the old index.
        * Output the current alias configuration.

    """
    _print('Optimizing, updating settings and aliases.', alias)

    # Optimize.
    ES.indices.optimize(index=new_index)

    # Update the replicas.
    ES.indices.put_settings(index=new_index, body=settings)

    # Add and remove aliases.
    actions = [{'add': {'index': new_index, 'alias': alias}}]
    if old_index:
        actions.append({'remove': {'index': old_index, 'alias': alias}})
    ES.indices.update_aliases(body=dict(actions=actions))

    _print('Unflagging the database.', alias)
    Reindexing.unflag_reindexing(alias=alias)

    _print('Removing index {index}.'.format(index=old_index), alias)
    if old_index and ES.indices.exists(index=old_index):
        ES.indices.delete(index=old_index)

    alias_output = ''
    for indexer in INDEXERS:
        alias = ES_INDEXES[indexer.get_mapping_type_name()]
        alias_output += unicode(ES.indices.get_aliases(index=alias)) + '\n'

    _print(
        'Reindexation done. Current aliases configuration: '
        '{output}\n'.format(output=alias_output), alias)
Example #3
0
    def test_unflag_reindexing(self):
        assert Reindexing.objects.filter(alias='foo').count() == 0

        # Unflagging unflagged database does nothing.
        Reindexing.unflag_reindexing(alias='foo')
        assert Reindexing.objects.filter(alias='foo').count() == 0

        # Flag, then unflag.
        Reindexing.objects.create(alias='foo', new_index='bar',
                                  old_index='baz')
        assert Reindexing.objects.filter(alias='foo').count() == 1

        Reindexing.unflag_reindexing(alias='foo')
        assert Reindexing.objects.filter(alias='foo').count() == 0

        # Unflagging another alias doesn't clash.
        Reindexing.objects.create(alias='bar', new_index='bar',
                                  old_index='baz')
        Reindexing.unflag_reindexing(alias='foo')
        assert Reindexing.objects.filter(alias='bar').count() == 1
Example #4
0
    def handle(self, *args, **kwargs):
        """Set up reindexing tasks.

        Creates a Tasktree that creates a new indexes and indexes all objects,
        then points the alias to this new index when finished.
        """
        index_choice = kwargs.get('index', None)
        prefix = kwargs.get('prefix', '')
        force = kwargs.get('force', False)

        if index_choice:
            # If we only want to reindex a subset of indexes.
            INDEXES = INDEX_CHOICES.get(index_choice, None)
            if INDEXES is None:
                raise CommandError(
                    'Incorrect index name specified. '
                    'Choose one of: %s' % ', '.join(INDEX_CHOICES.keys()))
        else:
            INDEXES = INDEXERS

        if Reindexing.is_reindexing() and not force:
            raise CommandError('Indexation already occuring - use --force to '
                               'bypass')
        elif force:
            Reindexing.unflag_reindexing()

        for INDEXER in INDEXES:
            index_name = INDEXER.get_mapping_type_name()
            chunk_size = INDEXER.chunk_size
            alias = ES_INDEXES[index_name]

            chunks, total = chunk_indexing(INDEXER, chunk_size)
            if not total:
                _print('No items to queue.', alias)
            else:
                total_chunks = int(ceil(total / float(chunk_size)))
                _print('Indexing {total} items into {n} chunks of size {size}'
                       .format(total=total, n=total_chunks, size=chunk_size),
                       alias)

            # Get the old index if it exists.
            try:
                aliases = ES.indices.get_alias(name=alias).keys()
            except elasticsearch.NotFoundError:
                aliases = []
            old_index = aliases[0] if aliases else None

            # Create a new index, using the index name with a timestamp.
            new_index = timestamp_index(prefix + alias)

            # See how the index is currently configured.
            if old_index:
                try:
                    s = (ES.indices.get_settings(index=old_index).get(
                        old_index, {}).get('settings', {}))
                except elasticsearch.NotFoundError:
                    s = {}
            else:
                s = {}
            num_replicas = s.get('number_of_replicas',
                                 settings.ES_DEFAULT_NUM_REPLICAS)
            num_shards = s.get('number_of_shards',
                               settings.ES_DEFAULT_NUM_SHARDS)

            pre_task = pre_index.si(new_index, old_index, alias, index_name, {
                'analysis': INDEXER.get_analysis(),
                'number_of_replicas': 0,
                'number_of_shards': num_shards,
                'store.compress.tv': True,
                'store.compress.stored': True,
                'refresh_interval': '-1'})
            post_task = post_index.si(new_index, old_index, alias, index_name,
                                      {'number_of_replicas': num_replicas,
                                       'refresh_interval': '5s'})

            # Ship it.
            if not total:
                # If there's no data we still create the index and alias.
                chain(pre_task, post_task).apply_async()
            else:
                index_tasks = [run_indexing.si(new_index, index_name, chunk)
                               for chunk in chunks]

                if settings.CELERY_ALWAYS_EAGER:
                    # Eager mode and chords don't get along. So we serialize
                    # the tasks as a workaround.
                    index_tasks.insert(0, pre_task)
                    index_tasks.append(post_task)
                    chain(*index_tasks).apply_async()
                else:
                    chain(pre_task, chord(header=index_tasks,
                                          body=post_task)).apply_async()

        _print('New index and indexing tasks all queued up.')
Example #5
0
    def handle(self, *args, **kwargs):
        """Set up reindexing tasks.

        Creates a Tasktree that creates a new indexes and indexes all objects,
        then points the alias to this new index when finished.
        """
        index_choice = kwargs.get('index', None)
        prefix = kwargs.get('prefix', '')
        force = kwargs.get('force', False)

        if index_choice:
            # If we only want to reindex a subset of indexes.
            INDEXES = INDEX_CHOICES.get(index_choice, None)
            if INDEXES is None:
                raise CommandError('Incorrect index name specified. '
                                   'Choose one of: %s' %
                                   ', '.join(INDEX_CHOICES.keys()))
        else:
            INDEXES = INDEXERS

        if Reindexing.is_reindexing() and not force:
            raise CommandError('Indexation already occuring - use --force to '
                               'bypass')
        elif force:
            Reindexing.unflag_reindexing()

        for INDEXER in INDEXES:
            index_name = INDEXER.get_mapping_type_name()
            chunk_size = INDEXER.chunk_size
            alias = ES_INDEXES[index_name]

            chunks, total = chunk_indexing(INDEXER, chunk_size)
            if not total:
                _print('No items to queue.', alias)
            else:
                total_chunks = int(ceil(total / float(chunk_size)))
                _print(
                    'Indexing {total} items into {n} chunks of size {size}'.
                    format(total=total, n=total_chunks,
                           size=chunk_size), alias)

            # Get the old index if it exists.
            try:
                aliases = ES.indices.get_alias(name=alias).keys()
            except elasticsearch.NotFoundError:
                aliases = []
            old_index = aliases[0] if aliases else None

            # Create a new index, using the index name with a timestamp.
            new_index = timestamp_index(prefix + alias)

            # See how the index is currently configured.
            if old_index:
                try:
                    s = (ES.indices.get_settings(index=old_index).get(
                        old_index, {}).get('settings', {}))
                except elasticsearch.NotFoundError:
                    s = {}
            else:
                s = {}
            num_replicas = s.get('number_of_replicas',
                                 settings.ES_DEFAULT_NUM_REPLICAS)
            num_shards = s.get('number_of_shards',
                               settings.ES_DEFAULT_NUM_SHARDS)

            pre_task = pre_index.si(
                new_index, old_index, alias, index_name, {
                    'analysis': INDEXER.get_analysis(),
                    'number_of_replicas': 0,
                    'number_of_shards': num_shards,
                    'store.compress.tv': True,
                    'store.compress.stored': True,
                    'refresh_interval': '-1'
                })
            post_task = post_index.si(new_index, old_index, alias, index_name,
                                      {
                                          'number_of_replicas': num_replicas,
                                          'refresh_interval': '5s'
                                      })

            # Ship it.
            if not total:
                # If there's no data we still create the index and alias.
                chain(pre_task, post_task).apply_async()
            else:
                index_tasks = [
                    run_indexing.si(new_index, index_name, chunk)
                    for chunk in chunks
                ]

                if settings.CELERY_ALWAYS_EAGER:
                    # Eager mode and chords don't get along. So we serialize
                    # the tasks as a workaround.
                    index_tasks.insert(0, pre_task)
                    index_tasks.append(post_task)
                    chain(*index_tasks).apply_async()
                else:
                    chain(pre_task, chord(header=index_tasks,
                                          body=post_task)).apply_async()

        _print('New index and indexing tasks all queued up.')
Example #6
0
    def handle(self, *args, **kwargs):
        """Set up reindexing tasks.

        Creates a Tasktree that creates a new indexes and indexes all objects,
        then points the alias to this new index when finished.
        """
        global INDEXES

        index_choice = kwargs.get('index', None)
        prefix = kwargs.get('prefix', '')
        force = kwargs.get('force', False)

        if index_choice:
            # If we only want to reindex a subset of indexes.
            INDEXES = INDEX_DICT.get(index_choice, INDEXES)

        if Reindexing.is_reindexing() and not force:
            raise CommandError('Indexation already occuring - use --force to '
                               'bypass')
        elif force:
            Reindexing.unflag_reindexing()

        for ALIAS, INDEXER, CHUNK_SIZE in INDEXES:

            chunks, total = chunk_indexing(INDEXER, CHUNK_SIZE)
            if not total:
                _print('No items to queue.', ALIAS)
            else:
                total_chunks = int(ceil(total / float(CHUNK_SIZE)))
                _print('Indexing {total} items into {n} chunks of size {size}'
                       .format(total=total, n=total_chunks, size=CHUNK_SIZE),
                       ALIAS)

            # Get the old index if it exists.
            try:
                aliases = ES.indices.get_alias(name=ALIAS).keys()
            except elasticsearch.NotFoundError:
                aliases = []
            old_index = aliases[0] if aliases else None

            # Create a new index, using the index name with a timestamp.
            new_index = timestamp_index(prefix + ALIAS)

            # See how the index is currently configured.
            if old_index:
                try:
                    s = (ES.indices.get_settings(index=old_index).get(
                        old_index, {}).get('settings', {}))
                except elasticsearch.NotFoundError:
                    s = {}
            else:
                s = {}
            num_replicas = s.get('number_of_replicas',
                                 settings.ES_DEFAULT_NUM_REPLICAS)
            num_shards = s.get('number_of_shards',
                               settings.ES_DEFAULT_NUM_SHARDS)

            pre_task = pre_index.si(new_index, old_index, ALIAS, INDEXER, {
                'analysis': INDEXER.get_analysis(),
                'number_of_replicas': 0,
                'number_of_shards': num_shards,
                'store.compress.tv': True,
                'store.compress.stored': True,
                'refresh_interval': '-1'})
            post_task = post_index.si(new_index, old_index, ALIAS, INDEXER, {
                'number_of_replicas': num_replicas,
                'refresh_interval': '5s'})

            # Ship it.
            if not total:
                # If there's no data we still create the index and alias.
                chain(pre_task, post_task).apply_async()
            else:
                index_tasks = [run_indexing.si(new_index, INDEXER, chunk)
                               for chunk in chunks]
                chain(pre_task,
                      chord(header=index_tasks, body=post_task)).apply_async()

        _print('New index and indexing tasks all queued up.')
Example #7
0
def unflag_database():
    """Unflag the database to indicate that the reindexing is over."""
    sys.stdout.write('Unflagging the database\n')
    Reindexing.unflag_reindexing()
Example #8
0
    def handle(self, *args, **kwargs):
        """Set up reindexing tasks.

        Creates a Tasktree that creates a new indexes and indexes all objects,
        then points the alias to this new index when finished.
        """
        global INDEXES

        index_choice = kwargs.get('index', None)
        prefix = kwargs.get('prefix', '')
        force = kwargs.get('force', False)

        if index_choice:
            # If we only want to reindex a subset of indexes.
            INDEXES = INDEX_DICT.get(index_choice, INDEXES)

        if Reindexing.is_reindexing() and not force:
            raise CommandError('Indexation already occuring - use --force to '
                               'bypass')
        elif force:
            Reindexing.unflag_reindexing()

        for ALIAS, INDEXER, CHUNK_SIZE in INDEXES:

            chunks, total = chunk_indexing(INDEXER, CHUNK_SIZE)
            if not total:
                _print('No items to queue.', ALIAS)
            else:
                total_chunks = int(ceil(total / float(CHUNK_SIZE)))
                _print(
                    'Indexing {total} items into {n} chunks of size {size}'.
                    format(total=total, n=total_chunks,
                           size=CHUNK_SIZE), ALIAS)

            # Get the old index if it exists.
            try:
                aliases = ES.indices.get_alias(name=ALIAS).keys()
            except elasticsearch.NotFoundError:
                aliases = []
            old_index = aliases[0] if aliases else None

            # Create a new index, using the index name with a timestamp.
            new_index = timestamp_index(prefix + ALIAS)

            # See how the index is currently configured.
            if old_index:
                try:
                    s = (ES.indices.get_settings(index=old_index).get(
                        old_index, {}).get('settings', {}))
                except elasticsearch.NotFoundError:
                    s = {}
            else:
                s = {}
            num_replicas = s.get('number_of_replicas',
                                 settings.ES_DEFAULT_NUM_REPLICAS)
            num_shards = s.get('number_of_shards',
                               settings.ES_DEFAULT_NUM_SHARDS)

            pre_task = pre_index.si(
                new_index, old_index, ALIAS, INDEXER, {
                    'analysis': INDEXER.get_analysis(),
                    'number_of_replicas': 0,
                    'number_of_shards': num_shards,
                    'store.compress.tv': True,
                    'store.compress.stored': True,
                    'refresh_interval': '-1'
                })
            post_task = post_index.si(new_index, old_index, ALIAS, INDEXER, {
                'number_of_replicas': num_replicas,
                'refresh_interval': '5s'
            })

            # Ship it.
            if not total:
                # If there's no data we still create the index and alias.
                chain(pre_task, post_task).apply_async()
            else:
                index_tasks = [
                    run_indexing.si(new_index, INDEXER, chunk)
                    for chunk in chunks
                ]
                chain(pre_task, chord(header=index_tasks,
                                      body=post_task)).apply_async()

        _print('New index and indexing tasks all queued up.')