コード例 #1
0
ファイル: reindex.py プロジェクト: almosteverywhere/zamboni
    def handle(self, *args, **kwargs):
        """Reindexing work.

        Creates a Tasktree that creates new indexes
        over the old ones so the search feature
        works while the indexation occurs
        """
        if not django_settings.MARKETPLACE:
            raise CommandError('This command affects both the marketplace and '
                               'AMO ES storage. But the command can only be '
                               'run from the Marketplace.')

        force = kwargs.get('force', False)

        if database_flagged() and not force:
            raise CommandError('Indexation already occuring - use --force to '
                               'bypass')

        prefix = kwargs.get('prefix', '')
        log('Starting the reindexation')

        if kwargs.get('wipe', False):
            confirm = raw_input("Are you sure you want to wipe all data from "
                                "ES ? (yes/no): ")

            while confirm not in ('yes', 'no'):
                confirm = raw_input('Please enter either "yes" or "no": ')

            if confirm == 'yes':
                unflag_database()
                requests.delete(url('/'))
            else:
                raise CommandError("Aborted.")
        elif force:
            unflag_database()

        # Get list current aliases at /_aliases.
        all_aliases = requests.get(url('/_aliases')).json()

        # building the list of indexes
        indexes = set([prefix + index for index in
                       _ALIASES.values()])

        actions = []

        def add_action(*elmt):
            if elmt in actions:
                return
            actions.append(elmt)

        all_aliases = all_aliases.items()

        # creating a task tree
        log('Building the task tree')
        tree = TaskTree()
        last_action = None

        to_remove = []

        # for each index, we create a new time-stamped index
        for alias in indexes:
            is_stats = 'stats' in alias
            old_index = None

            for aliased_index, alias_ in all_aliases:
                if alias in alias_['aliases'].keys():
                    # mark the index to be removed later
                    old_index = aliased_index
                    to_remove.append(aliased_index)

                    # mark the alias to be removed as well
                    add_action('remove', aliased_index, alias)

            # create a new index, using the alias name with a timestamp
            new_index = timestamp_index(alias)

            # if old_index is None that could mean it's a full index
            # In that case we want to continue index in it
            future_alias = url('/%s' % alias)
            if requests.head(future_alias).status_code == 200:
                old_index = alias

            # flag the database
            step1 = tree.add_task(flag_database, args=[new_index, old_index,
                                                       alias])
            step2 = step1.add_task(create_mapping, args=[new_index, alias])
            step3 = step2.add_task(create_index, args=[new_index, is_stats])
            last_action = step3

            # adding new index to the alias
            add_action('add', new_index, alias)

        # Alias the new index and remove the old aliases, if any.
        renaming_step = last_action.add_task(run_aliases_actions,
                                             args=[actions])

        # unflag the database - there's no need to duplicate the
        # indexing anymore
        delete = renaming_step.add_task(unflag_database)

        # Delete the old indexes, if any
        delete.add_task(delete_indexes, args=[to_remove])

        # let's do it
        log('Running all indexation tasks')

        os.environ['FORCE_INDEXING'] = '1'
        try:
            tree.apply_async()
            time.sleep(10)   # give celeryd some time to flag the DB
            while database_flagged():
                sys.stdout.write('.')
                sys.stdout.flush()
                time.sleep(5)
        finally:
            del os.environ['FORCE_INDEXING']

        sys.stdout.write('\n')

        # let's return the /_aliases values
        aliases = call_es('_aliases').json()
        aliases = json.dumps(aliases, sort_keys=True, indent=4)
        return _SUMMARY % (len(indexes), aliases)
コード例 #2
0
ファイル: reindex_mkt.py プロジェクト: sunbiz/zamboni
    def handle(self, *args, **kwargs):
        """Set up reindexing tasks.

        Creates a Tasktree that creates a new indexes and indexes all objects,
        then points the alias to this new index when finished.
        """
        if not settings.MARKETPLACE:
            raise CommandError('This command affects only marketplace and '
                               'should be run under Marketplace settings.')

        force = kwargs.get('force', False)
        prefix = kwargs.get('prefix', '')

        if database_flagged() and not force:
            raise CommandError('Indexation already occuring - use --force to '
                               'bypass')
        elif force:
            unflag_database()

        # The list of indexes that is currently aliased by `ALIAS`.
        try:
            aliases = ES.aliases(ALIAS).keys()
        except pyelasticsearch.exceptions.ElasticHttpNotFoundError:
            aliases = []
        old_index = aliases[0] if aliases else None
        # Create a new index, using the index name with a timestamp.
        new_index = timestamp_index(prefix + ALIAS)

        # See how the index is currently configured.
        if old_index:
            try:
                s = (ES.get_settings(old_index).get(old_index,
                                                    {}).get('settings', {}))
            except pyelasticsearch.exceptions.ElasticHttpNotFoundError:
                s = {}
        else:
            s = {}

        num_replicas = s.get('number_of_replicas',
                             settings.ES_DEFAULT_NUM_REPLICAS)
        num_shards = s.get('number_of_shards', settings.ES_DEFAULT_NUM_SHARDS)

        # Flag the database.
        chain = flag_database.si(new_index, old_index, ALIAS)

        # Create the index and mapping.
        #
        # Note: We set num_replicas=0 here to decrease load while re-indexing.
        # In a later step we increase it which results in a more efficient bulk
        # copy in Elasticsearch.
        # For ES < 0.90 we manually enable compression.
        chain |= create_index.si(
            new_index, ALIAS, {
                'analysis': WebappIndexer.get_analysis(),
                'number_of_replicas': 0,
                'number_of_shards': num_shards,
                'store.compress.tv': True,
                'store.compress.stored': True,
                'refresh_interval': '-1'
            })

        # Index all the things!
        chain |= run_indexing.si(new_index)

        # After indexing we optimize the index, adjust settings, and point the
        # alias to the new index.
        chain |= update_alias.si(new_index, old_index, ALIAS, {
            'number_of_replicas': num_replicas,
            'refresh_interval': '5s'
        })

        # Unflag the database.
        chain |= unflag_database.si()

        # Delete the old index, if any.
        if old_index:
            chain |= delete_index.si(old_index)

        chain |= output_summary.si()

        self.stdout.write('\nNew index and indexing tasks all queued up.\n')
        os.environ['FORCE_INDEXING'] = '1'
        try:
            chain.apply_async()
        finally:
            del os.environ['FORCE_INDEXING']
コード例 #3
0
ファイル: reindex_mkt.py プロジェクト: smillaedler/zamboni
    def handle(self, *args, **kwargs):
        """Set up reindexing tasks.

        Creates a Tasktree that creates a new indexes and indexes all objects,
        then points the alias to this new index when finished.
        """
        if not settings.MARKETPLACE:
            raise CommandError('This command affects only marketplace and '
                               'should be run under Marketplace settings.')

        force = kwargs.get('force', False)
        prefix = kwargs.get('prefix', '')

        if database_flagged() and not force:
            raise CommandError('Indexation already occuring - use --force to '
                               'bypass')
        elif force:
            unflag_database()

        # The list of indexes that is currently aliased by `ALIAS`.
        try:
            aliases = ES.aliases(ALIAS).keys()
        except pyelasticsearch.exceptions.ElasticHttpNotFoundError:
            aliases = []
        old_index = aliases[0] if aliases else None
        # Create a new index, using the index name with a timestamp.
        new_index = timestamp_index(prefix + ALIAS)

        # See how the index is currently configured.
        if old_index:
            try:
                s = (ES.get_settings(old_index).get(old_index, {})
                                               .get('settings', {}))
            except pyelasticsearch.exceptions.ElasticHttpNotFoundError:
                s = {}
        else:
            s = {}

        num_replicas = s.get('number_of_replicas',
                             settings.ES_DEFAULT_NUM_REPLICAS)
        num_shards = s.get('number_of_shards', settings.ES_DEFAULT_NUM_SHARDS)

        # Flag the database.
        chain = flag_database.si(new_index, old_index, ALIAS)

        # Create the index and mapping.
        #
        # Note: We set num_replicas=0 here to decrease load while re-indexing.
        # In a later step we increase it which results in a more efficient bulk
        # copy in Elasticsearch.
        # For ES < 0.90 we manually enable compression.
        chain |= create_index.si(new_index, ALIAS, {
            'number_of_replicas': 0, 'number_of_shards': num_shards,
            'store.compress.tv': True, 'store.compress.stored': True,
            'refresh_interval': '-1'})

        # Index all the things!
        chain |= run_indexing.si(new_index)

        # After indexing we optimize the index, adjust settings, and point the
        # alias to the new index.
        chain |= update_alias.si(new_index, old_index, ALIAS, {
            'number_of_replicas': num_replicas, 'refresh_interval': '5s'})

        # Unflag the database.
        chain |= unflag_database.si()

        # Delete the old index, if any.
        if old_index:
            chain |= delete_index.si(old_index)

        chain |= output_summary.si()

        self.stdout.write('\nNew index and indexing tasks all queued up.\n')
        os.environ['FORCE_INDEXING'] = '1'
        try:
            chain.apply_async()
        finally:
            del os.environ['FORCE_INDEXING']
コード例 #4
0
    def handle(self, *args, **kwargs):
        """Reindexing work.

        Creates a Tasktree that creates new indexes
        over the old ones so the search feature
        works while the indexation occurs
        """
        if not django_settings.MARKETPLACE:
            raise CommandError('This command affects both the marketplace and '
                               'AMO ES storage. But the command can only be '
                               'run from the Marketplace.')

        force = kwargs.get('force', False)

        if database_flagged() and not force:
            raise CommandError('Indexation already occuring - use --force to '
                               'bypass')

        prefix = kwargs.get('prefix', '')
        log('Starting the reindexation')

        if kwargs.get('wipe', False):
            confirm = raw_input("Are you sure you want to wipe all data from "
                                "ES ? (yes/no): ")

            while confirm not in ('yes', 'no'):
                confirm = raw_input('Please enter either "yes" or "no": ')

            if confirm == 'yes':
                unflag_database()
                requests.delete(url('/'))
            else:
                raise CommandError("Aborted.")
        elif force:
            unflag_database()

        # Get list current aliases at /_aliases.
        all_aliases = requests.get(url('/_aliases')).json

        # building the list of indexes
        indexes = set(
            [prefix + index for index in django_settings.ES_INDEXES.values()])

        actions = []

        def add_action(*elmt):
            if elmt in actions:
                return
            actions.append(elmt)

        all_aliases = all_aliases.items()

        # creating a task tree
        log('Building the task tree')
        tree = TaskTree()
        last_action = None

        to_remove = []

        # for each index, we create a new time-stamped index
        for alias in indexes:
            is_stats = 'stats' in alias
            old_index = None

            for aliased_index, alias_ in all_aliases:
                if alias in alias_['aliases'].keys():
                    # mark the index to be removed later
                    old_index = aliased_index
                    to_remove.append(aliased_index)

                    # mark the alias to be removed as well
                    add_action('remove', aliased_index, alias)

            # create a new index, using the alias name with a timestamp
            new_index = timestamp_index(alias)

            # if old_index is None that could mean it's a full index
            # In that case we want to continue index in it
            future_alias = url('/%s' % alias)
            if requests.head(future_alias).status_code == 200:
                old_index = alias

            # flag the database
            step1 = tree.add_task(flag_database,
                                  args=[new_index, old_index, alias])
            step2 = step1.add_task(create_mapping, args=[new_index, alias])
            step3 = step2.add_task(create_index, args=[new_index, is_stats])
            last_action = step3

            # adding new index to the alias
            add_action('add', new_index, alias)

        # Alias the new index and remove the old aliases, if any.
        renaming_step = last_action.add_task(run_aliases_actions,
                                             args=[actions])

        # unflag the database - there's no need to duplicate the
        # indexing anymore
        delete = renaming_step.add_task(unflag_database)

        # Delete the old indexes, if any
        delete.add_task(delete_indexes, args=[to_remove])

        # let's do it
        log('Running all indexation tasks')

        os.environ['FORCE_INDEXING'] = '1'
        try:
            tree.apply_async()
            time.sleep(10)  # give celeryd some time to flag the DB
            while database_flagged():
                sys.stdout.write('.')
                sys.stdout.flush()
                time.sleep(5)
        finally:
            del os.environ['FORCE_INDEXING']

        sys.stdout.write('\n')

        # let's return the /_aliases values
        aliases = call_es('_aliases').json
        aliases = json.dumps(aliases, sort_keys=True, indent=4)
        return _SUMMARY % (len(indexes), aliases)