Python create_new_index Examples, searchlight.elasticsearch.plugins.utils.create_new_index Python Examples

Example #1

0

Show file

    def test_index_settings(self, mock_api):
        mock_engine = mock.Mock()
        mock_api.return_value = mock_engine

        with mock.patch.object(CONF, 'elasticsearch') as mock_settings:
            mock_settings.index_gc_deletes = '100s'
            mock_settings.index_settings = {
                'key1': 'value1',
                'index.key2': 'value2',
                'index.something.key3': 'value3'
            }

            with mock.patch('oslo_utils.timeutils.utcnow', return_value=now):
                plugin_utils.create_new_index('test')

        expected = {
            'index': {
                'key1': 'value1',
                'key2': 'value2',
                'something.key3': 'value3',
                'gc_deletes': '100s'
            }
        }
        mock_engine.indices.create.assert_called_with(index='test-' + now_str,
                                                      body=expected)

Example #2

0

Show file

File: test_plugin_utils.py Project: openstack/searchlight

    def test_index_settings(self, mock_api):
        mock_engine = mock.Mock()
        mock_api.return_value = mock_engine

        with mock.patch.object(CONF, 'elasticsearch') as mock_settings:
            mock_settings.index_gc_deletes = '100s'
            mock_settings.index_settings = {
                'key1': 'value1',
                'index.key2': 'value2',
                'index.something.key3': 'value3'
            }

            with mock.patch('oslo_utils.timeutils.utcnow', return_value=now):
                plugin_utils.create_new_index('test')

        expected = {
            'index': {
                'key1': 'value1',
                'key2': 'value2',
                'something.key3': 'value3',
                'gc_deletes': '100s'
            }
        }
        mock_engine.indices.create.assert_called_with(index='test-' + now_str,
                                                      body=expected)

Example #3

0

Show file

    def test_no_index_settings(self, mock_api):
        mock_engine = mock.Mock()
        mock_api.return_value = mock_engine

        with mock.patch('searchlight.elasticsearch.plugins.'
                        'utils._get_index_settings_from_config',
                        return_value={}):
            with mock.patch('oslo_utils.timeutils.utcnow', return_value=now):
                plugin_utils.create_new_index('test')

        mock_engine.indices.create.assert_called_with(index='test-' + now_str)

Example #4

0

Show file

    def test_create_new_index(self):
        # Regex for matching the index name. The index name is the group
        # group name appended with a time stmap. The format for the
        # timestamp is defined in elasitcsearch.plugins.utils and is
        # defined as:
        #     [4 digit Year] [2 digit Month] [2 digit Day] [2 digit Hour]
        #     [2 digit Minutes] [2 digit Seconds]
        # We want to search for this pattern exactly, which is why we are
        # specifying "^" and "$" in the Regex. We elected to make the unit
        # test more complicated, rather than artificially wrap datetime
        # functionality in the code just for the tests.
        TS_FORMAT = '\d{4}_\d{2}_\d{2}_\d{2}_\d{2}_\d{2}$'
        group = 'searchlight'

        # Set up the ES mock.
        mock_engine = mock.Mock()
        with mock.patch('searchlight.elasticsearch.get_api') as mock_api:
            # Plug in the ES mock.
            mock_api.return_value = mock_engine

            # Test #1: Create a new index.
            index_name = plugin_utils.create_new_index(group)

            self.assertRegexpMatches(index_name, group + '-' + TS_FORMAT)
            mock_api.assert_called_with()
            mock_engine.indices.create.assert_called_with(index=index_name)

Example #5

0

Show file

File: test_reindex.py Project: cdvel/ansible-search-container

    def test_reindex_with_plugin_and_es(self):
        """Verify the reindexing functionality using both the plugin reindex
        and the elasticsearch reindex methods for the reindexing. We want to
        verify: the number of documents during reindex, the number of aliases
        during the reindex, the number of documents after the reindex and the
        aliases after the reindex.
        """
        alias_listener = self.role_plugin.alias_name_listener
        alias_search = self.role_plugin.alias_name_search
        resource_group = self.role_plugin.resource_group_name
        non_role_doc_type = self.non_role_plugin.document_type

        # Create a set of documents in ElasticSearch.
        self.create_es_documents(alias_listener)

        self.verify_initial_state()

        # Create and prepare a new index.
        new_index = es_utils.create_new_index(resource_group)
        self.role_plugin.prepare_index(index_name=new_index)
        self.non_role_plugin.prepare_index(index_name=new_index)

        # Set up the aliases.
        es_utils.setup_alias(new_index, alias_search, alias_listener)
        es_alias = self._get_elasticsearch_aliases([])
        self.assertEqual(2, len(es_alias))

        # Reindex. For role, use the plugin. For non-role use ElasticSearch.
        self.role_plugin.index_initial_data()
        reindex = [non_role_doc_type]
        es_utils.reindex(src_index=alias_listener,
                         dst_index=new_index,
                         type_list=reindex)
        self._flush_elasticsearch(alias_listener)

        self.verify_reindex_state(new_index)

        # Update aliases.
        old_index = es_utils.alias_search_update(alias_search, new_index)
        es_utils.delete_index(old_index)
        self._flush_elasticsearch(alias_listener)

        self.verify_new_alias_state(new_index=new_index,
                                    alias_search=alias_search,
                                    alias_listener=alias_listener)

Example #6

0

Show file

File: test_reindex.py Project: mateusz-blaszkowski/searchlight

    def test_reindex_with_plugin_and_es(self):
        """Verify the reindexing functionality using both the plugin reindex
        and the elasticsearch reindex methods for the reindexing. We want to
        verify: the number of documents during reindex, the number of aliases
        during the reindex, the number of documents after the reindex and the
        aliases after the reindex.
        """
        alias_listener = self.role_plugin.alias_name_listener
        alias_search = self.role_plugin.alias_name_search
        resource_group = self.role_plugin.resource_group_name
        non_role_doc_type = self.non_role_plugin.document_type

        # Create a set of documents in ElasticSearch.
        self.create_es_documents(alias_listener)

        self.verify_initial_state()

        # Create and prepare a new index.
        new_index = es_utils.create_new_index(resource_group)
        self.role_plugin.prepare_index(index_name=new_index)
        self.non_role_plugin.prepare_index(index_name=new_index)

        # Set up the aliases.
        es_utils.setup_alias(new_index, alias_search, alias_listener)
        es_alias = self._get_elasticsearch_aliases([])
        self.assertEqual(2, len(es_alias))

        # Reindex. For role, use the plugin. For non-role use ElasticSearch.
        self.role_plugin.initial_indexing()
        reindex = [non_role_doc_type]
        es_utils.reindex(src_index=alias_listener, dst_index=new_index,
                         type_list=reindex)
        self._flush_elasticsearch(alias_listener)

        self.verify_reindex_state(new_index)

        # Update aliases.
        old_index = es_utils.alias_search_update(alias_search, new_index)
        es_utils.delete_index(old_index)
        self._flush_elasticsearch(alias_listener)

        self.verify_new_alias_state(new_index=new_index,
                                    alias_search=alias_search,
                                    alias_listener=alias_listener)

Example #7

0

Show file

File: test_load.py Project: kurhula/searchlight

    def test_index_settings(self):
        """Test the default gc_delete interval plus some other
        dynamic index settings
        """
        with mock.patch.object(cfg.CONF, 'elasticsearch') as mock_settings:
            mock_settings.index_gc_deletes = '100s'
            mock_settings.index_settings = {
                'refresh_interval': '2s',
                'index.number_of_replicas': 1
            }

            index_name = es_utils.create_new_index('test-index-settings')
            try:
                settings = self.elastic_connection.indices.get_settings(
                    index_name)
                index_settings = settings[index_name]['settings']['index']

                self.assertEqual("100s", index_settings['gc_deletes'])
                self.assertEqual("2s", index_settings['refresh_interval'])
                self.assertEqual("1", index_settings['number_of_replicas'])

            finally:
                es_utils.delete_index(index_name)

Example #8

0

Show file

File: test_load.py Project: openstack/searchlight

    def test_index_settings(self):
        """Test the default gc_delete interval plus some other
        dynamic index settings
        """
        with mock.patch.object(cfg.CONF, 'elasticsearch') as mock_settings:
            mock_settings.index_gc_deletes = '100s'
            mock_settings.index_settings = {
                'refresh_interval': '2s',
                'index.number_of_replicas': 1
            }

            index_name = es_utils.create_new_index('test-index-settings')
            try:
                settings = self.elastic_connection.indices.get_settings(
                    index_name)
                index_settings = settings[index_name]['settings']['index']

                self.assertEqual("100s", index_settings['gc_deletes'])
                self.assertEqual("2s", index_settings['refresh_interval'])
                self.assertEqual("1", index_settings['number_of_replicas'])

            finally:
                es_utils.delete_index(index_name)

Example #9

0

Show file

    def configurePlugins(self, include_plugins=None, exclude_plugins=()):
        """Specify 'exclude_plugins' or 'include_plugins' as a list of
        tuples.
        """
        plugin_classes = {
            'glance': {
                'images': 'ImageIndex',
                'metadefs': 'MetadefIndex'
            },
            'nova': {
                'servers': 'ServerIndex',
                'hypervisors': 'HypervisorIndex',
                'flavors': 'FlavorIndex',
                'servergroups': 'ServerGroupIndex'
            },
            'cinder': {
                'volumes': 'VolumeIndex',
                'snapshots': 'SnapshotIndex'
            },
            'neutron': {
                'networks': 'NetworkIndex',
                'ports': 'PortIndex',
                'subnets': 'SubnetIndex',
                'routers': 'RouterIndex',
                'floatingips': 'FloatingIPIndex',
                'security_groups': 'SecurityGroupIndex'
            },
            'swift': {
                'accounts': 'AccountIndex',
                'containers': 'ContainerIndex',
                'objects': 'ObjectIndex'
            },
            'designate': {
                'zones': 'ZoneIndex',
                'recordsets': 'RecordSetIndex'
            }
        }

        plugins = include_plugins or (
            ('glance', 'images'), ('glance', 'metadefs'), ('nova', 'servers'),
            ('nova', 'hypervisors'), ('nova', 'flavors'),
            ('nova', 'servergroups'), ('cinder', 'volumes'),
            ('cinder', 'snapshots'), ('neutron', 'networks'),
            ('neutron', 'ports'), ('neutron', 'subnets'),
            ('neutron', 'routers'), ('neutron', 'floatingips'),
            ('neutron', 'security_groups'), ('cinder', 'volumes'),
            ('cinder', 'snapshots'), ('swift', 'accounts'),
            ('swift', 'containers'), ('swift', 'objects'),
            ('designate', 'zones'), ('designate', 'recordsets'))
        plugins = filter(lambda plugin: plugin not in exclude_plugins, plugins)

        # Make sure the plugins instantiated in this process use the same
        # connection as the ones in the API process they'll work with
        es_conn_patcher = mock.patch('searchlight.elasticsearch.get_api',
                                     return_value=self.elastic_connection)
        es_conn_patcher.start()
        self.addCleanup(es_conn_patcher.stop)

        index_name = es_utils.create_new_index('searchlight')

        for service, plugin_type in plugins:
            plugin_mod_name = ("searchlight.elasticsearch.plugins.%s.%s" %
                               (service, plugin_type))
            plugin_cls_name = plugin_classes[service][plugin_type]

            plugin_mod = importlib.import_module(plugin_mod_name)
            plugin_cls = getattr(plugin_mod, plugin_cls_name)

            # This'll call our dummy init (above)
            plugin_instance = plugin_cls()

            self.initialized_plugins[plugin_instance.document_type] = \
                plugin_instance

        # Reproduce the logic from searchlight.common.utils to set up
        # parent/child relationships; the stevedore structure is different
        for instance in self.initialized_plugins.values():
            parent_plugin_name = instance.parent_plugin_type()
            if parent_plugin_name:
                parent_plugin = self.initialized_plugins[parent_plugin_name]
                instance.register_parent(parent_plugin)

        # Reproduce the logic from cmd.manage to prepare the index.
        for instance in self.initialized_plugins.values():
            instance.prepare_index(index_name=index_name)

        # Create the aliases
        es_utils.setup_alias(index_name, 'searchlight-search',
                             'searchlight-listener')

Example #10

0

Show file

File: __init__.py Project: openstack/searchlight

    def configurePlugins(self, include_plugins=None, exclude_plugins=()):
        """Specify 'exclude_plugins' or 'include_plugins' as a list of
        tuples.
        """
        plugin_classes = {
            'glance': {'images': 'ImageIndex', 'metadefs': 'MetadefIndex'},
            'nova': {'servers': 'ServerIndex',
                     'hypervisors': 'HypervisorIndex',
                     'flavors': 'FlavorIndex',
                     'servergroups': 'ServerGroupIndex'},
            'cinder': {'volumes': 'VolumeIndex', 'snapshots': 'SnapshotIndex'},
            'neutron': {'networks': 'NetworkIndex', 'ports': 'PortIndex',
                        'subnets': 'SubnetIndex', 'routers': 'RouterIndex',
                        'floatingips': 'FloatingIPIndex',
                        'security_groups': 'SecurityGroupIndex'},
            'swift': {'accounts': 'AccountIndex',
                      'containers': 'ContainerIndex',
                      'objects': 'ObjectIndex'},
            'designate': {'zones': 'ZoneIndex',
                          'recordsets': 'RecordSetIndex'}
        }

        plugins = include_plugins or (
            ('glance', 'images'), ('glance', 'metadefs'),
            ('nova', 'servers'), ('nova', 'hypervisors'),
            ('nova', 'flavors'), ('nova', 'servergroups'),
            ('cinder', 'volumes'), ('cinder', 'snapshots'),
            ('neutron', 'networks'), ('neutron', 'ports'),
            ('neutron', 'subnets'), ('neutron', 'routers'),
            ('neutron', 'floatingips'), ('neutron', 'security_groups'),
            ('cinder', 'volumes'), ('cinder', 'snapshots'),
            ('swift', 'accounts'), ('swift', 'containers'),
            ('swift', 'objects'),
            ('designate', 'zones'), ('designate', 'recordsets')
        )
        plugins = filter(lambda plugin: plugin not in exclude_plugins, plugins)

        # Make sure the plugins instantiated in this process use the same
        # connection as the ones in the API process they'll work with
        es_conn_patcher = mock.patch('searchlight.elasticsearch.get_api',
                                     return_value=self.elastic_connection)
        es_conn_patcher.start()
        self.addCleanup(es_conn_patcher.stop)

        index_name = es_utils.create_new_index('searchlight')

        for service, plugin_type in plugins:
            plugin_mod_name = ("searchlight.elasticsearch.plugins.%s.%s"
                               % (service, plugin_type))
            plugin_cls_name = plugin_classes[service][plugin_type]

            plugin_mod = importlib.import_module(plugin_mod_name)
            plugin_cls = getattr(plugin_mod, plugin_cls_name)

            # This'll call our dummy init (above)
            plugin_instance = plugin_cls()

            self.initialized_plugins[plugin_instance.document_type] = \
                plugin_instance

        # Reproduce the logic from searchlight.common.utils to set up
        # parent/child relationships; the stevedore structure is different
        for instance in self.initialized_plugins.values():
            parent_plugin_name = instance.parent_plugin_type()
            if parent_plugin_name:
                parent_plugin = self.initialized_plugins[parent_plugin_name]
                instance.register_parent(parent_plugin)

        # Reproduce the logic from cmd.manage to prepare the index.
        for instance in self.initialized_plugins.values():
            instance.prepare_index(index_name=index_name)

        # Create the aliases
        es_utils.setup_alias(index_name, 'searchlight-search',
                             'searchlight-listener')

Example #11

0

Show file

File: manage.py Project: hikaru4649/searchlight

    def sync(self, group=None, _type=None, force=False, force_es=False):
        def wait_for_threads():
            """Patiently wait for all running threads to complete.
            """
            threads_running = True
            while threads_running:
                # Are any threads still running?
                threads_running = False
                for future in futures:
                    if not future.done():
                        threads_running = True
                        break
                time.sleep(1)

        # Signal handler to catch interrupts from the user (ctl-c)
        def sig_handler(signum, frame):
            """When rudely interrupted by the user, we will want to clean up
               after ourselves. We have potentially three pieces of unfinished
               business.
                   1. We have running threads. Cancel them.
                   2. Wait for all threads to finish.
                   3. We created new indices in Elasticsearch. Remove them.
            """
            # Cancel any and all threads.
            for future in futures:
                future.cancel()

            # Politely wait for the current threads to finish.
            LOG.warning(
                _LW("Interrupt received, waiting for threads to finish"
                    " before cleaning up"))
            wait_for_threads()

            # Rudely remove any newly created Elasticsearch indices.
            if index_names:
                es_utils.alias_error_cleanup(index_names)

            sys.exit(0)

        if force_es and _type:
            # The user cannot specify both of these options simultaneously.
            print("\nInvalid set of options.")
            print("Cannot specify both '--type' and '--apply-mapping-changes "
                  "simultaneously.\n")
            sys.exit(1)

        try:
            max_workers = cfg.CONF.manage.workers
        except cfg.ConfigFileValueError as e:
            LOG.error(
                _LE("Invalid value for config file option "
                    "'manage.workers'. The number of thread workers "
                    "must be greater than 0."))
            sys.exit(3)

        # Grab the list of plugins registered as entry points through stevedore
        search_plugins = utils.get_search_plugins()

        # Verify all indices and types have registered plugins.
        # index and _type are lists because of nargs='*'
        group = group.split(',') if group else []
        _type = _type.split(',') if _type else []

        _type = utils.expand_type_matches(_type, six.viewkeys(search_plugins))
        LOG.debug("After expansion, 'type' argument: %s", ", ".join(_type))

        group_set = set(group)
        type_set = set(_type)
        """
        The caller can specify a sync based on either the Document Type or the
        Resource Group. With the Zero Downtime functionality, we are using
        aliases to index into ElasticSearch. We now have multiple Document
        Types sharing a single alias. If any member of a Resource Group (an
        ES alias) is re-syncing *all* members of that Resource Group needs
        to re-sync.

        The final list of plugins to use for re-syncing *must* come only from
        the Resource Group specifications. The "type" list is used only to make
        the "group" list complete. We need a two pass algorithm for this.

        First pass: Analyze the plugins according to the "type" list. This
          turns a type in the "type" list to a group in the "group" list.

        Second pass: Analyze the plugins according to the "group" list. Create
          the plugin list that will be used for re-syncing.

        Note: We cannot call any plugin's sync() during these two passes. The
        sync needs to be a separate step. The API states that if any invalid
        plugin was specified by the caller, the entire operation fails.
        """

        # First Pass: Document Types.
        if _type:
            for res_type, ext in six.iteritems(search_plugins):
                plugin_obj = ext.obj
                type_set.discard(plugin_obj.get_document_type())
                if plugin_obj.get_document_type() in _type:
                    group.append(plugin_obj.resource_group_name)

        # Second Pass: Resource Groups (including those from types).
        # This pass is a little tricky. If "group" is empty, it implies every
        # resource gets re-synced. The command group_set.discard() is a no-op
        # when "group" is empty.
        resource_groups = []
        plugin_objs = {}
        plugins_list = []
        for res_type, ext in six.iteritems(search_plugins):
            plugin_obj = ext.obj
            group_set.discard(plugin_obj.resource_group_name)
            if (not group) or (plugin_obj.resource_group_name in group):
                plugins_list.append((res_type, ext))
                plugin_objs[plugin_obj.resource_group_name] = plugin_obj
                if not (plugin_obj.resource_group_name,
                        plugin_obj.alias_name_search,
                        plugin_obj.alias_name_listener) in resource_groups:
                    resource_groups.append((plugin_obj.resource_group_name,
                                            plugin_obj.alias_name_search,
                                            plugin_obj.alias_name_listener))

        if group_set or type_set:
            print("Some index names or types do not have plugins "
                  "registered. Index names: %s. Types: %s" %
                  (",".join(group_set) or "<None>", ",".join(type_set)
                   or "<None>"))
            print("Aborting.")
            sys.exit(1)

        # As an optimization, if any types are explicitly requested, we
        # will index them from their service APIs. The rest will be
        # indexed from an existing ES index, if one exists.
        #
        # Also, if force_es is set the user wishes to use ES exclusively
        # as the source for all data. This implies everything in the
        # es_reindex list and nothing in the plugins_to_index list.
        es_reindex = []
        plugins_to_index = copy.copy(plugins_list)
        if _type or force_es:
            for resource_type, ext in plugins_list:
                doc_type = ext.obj.get_document_type()

                # If force_es is set, then "_type" is None. Always do this.
                # If force_es is None, then "_type" is set. Adjust as needed.
                if doc_type not in _type:
                    es_reindex.append(doc_type)
                    # Don't reindex this type
                    plugins_to_index.remove((resource_type, ext))

        if not force:
            # For display purpose, we want to iterate on only parthenogenetic
            # plugins that are not the children of another plugin. If there
            # are children plugins they will be displayed when we call
            # get_index_display_name(). Therefore any child plugins in the
            # display list, will be listed twice.
            display_plugins = []
            plugins_without_notifications = []
            for res, ext in plugins_list:
                if not ext.obj.parent_plugin:
                    display_plugins.append((res, ext))

            def format_selection(selection):
                def _format_plugin(plugin, indent=0):
                    plugin_doc_type = plugin.get_document_type()
                    handler = plugin.get_notification_handler()
                    event_list = handler.get_notification_supported_events()

                    display = '\n' + '    ' * indent + '--> ' if indent else ''
                    display += '%s (%s)' % (plugin_doc_type,
                                            plugin.resource_group_name)
                    if plugin_doc_type in es_reindex:
                        display += ' *'
                    if not event_list:
                        display += ' !!'
                        plugins_without_notifications.append(plugin)
                    return display + ''.join(
                        _format_plugin(c, indent + 1)
                        for c in plugin.child_plugins)

                return _format_plugin(selection[1].obj)

            all_res_groups = set(grp[0] for grp in resource_groups)
            print("\nResources in these groups must be re-indexed: %s." %
                  ", ".join(all_res_groups))

            print("Resource types (and aliases) matching selection:\n\n%s\n" %
                  '\n'.join(map(format_selection, sorted(display_plugins))))

            if es_reindex:
                print("Any types marked with * will be reindexed from "
                      "existing Elasticsearch data.\n")

            if plugins_without_notifications:
                print("Any types marked with !! do not support incremental "
                      "updates via the listener.")
                print("These types must be fully re-indexed periodically or "
                      "should be disabled.\n")

            ans = six.moves.input("\nUse '--force' to suppress this message.\n"
                                  "OK to continue? [y/n]: ")
            if ans.lower() != 'y':
                print("Aborting.")
                sys.exit(0)

        # Start the re-indexing process.
        # Now we are starting to change Elasticsearch. Let's clean up
        # if interrupted. Set index_names/futures here for cleaner code
        # in the signal handler.
        index_names = {}
        futures = []
        signal.signal(signal.SIGINT, sig_handler)

        # Step #1: Create new indexes for each Resource Group Type.
        #   The index needs to be fully functional before it gets
        #   added to any aliases. This includes all settings and
        #   mappings. Only then can we add it to the aliases. We first
        #   need to create all indexes. This is done by resource group.
        #   We cache and turn off new indexes' refresh intervals,
        #   this will improve the the performance of data re-syncing.
        #   After data get re-synced, set the refresh interval back.
        #   Once all indexes are created, we need to initialize the
        #   indexes. This is done by document type.
        #   NB: The aliases remain unchanged for this step.
        refresh_intervals = {}
        try:
            for group, search, listen in resource_groups:
                index_name = es_utils.create_new_index(group)
                index_names[group] = index_name

                refresh_intervals[index_name] = \
                    es_utils.get_index_refresh_interval(index_name)
                # Disable refresh interval by setting its value to -1
                es_utils.set_index_refresh_interval(index_name, -1)
            for resource_type, ext in plugins_list:
                plugin_obj = ext.obj
                group_name = plugin_obj.resource_group_name
                plugin_obj.prepare_index(index_name=index_names[group_name])
        except Exception:
            LOG.error(
                _LE("Error creating index or mapping, aborting "
                    "without indexing"))
            es_utils.alias_error_cleanup(index_names)
            raise

        # Step #2: Modify new index to play well with multiple indices.
        #   There is a "feature" of Elasticsearch where some types of
        #   queries do not work across multiple indices if there are no
        #   mappings for the specified document types. This is an issue we
        #   run into with our RBAC functionality. We need to modify the new
        #   index to work for these cases. We will grab all document types
        #   from the plugins and add a mapping for them as needed to the newly
        #   created indices.
        doc_type_info = []
        for res_type, ext in six.iteritems(search_plugins):
            doc_type_info.append(
                (ext.obj.get_document_type(), ext.obj.parent_plugin_type))
        for index in list(index_names.values()):
            es_utils.add_extra_mappings(index_name=index,
                                        doc_type_info=doc_type_info)

        # Step #3: Set up the aliases for all Resource Type Group.
        #   These actions need to happen outside of the plugins. Now that
        #   the indexes are created and fully functional we can associate
        #   them with the aliases.
        #   NB: The indexes remain unchanged for this step.
        for group, search, listen in resource_groups:
            try:
                es_utils.setup_alias(index_names[group], search, listen)
            except Exception as e:
                LOG.exception(
                    _LE("Failed to setup alias for resource group "
                        "%(g)s: %(e)s") % {
                            'g': group,
                            'e': e
                        })
                es_utils.alias_error_cleanup(index_names)
                raise

        # Step #4: Re-index all resource types in this Resource Type Group.
        #   NB: The "search" and "listener" aliases remain unchanged for this
        #       step.
        #   NB: We will be spinning off this working into separate threads.
        #       We will limit each thread to a single resource type. For
        #       more information, please refer to the spec:
        #           searchlight-specs/specs/newton/
        #             index-performance-enhancement.rst
        ThreadPoolExec = concurrent.futures.ThreadPoolExecutor
        with ThreadPoolExec(max_workers=max_workers) as executor:
            try:
                futures = []
                # Start threads for plugin API.
                for res, ext in plugins_to_index:
                    # Throw the plugin into the thread pool.
                    plugin_obj = ext.obj
                    futures.append(
                        executor.submit(self._plugin_api, plugin_obj,
                                        index_names))

                # Start the single thread for ES re-index.
                if es_reindex:
                    futures.append(
                        executor.submit(self._es_reindex_worker, es_reindex,
                                        resource_groups, index_names))

                # Sit back, relax and wait for the threads to complete.
                wait_for_threads()
            except Exception as e:
                # An exception occurred. Start cleaning up ElasticSearch and
                # inform the user.
                es_utils.alias_error_cleanup(index_names)
                raise

        # Step #5: Update the "search" alias.
        #   All re-indexing has occurred. The index/alias is the same for
        #   all resource types within this Resource Group. These actions need
        #   to happen outside of the plugins. Also restore refresh interval
        #   for indexes, this will make data in the indexes become searchable.
        #   NB: The "listener" alias remains unchanged for this step.
        for index_name, interval in refresh_intervals.items():
            es_utils.set_index_refresh_interval(index_name, interval)

        old_index = {}
        for group, search, listen in resource_groups:
            old_index[group] = \
                es_utils.alias_search_update(search, index_names[group])

        # Step #6: Update the "listener" alias.
        #   The "search" alias has been updated. This involves both removing
        #   the old index from the alias as well as deleting the old index.
        #   These actions need to happen outside of the plugins.
        #   NB: The "search" alias remains unchanged for this step.
        for group, search, listen in resource_groups:
            try:
                # If any exception raises, ignore and continue to delete
                # any other old indexes.
                es_utils.delete_index(old_index[group])
            except Exception as e:
                LOG.error(encodeutils.exception_to_unicode(e))

Example #12

0

Show file

File: manage.py Project: mateusz-blaszkowski/searchlight

    def sync(self, group=None, _type=None, force=False):
        # Verify all indices and types have registered plugins.
        # index and _type are lists because of nargs='*'
        group = group.split(',') if group else []
        _type = _type.split(',') if _type else []

        group_set = set(group)
        type_set = set(_type)
        """
        The caller can specify a sync based on either the Document Type or the
        Resource Group. With the Zero Downtime functionality, we are using
        aliases to index into ElasticSearch. We now have multiple Document
        Types sharing a single alias. If any member of a Resource Group (an
        ES alias) is re-syncing *all* members of that Resoruce Group needs
        to re-sync.

        The final list of plugins to use for re-syncing *must* come only from
        the Resource Group specifications. The "type" list is used only to make
        the "group" list complete. We need a two pass algorithm for this.

        First pass: Analyze the plugins according to the "type" list. This
          turns a type in the "type" list to a group in the "group" list.

        Second pass: Analyze the plugins according to the "group" list. Create
          the plugin list that will be used for re-syncing.

        Note: We cannot call any plugin's sync() during these two passes. The
        sync needs to be a separate step. The API states that if any invalid
        plugin was specified by the caller, the entire operation fails.
        """

        # First Pass: Document Types.
        if _type:
            for res_type, ext in six.iteritems(utils.get_search_plugins()):
                plugin_obj = ext.obj
                type_set.discard(plugin_obj.get_document_type())
                if plugin_obj.get_document_type() in _type:
                    group.append(plugin_obj.resource_group_name)

        # Second Pass: Resource Groups (including those from types).
        # This pass is a little tricky. If "group" is empty, it implies every
        # resource gets re-synced. The command group_set.discard() is a no-op
        # when "group" is empty.
        resource_groups = []
        plugin_objs = {}
        plugins_list = []
        for res_type, ext in six.iteritems(utils.get_search_plugins()):
            plugin_obj = ext.obj
            group_set.discard(plugin_obj.resource_group_name)
            if (not group) or (plugin_obj.resource_group_name in group):
                plugins_list.append((res_type, ext))
                plugin_objs[plugin_obj.resource_group_name] = plugin_obj
                if not (plugin_obj.resource_group_name,
                        plugin_obj.alias_name_search,
                        plugin_obj.alias_name_listener) in resource_groups:
                    resource_groups.append((plugin_obj.resource_group_name,
                                            plugin_obj.alias_name_search,
                                            plugin_obj.alias_name_listener))

        if group_set or type_set:
            print("Some index names or types do not have plugins "
                  "registered. Index names: %s. Types: %s" %
                  (",".join(group_set) or "<None>", ",".join(type_set)
                   or "<None>"))
            print("Aborting.")
            sys.exit(1)

        if not force:
            # For display purpose, we want to iterate on only parthenogenetic
            # plugins that are not the children of another plugin. If there
            # are children plugins they will be displayed when we call
            # get_index_display_name(). Therefore any child plugins in the
            # display list, will be listed twice.
            display_plugins = []
            for res, ext in plugins_list:
                if not ext.obj.parent_plugin:
                    display_plugins.append((res, ext))

            def format_selection(selection):
                resource_type, ext = selection
                return '  ' + ext.obj.get_index_display_name()

            # Grab the first element in the first (and only) tuple.
            group = resource_groups[0][0]
            print("\nAll resource types within Resource Group \"%(group)s\""
                  " must be re-indexed" % {'group': group})
            print("\nResource types (and aliases) matching selection:\n%s\n" %
                  '\n'.join(map(format_selection, sorted(display_plugins))))

            ans = six.moves.input(
                "Indexing will NOT delete existing data or mapping(s). It "
                "will reindex all resources. \nUse '--force' to suppress "
                "this message.\nOK to continue? [y/n]: ")
            if ans.lower() != 'y':
                print("Aborting.")
                sys.exit(0)

        # Start the re-indexing process

        # Step #1: Create new indexes for each Resource Group Type.
        #   The index needs to be fully functional before it gets
        #   added to any aliases. This inclues all settings and
        #   mappings. Only then can we add it to the aliases. We first
        #   need to create all indexes. This is done by resource group.
        #   We cache and turn off new indexes' refresh intervals,
        #   this will improve the the performance of data re-syncing.
        #   After data get re-synced, set the refresh interval back.
        #   Once all indexes are created, we need to initialize the
        #   indexes. This is done by document type.
        #   NB: The aliases remain unchanged for this step.
        index_names = {}
        refresh_intervals = {}
        try:
            for group, search, listen in resource_groups:
                index_name = es_utils.create_new_index(group)
                index_names[group] = index_name
                refresh_intervals[index_name] = \
                    es_utils.get_index_refresh_interval(index_name)
                # Disable refresh interval by setting its value to -1
                es_utils.set_index_refresh_interval(index_name, -1)
            for resource_type, ext in plugins_list:
                plugin_obj = ext.obj
                group_name = plugin_obj.resource_group_name
                plugin_obj.prepare_index(index_name=index_names[group_name])
        except Exception:
            LOG.error(
                _LE("Error creating index or mapping, aborting "
                    "without indexing"))
            es_utils.alias_error_cleanup(index_names)
            raise

        # Step #2: Modify new index to play well with multiple indices.
        #   There is a "feature" of Elasticsearch where some types of
        #   queries do not work across multiple indices if there are no
        #   mappings for the specified document types. This is an issue we
        #   run into with our RBAC functionality. We need to modify the new
        #   index to work for these cases. We will grab all document types
        #   from the plugins and add a mapping for them as needed to the newly
        #   created indices.
        doc_type_info = []
        for res_type, ext in six.iteritems(utils.get_search_plugins()):
            doc_type_info.append(
                (ext.obj.get_document_type(), ext.obj.parent_plugin_type))
        for index in list(index_names.values()):
            es_utils.add_extra_mappings(index_name=index,
                                        doc_type_info=doc_type_info)

        # Step #3: Set up the aliases for all Resource Type Group.
        #   These actions need to happen outside of the plugins. Now that
        #   the indexes are created and fully functional we can associate
        #   them with the aliases.
        #   NB: The indexes remain unchanged for this step.
        for group, search, listen in resource_groups:
            try:
                es_utils.setup_alias(index_names[group], search, listen)
            except Exception as e:
                LOG.error(
                    _LE("Failed to setup alias for resource group "
                        "%(g)s: %(e)s") % {
                            'g': group,
                            'e': e
                        })
                es_utils.alias_error_cleanup(index_names)
                raise

        # Step #4: Re-index all resource types in this Resource Type Group.
        #   As an optimization, if any types are explicitly requested, we
        #   will index them from their service APIs. The rest will be
        #   indexed from an existing ES index, if one exists.
        #   NB: The "search" and "listener" aliases remain unchanged for this
        #       step.
        es_reindex = []
        plugins_to_index = copy.copy(plugins_list)
        if _type:
            for resource_type, ext in plugins_list:
                doc_type = ext.obj.get_document_type()
                if doc_type not in _type:
                    es_reindex.append(doc_type)
                    plugins_to_index.remove((resource_type, ext))

        # Call plugin API as needed.
        if plugins_to_index:
            for res, ext in plugins_to_index:
                plugin_obj = ext.obj
                gname = plugin_obj.resource_group_name
                try:
                    plugin_obj.initial_indexing(index_name=index_names[gname])
                    es_utils.refresh_index(index_names[gname])
                except exceptions.EndpointNotFound:
                    LOG.warning(
                        _LW("Service is not available for plugin: "
                            "%(ext)s") % {"ext": ext.name})
                except Exception as e:
                    LOG.error(
                        _LE("Failed to setup index extension "
                            "%(ex)s: %(e)s") % {
                                'ex': ext.name,
                                'e': e
                            })
                    es_utils.alias_error_cleanup(index_names)
                    raise

        # Call ElasticSearch for the rest, if needed.
        if es_reindex:
            for group in six.iterkeys(index_names):
                # Grab the correct tuple as a list, convert list to a single
                # tuple, extract second member (the search alias) of tuple.
                alias_search = \
                    [a for a in resource_groups if a[0] == group][0][1]
                try:
                    es_utils.reindex(src_index=alias_search,
                                     dst_index=index_names[group],
                                     type_list=es_reindex)
                    es_utils.refresh_index(index_names[group])
                except Exception as e:
                    LOG.error(
                        _LE("Failed to setup index extension "
                            "%(ex)s: %(e)s") % {
                                'ex': ext.name,
                                'e': e
                            })
                    es_utils.alias_error_cleanup(index_names)
                    raise

        # Step #5: Update the "search" alias.
        #   All re-indexing has occurred. The index/alias is the same for
        #   all resource types within this Resource Group. These actions need
        #   to happen outside of the plugins. Also restore refresh interval
        #   for indexes, this will make data in the indexes become searchable.
        #   NB: The "listener" alias remains unchanged for this step.
        for index_name, interval in refresh_intervals.items():
            es_utils.set_index_refresh_interval(index_name, interval)

        old_index = {}
        for group, search, listen in resource_groups:
            old_index[group] = \
                es_utils.alias_search_update(search, index_names[group])

        # Step #6: Update the "listener" alias.
        #   The "search" alias has been updated. This involves both removing
        #   the old index from the alias as well as deleting the old index.
        #   These actions need to happen outside of the plugins.
        #   NB: The "search" alias remains unchanged for this step.
        for group, search, listen in resource_groups:
            try:
                # If any exception raises, ignore and continue to delete
                # any other old indexes.
                es_utils.delete_index(old_index[group])
            except Exception as e:
                LOG.error(encodeutils.exception_to_unicode(e))