예제 #1
0
    def sync(self, group=None, _type=None, force=False, force_es=False):
        def wait_for_threads():
            """Patiently wait for all running threads to complete.
            """
            threads_running = True
            while threads_running:
                # Are any threads still running?
                threads_running = False
                for future in futures:
                    if not future.done():
                        threads_running = True
                        break
                time.sleep(1)

        # Signal handler to catch interrupts from the user (ctl-c)
        def sig_handler(signum, frame):
            """When rudely interrupted by the user, we will want to clean up
               after ourselves. We have potentially three pieces of unfinished
               business.
                   1. We have running threads. Cancel them.
                   2. Wait for all threads to finish.
                   3. We created new indices in Elasticsearch. Remove them.
            """
            # Cancel any and all threads.
            for future in futures:
                future.cancel()

            # Politely wait for the current threads to finish.
            LOG.warning(
                _LW("Interrupt received, waiting for threads to finish"
                    " before cleaning up"))
            wait_for_threads()

            # Rudely remove any newly created Elasticsearch indices.
            if index_names:
                es_utils.alias_error_cleanup(index_names)

            sys.exit(0)

        if force_es and _type:
            # The user cannot specify both of these options simultaneously.
            print("\nInvalid set of options.")
            print("Cannot specify both '--type' and '--apply-mapping-changes "
                  "simultaneously.\n")
            sys.exit(1)

        try:
            max_workers = cfg.CONF.manage.workers
        except cfg.ConfigFileValueError as e:
            LOG.error(
                _LE("Invalid value for config file option "
                    "'manage.workers'. The number of thread workers "
                    "must be greater than 0."))
            sys.exit(3)

        # Grab the list of plugins registered as entry points through stevedore
        search_plugins = utils.get_search_plugins()

        # Verify all indices and types have registered plugins.
        # index and _type are lists because of nargs='*'
        group = group.split(',') if group else []
        _type = _type.split(',') if _type else []

        _type = utils.expand_type_matches(_type, six.viewkeys(search_plugins))
        LOG.debug("After expansion, 'type' argument: %s", ", ".join(_type))

        group_set = set(group)
        type_set = set(_type)
        """
        The caller can specify a sync based on either the Document Type or the
        Resource Group. With the Zero Downtime functionality, we are using
        aliases to index into ElasticSearch. We now have multiple Document
        Types sharing a single alias. If any member of a Resource Group (an
        ES alias) is re-syncing *all* members of that Resource Group needs
        to re-sync.

        The final list of plugins to use for re-syncing *must* come only from
        the Resource Group specifications. The "type" list is used only to make
        the "group" list complete. We need a two pass algorithm for this.

        First pass: Analyze the plugins according to the "type" list. This
          turns a type in the "type" list to a group in the "group" list.

        Second pass: Analyze the plugins according to the "group" list. Create
          the plugin list that will be used for re-syncing.

        Note: We cannot call any plugin's sync() during these two passes. The
        sync needs to be a separate step. The API states that if any invalid
        plugin was specified by the caller, the entire operation fails.
        """

        # First Pass: Document Types.
        if _type:
            for res_type, ext in six.iteritems(search_plugins):
                plugin_obj = ext.obj
                type_set.discard(plugin_obj.get_document_type())
                if plugin_obj.get_document_type() in _type:
                    group.append(plugin_obj.resource_group_name)

        # Second Pass: Resource Groups (including those from types).
        # This pass is a little tricky. If "group" is empty, it implies every
        # resource gets re-synced. The command group_set.discard() is a no-op
        # when "group" is empty.
        resource_groups = []
        plugin_objs = {}
        plugins_list = []
        for res_type, ext in six.iteritems(search_plugins):
            plugin_obj = ext.obj
            group_set.discard(plugin_obj.resource_group_name)
            if (not group) or (plugin_obj.resource_group_name in group):
                plugins_list.append((res_type, ext))
                plugin_objs[plugin_obj.resource_group_name] = plugin_obj
                if not (plugin_obj.resource_group_name,
                        plugin_obj.alias_name_search,
                        plugin_obj.alias_name_listener) in resource_groups:
                    resource_groups.append((plugin_obj.resource_group_name,
                                            plugin_obj.alias_name_search,
                                            plugin_obj.alias_name_listener))

        if group_set or type_set:
            print("Some index names or types do not have plugins "
                  "registered. Index names: %s. Types: %s" %
                  (",".join(group_set) or "<None>", ",".join(type_set)
                   or "<None>"))
            print("Aborting.")
            sys.exit(1)

        # As an optimization, if any types are explicitly requested, we
        # will index them from their service APIs. The rest will be
        # indexed from an existing ES index, if one exists.
        #
        # Also, if force_es is set the user wishes to use ES exclusively
        # as the source for all data. This implies everything in the
        # es_reindex list and nothing in the plugins_to_index list.
        es_reindex = []
        plugins_to_index = copy.copy(plugins_list)
        if _type or force_es:
            for resource_type, ext in plugins_list:
                doc_type = ext.obj.get_document_type()

                # If force_es is set, then "_type" is None. Always do this.
                # If force_es is None, then "_type" is set. Adjust as needed.
                if doc_type not in _type:
                    es_reindex.append(doc_type)
                    # Don't reindex this type
                    plugins_to_index.remove((resource_type, ext))

        if not force:
            # For display purpose, we want to iterate on only parthenogenetic
            # plugins that are not the children of another plugin. If there
            # are children plugins they will be displayed when we call
            # get_index_display_name(). Therefore any child plugins in the
            # display list, will be listed twice.
            display_plugins = []
            plugins_without_notifications = []
            for res, ext in plugins_list:
                if not ext.obj.parent_plugin:
                    display_plugins.append((res, ext))

            def format_selection(selection):
                def _format_plugin(plugin, indent=0):
                    plugin_doc_type = plugin.get_document_type()
                    handler = plugin.get_notification_handler()
                    event_list = handler.get_notification_supported_events()

                    display = '\n' + '    ' * indent + '--> ' if indent else ''
                    display += '%s (%s)' % (plugin_doc_type,
                                            plugin.resource_group_name)
                    if plugin_doc_type in es_reindex:
                        display += ' *'
                    if not event_list:
                        display += ' !!'
                        plugins_without_notifications.append(plugin)
                    return display + ''.join(
                        _format_plugin(c, indent + 1)
                        for c in plugin.child_plugins)

                return _format_plugin(selection[1].obj)

            all_res_groups = set(grp[0] for grp in resource_groups)
            print("\nResources in these groups must be re-indexed: %s." %
                  ", ".join(all_res_groups))

            print("Resource types (and aliases) matching selection:\n\n%s\n" %
                  '\n'.join(map(format_selection, sorted(display_plugins))))

            if es_reindex:
                print("Any types marked with * will be reindexed from "
                      "existing Elasticsearch data.\n")

            if plugins_without_notifications:
                print("Any types marked with !! do not support incremental "
                      "updates via the listener.")
                print("These types must be fully re-indexed periodically or "
                      "should be disabled.\n")

            ans = six.moves.input("\nUse '--force' to suppress this message.\n"
                                  "OK to continue? [y/n]: ")
            if ans.lower() != 'y':
                print("Aborting.")
                sys.exit(0)

        # Start the re-indexing process.
        # Now we are starting to change Elasticsearch. Let's clean up
        # if interrupted. Set index_names/futures here for cleaner code
        # in the signal handler.
        index_names = {}
        futures = []
        signal.signal(signal.SIGINT, sig_handler)

        # Step #1: Create new indexes for each Resource Group Type.
        #   The index needs to be fully functional before it gets
        #   added to any aliases. This includes all settings and
        #   mappings. Only then can we add it to the aliases. We first
        #   need to create all indexes. This is done by resource group.
        #   We cache and turn off new indexes' refresh intervals,
        #   this will improve the the performance of data re-syncing.
        #   After data get re-synced, set the refresh interval back.
        #   Once all indexes are created, we need to initialize the
        #   indexes. This is done by document type.
        #   NB: The aliases remain unchanged for this step.
        refresh_intervals = {}
        try:
            for group, search, listen in resource_groups:
                index_name = es_utils.create_new_index(group)
                index_names[group] = index_name

                refresh_intervals[index_name] = \
                    es_utils.get_index_refresh_interval(index_name)
                # Disable refresh interval by setting its value to -1
                es_utils.set_index_refresh_interval(index_name, -1)
            for resource_type, ext in plugins_list:
                plugin_obj = ext.obj
                group_name = plugin_obj.resource_group_name
                plugin_obj.prepare_index(index_name=index_names[group_name])
        except Exception:
            LOG.error(
                _LE("Error creating index or mapping, aborting "
                    "without indexing"))
            es_utils.alias_error_cleanup(index_names)
            raise

        # Step #2: Modify new index to play well with multiple indices.
        #   There is a "feature" of Elasticsearch where some types of
        #   queries do not work across multiple indices if there are no
        #   mappings for the specified document types. This is an issue we
        #   run into with our RBAC functionality. We need to modify the new
        #   index to work for these cases. We will grab all document types
        #   from the plugins and add a mapping for them as needed to the newly
        #   created indices.
        doc_type_info = []
        for res_type, ext in six.iteritems(search_plugins):
            doc_type_info.append(
                (ext.obj.get_document_type(), ext.obj.parent_plugin_type))
        for index in list(index_names.values()):
            es_utils.add_extra_mappings(index_name=index,
                                        doc_type_info=doc_type_info)

        # Step #3: Set up the aliases for all Resource Type Group.
        #   These actions need to happen outside of the plugins. Now that
        #   the indexes are created and fully functional we can associate
        #   them with the aliases.
        #   NB: The indexes remain unchanged for this step.
        for group, search, listen in resource_groups:
            try:
                es_utils.setup_alias(index_names[group], search, listen)
            except Exception as e:
                LOG.exception(
                    _LE("Failed to setup alias for resource group "
                        "%(g)s: %(e)s") % {
                            'g': group,
                            'e': e
                        })
                es_utils.alias_error_cleanup(index_names)
                raise

        # Step #4: Re-index all resource types in this Resource Type Group.
        #   NB: The "search" and "listener" aliases remain unchanged for this
        #       step.
        #   NB: We will be spinning off this working into separate threads.
        #       We will limit each thread to a single resource type. For
        #       more information, please refer to the spec:
        #           searchlight-specs/specs/newton/
        #             index-performance-enhancement.rst
        ThreadPoolExec = concurrent.futures.ThreadPoolExecutor
        with ThreadPoolExec(max_workers=max_workers) as executor:
            try:
                futures = []
                # Start threads for plugin API.
                for res, ext in plugins_to_index:
                    # Throw the plugin into the thread pool.
                    plugin_obj = ext.obj
                    futures.append(
                        executor.submit(self._plugin_api, plugin_obj,
                                        index_names))

                # Start the single thread for ES re-index.
                if es_reindex:
                    futures.append(
                        executor.submit(self._es_reindex_worker, es_reindex,
                                        resource_groups, index_names))

                # Sit back, relax and wait for the threads to complete.
                wait_for_threads()
            except Exception as e:
                # An exception occurred. Start cleaning up ElasticSearch and
                # inform the user.
                es_utils.alias_error_cleanup(index_names)
                raise

        # Step #5: Update the "search" alias.
        #   All re-indexing has occurred. The index/alias is the same for
        #   all resource types within this Resource Group. These actions need
        #   to happen outside of the plugins. Also restore refresh interval
        #   for indexes, this will make data in the indexes become searchable.
        #   NB: The "listener" alias remains unchanged for this step.
        for index_name, interval in refresh_intervals.items():
            es_utils.set_index_refresh_interval(index_name, interval)

        old_index = {}
        for group, search, listen in resource_groups:
            old_index[group] = \
                es_utils.alias_search_update(search, index_names[group])

        # Step #6: Update the "listener" alias.
        #   The "search" alias has been updated. This involves both removing
        #   the old index from the alias as well as deleting the old index.
        #   These actions need to happen outside of the plugins.
        #   NB: The "search" alias remains unchanged for this step.
        for group, search, listen in resource_groups:
            try:
                # If any exception raises, ignore and continue to delete
                # any other old indexes.
                es_utils.delete_index(old_index[group])
            except Exception as e:
                LOG.error(encodeutils.exception_to_unicode(e))
예제 #2
0
    def test_add_extra_mappings(self):
        # Set up the ES mock.
        mock_engine = mock.Mock()
        index = 'fake'
        b_parent = {'dynamic': 'strict',
                    '_parent': {
                        'type': 'never_used_parent'},
                    'properties': {
                    }}
        b_orphan = {'dynamic': 'strict',
                    'properties': {
                    }}
        with mock.patch('searchlight.elasticsearch.get_api') as mock_api:
            # Plug in the ES mock.
            mock_api.return_value = mock_engine

            # Test #1: One doc_type that already exists. With parent.
            mock_engine.indices.exists_type.return_value = True
            docs = [('doc1', True)]

            plugin_utils.add_extra_mappings(index_name=index,
                                            doc_type_info=docs)
            mock_engine.indices.put_mapping.assert_not_called()

            # Test #2: One doc_type that already exists. Without parent.
            mock_engine.indices.exists_type.return_value = True
            docs = [('doc1', False)]

            plugin_utils.add_extra_mappings(index_name=index,
                                            doc_type_info=docs)
            mock_engine.indices.put_mapping.assert_not_called()

            # Test #3: One doc_type that does not exist. With parent.
            mock_engine.indices.exists_type.return_value = False
            docs = [('doc1', True)]

            plugin_utils.add_extra_mappings(index_name=index,
                                            doc_type_info=docs)
            mock_engine.indices.put_mapping.assert_called_with(index=index,
                                                               doc_type='doc1',
                                                               body=b_parent)

            # Test #4: One doc_type that does not exist. Without parent.
            mock_engine.indices.exists_type.return_value = False
            mock_engine.indices.put_mapping.reset_mock()
            docs = [('doc1', False)]

            plugin_utils.add_extra_mappings(index_name=index,
                                            doc_type_info=docs)
            mock_engine.indices.put_mapping.assert_called_with(index=index,
                                                               doc_type='doc1',
                                                               body=b_orphan)

            # Test #5: Two doc_type's that do not exist. Mixed parents.
            mock_engine.indices.exists_type.return_value = False
            mock_engine.indices.put_mapping.reset_mock()
            docs = [('doc1', True), ('doc2', False)]
            calls = [mock.call(index=index, doc_type='doc1', body=b_parent),
                     mock.call(index=index, doc_type='doc2', body=b_orphan)]

            plugin_utils.add_extra_mappings(index_name=index,
                                            doc_type_info=docs)
            mock_engine.indices.put_mapping.assert_has_calls(calls)
예제 #3
0
    def sync(self, group=None, _type=None, force=False):
        # Verify all indices and types have registered plugins.
        # index and _type are lists because of nargs='*'
        group = group.split(',') if group else []
        _type = _type.split(',') if _type else []

        group_set = set(group)
        type_set = set(_type)
        """
        The caller can specify a sync based on either the Document Type or the
        Resource Group. With the Zero Downtime functionality, we are using
        aliases to index into ElasticSearch. We now have multiple Document
        Types sharing a single alias. If any member of a Resource Group (an
        ES alias) is re-syncing *all* members of that Resoruce Group needs
        to re-sync.

        The final list of plugins to use for re-syncing *must* come only from
        the Resource Group specifications. The "type" list is used only to make
        the "group" list complete. We need a two pass algorithm for this.

        First pass: Analyze the plugins according to the "type" list. This
          turns a type in the "type" list to a group in the "group" list.

        Second pass: Analyze the plugins according to the "group" list. Create
          the plugin list that will be used for re-syncing.

        Note: We cannot call any plugin's sync() during these two passes. The
        sync needs to be a separate step. The API states that if any invalid
        plugin was specified by the caller, the entire operation fails.
        """

        # First Pass: Document Types.
        if _type:
            for res_type, ext in six.iteritems(utils.get_search_plugins()):
                plugin_obj = ext.obj
                type_set.discard(plugin_obj.get_document_type())
                if plugin_obj.get_document_type() in _type:
                    group.append(plugin_obj.resource_group_name)

        # Second Pass: Resource Groups (including those from types).
        # This pass is a little tricky. If "group" is empty, it implies every
        # resource gets re-synced. The command group_set.discard() is a no-op
        # when "group" is empty.
        resource_groups = []
        plugin_objs = {}
        plugins_list = []
        for res_type, ext in six.iteritems(utils.get_search_plugins()):
            plugin_obj = ext.obj
            group_set.discard(plugin_obj.resource_group_name)
            if (not group) or (plugin_obj.resource_group_name in group):
                plugins_list.append((res_type, ext))
                plugin_objs[plugin_obj.resource_group_name] = plugin_obj
                if not (plugin_obj.resource_group_name,
                        plugin_obj.alias_name_search,
                        plugin_obj.alias_name_listener) in resource_groups:
                    resource_groups.append((plugin_obj.resource_group_name,
                                            plugin_obj.alias_name_search,
                                            plugin_obj.alias_name_listener))

        if group_set or type_set:
            print("Some index names or types do not have plugins "
                  "registered. Index names: %s. Types: %s" %
                  (",".join(group_set) or "<None>", ",".join(type_set)
                   or "<None>"))
            print("Aborting.")
            sys.exit(1)

        if not force:
            # For display purpose, we want to iterate on only parthenogenetic
            # plugins that are not the children of another plugin. If there
            # are children plugins they will be displayed when we call
            # get_index_display_name(). Therefore any child plugins in the
            # display list, will be listed twice.
            display_plugins = []
            for res, ext in plugins_list:
                if not ext.obj.parent_plugin:
                    display_plugins.append((res, ext))

            def format_selection(selection):
                resource_type, ext = selection
                return '  ' + ext.obj.get_index_display_name()

            # Grab the first element in the first (and only) tuple.
            group = resource_groups[0][0]
            print("\nAll resource types within Resource Group \"%(group)s\""
                  " must be re-indexed" % {'group': group})
            print("\nResource types (and aliases) matching selection:\n%s\n" %
                  '\n'.join(map(format_selection, sorted(display_plugins))))

            ans = six.moves.input(
                "Indexing will NOT delete existing data or mapping(s). It "
                "will reindex all resources. \nUse '--force' to suppress "
                "this message.\nOK to continue? [y/n]: ")
            if ans.lower() != 'y':
                print("Aborting.")
                sys.exit(0)

        # Start the re-indexing process

        # Step #1: Create new indexes for each Resource Group Type.
        #   The index needs to be fully functional before it gets
        #   added to any aliases. This inclues all settings and
        #   mappings. Only then can we add it to the aliases. We first
        #   need to create all indexes. This is done by resource group.
        #   We cache and turn off new indexes' refresh intervals,
        #   this will improve the the performance of data re-syncing.
        #   After data get re-synced, set the refresh interval back.
        #   Once all indexes are created, we need to initialize the
        #   indexes. This is done by document type.
        #   NB: The aliases remain unchanged for this step.
        index_names = {}
        refresh_intervals = {}
        try:
            for group, search, listen in resource_groups:
                index_name = es_utils.create_new_index(group)
                index_names[group] = index_name
                refresh_intervals[index_name] = \
                    es_utils.get_index_refresh_interval(index_name)
                # Disable refresh interval by setting its value to -1
                es_utils.set_index_refresh_interval(index_name, -1)
            for resource_type, ext in plugins_list:
                plugin_obj = ext.obj
                group_name = plugin_obj.resource_group_name
                plugin_obj.prepare_index(index_name=index_names[group_name])
        except Exception:
            LOG.error(
                _LE("Error creating index or mapping, aborting "
                    "without indexing"))
            es_utils.alias_error_cleanup(index_names)
            raise

        # Step #2: Modify new index to play well with multiple indices.
        #   There is a "feature" of Elasticsearch where some types of
        #   queries do not work across multiple indices if there are no
        #   mappings for the specified document types. This is an issue we
        #   run into with our RBAC functionality. We need to modify the new
        #   index to work for these cases. We will grab all document types
        #   from the plugins and add a mapping for them as needed to the newly
        #   created indices.
        doc_type_info = []
        for res_type, ext in six.iteritems(utils.get_search_plugins()):
            doc_type_info.append(
                (ext.obj.get_document_type(), ext.obj.parent_plugin_type))
        for index in list(index_names.values()):
            es_utils.add_extra_mappings(index_name=index,
                                        doc_type_info=doc_type_info)

        # Step #3: Set up the aliases for all Resource Type Group.
        #   These actions need to happen outside of the plugins. Now that
        #   the indexes are created and fully functional we can associate
        #   them with the aliases.
        #   NB: The indexes remain unchanged for this step.
        for group, search, listen in resource_groups:
            try:
                es_utils.setup_alias(index_names[group], search, listen)
            except Exception as e:
                LOG.error(
                    _LE("Failed to setup alias for resource group "
                        "%(g)s: %(e)s") % {
                            'g': group,
                            'e': e
                        })
                es_utils.alias_error_cleanup(index_names)
                raise

        # Step #4: Re-index all resource types in this Resource Type Group.
        #   As an optimization, if any types are explicitly requested, we
        #   will index them from their service APIs. The rest will be
        #   indexed from an existing ES index, if one exists.
        #   NB: The "search" and "listener" aliases remain unchanged for this
        #       step.
        es_reindex = []
        plugins_to_index = copy.copy(plugins_list)
        if _type:
            for resource_type, ext in plugins_list:
                doc_type = ext.obj.get_document_type()
                if doc_type not in _type:
                    es_reindex.append(doc_type)
                    plugins_to_index.remove((resource_type, ext))

        # Call plugin API as needed.
        if plugins_to_index:
            for res, ext in plugins_to_index:
                plugin_obj = ext.obj
                gname = plugin_obj.resource_group_name
                try:
                    plugin_obj.initial_indexing(index_name=index_names[gname])
                    es_utils.refresh_index(index_names[gname])
                except exceptions.EndpointNotFound:
                    LOG.warning(
                        _LW("Service is not available for plugin: "
                            "%(ext)s") % {"ext": ext.name})
                except Exception as e:
                    LOG.error(
                        _LE("Failed to setup index extension "
                            "%(ex)s: %(e)s") % {
                                'ex': ext.name,
                                'e': e
                            })
                    es_utils.alias_error_cleanup(index_names)
                    raise

        # Call ElasticSearch for the rest, if needed.
        if es_reindex:
            for group in six.iterkeys(index_names):
                # Grab the correct tuple as a list, convert list to a single
                # tuple, extract second member (the search alias) of tuple.
                alias_search = \
                    [a for a in resource_groups if a[0] == group][0][1]
                try:
                    es_utils.reindex(src_index=alias_search,
                                     dst_index=index_names[group],
                                     type_list=es_reindex)
                    es_utils.refresh_index(index_names[group])
                except Exception as e:
                    LOG.error(
                        _LE("Failed to setup index extension "
                            "%(ex)s: %(e)s") % {
                                'ex': ext.name,
                                'e': e
                            })
                    es_utils.alias_error_cleanup(index_names)
                    raise

        # Step #5: Update the "search" alias.
        #   All re-indexing has occurred. The index/alias is the same for
        #   all resource types within this Resource Group. These actions need
        #   to happen outside of the plugins. Also restore refresh interval
        #   for indexes, this will make data in the indexes become searchable.
        #   NB: The "listener" alias remains unchanged for this step.
        for index_name, interval in refresh_intervals.items():
            es_utils.set_index_refresh_interval(index_name, interval)

        old_index = {}
        for group, search, listen in resource_groups:
            old_index[group] = \
                es_utils.alias_search_update(search, index_names[group])

        # Step #6: Update the "listener" alias.
        #   The "search" alias has been updated. This involves both removing
        #   the old index from the alias as well as deleting the old index.
        #   These actions need to happen outside of the plugins.
        #   NB: The "search" alias remains unchanged for this step.
        for group, search, listen in resource_groups:
            try:
                # If any exception raises, ignore and continue to delete
                # any other old indexes.
                es_utils.delete_index(old_index[group])
            except Exception as e:
                LOG.error(encodeutils.exception_to_unicode(e))