Ejemplo n.º 1
0
    def __init_gdd(self, path):
        gdd = self._global_data_dict
        if path in gdd:
            return

        # Setup default global dictionary for this index path.
        qp.TermQuery.__init_gdd(self, path)

        # Client search needs to account for the packages which have
        # been installed or removed since the last time the indexes
        # were rebuilt. Add client-specific global data dictionaries
        # for this index path.
        tq_gdd = gdd[path]
        tq_gdd["fast_add"] = ss.IndexStoreSet(ss.FAST_ADD)
        tq_gdd["fast_remove"] = ss.IndexStoreSet(ss.FAST_REMOVE)
        tq_gdd["fmri_hash"] = ss.IndexStoreSetHash(ss.FULL_FMRI_HASH_FILE)
Ejemplo n.º 2
0
        def check_for_updates(index_root, fmri_set):
                """ Checks fmri_set to see which members have not been indexed.
                It modifies fmri_set.

                The "index_root" parameter is the directory which contains the
                index."""

                data =  ss.IndexStoreSet("full_fmri_list")
                try:
                        data.open(index_root)
                except IOError, e:
                        if not os.path.exists(os.path.join(
                                index_root, data.get_file_name())):
                                return fmri_set
                        else:
                                raise
Ejemplo n.º 3
0
        def check_for_updates(index_root, cat):
                """Check to see whether the catalog has fmris which have not
                been indexed.

                'index_root' is the path to the index to check against.

                'cat' is the catalog to check for new fmris."""

                fmri_set = set((f.remove_publisher() for f in cat.fmris()))
                
                data =  ss.IndexStoreSet("full_fmri_list")
                try:
                        data.open(index_root)
                except IOError, e:
                        if not os.path.exists(os.path.join(
                                index_root, data.get_file_name())):
                                return fmri_set
                        else:
                                raise
Ejemplo n.º 4
0
        def __init__(self, index_dir, get_manifest_func, get_manifest_path_func,
            progtrack=None, excludes=EmptyI, log=None,
            sort_file_max_size=SORT_FILE_MAX_SIZE):
                self._num_keys = 0
                self._num_manifests = 0
                self._num_entries = 0
                self.get_manifest_func = get_manifest_func
                self.get_manifest_path_func = get_manifest_path_func
                self.excludes = excludes
                self.__log = log
                self.sort_file_max_size = sort_file_max_size
                if self.sort_file_max_size <= 0:
                        raise search_errors.IndexingException(
                            _("sort_file_max_size must be greater than 0"))

                # This structure was used to gather all index files into one
                # location. If a new index structure is needed, the files can
                # be added (or removed) from here. Providing a list or
                # dictionary allows an easy approach to opening or closing all
                # index files.

                self._data_dict = {
                        "fast_add":
                            ss.IndexStoreSet(ss.FAST_ADD),
                        "fast_remove":
                            ss.IndexStoreSet(ss.FAST_REMOVE),
                        "manf":
                            ss.IndexStoreListDict(ss.MANIFEST_LIST,
                                build_function=self.__build_fmri,
                                decode_function=self.__decode_fmri),
                        "full_fmri": ss.IndexStoreSet(ss.FULL_FMRI_FILE),
                        "main_dict": ss.IndexStoreMainDict(ss.MAIN_FILE),
                        "token_byte_offset":
                            ss.IndexStoreDictMutable(ss.BYTE_OFFSET_FILE)
                        }

                self._data_fast_add = self._data_dict["fast_add"]
                self._data_fast_remove = self._data_dict["fast_remove"]
                self._data_manf = self._data_dict["manf"]
                self._data_full_fmri = self._data_dict["full_fmri"]
                self._data_main_dict = self._data_dict["main_dict"]
                self._data_token_offset = self._data_dict["token_byte_offset"]

                # This is added to the dictionary after the others because it
                # needs one of the other mappings as an input.
                self._data_dict["fmri_offsets"] = \
                    ss.InvertedDict(ss.FMRI_OFFSETS_FILE, self._data_manf)
                self._data_fmri_offsets = self._data_dict["fmri_offsets"]

                self._index_dir = index_dir
                self._tmp_dir = os.path.join(self._index_dir, "TMP")

                self.__lockfile = lockfile.LockFile(os.path.join(
                    self._index_dir, "lock"),
                    set_lockstr=lockfile.generic_lock_set_str,
                    get_lockstr=lockfile.generic_lock_get_str,
                    failure_exc=search_errors.IndexLockedException)

                self._indexed_manifests = 0
                self.server_repo = True
                self.empty_index = False
                self.file_version_number = None

                if progtrack is None:
                        self._progtrack = progress.NullProgressTracker()
                else:
                        self._progtrack = progtrack

                self._file_timeout_secs = FILE_OPEN_TIMEOUT_SECS

                self._sort_fh = None
                self._sort_file_num = 0
                self._sort_file_bytes = 0

                # The action type and key indexes, which are necessary for
                # efficient searches by type or key, store their file handles in
                # dictionaries.  File handles for actions are in at_fh, while
                # filehandles for keys are kept in st_fh.
                self.at_fh = {}
                self.st_fh = {}

                self.old_out_token = None
Ejemplo n.º 5
0
class TermQuery(qp.TermQuery):
    """This class handles the client specific search logic for searching
        for a base query term."""

    # This lock is used so that only one instance of a term query object
    # is ever modifying the class wide variable, _global_data_dict.
    client_dict_lock = threading.Lock()

    # Client search needs to account for the packages which have been
    # installed or removed since the last time the indexes were rebuilt.
    qp.TermQuery._global_data_dict["fast_add"] = \
        ss.IndexStoreSet(ss.FAST_ADD)
    qp.TermQuery._global_data_dict["fast_remove"] = \
        ss.IndexStoreSet(ss.FAST_REMOVE)
    qp.TermQuery._global_data_dict["fmri_hash"] = \
        ss.IndexStoreSetHash(ss.FULL_FMRI_HASH_FILE)

    def __init__(self, term):
        qp.TermQuery.__init__(self, term)
        self._impl_fmri_to_path = None
        self._efn = None
        self._data_fast_remove = None
        self.full_fmri_hash = None
        self._data_fast_add = None

    def set_info(self, gen_installed_pkg_names, get_use_slow_search,
                 set_use_slow_search, **kwargs):
        """This function provides the necessary information to the AST
                so that a search can be performed.

                The "gen_installed_pkg_names" parameter is a function which
                returns a generator function which iterates over the names of
                the installed packages in the image.

                The "get_use_slow_search" parameter is a function that returns
                whether slow search has been used.

                The "set_use_slow_search" parameter is a function that sets
                whether slow search was used."""

        self.get_use_slow_search = get_use_slow_search
        self._efn = gen_installed_pkg_names()
        TermQuery.client_dict_lock.acquire()
        try:
            try:
                qp.TermQuery.set_info(self,
                    gen_installed_pkg_names=\
                        gen_installed_pkg_names,
                    get_use_slow_search=get_use_slow_search,
                    set_use_slow_search=set_use_slow_search,
                    **kwargs)
                # Take local copies of the client-only
                # dictionaries so that if another thread
                # changes the shared data structure, this
                # instance's objects won't be affected.
                self._data_fast_add = \
                    TermQuery._global_data_dict["fast_add"]
                self._data_fast_remove = \
                    TermQuery._global_data_dict["fast_remove"]
                self.full_fmri_hash = \
                    self._global_data_dict["fmri_hash"]
                set_use_slow_search(False)
            except se.NoIndexException:
                # If no index was found, the slower version of
                # search will be used.
                set_use_slow_search(True)
        finally:
            TermQuery.client_dict_lock.release()

    def search(self, restriction, fmris, manifest_func, excludes):
        """This function performs performs local client side search.
                
                The "restriction" paramter is a generator over the results that
                another branch of the AST has already found.  If it exists,
                those results are treated as the domain for search.  If it does
                not exist, search uses the set of actions from installed
                packages as the domain.

                The "fmris" parameter is a function which produces an object
                which iterates over the names of installed fmris.

                The "manifest_func" parameter is a function which takes a fmri
                and returns a path to the manifest for that fmri.

                The "excludes" parameter is a list of the variants defined for
                this image."""

        if restriction:
            return self._restricted_search_internal(restriction)
        elif not self.get_use_slow_search():
            try:
                self.full_fmri_hash.check_against_file(self._efn)
            except se.IncorrectIndexFileHash:
                raise \
                    api_errors.IncorrectIndexFileHash()
            base_res = \
                self._search_internal(fmris)
            client_res = \
                self._search_fast_update(manifest_func,
                excludes)
            base_res = self._check_fast_remove(base_res)
            it = itertools.chain(self._get_results(base_res),
                                 self._get_fast_results(client_res))
            return it
        else:
            return self.slow_search(fmris, manifest_func, excludes)

    def _check_fast_remove(self, res):
        """This function removes any results from the generator "res"
                (the search results) that are actions from packages known to
                have been removed from the image since the last time the index
                was built."""

        return ((p_str, o, a, s, f) for p_str, o, a, s, f in res
                if not self._data_fast_remove.has_entity(p_str))

    def _search_fast_update(self, manifest_func, excludes):
        """This function searches the packages which have been
                installed since the last time the index was rebuilt.

                The "manifest_func" parameter is a function which maps fmris to
                the path to their manifests.

                The "excludes" paramter is a list of variants defined in the
                image."""

        assert self._data_main_dict.get_file_handle() is not None

        glob = self._glob
        term = self._term
        case_sensitive = self._case_sensitive

        if not case_sensitive:
            glob = True

        fast_update_dict = {}

        fast_update_res = []

        # self._data_fast_add holds the names of the fmris added
        # since the last time the index was rebuilt.
        for fmri_str in self._data_fast_add._set:
            if not (self.pkg_name_wildcard or self.pkg_name_match(fmri_str)):
                continue
            f = fmri.PkgFmri(fmri_str)
            path = manifest_func(f)
            search_dict = manifest.Manifest.search_dict(path,
                                                        return_line=True,
                                                        excludes=excludes)
            for tmp in search_dict:
                tok, at, st, fv = tmp
                if not (self.action_type_wildcard or
                    at == self.action_type) or \
                    not (self.key_wildcard or st == self.key):
                    continue
                if tok not in fast_update_dict:
                    fast_update_dict[tok] = []
                fast_update_dict[tok].append(
                    (at, st, fv, fmri_str, search_dict[tmp]))
        if glob:
            keys = fast_update_dict.keys()
            matches = choose(keys, term, case_sensitive)
            fast_update_res = [fast_update_dict[m] for m in matches]

        else:
            if term in fast_update_dict:
                fast_update_res.append(fast_update_dict[term])
        return fast_update_res

    def _get_fast_results(self, fast_update_res):
        """This function transforms the output of _search_fast_update
                to match that of _search_internal."""

        for sub_list in fast_update_res:
            for at, st, fv, fmri_str, line_list in sub_list:
                for l in line_list:
                    yield at, st, fmri_str, fv, l

    def slow_search(self, fmris, manifest_func, excludes):
        """This function performs search when no prebuilt index is
                available.

                The "fmris" parameter is a generator function which iterates
                over the packages to be searched.

                The "manifest_func" parameter is a function which maps fmris to
                the path to their manifests.

                The "excludes" parameter is a list of variants defined in the
                image."""

        for pfmri in list(fmris()):
            fmri_str = pfmri.get_fmri(anarchy=True, include_scheme=False)
            if not (self.pkg_name_wildcard or self.pkg_name_match(fmri_str)):
                continue
            manf = manifest_func(pfmri)
            fast_update_dict = {}
            fast_update_res = []
            glob = self._glob
            term = self._term
            case_sensitive = self._case_sensitive

            if not case_sensitive:
                glob = True

            search_dict = manifest.Manifest.search_dict(manf,
                                                        return_line=True,
                                                        excludes=excludes)
            for tmp in search_dict:
                tok, at, st, fv = tmp
                if not (self.action_type_wildcard or
                    at == self.action_type) or \
                    not (self.key_wildcard or st == self.key):
                    continue
                if tok not in fast_update_dict:
                    fast_update_dict[tok] = []
                fast_update_dict[tok].append(
                    (at, st, fv, fmri_str, search_dict[tmp]))
            if glob:
                keys = fast_update_dict.keys()
                matches = choose(keys, term, case_sensitive)
                fast_update_res = [fast_update_dict[m] for m in matches]
            else:
                if term in fast_update_dict:
                    fast_update_res.append(fast_update_dict[term])
            for sub_list in fast_update_res:
                for at, st, fv, fmri_str, line_list in sub_list:
                    for l in line_list:
                        yield at, st, fmri_str, fv, l

    def _read_pkg_dirs(self, fmris):
        """Legacy function used to search indexes which have a pkg
                directory with fmri offset information instead of the
                fmri_offsets.v1 file.  This function is in this subclass to
                translate the error from a search_error to an api_error."""

        try:
            return qp.TermQuery._read_pkg_dirs(self, fmris)
        except se.InconsistentIndexException, e:
            raise api_errors.InconsistentIndexException(e)