Beispiel #1
0
    def index_files_of_interest(self, doc, package_dict):
        log.info("index_files_of_interest start")
        name = package_dict['name']
        branch = package_dict['branch']

        if branch == 'master':
            branch = 'rawhide'

        #url = "/".join([self.mdapi_url, branch, "files", name])
        #data = self._call_api(url)
        data = package_dict['file_data']
        if data.get('files') is not None:
            for entry in data['files']:
                filenames = entry['filenames'].split('/')
                for filename in filenames:
                    if filename.startswith('/usr/bin'):
                        # index executables
                        log.info("indexing exe file %s" %
                                 os.path.basename(filename))
                        exe_name = filter_search_string(
                            os.path.basename(filename))
                        self.indexer.index_text_without_positions(
                            "EX__%s__EX" % exe_name)
        else:
            log.warn("Failed to get file list for %r" % name)
            return
        log.info("index_files_of_interest end")
Beispiel #2
0
    def _get_xapian_data(self):
        xapian_dir = '/var/cache/fedoracommunity/packages/xapian/search'
        if not os.path.exists(xapian_dir):
            NO_XAP = '__no_xapian_available__'
            keys = ['icon', 'summary']
            dumb_data = dict([(key, NO_XAP) for key in keys])
            return dumb_data

        import xapian
        from fedoracommunity.search.utils import filter_search_string
        package_name = filter_search_string(self.name)
        search_db = xapian.Database(xapian_dir)
        enquire = xapian.Enquire(search_db)
        qp = xapian.QueryParser()
        qp.set_database(search_db)
        search_string = "Ex__%s__EX" % package_name
        query = qp.parse_query(search_string)
        enquire.set_query(query)
        matches = enquire.get_mset(0, 1)

        if len(matches) == 0:
            return None

        result = json.loads(matches[0].document.get_data())
        return result
    def search_packages(self, start_row=None,
                              rows_per_page=None,
                              order=-1,
                              sort_col=None,
                              filters = {},
                              **params):

        search_string = filters.get('search')
        # short circut for empty string
        if not search_string:
            return (0, [])

        search_string = urllib.unquote_plus(search_string)

        unfiltered_search_terms = [
            t.strip() for t in search_string.split(' ') if t.strip()
        ]

        search_string = utils.filter_search_string(search_string)
        phrase = '"%s"' % search_string

        # add exact matchs
        search_terms = search_string.split(' ')
        search_terms = [t.strip() for t in search_terms if t.strip()]
        for term in search_terms:
            search_string += " EX__%s__EX" % term

        # add phrase match
        search_string += " OR %s" % phrase

        if len(search_terms) > 1:
            # add near phrase match (phrases that are near each other)
            search_string += " OR (%s)" % ' NEAR '.join(search_terms)

        # Add partial/wildcard matches
        search_string += " OR (%s)" % ' OR '.join([
            "*%s*" % term for term in search_terms])

        matches = self.do_search(search_string,
                                 start_row,
                                 rows_per_page,
                                 order,
                                 sort_col)

        count = matches.get_matches_estimated()
        rows = []
        for m in matches:
            result = json.loads(m.document.get_data())

            # mark matches in <span class="match">
            self._highlight_matches(result, unfiltered_search_terms)

            rows.append(result)

        return (count, rows)
    def _get_old_document(self, package_name):
        search_name = utils.filter_search_string(package_name)
        search_string = "%s EX__%s__EX" % (search_name, search_name)
        matches = self._xapian_connector().do_search(search_string, 0, 10)

        for match in matches:
            result = json.loads(match.document.get_data())
            if result['name'] == package_name:
                return match.document

        return None
Beispiel #5
0
    def _get_old_document(self, package_name):
        search_name = utils.filter_search_string(package_name)
        search_string = "%s EX__%s__EX" % (search_name, search_name)
        matches = self._xapian_connector().do_search(search_string, 0, 10)

        for match in matches:
            result = json.loads(match.document.get_data())
            if result['name'] == package_name:
                return match.document

        return None
Beispiel #6
0
    def search_packages(self,
                        start_row=None,
                        rows_per_page=None,
                        order=-1,
                        sort_col=None,
                        filters={},
                        **params):

        search_string = filters.get('search')
        # short circut for empty string
        if not search_string:
            return (0, [])

        search_string = urllib.unquote_plus(search_string)

        unfiltered_search_terms = [
            t.strip() for t in search_string.split(' ') if t.strip()
        ]

        search_string = utils.filter_search_string(search_string)
        phrase = '"%s"' % search_string

        # add exact matchs
        search_terms = search_string.split(' ')
        search_terms = [t.strip() for t in search_terms if t.strip()]
        for term in search_terms:
            search_string += " EX__%s__EX" % term

        # add phrase match
        search_string += " OR %s" % phrase

        if len(search_terms) > 1:
            # add near phrase match (phrases that are near each other)
            search_string += " OR (%s)" % ' NEAR '.join(search_terms)

        # Add partial/wildcard matches
        search_string += " OR (%s)" % ' OR '.join(
            ["*%s*" % term for term in search_terms])

        matches = self.do_search(search_string, start_row, rows_per_page,
                                 order, sort_col)

        count = matches.get_matches_estimated()
        rows = []
        for m in matches:
            result = json.loads(m.document.get_data())

            # mark matches in <span class="match">
            self._highlight_matches(result, unfiltered_search_terms)

            rows.append(result)

        return (count, rows)
Beispiel #7
0
    def get_package_info(self, package_name):
        search_name = utils.filter_search_string(package_name)
        search_string = "%s EX__%s__EX" % (search_name, search_name)

        matches = self.do_search(search_string, 0, 10)
        if len(matches) == 0:
            return None

        # Sometimes (rarely), the first match is not the one we actually want.
        for match in matches:
            result = json.loads(match.document.get_data())
            if result['name'] == package_name:
                return result
            if any([sp['name'] == package_name for sp in result['sub_pkgs']]):
                return result

        return None
    def get_package_info(self, package_name):
        search_name = utils.filter_search_string(package_name)
        search_string = "%s EX__%s__EX" % (search_name, search_name)

        matches = self.do_search(search_string, 0, 10)
        if len(matches) == 0:
            return None

        # Sometimes (rarely), the first match is not the one we actually want.
        for match in matches:
            result = json.loads(match.document.get_data())
            if result['name'] == package_name:
                return result
            if any([sp['name'] == package_name for sp in result['sub_pkgs']]):
                return result

        return None
    def get_latest_builds(self, package_name):
        enquire = xapian.Enquire(self._versionmap_db)
        qp = xapian.QueryParser()
        qp.set_database(self._versionmap_db)
        qp.add_boolean_prefix('key', 'XA')
        query = qp.parse_query('key:%s' % utils.filter_search_string(package_name))

        enquire.set_query(query)
        matches = enquire.get_mset(0, 1)
        if len(matches) == 0:
            return None
        results = json.loads(matches[0].document.get_data())

        latest_builds = OrderedDict()
        lastdistname = ""

        for dist in distmappings.tags:
            distname = dist['name']
            if lastdistname != distname and distname in results:
                latest_builds[distname] = results[distname]
                lastdistname = distname

        return latest_builds
Beispiel #10
0
    def get_latest_builds(self, package_name):
        enquire = xapian.Enquire(self._versionmap_db)
        qp = xapian.QueryParser()
        qp.set_database(self._versionmap_db)
        qp.add_boolean_prefix('key', 'XA')
        query = qp.parse_query('key:%s' % utils.filter_search_string(package_name))

        enquire.set_query(query)
        matches = enquire.get_mset(0, 1)
        if len(matches) == 0:
            return None
        results = json.loads(matches[0].document.get_data())

        latest_builds = OrderedDict()
        lastdistname = ""

        for dist in distmappings.tags:
            distname = dist['name']
            if lastdistname != distname and distname in results:
                latest_builds[distname] = results[distname]
                lastdistname = distname

        return latest_builds
Beispiel #11
0
    def _create_document(self, package, old_doc=None):
        log.info("_create_document start")
        doc = xapian.Document()
        self.indexer.set_document(doc)
        filtered_name = filter_search_string(package['name'])
        filtered_summary = filter_search_string(package['summary'])
        filtered_description = filter_search_string(package['description'])
        filtered_owner = filter_search_string(package['devel_owner'])

        self.indexer.index_text_without_positions(
            'EX__' + filtered_name + '__EX', 10, '')
        self.indexer.index_text_without_positions(
            'EX__' + filtered_owner + '__EX', 10, '')

        name_parts = filtered_name.split('_')
        for i in range(20):
            if len(name_parts) > 1:
                for part in name_parts:
                    self.indexer.index_text_without_positions(part)
            self.indexer.index_text_without_positions(filtered_name, 10, '')

        for i in range(4):
            self.indexer.index_text_without_positions(filtered_summary)
        self.indexer.index_text_without_positions(filtered_description)

        self.index_files_of_interest(doc, package)

        for sub_package in package['sub_pkgs']:
            filtered_sub_package_name = filter_search_string(
                sub_package['name'])
            log.info("       indexing subpackage %s" % sub_package['name'])

            self.indexer.index_text_without_positions(
                filtered_sub_package_name)
            self.indexer.index_text_without_positions(
                'EX__' + filtered_sub_package_name + '__EX', 10, '')

            self.index_files_of_interest(doc, sub_package)

            # Set special sub-package icon if appstream has one
            sub_package['icon'] = self.icon_cache.get(sub_package['name'],
                                                      self.default_icon)

            # If the parent has a dull icon, give it ours!
            if sub_package['icon'] != self.default_icon \
                    and package['icon'] == self.default_icon:
                package['icon'] = sub_package['icon']

            # remove anything we don't want to store
            del sub_package['package']

        # @@: Right now we're only indexing the first part of the
        # provides/requires, and not boolean comparison or version
        # for requires in package.requires:
        #    print requires[0]
        #    doc.fields.append(xappy.Field('requires', requires[0]))
        # for provides in package.provides:
        #    doc.fields.append(xappy.Field('provides', provides[0]))

        # remove anything we don't want to store and then store data in
        # json format
        del package['package']

        doc.set_data(json.dumps(package))

        # It seems that xapian db.replace_document still creates a new
        # document. In order to avoid duplicating the document we are
        # using add_document and then delete the old document.
        self.db.add_document(doc)
        if old_doc is not None:
            self.db.delete_document(old_doc.get_docid())
        self.db.commit()
        log.info("_create_document end")