def index_files(self, doc, pkg_dict):
        yum_pkg = pkg_dict['pkg']
        if yum_pkg != None:
            desktop_file_cache = RPMCache(yum_pkg, self.yum_base,
                                          self.cache_path)
            desktop_file_cache.open()
            for filename in yum_pkg.filelist:
                if filename.endswith('.desktop'):
                    # index apps
                    print "        indexing desktop file %s" % os.path.basename(
                        filename)
                    f = desktop_file_cache.open_file(
                        filename, decompress_filter='*.desktop')
                    if f == None:
                        print "could not open desktop file"
                        continue

                    self.index_desktop_file(doc, f, pkg_dict,
                                            desktop_file_cache)
                    f.close()
                if filename.startswith('/usr/bin'):
                    # index executables
                    print("        indexing exe file %s" %
                          os.path.basename(filename))
                    exe_name = filter_search_string(os.path.basename(filename))
                    doc.fields.append(
                        xappy.Field('cmd', "EX__%s__EX" % exe_name))

            desktop_file_cache.close()
    def search(self, key):
        if self.sconn_needs_reload:
            self.sconn.reopen()
        q = self.sconn.query_parse('key:%s' % filter_search_string(key))
        results = self.sconn.search(q, 0, 1)

        return results
    def search(self, key):
        if self.sconn_needs_reload:
            self.sconn.reopen()
        q = self.sconn.query_parse('key:%s' % filter_search_string(key)) 
        results = self.sconn.search(q, 0, 1)

        return results
Exemple #4
0
    def index_desktop_file(self, doc, desktop_file, pkg_dict, desktop_file_cache):
        doc.fields.append(xappy.Field('tag', 'desktop'))

        dp = DesktopParser(desktop_file)
        category = dp.get('Categories', '')

        for c in category.split(';'):
            if c:
                c = filter_search_string(c)
                doc.fields.append(xappy.Field('category_tags', c))
                # add exact match also
                doc.fields.append(xappy.Field('category_tags', "EX__%s__EX" % c))

        icon = dp.get('Icon', '')
        if icon:
            print "Icon %s" % icon
            generated_icon = self.icon_cache.generate_icon(icon, desktop_file_cache)
            if generated_icon != None:
                pkg_dict['icon'] = icon
    def index_files_of_interest(self, doc, package_dict):
        name = package_dict['name']
        branch = package_dict['branch']

        if branch == 'master':
            branch = 'rawhide'

        url = "/".join([self.mdapi_url, branch, "files", name])
        data = self._call_api(url)
        if data.get('files') is not None:
            for entry in data['files']:
                filenames = entry['filenames'].split('/')
                for filename in filenames:
                    if filename.startswith('/usr/bin'):
                        # index executables
                        log.info("indexing exe file %s" % os.path.basename(filename))
                        exe_name = filter_search_string(os.path.basename(filename))
                        self.indexer.index_text_without_positions("EX__%s__EX" % exe_name)
        else:
            log.warn("Failed to get file list for %r, %r" % (name, url))
            return
    def index_desktop_file(self, doc, desktop_file, pkg_dict,
                           desktop_file_cache):
        doc.fields.append(xappy.Field('tag', 'desktop'))

        dp = DesktopParser(desktop_file)
        category = dp.get('Categories', '')

        for c in category.split(';'):
            if c:
                c = filter_search_string(c)
                doc.fields.append(xappy.Field('category_tags', c))
                # add exact match also
                doc.fields.append(
                    xappy.Field('category_tags', "EX__%s__EX" % c))

        icon = dp.get('Icon', '')
        if icon:
            print "Icon %s" % icon
            generated_icon = self.icon_cache.generate_icon(
                icon, desktop_file_cache)
            if generated_icon != None:
                pkg_dict['icon'] = icon
    def index_files_of_interest(self, doc, package_dict):
        name = package_dict['name']
        branch = package_dict['branch']

        if branch == 'master':
            branch = 'rawhide'

        url = "/".join([self.mdapi_url, branch, "files", name])
        response = local.http.get(url)
        if not bool(response):
            log.warn("Failed to get file list for %r, %r" % (name, response))
            return
        data = response.json()
        for entry in data['files']:
            filenames = entry['filenames'].split('/')
            for filename in filenames:
                if filename.startswith('/usr/bin'):
                    # index executables
                    log.info("        indexing exe file %s" %
                             os.path.basename(filename))
                    exe_name = filter_search_string(os.path.basename(filename))
                    doc.fields.append(
                        xappy.Field('cmd', "EX__%s__EX" % exe_name))
Exemple #8
0
    def index_files(self, doc, pkg_dict):
        yum_pkg = pkg_dict['pkg']
        if yum_pkg != None:
            desktop_file_cache = RPMCache(yum_pkg, self.yum_base, self.cache_path)
            desktop_file_cache.open()
            for filename in yum_pkg.filelist:
                if filename.endswith('.desktop'):
                    # index apps
                    print "        indexing desktop file %s" % os.path.basename(filename)
                    f = desktop_file_cache.open_file(filename, decompress_filter='*.desktop')
                    if f == None:
                        print "could not open desktop file"
                        continue

                    self.index_desktop_file(doc, f, pkg_dict, desktop_file_cache)
                    f.close()
                if filename.startswith('/usr/bin'):
                    # index executables
                    print ("        indexing exe file %s" % os.path.basename(filename))
                    exe_name = filter_search_string(os.path.basename(filename))
                    doc.fields.append(xappy.Field('cmd', "EX__%s__EX" % exe_name))

            desktop_file_cache.close()
Exemple #9
0
    def index_files_of_interest(self, doc, package_dict):
        name = package_dict['name']
        branch = package_dict['branch']

        if branch == 'master':
            branch = 'rawhide'

        url = "/".join([self.mdapi_url, branch, "files", name])
        data = self._call_api(url)
        if data.get('files') is not None:
            for entry in data['files']:
                filenames = entry['filenames'].split('/')
                for filename in filenames:
                    if filename.startswith('/usr/bin'):
                        # index executables
                        log.info("indexing exe file %s" %
                                 os.path.basename(filename))
                        exe_name = filter_search_string(
                            os.path.basename(filename))
                        self.indexer.index_text_without_positions(
                            "EX__%s__EX" % exe_name)
        else:
            log.warn("Failed to get file list for %r, %r" % (name, url))
            return
    def index_pkgs(self):
        yum_pkgs = self.index_yum_pkgs()
        pkg_count = 0

        for pkg in yum_pkgs.values():
            pkg_count += 1

            doc = xappy.UnprocessedDocument()
            filtered_name = filter_search_string(pkg['name'])
            filtered_summary = filter_search_string(pkg['summary'])
            filtered_description = filter_search_string(pkg['description'])

            if pkg['name'] != filtered_name:
                print("%d: indexing %s as %s" %
                      (pkg_count, pkg['name'], filtered_name))
            else:
                print("%d: indexing %s" % (pkg_count, pkg['name']))

            doc.fields.append(
                xappy.Field('exact_name',
                            'EX__' + filtered_name + '__EX',
                            weight=10.0))

            name_parts = filtered_name.split('_')
            for i in range(20):
                if len(name_parts) > 1:
                    for part in name_parts:
                        doc.fields.append(xappy.Field('name', part,
                                                      weight=1.0))
                doc.fields.append(
                    xappy.Field('name', filtered_name, weight=10.0))

            for i in range(4):
                doc.fields.append(
                    xappy.Field('summary', filtered_summary, weight=1.0))
            doc.fields.append(
                xappy.Field('description', filtered_description, weight=0.2))

            self.index_files(doc, pkg)
            self.index_tags(doc, pkg)

            for sub_pkg in pkg['sub_pkgs']:
                pkg_count += 1
                filtered_sub_pkg_name = filter_search_string(sub_pkg['name'])
                if filtered_sub_pkg_name != sub_pkg['name']:
                    print("%d:    indexing subpkg %s as %s" %
                          (pkg_count, sub_pkg['name'], filtered_sub_pkg_name))
                else:
                    print("%d:    indexing subpkg %s" %
                          (pkg_count, sub_pkg['name']))

                doc.fields.append(
                    xappy.Field('subpackages',
                                filtered_sub_pkg_name,
                                weight=1.0))
                doc.fields.append(
                    xappy.Field('exact_name',
                                'EX__' + filtered_sub_pkg_name + '__EX',
                                weight=10.0))

                self.index_files(doc, sub_pkg)
                self.index_tags(doc, sub_pkg)
                if sub_pkg['icon'] != self.default_icon and pkg[
                        'icon'] == self.default_icon:
                    pkg['icon'] = sub_pkg['icon']

                # remove anything we don't want to store
                del sub_pkg['pkg']

            # @@: Right now we're only indexing the first part of the
            # provides/requires, and not boolean comparison or version
            #for requires in pkg.requires:
            #    print requires[0]
            #    doc.fields.append(xappy.Field('requires', requires[0]))
            #for provides in pkg.provides:
            #    doc.fields.append(xappy.Field('provides', provides[0]))

            # remove anything we don't want to store and then store data in
            # json format
            del pkg['pkg']
            del pkg['src_pkg']

            processed_doc = self.iconn.process(doc, False)
            processed_doc._doc.set_data(json.dumps(pkg))
            # preempt xappy's processing of data
            processed_doc._data = None
            self.iconn.add(processed_doc)

        self.icon_cache.close()

        return pkg_count
    def _create_document(self, package):
        doc = xappy.UnprocessedDocument()
        filtered_name = filter_search_string(package['name'])
        filtered_summary = filter_search_string(package['summary'])
        filtered_description = filter_search_string(package['description'])

        doc.fields.append(
            xappy.Field('exact_name',
                        'EX__' + filtered_name + '__EX',
                        weight=10.0))

        name_parts = filtered_name.split('_')
        for i in range(20):
            if len(name_parts) > 1:
                for part in name_parts:
                    doc.fields.append(xappy.Field('name', part, weight=1.0))
            doc.fields.append(xappy.Field('name', filtered_name, weight=10.0))

        for i in range(4):
            doc.fields.append(
                xappy.Field('summary', filtered_summary, weight=1.0))
        doc.fields.append(
            xappy.Field('description', filtered_description, weight=0.2))

        self.index_files_of_interest(doc, package)
        self.index_tags(doc, package)

        for sub_package in package['sub_pkgs']:
            filtered_sub_package_name = filter_search_string(
                sub_package['name'])
            log.info("       indexing subpackage %s" % sub_package['name'])

            doc.fields.append(
                xappy.Field('subpackages',
                            filtered_sub_package_name,
                            weight=1.0))
            doc.fields.append(
                xappy.Field('exact_name',
                            'EX__' + filtered_sub_package_name + '__EX',
                            weight=10.0))

            self.index_files_of_interest(doc, sub_package)

            # fedora-tagger does not provide special tags for sub-packages...
            #self.index_tags(doc, sub_package)

            # Set special sub-package icon if appstream has one
            sub_package['icon'] = self.icon_cache.get(sub_package['name'],
                                                      self.default_icon)

            # If the parent has a dull icon, give it ours!
            if sub_package['icon'] != self.default_icon \
                and package['icon'] == self.default_icon:
                package['icon'] = sub_package['icon']

            # remove anything we don't want to store
            del sub_package['package']

        # @@: Right now we're only indexing the first part of the
        # provides/requires, and not boolean comparison or version
        #for requires in package.requires:
        #    print requires[0]
        #    doc.fields.append(xappy.Field('requires', requires[0]))
        #for provides in package.provides:
        #    doc.fields.append(xappy.Field('provides', provides[0]))

        # remove anything we don't want to store and then store data in
        # json format
        del package['package']

        return doc
Exemple #12
0
    def _create_document(self, package, old_doc=None):
        doc = xapian.Document()
        self.indexer.set_document(doc)
        filtered_name = filter_search_string(package['name'])
        filtered_summary = filter_search_string(package['summary'])
        filtered_description = filter_search_string(package['description'])
        filtered_owner = filter_search_string(package['devel_owner'])

        self.indexer.index_text_without_positions(
            'EX__' + filtered_name + '__EX', 10, '')
        self.indexer.index_text_without_positions(
            'EX__' + filtered_owner + '__EX', 10, '')

        name_parts = filtered_name.split('_')
        for i in range(20):
            if len(name_parts) > 1:
                for part in name_parts:
                    self.indexer.index_text_without_positions(part)
            self.indexer.index_text_without_positions(filtered_name, 10, '')

        for i in range(4):
            self.indexer.index_text_without_positions(filtered_summary)
        self.indexer.index_text_without_positions(filtered_description)

        self.index_files_of_interest(doc, package)

        for sub_package in package['sub_pkgs']:
            filtered_sub_package_name = filter_search_string(
                sub_package['name'])
            log.info("       indexing subpackage %s" % sub_package['name'])

            self.indexer.index_text_without_positions(
                filtered_sub_package_name)
            self.indexer.index_text_without_positions(
                'EX__' + filtered_sub_package_name + '__EX', 10, '')

            self.index_files_of_interest(doc, sub_package)

            # Set special sub-package icon if appstream has one
            sub_package['icon'] = self.icon_cache.get(sub_package['name'],
                                                      self.default_icon)

            # If the parent has a dull icon, give it ours!
            if sub_package['icon'] != self.default_icon \
                    and package['icon'] == self.default_icon:
                package['icon'] = sub_package['icon']

            # remove anything we don't want to store
            del sub_package['package']

        # @@: Right now we're only indexing the first part of the
        # provides/requires, and not boolean comparison or version
        # for requires in package.requires:
        #    print requires[0]
        #    doc.fields.append(xappy.Field('requires', requires[0]))
        # for provides in package.provides:
        #    doc.fields.append(xappy.Field('provides', provides[0]))

        # remove anything we don't want to store and then store data in
        # json format
        del package['package']

        doc.set_data(json.dumps(package))

        # It seems that xapian db.replace_document still creates a new
        # document. In order to avoid duplicating the document we are
        # using add_document and then delete the old document.
        self.db.add_document(doc)
        if old_doc is not None:
            self.db.delete_document(old_doc.get_docid())
        self.db.commit()
    def update_db(self, timestamp=None):
        """ ask koji for any changes after we last ran the mapper
            if a timestamp is provided in ISO format ('YYYY-MM-DD HH:MI:SS')
            use that instead
        """

        try:
            timestamp = float(timestamp)
        except (ValueError, TypeError):
            pass

        if not timestamp:
            timestamp = self.get_current_timestamp()
            try:
                timestamp = float(timestamp)
            except (ValueError, TypeError):
                pass

            if not timestamp:
                print "Error: you need to specify a time to update from in ISO format ('YYYY-MM-DD HH:MI:SS') or run init"
                exit(-1)

        self.new_timestamp = time.time() - 60
        print "Calculating timestamp minus 1 minute to account for any skew between the servers (%s)" % time.strftime(
            '%Y-%m-%d %H:%M:%S', time.localtime(self.new_timestamp))

        opts = {
            'completedAfter': timestamp,
            'method': 'tagBuild',
            'decode': True
        }

        if isinstance(timestamp, float):
            display_timestamp = time.strftime('%Y-%m-%d %H:%M:%S',
                                              time.localtime(timestamp))
        else:
            display_timestamp = timestamp
        print "Getting Task List since %s" % display_timestamp
        task_list = self.koji_client.listTasks(opts=opts)
        print "Updating Index"
        for task in task_list:
            parent_id = task['parent']
            if parent_id:
                builds = self.koji_client.listBuilds(taskID=parent_id)

                if len(builds) < 1:
                    continue

                build = builds[0]

                pkg_tags = self.koji_client.listTags(build['build_id'])
                dist_name = None
                for t in pkg_tags:
                    dist_name = tags_to_name_map.get(t['name'], None)
                    if dist_name:
                        break

                if not dist_name:
                    continue

                pkg_doc = None
                if build['name'] in self.found_packages:
                    pkg_doc = self.found_packages[build['name']]
                else:
                    results = self.search(build['name'])

                    if results:
                        pkg_doc = results[0]

                build_epoch = build.get('epoch', None)
                if build_epoch is not None:
                    build_epoch = str(build_epoch)

                if not pkg_doc:
                    # TODO create new document
                    print "ran into new package %s" % build['name']
                    self.new_packages[build['name']] = True
                    doc = xappy.UnprocessedDocument()
                    filtered_name = filter_search_string(build['name'])
                    doc.fields.append(xappy.Field('key', filtered_name))

                    latest_builds = {'name': build['name']}
                    data = {}
                    if build_epoch is not None:
                        data['epoch'] = build_epoch
                    data['version'] = build['version']
                    data['release'] = build['release']
                    data['build_id'] = build['build_id']
                    latest_builds[dist_name] = data

                    processed_doc = self.iconn.process(doc, False)
                    processed_doc._doc.set_data(json.dumps(latest_builds))
                    # preempt xappy's processing of data
                    processed_doc._data = None
                    self.iconn.add(processed_doc)
                    self.sconn_needs_reload = True
                    self.iconn.flush()
                else:
                    latest_builds = json.loads(pkg_doc._doc.get_data())
                    data = latest_builds.get(dist_name, {
                        'version': '0',
                        'release': '0',
                        'build_id': 0
                    })
                    data_epoch = None
                    do_update = False
                    if 'release' not in data:
                        # do the update because we have old data
                        do_update = True
                    else:
                        data_epoch = data.get('epoch', None)
                        if data_epoch is not None:
                            data_epoch = str(data_epoch)

                        if rpm.labelCompare(
                            (build_epoch, build['version'], build['release']),
                            (data_epoch, data['version'],
                             data['release'])) == 1:
                            do_update = True

                    if do_update:
                        self.updated_packages[build['name']] = True
                        build_vr = ''
                        if build_epoch is not None:
                            build_vr = "%s:%s.%s" % (build_epoch,
                                                     build['version'],
                                                     build['release'])
                        else:
                            build_vr = "%s.%s" % (build['version'],
                                                  build['release'])

                        data_vr = ''
                        if data_epoch is not None:
                            data_vr = "%s:%s.%s" % (data_epoch,
                                                    data['version'],
                                                    data.get('release', ''))
                        else:
                            data_vr = "%s.%s" % (data['version'],
                                                 data.get('release', ''))

                        print "Updating package %s in dist %s to version %s (from %s)" % (
                            build['name'], dist_name, build_vr, data_vr)

                        if build_epoch is not None:
                            data['epoch'] = build_epoch
                        data['version'] = build['version']
                        data['release'] = build['release']
                        data['build_id'] = build['build_id']
                        latest_builds[dist_name] = data

                        pkg_doc._doc.set_data(json.dumps(latest_builds))
                        # preempt xappy's processing of data
                        pkg_doc._data = None
                        self.iconn.replace(pkg_doc)
                        self.sconn_needs_reload = True
                        self.found_packages[build['name']] = pkg_doc
                        self.iconn.flush()

        updated_count = len(self.updated_packages)
        new_count = len(self.new_packages)
        print "Updated: %d packages" % updated_count
        print "  Added: %d packages" % new_count
        print "========================="
        print "  Total: %s" % (updated_count + new_count)

        self.update_timestamp(self.new_timestamp)
    def init_db(self, *args):
        """
        loop through all packages and get the latest builds for koji tags
        listed in distmappings
        """
        self.new_timestamp = time.time() - 60
        print "Calculating timestamp minus 1 minute to account for any skew between the servers (%s)" % time.strftime(
            '%Y-%m-%d %H:%M:%S', time.localtime(self.new_timestamp))

        print "Initializing Index"
        package_list = self.koji_client.listPackages()
        i = 0
        for pkg in package_list:
            i += 1
            pkg_name = pkg['package_name']
            print "%d: Processing package %s" % (i, pkg_name)
            name_len = len(pkg_name)

            doc = xappy.UnprocessedDocument()
            filtered_name = filter_search_string(pkg_name)
            doc.fields.append(xappy.Field('key', filtered_name))

            latest_builds = {'name': pkg_name}
            for t in tags:
                tag = t['tag']
                if t['name'] in latest_builds:
                    # short circuit optimization
                    continue

                builds = self.koji_client.getLatestBuilds(tag,
                                                          package=pkg_name)
                if builds:
                    build = None
                    for b in builds:
                        # only get builds which completed
                        if b['state'] == koji.BUILD_STATES['COMPLETE']:
                            build = b
                            break

                    if build:
                        data = {
                            'version': build['version'],
                            'release': build['release'],
                            'build_id': build['build_id']
                        }

                        if build.get('epoch', None) != None:
                            data['epoch'] = str(build['epoch'])
                            version_display = "%s:%s.%s" % (data['epoch'],
                                                            data['version'],
                                                            data['release'])
                        else:
                            version_display = "%s.%s" % (data['version'],
                                                         data['release'])

                        latest_builds[t['name']] = data
                        print "    %s: %s" % (t['name'], version_display)

            if len(latest_builds) < 2:
                # don't process doc if there is no real data
                # most likely this is an outdated package
                continue

            processed_doc = self.iconn.process(doc, False)
            processed_doc._doc.set_data(json.dumps(latest_builds))
            # preempt xappy's processing of data
            processed_doc._data = None
            self.iconn.add(processed_doc)

        print "Finished updating timestamp"
        self.update_timestamp(self.new_timestamp)
    def init_db(self, *args):
        """
        loop through all packages and get the latest builds for koji tags
        listed in distmappings
        """
        self.new_timestamp = time.time() - 60
        print "Calculating timestamp minus 1 minute to account for any skew between the servers (%s)" % time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(self.new_timestamp))

        print "Initializing Index"
        package_list = self.koji_client.listPackages()
        i = 0
        for pkg in package_list:
            i += 1
            pkg_name = pkg['package_name']
            print "%d: Processing package %s" % (i, pkg_name)
            name_len = len(pkg_name)

            doc = xappy.UnprocessedDocument()
            filtered_name = filter_search_string(pkg_name)
            doc.fields.append(xappy.Field('key', filtered_name))

            latest_builds = {'name': pkg_name}
            for t in tags:
                tag = t['tag']
                if t['name'] in latest_builds:
                    # short circuit optimization
                    continue

                builds = self.koji_client.getLatestBuilds(tag, package=pkg_name)
                if builds:
                    build = None
                    for b in builds:
                        # only get builds which completed
                        if b['state'] == koji.BUILD_STATES['COMPLETE']:
                            build = b
                            break

                    if build:
                        data = {'version': build['version'],
                                'release': build['release'],
                                'build_id': build['build_id']}

                        if build.get('epoch', None) != None:
                            data['epoch'] = str(build['epoch'])
                            version_display = "%s:%s.%s" % (data['epoch'], data['version'], data['release'])
                        else:
                            version_display = "%s.%s" % (data['version'], data['release'])

                        latest_builds[t['name']] = data
                        print "    %s: %s" % (t['name'], version_display)

            if len(latest_builds) < 2:
                # don't process doc if there is no real data
                # most likely this is an outdated package
                continue

            processed_doc = self.iconn.process(doc, False)
            processed_doc._doc.set_data(json.dumps(latest_builds))
            # preempt xappy's processing of data
            processed_doc._data = None
            self.iconn.add(processed_doc)

        print "Finished updating timestamp"
        self.update_timestamp(self.new_timestamp)
    def update_db(self, timestamp=None):
        """ ask koji for any changes after we last ran the mapper
            if a timestamp is provided in ISO format ('YYYY-MM-DD HH:MI:SS')
            use that instead
        """

        try:
            timestamp = float(timestamp)
        except (ValueError, TypeError):
            pass

        if not timestamp:
            timestamp = self.get_current_timestamp()
            try:
                timestamp = float(timestamp)
            except (ValueError, TypeError):
                pass

            if not timestamp:
                print "Error: you need to specify a time to update from in ISO format ('YYYY-MM-DD HH:MI:SS') or run init"
                exit(-1)

        self.new_timestamp = time.time() - 60
        print "Calculating timestamp minus 1 minute to account for any skew between the servers (%s)" % time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(self.new_timestamp))


        opts = {'completedAfter': timestamp,
                'method': 'tagBuild',
                'decode': True}

        if isinstance(timestamp, float):
            display_timestamp = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(timestamp))
        else:
            display_timestamp = timestamp
        print "Getting Task List since %s" % display_timestamp
        task_list = self.koji_client.listTasks(opts=opts)
        print "Updating Index"
        for task in task_list:
            parent_id = task['parent']
            if parent_id:
                builds = self.koji_client.listBuilds(taskID=parent_id)

                if len(builds) < 1:
                    continue

                build = builds[0]

                pkg_tags = self.koji_client.listTags(build['build_id'])
                dist_name = None
                for t in pkg_tags:
                    dist_name = tags_to_name_map.get(t['name'], None)
                    if dist_name:
                        break

                if not dist_name:
                    continue

                pkg_doc = None
                if build['name'] in self.found_packages:
                    pkg_doc = self.found_packages[build['name']]
                else:
                    results = self.search(build['name'])

                    if results:
                        pkg_doc = results[0]

                build_epoch = build.get('epoch', None)
                if build_epoch is not None:
                    build_epoch = str(build_epoch)

                if not pkg_doc:
                    # TODO create new document
                    print "ran into new package %s" % build['name']
                    self.new_packages[build['name']] = True
                    doc = xappy.UnprocessedDocument()
                    filtered_name = filter_search_string(build['name'])
                    doc.fields.append(xappy.Field('key', filtered_name))

                    latest_builds = {'name': build['name']}
                    data = {}
                    if build_epoch is not None:
                        data['epoch'] = build_epoch
                    data['version'] = build['version']
                    data['release'] = build['release']
                    data['build_id'] = build['build_id']
                    latest_builds[dist_name] = data

                    processed_doc = self.iconn.process(doc, False)
                    processed_doc._doc.set_data(json.dumps(latest_builds))
                    # preempt xappy's processing of data
                    processed_doc._data = None
                    self.iconn.add(processed_doc)
                    self.sconn_needs_reload = True
                    self.iconn.flush()
                else:
                    latest_builds = json.loads(pkg_doc._doc.get_data())
                    data = latest_builds.get(dist_name, {'version': '0',
                                                         'release': '0',
                                                         'build_id': 0})
                    data_epoch = None
                    do_update = False
                    if 'release' not in data:
                        # do the update because we have old data
                        do_update = True
                    else:
                        data_epoch = data.get('epoch', None)
                        if data_epoch is not None:
                            data_epoch = str(data_epoch)

                        if rpm.labelCompare(
                            (build_epoch, build['version'], build['release']),
                            (data_epoch, data['version'], data['release'])) == 1:
                            do_update = True

                    if do_update:
                        self.updated_packages[build['name']] = True
                        build_vr = ''
                        if build_epoch is not None:
                            build_vr = "%s:%s.%s" % (build_epoch, build['version'], build['release'])
                        else:
                            build_vr = "%s.%s" % (build['version'], build['release'])

                        data_vr = ''
                        if data_epoch is not None:
                            data_vr = "%s:%s.%s" % (data_epoch, data['version'], data.get('release',''))
                        else:
                            data_vr = "%s.%s" % (data['version'], data.get('release', ''))

                        print "Updating package %s in dist %s to version %s (from %s)" % (
                                build['name'], dist_name, build_vr, data_vr)

                        if build_epoch is not None:
                            data['epoch'] = build_epoch
                        data['version'] = build['version']
                        data['release'] = build['release']
                        data['build_id'] = build['build_id']
                        latest_builds[dist_name] = data

                        pkg_doc._doc.set_data(json.dumps(latest_builds))
                        # preempt xappy's processing of data
                        pkg_doc._data = None
                        self.iconn.replace(pkg_doc)
                        self.sconn_needs_reload = True
                        self.found_packages[build['name']] = pkg_doc
                        self.iconn.flush()

        updated_count = len(self.updated_packages)
        new_count = len(self.new_packages)
        print "Updated: %d packages" % updated_count
        print "  Added: %d packages" % new_count
        print "========================="
        print "  Total: %s" % (updated_count + new_count)

        self.update_timestamp(self.new_timestamp)
Exemple #17
0
    def index_pkgs(self):
        yum_pkgs = self.index_yum_pkgs()
        pkg_count = 0

        for pkg in yum_pkgs.values():
            pkg_count += 1

            doc = xappy.UnprocessedDocument()
            filtered_name = filter_search_string(pkg['name'])
            filtered_summary = filter_search_string(pkg['summary'])
            filtered_description = filter_search_string(pkg['description'])

            if pkg['name'] != filtered_name:
                print("%d: indexing %s as %s" % (pkg_count, pkg['name'], filtered_name) )
            else:
                print("%d: indexing %s" % (pkg_count, pkg['name']))

            doc.fields.append(xappy.Field('exact_name', 'EX__' + filtered_name + '__EX', weight=10.0))

            name_parts = filtered_name.split('_')
            for i in range(20):
                if len(name_parts) > 1:
                    for part in name_parts:
                        doc.fields.append(xappy.Field('name', part, weight=1.0))
                doc.fields.append(xappy.Field('name', filtered_name, weight=10.0))

            for i in range(4):
                doc.fields.append(xappy.Field('summary', filtered_summary, weight=1.0))
            doc.fields.append(xappy.Field('description', filtered_description, weight=0.2))

            self.index_files(doc, pkg)
            self.index_tags(doc, pkg)

            for sub_pkg in pkg['sub_pkgs']:
                pkg_count += 1
                filtered_sub_pkg_name = filter_search_string(sub_pkg['name'])
                if filtered_sub_pkg_name != sub_pkg['name']:
                    print("%d:    indexing subpkg %s as %s" % (pkg_count, sub_pkg['name'], filtered_sub_pkg_name))
                else:
                    print("%d:    indexing subpkg %s" % (pkg_count, sub_pkg['name']))

                doc.fields.append(xappy.Field('subpackages', filtered_sub_pkg_name, weight=1.0))
                doc.fields.append(xappy.Field('exact_name', 'EX__' + filtered_sub_pkg_name + '__EX', weight=10.0))

                self.index_files(doc, sub_pkg)
                self.index_tags(doc, sub_pkg)
                if sub_pkg['icon'] != self.default_icon and pkg['icon'] == self.default_icon:
                    pkg['icon'] = sub_pkg['icon']

                # remove anything we don't want to store
                del sub_pkg['pkg']

            # @@: Right now we're only indexing the first part of the
            # provides/requires, and not boolean comparison or version
            #for requires in pkg.requires:
            #    print requires[0]
            #    doc.fields.append(xappy.Field('requires', requires[0]))
            #for provides in pkg.provides:
            #    doc.fields.append(xappy.Field('provides', provides[0]))


            # remove anything we don't want to store and then store data in
            # json format
            del pkg['pkg']
            del pkg['src_pkg']

            processed_doc = self.iconn.process(doc, False)
            processed_doc._doc.set_data(json.dumps(pkg))
            # preempt xappy's processing of data
            processed_doc._data = None
            self.iconn.add(processed_doc)

        self.icon_cache.close()

        return pkg_count
Exemple #18
0
    def _create_document(self, package, old_doc=None):
        doc = xapian.Document()
        self.indexer.set_document(doc)
        filtered_name = filter_search_string(package['name'])
        filtered_summary = filter_search_string(package['summary'])
        filtered_description = filter_search_string(package['description'])
        filtered_owner = filter_search_string(package['devel_owner'])

        self.indexer.index_text_without_positions('EX__' + filtered_name + '__EX', 10, '')
        self.indexer.index_text_without_positions('EX__' + filtered_owner + '__EX', 10, '')

        name_parts = filtered_name.split('_')
        for i in range(20):
            if len(name_parts) > 1:
                for part in name_parts:
                    self.indexer.index_text_without_positions(part)
            self.indexer.index_text_without_positions(filtered_name, 10, '')

        for i in range(4):
            self.indexer.index_text_without_positions(filtered_summary)
        self.indexer.index_text_without_positions(filtered_description)

        self.index_files_of_interest(doc, package)

        for sub_package in package['sub_pkgs']:
            filtered_sub_package_name = filter_search_string(sub_package['name'])
            log.info("       indexing subpackage %s" % sub_package['name'])

            self.indexer.index_text_without_positions(filtered_sub_package_name)
            self.indexer.index_text_without_positions('EX__' + filtered_sub_package_name
                                                      + '__EX', 10, '')

            self.index_files_of_interest(doc, sub_package)

            # Set special sub-package icon if appstream has one
            sub_package['icon'] = self.icon_cache.get(
                sub_package['name'], self.default_icon)

            # If the parent has a dull icon, give it ours!
            if sub_package['icon'] != self.default_icon \
                    and package['icon'] == self.default_icon:
                package['icon'] = sub_package['icon']

            # remove anything we don't want to store
            del sub_package['package']

        # @@: Right now we're only indexing the first part of the
        # provides/requires, and not boolean comparison or version
        # for requires in package.requires:
        #    print requires[0]
        #    doc.fields.append(xappy.Field('requires', requires[0]))
        # for provides in package.provides:
        #    doc.fields.append(xappy.Field('provides', provides[0]))

        # remove anything we don't want to store and then store data in
        # json format
        del package['package']

        doc.set_data(json.dumps(package))

        # It seems that xapian db.replace_document still creates a new
        # document. In order to avoid duplicating the document we are
        # using add_document and then delete the old document.
        self.db.add_document(doc)
        if old_doc is not None:
            self.db.delete_document(old_doc.get_docid())
        self.db.commit()