def index_files(self, doc, pkg_dict): yum_pkg = pkg_dict['pkg'] if yum_pkg != None: desktop_file_cache = RPMCache(yum_pkg, self.yum_base, self.cache_path) desktop_file_cache.open() for filename in yum_pkg.filelist: if filename.endswith('.desktop'): # index apps print " indexing desktop file %s" % os.path.basename( filename) f = desktop_file_cache.open_file( filename, decompress_filter='*.desktop') if f == None: print "could not open desktop file" continue self.index_desktop_file(doc, f, pkg_dict, desktop_file_cache) f.close() if filename.startswith('/usr/bin'): # index executables print(" indexing exe file %s" % os.path.basename(filename)) exe_name = filter_search_string(os.path.basename(filename)) doc.fields.append( xappy.Field('cmd', "EX__%s__EX" % exe_name)) desktop_file_cache.close()
def search(self, key): if self.sconn_needs_reload: self.sconn.reopen() q = self.sconn.query_parse('key:%s' % filter_search_string(key)) results = self.sconn.search(q, 0, 1) return results
def index_desktop_file(self, doc, desktop_file, pkg_dict, desktop_file_cache): doc.fields.append(xappy.Field('tag', 'desktop')) dp = DesktopParser(desktop_file) category = dp.get('Categories', '') for c in category.split(';'): if c: c = filter_search_string(c) doc.fields.append(xappy.Field('category_tags', c)) # add exact match also doc.fields.append(xappy.Field('category_tags', "EX__%s__EX" % c)) icon = dp.get('Icon', '') if icon: print "Icon %s" % icon generated_icon = self.icon_cache.generate_icon(icon, desktop_file_cache) if generated_icon != None: pkg_dict['icon'] = icon
def index_files_of_interest(self, doc, package_dict): name = package_dict['name'] branch = package_dict['branch'] if branch == 'master': branch = 'rawhide' url = "/".join([self.mdapi_url, branch, "files", name]) data = self._call_api(url) if data.get('files') is not None: for entry in data['files']: filenames = entry['filenames'].split('/') for filename in filenames: if filename.startswith('/usr/bin'): # index executables log.info("indexing exe file %s" % os.path.basename(filename)) exe_name = filter_search_string(os.path.basename(filename)) self.indexer.index_text_without_positions("EX__%s__EX" % exe_name) else: log.warn("Failed to get file list for %r, %r" % (name, url)) return
def index_desktop_file(self, doc, desktop_file, pkg_dict, desktop_file_cache): doc.fields.append(xappy.Field('tag', 'desktop')) dp = DesktopParser(desktop_file) category = dp.get('Categories', '') for c in category.split(';'): if c: c = filter_search_string(c) doc.fields.append(xappy.Field('category_tags', c)) # add exact match also doc.fields.append( xappy.Field('category_tags', "EX__%s__EX" % c)) icon = dp.get('Icon', '') if icon: print "Icon %s" % icon generated_icon = self.icon_cache.generate_icon( icon, desktop_file_cache) if generated_icon != None: pkg_dict['icon'] = icon
def index_files_of_interest(self, doc, package_dict): name = package_dict['name'] branch = package_dict['branch'] if branch == 'master': branch = 'rawhide' url = "/".join([self.mdapi_url, branch, "files", name]) response = local.http.get(url) if not bool(response): log.warn("Failed to get file list for %r, %r" % (name, response)) return data = response.json() for entry in data['files']: filenames = entry['filenames'].split('/') for filename in filenames: if filename.startswith('/usr/bin'): # index executables log.info(" indexing exe file %s" % os.path.basename(filename)) exe_name = filter_search_string(os.path.basename(filename)) doc.fields.append( xappy.Field('cmd', "EX__%s__EX" % exe_name))
def index_files(self, doc, pkg_dict): yum_pkg = pkg_dict['pkg'] if yum_pkg != None: desktop_file_cache = RPMCache(yum_pkg, self.yum_base, self.cache_path) desktop_file_cache.open() for filename in yum_pkg.filelist: if filename.endswith('.desktop'): # index apps print " indexing desktop file %s" % os.path.basename(filename) f = desktop_file_cache.open_file(filename, decompress_filter='*.desktop') if f == None: print "could not open desktop file" continue self.index_desktop_file(doc, f, pkg_dict, desktop_file_cache) f.close() if filename.startswith('/usr/bin'): # index executables print (" indexing exe file %s" % os.path.basename(filename)) exe_name = filter_search_string(os.path.basename(filename)) doc.fields.append(xappy.Field('cmd', "EX__%s__EX" % exe_name)) desktop_file_cache.close()
def index_files_of_interest(self, doc, package_dict): name = package_dict['name'] branch = package_dict['branch'] if branch == 'master': branch = 'rawhide' url = "/".join([self.mdapi_url, branch, "files", name]) data = self._call_api(url) if data.get('files') is not None: for entry in data['files']: filenames = entry['filenames'].split('/') for filename in filenames: if filename.startswith('/usr/bin'): # index executables log.info("indexing exe file %s" % os.path.basename(filename)) exe_name = filter_search_string( os.path.basename(filename)) self.indexer.index_text_without_positions( "EX__%s__EX" % exe_name) else: log.warn("Failed to get file list for %r, %r" % (name, url)) return
def index_pkgs(self): yum_pkgs = self.index_yum_pkgs() pkg_count = 0 for pkg in yum_pkgs.values(): pkg_count += 1 doc = xappy.UnprocessedDocument() filtered_name = filter_search_string(pkg['name']) filtered_summary = filter_search_string(pkg['summary']) filtered_description = filter_search_string(pkg['description']) if pkg['name'] != filtered_name: print("%d: indexing %s as %s" % (pkg_count, pkg['name'], filtered_name)) else: print("%d: indexing %s" % (pkg_count, pkg['name'])) doc.fields.append( xappy.Field('exact_name', 'EX__' + filtered_name + '__EX', weight=10.0)) name_parts = filtered_name.split('_') for i in range(20): if len(name_parts) > 1: for part in name_parts: doc.fields.append(xappy.Field('name', part, weight=1.0)) doc.fields.append( xappy.Field('name', filtered_name, weight=10.0)) for i in range(4): doc.fields.append( xappy.Field('summary', filtered_summary, weight=1.0)) doc.fields.append( xappy.Field('description', filtered_description, weight=0.2)) self.index_files(doc, pkg) self.index_tags(doc, pkg) for sub_pkg in pkg['sub_pkgs']: pkg_count += 1 filtered_sub_pkg_name = filter_search_string(sub_pkg['name']) if filtered_sub_pkg_name != sub_pkg['name']: print("%d: indexing subpkg %s as %s" % (pkg_count, sub_pkg['name'], filtered_sub_pkg_name)) else: print("%d: indexing subpkg %s" % (pkg_count, sub_pkg['name'])) doc.fields.append( xappy.Field('subpackages', filtered_sub_pkg_name, weight=1.0)) doc.fields.append( xappy.Field('exact_name', 'EX__' + filtered_sub_pkg_name + '__EX', weight=10.0)) self.index_files(doc, sub_pkg) self.index_tags(doc, sub_pkg) if sub_pkg['icon'] != self.default_icon and pkg[ 'icon'] == self.default_icon: pkg['icon'] = sub_pkg['icon'] # remove anything we don't want to store del sub_pkg['pkg'] # @@: Right now we're only indexing the first part of the # provides/requires, and not boolean comparison or version #for requires in pkg.requires: # print requires[0] # doc.fields.append(xappy.Field('requires', requires[0])) #for provides in pkg.provides: # doc.fields.append(xappy.Field('provides', provides[0])) # remove anything we don't want to store and then store data in # json format del pkg['pkg'] del pkg['src_pkg'] processed_doc = self.iconn.process(doc, False) processed_doc._doc.set_data(json.dumps(pkg)) # preempt xappy's processing of data processed_doc._data = None self.iconn.add(processed_doc) self.icon_cache.close() return pkg_count
def _create_document(self, package): doc = xappy.UnprocessedDocument() filtered_name = filter_search_string(package['name']) filtered_summary = filter_search_string(package['summary']) filtered_description = filter_search_string(package['description']) doc.fields.append( xappy.Field('exact_name', 'EX__' + filtered_name + '__EX', weight=10.0)) name_parts = filtered_name.split('_') for i in range(20): if len(name_parts) > 1: for part in name_parts: doc.fields.append(xappy.Field('name', part, weight=1.0)) doc.fields.append(xappy.Field('name', filtered_name, weight=10.0)) for i in range(4): doc.fields.append( xappy.Field('summary', filtered_summary, weight=1.0)) doc.fields.append( xappy.Field('description', filtered_description, weight=0.2)) self.index_files_of_interest(doc, package) self.index_tags(doc, package) for sub_package in package['sub_pkgs']: filtered_sub_package_name = filter_search_string( sub_package['name']) log.info(" indexing subpackage %s" % sub_package['name']) doc.fields.append( xappy.Field('subpackages', filtered_sub_package_name, weight=1.0)) doc.fields.append( xappy.Field('exact_name', 'EX__' + filtered_sub_package_name + '__EX', weight=10.0)) self.index_files_of_interest(doc, sub_package) # fedora-tagger does not provide special tags for sub-packages... #self.index_tags(doc, sub_package) # Set special sub-package icon if appstream has one sub_package['icon'] = self.icon_cache.get(sub_package['name'], self.default_icon) # If the parent has a dull icon, give it ours! if sub_package['icon'] != self.default_icon \ and package['icon'] == self.default_icon: package['icon'] = sub_package['icon'] # remove anything we don't want to store del sub_package['package'] # @@: Right now we're only indexing the first part of the # provides/requires, and not boolean comparison or version #for requires in package.requires: # print requires[0] # doc.fields.append(xappy.Field('requires', requires[0])) #for provides in package.provides: # doc.fields.append(xappy.Field('provides', provides[0])) # remove anything we don't want to store and then store data in # json format del package['package'] return doc
def _create_document(self, package, old_doc=None): doc = xapian.Document() self.indexer.set_document(doc) filtered_name = filter_search_string(package['name']) filtered_summary = filter_search_string(package['summary']) filtered_description = filter_search_string(package['description']) filtered_owner = filter_search_string(package['devel_owner']) self.indexer.index_text_without_positions( 'EX__' + filtered_name + '__EX', 10, '') self.indexer.index_text_without_positions( 'EX__' + filtered_owner + '__EX', 10, '') name_parts = filtered_name.split('_') for i in range(20): if len(name_parts) > 1: for part in name_parts: self.indexer.index_text_without_positions(part) self.indexer.index_text_without_positions(filtered_name, 10, '') for i in range(4): self.indexer.index_text_without_positions(filtered_summary) self.indexer.index_text_without_positions(filtered_description) self.index_files_of_interest(doc, package) for sub_package in package['sub_pkgs']: filtered_sub_package_name = filter_search_string( sub_package['name']) log.info(" indexing subpackage %s" % sub_package['name']) self.indexer.index_text_without_positions( filtered_sub_package_name) self.indexer.index_text_without_positions( 'EX__' + filtered_sub_package_name + '__EX', 10, '') self.index_files_of_interest(doc, sub_package) # Set special sub-package icon if appstream has one sub_package['icon'] = self.icon_cache.get(sub_package['name'], self.default_icon) # If the parent has a dull icon, give it ours! if sub_package['icon'] != self.default_icon \ and package['icon'] == self.default_icon: package['icon'] = sub_package['icon'] # remove anything we don't want to store del sub_package['package'] # @@: Right now we're only indexing the first part of the # provides/requires, and not boolean comparison or version # for requires in package.requires: # print requires[0] # doc.fields.append(xappy.Field('requires', requires[0])) # for provides in package.provides: # doc.fields.append(xappy.Field('provides', provides[0])) # remove anything we don't want to store and then store data in # json format del package['package'] doc.set_data(json.dumps(package)) # It seems that xapian db.replace_document still creates a new # document. In order to avoid duplicating the document we are # using add_document and then delete the old document. self.db.add_document(doc) if old_doc is not None: self.db.delete_document(old_doc.get_docid()) self.db.commit()
def update_db(self, timestamp=None): """ ask koji for any changes after we last ran the mapper if a timestamp is provided in ISO format ('YYYY-MM-DD HH:MI:SS') use that instead """ try: timestamp = float(timestamp) except (ValueError, TypeError): pass if not timestamp: timestamp = self.get_current_timestamp() try: timestamp = float(timestamp) except (ValueError, TypeError): pass if not timestamp: print "Error: you need to specify a time to update from in ISO format ('YYYY-MM-DD HH:MI:SS') or run init" exit(-1) self.new_timestamp = time.time() - 60 print "Calculating timestamp minus 1 minute to account for any skew between the servers (%s)" % time.strftime( '%Y-%m-%d %H:%M:%S', time.localtime(self.new_timestamp)) opts = { 'completedAfter': timestamp, 'method': 'tagBuild', 'decode': True } if isinstance(timestamp, float): display_timestamp = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(timestamp)) else: display_timestamp = timestamp print "Getting Task List since %s" % display_timestamp task_list = self.koji_client.listTasks(opts=opts) print "Updating Index" for task in task_list: parent_id = task['parent'] if parent_id: builds = self.koji_client.listBuilds(taskID=parent_id) if len(builds) < 1: continue build = builds[0] pkg_tags = self.koji_client.listTags(build['build_id']) dist_name = None for t in pkg_tags: dist_name = tags_to_name_map.get(t['name'], None) if dist_name: break if not dist_name: continue pkg_doc = None if build['name'] in self.found_packages: pkg_doc = self.found_packages[build['name']] else: results = self.search(build['name']) if results: pkg_doc = results[0] build_epoch = build.get('epoch', None) if build_epoch is not None: build_epoch = str(build_epoch) if not pkg_doc: # TODO create new document print "ran into new package %s" % build['name'] self.new_packages[build['name']] = True doc = xappy.UnprocessedDocument() filtered_name = filter_search_string(build['name']) doc.fields.append(xappy.Field('key', filtered_name)) latest_builds = {'name': build['name']} data = {} if build_epoch is not None: data['epoch'] = build_epoch data['version'] = build['version'] data['release'] = build['release'] data['build_id'] = build['build_id'] latest_builds[dist_name] = data processed_doc = self.iconn.process(doc, False) processed_doc._doc.set_data(json.dumps(latest_builds)) # preempt xappy's processing of data processed_doc._data = None self.iconn.add(processed_doc) self.sconn_needs_reload = True self.iconn.flush() else: latest_builds = json.loads(pkg_doc._doc.get_data()) data = latest_builds.get(dist_name, { 'version': '0', 'release': '0', 'build_id': 0 }) data_epoch = None do_update = False if 'release' not in data: # do the update because we have old data do_update = True else: data_epoch = data.get('epoch', None) if data_epoch is not None: data_epoch = str(data_epoch) if rpm.labelCompare( (build_epoch, build['version'], build['release']), (data_epoch, data['version'], data['release'])) == 1: do_update = True if do_update: self.updated_packages[build['name']] = True build_vr = '' if build_epoch is not None: build_vr = "%s:%s.%s" % (build_epoch, build['version'], build['release']) else: build_vr = "%s.%s" % (build['version'], build['release']) data_vr = '' if data_epoch is not None: data_vr = "%s:%s.%s" % (data_epoch, data['version'], data.get('release', '')) else: data_vr = "%s.%s" % (data['version'], data.get('release', '')) print "Updating package %s in dist %s to version %s (from %s)" % ( build['name'], dist_name, build_vr, data_vr) if build_epoch is not None: data['epoch'] = build_epoch data['version'] = build['version'] data['release'] = build['release'] data['build_id'] = build['build_id'] latest_builds[dist_name] = data pkg_doc._doc.set_data(json.dumps(latest_builds)) # preempt xappy's processing of data pkg_doc._data = None self.iconn.replace(pkg_doc) self.sconn_needs_reload = True self.found_packages[build['name']] = pkg_doc self.iconn.flush() updated_count = len(self.updated_packages) new_count = len(self.new_packages) print "Updated: %d packages" % updated_count print " Added: %d packages" % new_count print "=========================" print " Total: %s" % (updated_count + new_count) self.update_timestamp(self.new_timestamp)
def init_db(self, *args): """ loop through all packages and get the latest builds for koji tags listed in distmappings """ self.new_timestamp = time.time() - 60 print "Calculating timestamp minus 1 minute to account for any skew between the servers (%s)" % time.strftime( '%Y-%m-%d %H:%M:%S', time.localtime(self.new_timestamp)) print "Initializing Index" package_list = self.koji_client.listPackages() i = 0 for pkg in package_list: i += 1 pkg_name = pkg['package_name'] print "%d: Processing package %s" % (i, pkg_name) name_len = len(pkg_name) doc = xappy.UnprocessedDocument() filtered_name = filter_search_string(pkg_name) doc.fields.append(xappy.Field('key', filtered_name)) latest_builds = {'name': pkg_name} for t in tags: tag = t['tag'] if t['name'] in latest_builds: # short circuit optimization continue builds = self.koji_client.getLatestBuilds(tag, package=pkg_name) if builds: build = None for b in builds: # only get builds which completed if b['state'] == koji.BUILD_STATES['COMPLETE']: build = b break if build: data = { 'version': build['version'], 'release': build['release'], 'build_id': build['build_id'] } if build.get('epoch', None) != None: data['epoch'] = str(build['epoch']) version_display = "%s:%s.%s" % (data['epoch'], data['version'], data['release']) else: version_display = "%s.%s" % (data['version'], data['release']) latest_builds[t['name']] = data print " %s: %s" % (t['name'], version_display) if len(latest_builds) < 2: # don't process doc if there is no real data # most likely this is an outdated package continue processed_doc = self.iconn.process(doc, False) processed_doc._doc.set_data(json.dumps(latest_builds)) # preempt xappy's processing of data processed_doc._data = None self.iconn.add(processed_doc) print "Finished updating timestamp" self.update_timestamp(self.new_timestamp)
def init_db(self, *args): """ loop through all packages and get the latest builds for koji tags listed in distmappings """ self.new_timestamp = time.time() - 60 print "Calculating timestamp minus 1 minute to account for any skew between the servers (%s)" % time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(self.new_timestamp)) print "Initializing Index" package_list = self.koji_client.listPackages() i = 0 for pkg in package_list: i += 1 pkg_name = pkg['package_name'] print "%d: Processing package %s" % (i, pkg_name) name_len = len(pkg_name) doc = xappy.UnprocessedDocument() filtered_name = filter_search_string(pkg_name) doc.fields.append(xappy.Field('key', filtered_name)) latest_builds = {'name': pkg_name} for t in tags: tag = t['tag'] if t['name'] in latest_builds: # short circuit optimization continue builds = self.koji_client.getLatestBuilds(tag, package=pkg_name) if builds: build = None for b in builds: # only get builds which completed if b['state'] == koji.BUILD_STATES['COMPLETE']: build = b break if build: data = {'version': build['version'], 'release': build['release'], 'build_id': build['build_id']} if build.get('epoch', None) != None: data['epoch'] = str(build['epoch']) version_display = "%s:%s.%s" % (data['epoch'], data['version'], data['release']) else: version_display = "%s.%s" % (data['version'], data['release']) latest_builds[t['name']] = data print " %s: %s" % (t['name'], version_display) if len(latest_builds) < 2: # don't process doc if there is no real data # most likely this is an outdated package continue processed_doc = self.iconn.process(doc, False) processed_doc._doc.set_data(json.dumps(latest_builds)) # preempt xappy's processing of data processed_doc._data = None self.iconn.add(processed_doc) print "Finished updating timestamp" self.update_timestamp(self.new_timestamp)
def update_db(self, timestamp=None): """ ask koji for any changes after we last ran the mapper if a timestamp is provided in ISO format ('YYYY-MM-DD HH:MI:SS') use that instead """ try: timestamp = float(timestamp) except (ValueError, TypeError): pass if not timestamp: timestamp = self.get_current_timestamp() try: timestamp = float(timestamp) except (ValueError, TypeError): pass if not timestamp: print "Error: you need to specify a time to update from in ISO format ('YYYY-MM-DD HH:MI:SS') or run init" exit(-1) self.new_timestamp = time.time() - 60 print "Calculating timestamp minus 1 minute to account for any skew between the servers (%s)" % time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(self.new_timestamp)) opts = {'completedAfter': timestamp, 'method': 'tagBuild', 'decode': True} if isinstance(timestamp, float): display_timestamp = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(timestamp)) else: display_timestamp = timestamp print "Getting Task List since %s" % display_timestamp task_list = self.koji_client.listTasks(opts=opts) print "Updating Index" for task in task_list: parent_id = task['parent'] if parent_id: builds = self.koji_client.listBuilds(taskID=parent_id) if len(builds) < 1: continue build = builds[0] pkg_tags = self.koji_client.listTags(build['build_id']) dist_name = None for t in pkg_tags: dist_name = tags_to_name_map.get(t['name'], None) if dist_name: break if not dist_name: continue pkg_doc = None if build['name'] in self.found_packages: pkg_doc = self.found_packages[build['name']] else: results = self.search(build['name']) if results: pkg_doc = results[0] build_epoch = build.get('epoch', None) if build_epoch is not None: build_epoch = str(build_epoch) if not pkg_doc: # TODO create new document print "ran into new package %s" % build['name'] self.new_packages[build['name']] = True doc = xappy.UnprocessedDocument() filtered_name = filter_search_string(build['name']) doc.fields.append(xappy.Field('key', filtered_name)) latest_builds = {'name': build['name']} data = {} if build_epoch is not None: data['epoch'] = build_epoch data['version'] = build['version'] data['release'] = build['release'] data['build_id'] = build['build_id'] latest_builds[dist_name] = data processed_doc = self.iconn.process(doc, False) processed_doc._doc.set_data(json.dumps(latest_builds)) # preempt xappy's processing of data processed_doc._data = None self.iconn.add(processed_doc) self.sconn_needs_reload = True self.iconn.flush() else: latest_builds = json.loads(pkg_doc._doc.get_data()) data = latest_builds.get(dist_name, {'version': '0', 'release': '0', 'build_id': 0}) data_epoch = None do_update = False if 'release' not in data: # do the update because we have old data do_update = True else: data_epoch = data.get('epoch', None) if data_epoch is not None: data_epoch = str(data_epoch) if rpm.labelCompare( (build_epoch, build['version'], build['release']), (data_epoch, data['version'], data['release'])) == 1: do_update = True if do_update: self.updated_packages[build['name']] = True build_vr = '' if build_epoch is not None: build_vr = "%s:%s.%s" % (build_epoch, build['version'], build['release']) else: build_vr = "%s.%s" % (build['version'], build['release']) data_vr = '' if data_epoch is not None: data_vr = "%s:%s.%s" % (data_epoch, data['version'], data.get('release','')) else: data_vr = "%s.%s" % (data['version'], data.get('release', '')) print "Updating package %s in dist %s to version %s (from %s)" % ( build['name'], dist_name, build_vr, data_vr) if build_epoch is not None: data['epoch'] = build_epoch data['version'] = build['version'] data['release'] = build['release'] data['build_id'] = build['build_id'] latest_builds[dist_name] = data pkg_doc._doc.set_data(json.dumps(latest_builds)) # preempt xappy's processing of data pkg_doc._data = None self.iconn.replace(pkg_doc) self.sconn_needs_reload = True self.found_packages[build['name']] = pkg_doc self.iconn.flush() updated_count = len(self.updated_packages) new_count = len(self.new_packages) print "Updated: %d packages" % updated_count print " Added: %d packages" % new_count print "=========================" print " Total: %s" % (updated_count + new_count) self.update_timestamp(self.new_timestamp)
def index_pkgs(self): yum_pkgs = self.index_yum_pkgs() pkg_count = 0 for pkg in yum_pkgs.values(): pkg_count += 1 doc = xappy.UnprocessedDocument() filtered_name = filter_search_string(pkg['name']) filtered_summary = filter_search_string(pkg['summary']) filtered_description = filter_search_string(pkg['description']) if pkg['name'] != filtered_name: print("%d: indexing %s as %s" % (pkg_count, pkg['name'], filtered_name) ) else: print("%d: indexing %s" % (pkg_count, pkg['name'])) doc.fields.append(xappy.Field('exact_name', 'EX__' + filtered_name + '__EX', weight=10.0)) name_parts = filtered_name.split('_') for i in range(20): if len(name_parts) > 1: for part in name_parts: doc.fields.append(xappy.Field('name', part, weight=1.0)) doc.fields.append(xappy.Field('name', filtered_name, weight=10.0)) for i in range(4): doc.fields.append(xappy.Field('summary', filtered_summary, weight=1.0)) doc.fields.append(xappy.Field('description', filtered_description, weight=0.2)) self.index_files(doc, pkg) self.index_tags(doc, pkg) for sub_pkg in pkg['sub_pkgs']: pkg_count += 1 filtered_sub_pkg_name = filter_search_string(sub_pkg['name']) if filtered_sub_pkg_name != sub_pkg['name']: print("%d: indexing subpkg %s as %s" % (pkg_count, sub_pkg['name'], filtered_sub_pkg_name)) else: print("%d: indexing subpkg %s" % (pkg_count, sub_pkg['name'])) doc.fields.append(xappy.Field('subpackages', filtered_sub_pkg_name, weight=1.0)) doc.fields.append(xappy.Field('exact_name', 'EX__' + filtered_sub_pkg_name + '__EX', weight=10.0)) self.index_files(doc, sub_pkg) self.index_tags(doc, sub_pkg) if sub_pkg['icon'] != self.default_icon and pkg['icon'] == self.default_icon: pkg['icon'] = sub_pkg['icon'] # remove anything we don't want to store del sub_pkg['pkg'] # @@: Right now we're only indexing the first part of the # provides/requires, and not boolean comparison or version #for requires in pkg.requires: # print requires[0] # doc.fields.append(xappy.Field('requires', requires[0])) #for provides in pkg.provides: # doc.fields.append(xappy.Field('provides', provides[0])) # remove anything we don't want to store and then store data in # json format del pkg['pkg'] del pkg['src_pkg'] processed_doc = self.iconn.process(doc, False) processed_doc._doc.set_data(json.dumps(pkg)) # preempt xappy's processing of data processed_doc._data = None self.iconn.add(processed_doc) self.icon_cache.close() return pkg_count
def _create_document(self, package, old_doc=None): doc = xapian.Document() self.indexer.set_document(doc) filtered_name = filter_search_string(package['name']) filtered_summary = filter_search_string(package['summary']) filtered_description = filter_search_string(package['description']) filtered_owner = filter_search_string(package['devel_owner']) self.indexer.index_text_without_positions('EX__' + filtered_name + '__EX', 10, '') self.indexer.index_text_without_positions('EX__' + filtered_owner + '__EX', 10, '') name_parts = filtered_name.split('_') for i in range(20): if len(name_parts) > 1: for part in name_parts: self.indexer.index_text_without_positions(part) self.indexer.index_text_without_positions(filtered_name, 10, '') for i in range(4): self.indexer.index_text_without_positions(filtered_summary) self.indexer.index_text_without_positions(filtered_description) self.index_files_of_interest(doc, package) for sub_package in package['sub_pkgs']: filtered_sub_package_name = filter_search_string(sub_package['name']) log.info(" indexing subpackage %s" % sub_package['name']) self.indexer.index_text_without_positions(filtered_sub_package_name) self.indexer.index_text_without_positions('EX__' + filtered_sub_package_name + '__EX', 10, '') self.index_files_of_interest(doc, sub_package) # Set special sub-package icon if appstream has one sub_package['icon'] = self.icon_cache.get( sub_package['name'], self.default_icon) # If the parent has a dull icon, give it ours! if sub_package['icon'] != self.default_icon \ and package['icon'] == self.default_icon: package['icon'] = sub_package['icon'] # remove anything we don't want to store del sub_package['package'] # @@: Right now we're only indexing the first part of the # provides/requires, and not boolean comparison or version # for requires in package.requires: # print requires[0] # doc.fields.append(xappy.Field('requires', requires[0])) # for provides in package.provides: # doc.fields.append(xappy.Field('provides', provides[0])) # remove anything we don't want to store and then store data in # json format del package['package'] doc.set_data(json.dumps(package)) # It seems that xapian db.replace_document still creates a new # document. In order to avoid duplicating the document we are # using add_document and then delete the old document. self.db.add_document(doc) if old_doc is not None: self.db.delete_document(old_doc.get_docid()) self.db.commit()