def main() -> int: options = parse_arguments() logger: Logger = StderrLogger() if options.logfile: logger = FileLogger(options.logfile) if options.fields == 'all': options.fields = sorted(Package().__dict__.keys()) else: options.fields = options.fields.split(',') repomgr = RepositoryManager(options.repos_dir) repoproc = RepositoryProcessor(repomgr, options.statedir, options.parseddir) logger.log('dumping...') for packageset in repoproc.iter_parsed(reponames=options.reponames): FillPackagesetVersions(packageset) if not options.all and packageset_is_shadow_only(packageset): continue for package in packageset: print( options.field_separator.join( (format_package_field(field, getattr(package, field)) for field in options.fields))) return 0
def main() -> int: options = parse_arguments() logger: Logger = StderrLogger() if options.logfile: logger = FileLogger(options.logfile) if options.fields == 'all': options.fields = ['effname', 'repo', 'version'] + [ slot for slot in Package.__slots__ if slot not in ['effname', 'repo', 'version'] ] else: options.fields = options.fields.split(',') repomgr = RepositoryManager(options.repos_dir) repoproc = RepositoryProcessor(repomgr, options.statedir, options.parseddir) logger.log('dumping...') for packageset in repoproc.iter_parsed(reponames=options.reponames, logger=logger): if options.from_ is not None and packageset[0].effname < options.from_: continue if options.to is not None and packageset[0].effname > options.to: break fill_packageset_versions(packageset) if not options.all and packageset_is_shadow_only(packageset): continue for package in packageset: print( options.field_separator.join( (format_package_field(field, getattr(package, field, None)) for field in options.fields))) return 0
def main() -> int: options = ParseArguments() logger = FileLogger(options.logfile) if options.logfile else StderrLogger() querymgr = QueryManager(options.sql_dir) database = Database(options.dsn, querymgr, readonly=True, autocommit=True, application_name='repology-linkchecker/reader') readqueue: multiprocessing.Queue = multiprocessing.Queue(10) writequeue: multiprocessing.Queue = multiprocessing.Queue(10) writer = multiprocessing.Process(target=LinkUpdatingWorker, args=(writequeue, options, querymgr, logger)) writer.start() processpool = [ multiprocessing.Process(target=LinkProcessingWorker, args=(readqueue, writequeue, i, options, logger)) for i in range(options.jobs) ] for process in processpool: process.start() # base logger already passed to workers, may append prefix here logger = logger.get_prefixed('master: ') prev_url = None while True: # Get pack of links logger.log('Requesting pack of urls') urls = database.get_links_for_check( after=prev_url, prefix=options.prefix, # no limit by default limit=options.packsize, recheck_age=datetime.timedelta(seconds=options.age * 60 * 60 * 24), unchecked_only=options.unchecked, checked_only=options.checked, failed_only=options.failed, succeeded_only=options.succeeded) if not urls: logger.log(' No more urls to process') break # Get another pack of urls with the last hostname to ensure # that all urls for one hostname get into a same large pack match = re.match('([a-z]+://[^/]+/)', urls[-1]) if match: urls += database.get_links_for_check( after=urls[-1], prefix=match.group(1), recheck_age=datetime.timedelta(seconds=options.age * 60 * 60 * 24), unchecked_only=options.unchecked, checked_only=options.checked, failed_only=options.failed, succeeded_only=options.succeeded) # Process if options.maxpacksize and len(urls) > options.maxpacksize: logger.log( 'Skipping {} urls ({}..{}), exceeds max pack size'.format( len(urls), urls[0], urls[-1])) else: readqueue.put(urls) logger.log('Enqueued {} urls ({}..{})'.format( len(urls), urls[0], urls[-1])) prev_url = urls[-1] logger.log('Waiting for child processes to exit') # close workers for process in processpool: readqueue.put(None) for process in processpool: process.join() # close writer writequeue.put(None) writer.join() logger.log('Done') return 0