def main(): options = parse_arguments() logger = StderrLogger() if options.logfile: logger = FileLogger(options.logfile) if options.fields == 'all': options.fields = sorted(Package().__dict__.keys()) else: options.fields = options.fields.split(',') # Set up filters filters = [] if options.maintainer: filters.append(MaintainerFilter(options.maintainer)) if options.category: filters.append(CategoryFilter(options.maintainer)) if options.more_repos is not None or options.less_repos is not None: filters.append(FamilyCountFilter(more=options.more_repos, less=options.less_repos)) if options.in_repository: filters.append(InRepoFilter(options.in_repository)) if options.not_in_repository: filters.append(NotInRepoFilter(options.not_in_repository)) if options.outdated_in_repository: filters.append(OutdatedInRepoFilter(options.not_in_repository)) if not options.no_shadow: filters.append(ShadowFilter()) repomgr = RepositoryManager(options.repos_dir) repoproc = RepositoryProcessor(repomgr, options.statedir) logger.Log('dumping...') for packageset in repoproc.StreamDeserializeMulti(reponames=options.reponames): FillPackagesetVersions(packageset) if not PackagesetCheckFilters(packageset, *filters): continue if options.dump == 'packages': for package in packageset: print( options.field_separator.join( ( format_package_field(field, getattr(package, field)) for field in options.fields ) ) ) if options.dump == 'summaries': print(packageset[0].effname) best_pkg_by_repo = PackagesetToBestByRepo(packageset) for reponame in repomgr.GetNames(options.reponames): if reponame in best_pkg_by_repo: print(' {}: {} ({})'.format( reponame, best_pkg_by_repo[reponame].version, VersionClass.ToString(best_pkg_by_repo[reponame].versionclass) )) return 0
def Main(): options = ParseArguments() repomgr = RepositoryManager(options.repos_dir) repoproc = RepositoryProcessor(repomgr, options.statedir, safety_checks=not options.no_safety_checks) if options.list: print('\n'.join(repomgr.GetNames(reponames=options.reponames))) return 0 transformer = PackageTransformer(repomgr, options.rules_dir) logger = StderrLogger() if options.logfile: logger = FileLogger(options.logfile) repositories_updated = [] repositories_not_updated = [] start = timer() if options.fetch or options.parse or options.reprocess: repositories_updated, repositories_not_updated = ProcessRepositories( options=options, logger=logger, repoproc=repoproc, transformer=transformer, reponames=repomgr.GetNames(reponames=options.reponames)) if options.initdb or options.database or options.postupdate: ProcessDatabase( options=options, logger=logger, repomgr=repomgr, repoproc=repoproc, repositories_updated=repositories_updated, reponames=repomgr.GetNames(reponames=options.reponames)) if (options.parse or options.reprocess) and (options.show_unmatched_rules): ShowUnmatchedRules(options=options, logger=logger, transformer=transformer, reliable=repositories_not_updated == []) logger.Log('total time taken: {:.2f} seconds'.format((timer() - start))) return 1 if repositories_not_updated else 0
def main() -> int: options = parse_arguments() logger: Logger = StderrLogger() if options.logfile: logger = FileLogger(options.logfile) if options.fields == 'all': options.fields = sorted(Package().__dict__.keys()) else: options.fields = options.fields.split(',') repomgr = RepositoryManager(options.repos_dir) repoproc = RepositoryProcessor(repomgr, options.statedir, options.parseddir) logger.log('dumping...') for packageset in repoproc.iter_parsed(reponames=options.reponames): FillPackagesetVersions(packageset) if not options.all and packageset_is_shadow_only(packageset): continue for package in packageset: print( options.field_separator.join( (format_package_field(field, getattr(package, field)) for field in options.fields))) return 0
def main(): options = parse_arguments() logger = StderrLogger() if options.logfile: logger = FileLogger(options.logfile) if options.fields == 'all': options.fields = sorted(Package().__dict__.keys()) else: options.fields = options.fields.split(',') repomgr = RepositoryManager(options.repos_dir) repoproc = RepositoryProcessor(repomgr, options.statedir, options.parseddir) logger.Log('dumping...') for packageset in repoproc.iter_parsed(reponames=options.reponames): FillPackagesetVersions(packageset) if not options.all and packageset_is_shadow_only(packageset): continue if options.dump == 'packages': for package in packageset: print( options.field_separator.join( (format_package_field(field, getattr(package, field)) for field in options.fields))) if options.dump == 'summaries': print(packageset[0].effname) best_pkg_by_repo = PackagesetToBestByRepo(packageset) for reponame in repomgr.GetNames(options.reponames): if reponame in best_pkg_by_repo: print(' {}: {} ({})'.format( reponame, best_pkg_by_repo[reponame].version, VersionClass.ToString( best_pkg_by_repo[reponame].versionclass))) return 0
def main() -> int: options = parse_arguments() logger: Logger = StderrLogger() if options.logfile: logger = FileLogger(options.logfile) if options.fields == 'all': options.fields = ['effname', 'repo', 'version'] + [ slot for slot in Package.__slots__ if slot not in ['effname', 'repo', 'version'] ] else: options.fields = options.fields.split(',') repomgr = RepositoryManager(options.repos_dir) repoproc = RepositoryProcessor(repomgr, options.statedir, options.parseddir) logger.log('dumping...') for packageset in repoproc.iter_parsed(reponames=options.reponames, logger=logger): if options.from_ is not None and packageset[0].effname < options.from_: continue if options.to is not None and packageset[0].effname > options.to: break fill_packageset_versions(packageset) if not options.all and packageset_is_shadow_only(packageset): continue for package in packageset: print( options.field_separator.join( (format_package_field(field, getattr(package, field, None)) for field in options.fields))) return 0
def get_main_logger(self) -> Logger: return FileLogger( self.options.logfile) if self.options.logfile else StderrLogger()
def Main(): options = ParseArguments() logger = FileLogger(options.logfile) if options.logfile else StderrLogger() database = Database(options.dsn, readonly=True, autocommit=True) readqueue = multiprocessing.Queue(10) writequeue = multiprocessing.Queue(10) writer = multiprocessing.Process(target=LinkUpdatingWorker, args=(writequeue, options, logger)) writer.start() processpool = [ multiprocessing.Process(target=LinkProcessingWorker, args=(readqueue, writequeue, i, options, logger)) for i in range(options.jobs) ] for process in processpool: process.start() # base logger already passed to workers, may append prefix here logger = logger.GetPrefixed('master: ') prev_url = None while True: # Get pack of links logger.Log('Requesting pack of urls') urls = database.GetLinksForCheck( after=prev_url, prefix=options.prefix, # no limit by default limit=options.packsize, recheck_age=options.age * 60 * 60 * 24, unchecked_only=options.unchecked, checked_only=options.checked, failed_only=options.failed, succeeded_only=options.succeeded) if not urls: logger.Log(' No more urls to process') break # Get another pack of urls with the last hostname to ensure # that all urls for one hostname get into a same large pack match = re.match('([a-z]+://[^/]+/)', urls[-1]) if match: urls += database.GetLinksForCheck(after=urls[-1], prefix=match.group(1), recheck_age=options.age * 60 * 60 * 24, unchecked_only=options.unchecked, checked_only=options.checked, failed_only=options.failed, succeeded_only=options.succeeded) # Process if options.maxpacksize and len(urls) > options.maxpacksize: logger.Log( 'Skipping {} urls ({}..{}), exceeds max pack size'.format( len(urls), urls[0], urls[-1])) else: readqueue.put(urls) logger.Log('Enqueued {} urls ({}..{})'.format( len(urls), urls[0], urls[-1])) prev_url = urls[-1] logger.Log('Waiting for child processes to exit') # close workers for process in processpool: readqueue.put(None) for process in processpool: process.join() # close writer writequeue.put(None) writer.join() logger.Log('Done') return 0
def Main(): parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('--dsn', default=repology.config.DSN, help='database connection params') parser.add_argument('--logfile', help='path to log file (log to stderr by default)') parser.add_argument('--timeout', type=float, default=60.0, help='timeout for link requests in seconds') parser.add_argument('--delay', type=float, default=3.0, help='delay between requests to one host') parser.add_argument('--age', type=int, default=365, help='min age for recheck in days') parser.add_argument('--packsize', type=int, default=128, help='pack size for link processing') parser.add_argument( '--maxpacksize', type=int, help='max pack size for link processing (useful to skip large hosts)') parser.add_argument('--jobs', type=int, default=1, help='number of parallel jobs') parser.add_argument('--unchecked', action='store_true', help='only process unchecked (newly discovered) links') parser.add_argument('--checked', action='store_true', help='only process old (already checked) links') parser.add_argument('--failed', action='store_true', help='only process links that were checked and failed') parser.add_argument('--succeeded', action='store_true', help='only process links that were checked and failed') parser.add_argument('--prefix', help='only process links with specified prefix') options = parser.parse_args() logger = FileLogger(options.logfile) if options.logfile else StderrLogger() database = Database(options.dsn, readonly=True, autocommit=True) queue = multiprocessing.Queue(1) processpool = [ multiprocessing.Process(target=LinkProcessorWorker, args=(queue, i, options, logger)) for i in range(options.jobs) ] for process in processpool: process.start() # base logger already passed to workers, may append prefix here logger = logger.GetPrefixed('master: ') prev_url = None while True: # Get pack of links logger.Log('Requesting pack of urls'.format(prev_url)) urls = database.GetLinksForCheck( after=prev_url, prefix=options.prefix, # no limit by default limit=options.packsize, recheck_age=options.age * 60 * 60 * 24, unchecked_only=options.unchecked, checked_only=options.checked, failed_only=options.failed, succeeded_only=options.succeeded) if not urls: logger.Log(' No more urls to process') break # Get another pack of urls with the last hostname to ensure # that all urls for one hostname get into a same large pack match = re.match('([a-z]+://[^/]+/)', urls[-1]) if match: urls += database.GetLinksForCheck(after=urls[-1], prefix=match.group(1), recheck_age=options.age * 60 * 60 * 24, unchecked_only=options.unchecked, checked_only=options.checked, failed_only=options.failed, succeeded_only=options.succeeded) # Process if options.maxpacksize and len(urls) > options.maxpacksize: logger.Log( 'Skipping {} urls ({}..{}), exceeds max pack size'.format( len(urls), urls[0], urls[-1])) else: queue.put(urls) logger.Log('Enqueued {} urls ({}..{})'.format( len(urls), urls[0], urls[-1])) prev_url = urls[-1] logger.Log('Waiting for child processes to exit') for process in processpool: queue.put(None) for process in processpool: process.join() logger.Log('Done') return 0