Exemple #1
0
def main() -> int:
    options = parse_arguments()

    logger: Logger = StderrLogger()
    if options.logfile:
        logger = FileLogger(options.logfile)

    if options.fields == 'all':
        options.fields = sorted(Package().__dict__.keys())
    else:
        options.fields = options.fields.split(',')

    repomgr = RepositoryManager(options.repos_dir)
    repoproc = RepositoryProcessor(repomgr, options.statedir,
                                   options.parseddir)

    logger.log('dumping...')
    for packageset in repoproc.iter_parsed(reponames=options.reponames):
        FillPackagesetVersions(packageset)

        if not options.all and packageset_is_shadow_only(packageset):
            continue

        for package in packageset:
            print(
                options.field_separator.join(
                    (format_package_field(field, getattr(package, field))
                     for field in options.fields)))

    return 0
Exemple #2
0
def main() -> int:
    options = parse_arguments()

    logger: Logger = StderrLogger()
    if options.logfile:
        logger = FileLogger(options.logfile)

    if options.fields == 'all':
        options.fields = ['effname', 'repo', 'version'] + [
            slot for slot in Package.__slots__
            if slot not in ['effname', 'repo', 'version']
        ]
    else:
        options.fields = options.fields.split(',')

    repomgr = RepositoryManager(options.repos_dir)
    repoproc = RepositoryProcessor(repomgr, options.statedir,
                                   options.parseddir)

    logger.log('dumping...')
    for packageset in repoproc.iter_parsed(reponames=options.reponames,
                                           logger=logger):
        if options.from_ is not None and packageset[0].effname < options.from_:
            continue
        if options.to is not None and packageset[0].effname > options.to:
            break

        fill_packageset_versions(packageset)

        if not options.all and packageset_is_shadow_only(packageset):
            continue

        for package in packageset:
            print(
                options.field_separator.join(
                    (format_package_field(field, getattr(package, field, None))
                     for field in options.fields)))

    return 0
def main() -> int:
    options = ParseArguments()

    logger = FileLogger(options.logfile) if options.logfile else StderrLogger()
    querymgr = QueryManager(options.sql_dir)
    database = Database(options.dsn,
                        querymgr,
                        readonly=True,
                        autocommit=True,
                        application_name='repology-linkchecker/reader')

    readqueue: multiprocessing.Queue = multiprocessing.Queue(10)
    writequeue: multiprocessing.Queue = multiprocessing.Queue(10)

    writer = multiprocessing.Process(target=LinkUpdatingWorker,
                                     args=(writequeue, options, querymgr,
                                           logger))
    writer.start()

    processpool = [
        multiprocessing.Process(target=LinkProcessingWorker,
                                args=(readqueue, writequeue, i, options,
                                      logger)) for i in range(options.jobs)
    ]
    for process in processpool:
        process.start()

    # base logger already passed to workers, may append prefix here
    logger = logger.get_prefixed('master: ')

    prev_url = None
    while True:
        # Get pack of links
        logger.log('Requesting pack of urls')
        urls = database.get_links_for_check(
            after=prev_url,
            prefix=options.prefix,  # no limit by default
            limit=options.packsize,
            recheck_age=datetime.timedelta(seconds=options.age * 60 * 60 * 24),
            unchecked_only=options.unchecked,
            checked_only=options.checked,
            failed_only=options.failed,
            succeeded_only=options.succeeded)
        if not urls:
            logger.log('  No more urls to process')
            break

        # Get another pack of urls with the last hostname to ensure
        # that all urls for one hostname get into a same large pack
        match = re.match('([a-z]+://[^/]+/)', urls[-1])
        if match:
            urls += database.get_links_for_check(
                after=urls[-1],
                prefix=match.group(1),
                recheck_age=datetime.timedelta(seconds=options.age * 60 * 60 *
                                               24),
                unchecked_only=options.unchecked,
                checked_only=options.checked,
                failed_only=options.failed,
                succeeded_only=options.succeeded)

        # Process
        if options.maxpacksize and len(urls) > options.maxpacksize:
            logger.log(
                'Skipping {} urls ({}..{}), exceeds max pack size'.format(
                    len(urls), urls[0], urls[-1]))
        else:
            readqueue.put(urls)
            logger.log('Enqueued {} urls ({}..{})'.format(
                len(urls), urls[0], urls[-1]))

        prev_url = urls[-1]

    logger.log('Waiting for child processes to exit')

    # close workers
    for process in processpool:
        readqueue.put(None)
    for process in processpool:
        process.join()

    # close writer
    writequeue.put(None)
    writer.join()

    logger.log('Done')

    return 0