Exemple #1
0
def main():
    options = parse_arguments()

    logger = StderrLogger()
    if options.logfile:
        logger = FileLogger(options.logfile)

    if options.fields == 'all':
        options.fields = sorted(Package().__dict__.keys())
    else:
        options.fields = options.fields.split(',')

    # Set up filters
    filters = []
    if options.maintainer:
        filters.append(MaintainerFilter(options.maintainer))
    if options.category:
        filters.append(CategoryFilter(options.maintainer))
    if options.more_repos is not None or options.less_repos is not None:
        filters.append(FamilyCountFilter(more=options.more_repos, less=options.less_repos))
    if options.in_repository:
        filters.append(InRepoFilter(options.in_repository))
    if options.not_in_repository:
        filters.append(NotInRepoFilter(options.not_in_repository))
    if options.outdated_in_repository:
        filters.append(OutdatedInRepoFilter(options.not_in_repository))
    if not options.no_shadow:
        filters.append(ShadowFilter())

    repomgr = RepositoryManager(options.repos_dir)
    repoproc = RepositoryProcessor(repomgr, options.statedir)

    logger.Log('dumping...')
    for packageset in repoproc.StreamDeserializeMulti(reponames=options.reponames):
        FillPackagesetVersions(packageset)

        if not PackagesetCheckFilters(packageset, *filters):
            continue

        if options.dump == 'packages':
            for package in packageset:
                print(
                    options.field_separator.join(
                        (
                            format_package_field(field, getattr(package, field)) for field in options.fields
                        )
                    )
                )
        if options.dump == 'summaries':
            print(packageset[0].effname)
            best_pkg_by_repo = PackagesetToBestByRepo(packageset)
            for reponame in repomgr.GetNames(options.reponames):
                if reponame in best_pkg_by_repo:
                    print('  {}: {} ({})'.format(
                        reponame,
                        best_pkg_by_repo[reponame].version,
                        VersionClass.ToString(best_pkg_by_repo[reponame].versionclass)
                    ))

    return 0
Exemple #2
0
def Main():
    options = ParseArguments()

    repomgr = RepositoryManager(options.repos_dir)
    repoproc = RepositoryProcessor(repomgr,
                                   options.statedir,
                                   safety_checks=not options.no_safety_checks)

    if options.list:
        print('\n'.join(repomgr.GetNames(reponames=options.reponames)))
        return 0

    transformer = PackageTransformer(repomgr, options.rules_dir)

    logger = StderrLogger()
    if options.logfile:
        logger = FileLogger(options.logfile)

    repositories_updated = []
    repositories_not_updated = []

    start = timer()
    if options.fetch or options.parse or options.reprocess:
        repositories_updated, repositories_not_updated = ProcessRepositories(
            options=options,
            logger=logger,
            repoproc=repoproc,
            transformer=transformer,
            reponames=repomgr.GetNames(reponames=options.reponames))

    if options.initdb or options.database or options.postupdate:
        ProcessDatabase(
            options=options,
            logger=logger,
            repomgr=repomgr,
            repoproc=repoproc,
            repositories_updated=repositories_updated,
            reponames=repomgr.GetNames(reponames=options.reponames))

    if (options.parse or options.reprocess) and (options.show_unmatched_rules):
        ShowUnmatchedRules(options=options,
                           logger=logger,
                           transformer=transformer,
                           reliable=repositories_not_updated == [])

    logger.Log('total time taken: {:.2f} seconds'.format((timer() - start)))

    return 1 if repositories_not_updated else 0
Exemple #3
0
def main() -> int:
    options = parse_arguments()

    logger: Logger = StderrLogger()
    if options.logfile:
        logger = FileLogger(options.logfile)

    if options.fields == 'all':
        options.fields = sorted(Package().__dict__.keys())
    else:
        options.fields = options.fields.split(',')

    repomgr = RepositoryManager(options.repos_dir)
    repoproc = RepositoryProcessor(repomgr, options.statedir,
                                   options.parseddir)

    logger.log('dumping...')
    for packageset in repoproc.iter_parsed(reponames=options.reponames):
        FillPackagesetVersions(packageset)

        if not options.all and packageset_is_shadow_only(packageset):
            continue

        for package in packageset:
            print(
                options.field_separator.join(
                    (format_package_field(field, getattr(package, field))
                     for field in options.fields)))

    return 0
def main():
    options = parse_arguments()

    logger = StderrLogger()
    if options.logfile:
        logger = FileLogger(options.logfile)

    if options.fields == 'all':
        options.fields = sorted(Package().__dict__.keys())
    else:
        options.fields = options.fields.split(',')

    repomgr = RepositoryManager(options.repos_dir)
    repoproc = RepositoryProcessor(repomgr, options.statedir,
                                   options.parseddir)

    logger.Log('dumping...')
    for packageset in repoproc.iter_parsed(reponames=options.reponames):
        FillPackagesetVersions(packageset)

        if not options.all and packageset_is_shadow_only(packageset):
            continue

        if options.dump == 'packages':
            for package in packageset:
                print(
                    options.field_separator.join(
                        (format_package_field(field, getattr(package, field))
                         for field in options.fields)))
        if options.dump == 'summaries':
            print(packageset[0].effname)
            best_pkg_by_repo = PackagesetToBestByRepo(packageset)
            for reponame in repomgr.GetNames(options.reponames):
                if reponame in best_pkg_by_repo:
                    print('  {}: {} ({})'.format(
                        reponame, best_pkg_by_repo[reponame].version,
                        VersionClass.ToString(
                            best_pkg_by_repo[reponame].versionclass)))

    return 0
Exemple #5
0
def main() -> int:
    options = parse_arguments()

    logger: Logger = StderrLogger()
    if options.logfile:
        logger = FileLogger(options.logfile)

    if options.fields == 'all':
        options.fields = ['effname', 'repo', 'version'] + [
            slot for slot in Package.__slots__
            if slot not in ['effname', 'repo', 'version']
        ]
    else:
        options.fields = options.fields.split(',')

    repomgr = RepositoryManager(options.repos_dir)
    repoproc = RepositoryProcessor(repomgr, options.statedir,
                                   options.parseddir)

    logger.log('dumping...')
    for packageset in repoproc.iter_parsed(reponames=options.reponames,
                                           logger=logger):
        if options.from_ is not None and packageset[0].effname < options.from_:
            continue
        if options.to is not None and packageset[0].effname > options.to:
            break

        fill_packageset_versions(packageset)

        if not options.all and packageset_is_shadow_only(packageset):
            continue

        for package in packageset:
            print(
                options.field_separator.join(
                    (format_package_field(field, getattr(package, field, None))
                     for field in options.fields)))

    return 0
 def get_main_logger(self) -> Logger:
     return FileLogger(
         self.options.logfile) if self.options.logfile else StderrLogger()
def Main():
    options = ParseArguments()

    logger = FileLogger(options.logfile) if options.logfile else StderrLogger()
    database = Database(options.dsn, readonly=True, autocommit=True)

    readqueue = multiprocessing.Queue(10)
    writequeue = multiprocessing.Queue(10)

    writer = multiprocessing.Process(target=LinkUpdatingWorker,
                                     args=(writequeue, options, logger))
    writer.start()

    processpool = [
        multiprocessing.Process(target=LinkProcessingWorker,
                                args=(readqueue, writequeue, i, options,
                                      logger)) for i in range(options.jobs)
    ]
    for process in processpool:
        process.start()

    # base logger already passed to workers, may append prefix here
    logger = logger.GetPrefixed('master: ')

    prev_url = None
    while True:
        # Get pack of links
        logger.Log('Requesting pack of urls')
        urls = database.GetLinksForCheck(
            after=prev_url,
            prefix=options.prefix,  # no limit by default
            limit=options.packsize,
            recheck_age=options.age * 60 * 60 * 24,
            unchecked_only=options.unchecked,
            checked_only=options.checked,
            failed_only=options.failed,
            succeeded_only=options.succeeded)
        if not urls:
            logger.Log('  No more urls to process')
            break

        # Get another pack of urls with the last hostname to ensure
        # that all urls for one hostname get into a same large pack
        match = re.match('([a-z]+://[^/]+/)', urls[-1])
        if match:
            urls += database.GetLinksForCheck(after=urls[-1],
                                              prefix=match.group(1),
                                              recheck_age=options.age * 60 *
                                              60 * 24,
                                              unchecked_only=options.unchecked,
                                              checked_only=options.checked,
                                              failed_only=options.failed,
                                              succeeded_only=options.succeeded)

        # Process
        if options.maxpacksize and len(urls) > options.maxpacksize:
            logger.Log(
                'Skipping {} urls ({}..{}), exceeds max pack size'.format(
                    len(urls), urls[0], urls[-1]))
        else:
            readqueue.put(urls)
            logger.Log('Enqueued {} urls ({}..{})'.format(
                len(urls), urls[0], urls[-1]))

        prev_url = urls[-1]

    logger.Log('Waiting for child processes to exit')

    # close workers
    for process in processpool:
        readqueue.put(None)
    for process in processpool:
        process.join()

    # close writer
    writequeue.put(None)
    writer.join()

    logger.Log('Done')

    return 0
def Main():
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('--dsn',
                        default=repology.config.DSN,
                        help='database connection params')
    parser.add_argument('--logfile',
                        help='path to log file (log to stderr by default)')

    parser.add_argument('--timeout',
                        type=float,
                        default=60.0,
                        help='timeout for link requests in seconds')
    parser.add_argument('--delay',
                        type=float,
                        default=3.0,
                        help='delay between requests to one host')
    parser.add_argument('--age',
                        type=int,
                        default=365,
                        help='min age for recheck in days')
    parser.add_argument('--packsize',
                        type=int,
                        default=128,
                        help='pack size for link processing')
    parser.add_argument(
        '--maxpacksize',
        type=int,
        help='max pack size for link processing (useful to skip large hosts)')
    parser.add_argument('--jobs',
                        type=int,
                        default=1,
                        help='number of parallel jobs')

    parser.add_argument('--unchecked',
                        action='store_true',
                        help='only process unchecked (newly discovered) links')
    parser.add_argument('--checked',
                        action='store_true',
                        help='only process old (already checked) links')
    parser.add_argument('--failed',
                        action='store_true',
                        help='only process links that were checked and failed')
    parser.add_argument('--succeeded',
                        action='store_true',
                        help='only process links that were checked and failed')
    parser.add_argument('--prefix',
                        help='only process links with specified prefix')
    options = parser.parse_args()

    logger = FileLogger(options.logfile) if options.logfile else StderrLogger()
    database = Database(options.dsn, readonly=True, autocommit=True)

    queue = multiprocessing.Queue(1)
    processpool = [
        multiprocessing.Process(target=LinkProcessorWorker,
                                args=(queue, i, options, logger))
        for i in range(options.jobs)
    ]
    for process in processpool:
        process.start()

    # base logger already passed to workers, may append prefix here
    logger = logger.GetPrefixed('master: ')

    prev_url = None
    while True:
        # Get pack of links
        logger.Log('Requesting pack of urls'.format(prev_url))
        urls = database.GetLinksForCheck(
            after=prev_url,
            prefix=options.prefix,  # no limit by default
            limit=options.packsize,
            recheck_age=options.age * 60 * 60 * 24,
            unchecked_only=options.unchecked,
            checked_only=options.checked,
            failed_only=options.failed,
            succeeded_only=options.succeeded)
        if not urls:
            logger.Log('  No more urls to process')
            break

        # Get another pack of urls with the last hostname to ensure
        # that all urls for one hostname get into a same large pack
        match = re.match('([a-z]+://[^/]+/)', urls[-1])
        if match:
            urls += database.GetLinksForCheck(after=urls[-1],
                                              prefix=match.group(1),
                                              recheck_age=options.age * 60 *
                                              60 * 24,
                                              unchecked_only=options.unchecked,
                                              checked_only=options.checked,
                                              failed_only=options.failed,
                                              succeeded_only=options.succeeded)

        # Process
        if options.maxpacksize and len(urls) > options.maxpacksize:
            logger.Log(
                'Skipping {} urls ({}..{}), exceeds max pack size'.format(
                    len(urls), urls[0], urls[-1]))
        else:
            queue.put(urls)
            logger.Log('Enqueued {} urls ({}..{})'.format(
                len(urls), urls[0], urls[-1]))

        prev_url = urls[-1]

    logger.Log('Waiting for child processes to exit')

    for process in processpool:
        queue.put(None)

    for process in processpool:
        process.join()

    logger.Log('Done')

    return 0