Esempio n. 1
0
def main() -> int:
    options = parse_arguments()

    logger: Logger = StderrLogger()
    if options.logfile:
        logger = FileLogger(options.logfile)

    if options.fields == 'all':
        options.fields = sorted(Package().__dict__.keys())
    else:
        options.fields = options.fields.split(',')

    repomgr = RepositoryManager(options.repos_dir)
    repoproc = RepositoryProcessor(repomgr, options.statedir,
                                   options.parseddir)

    logger.log('dumping...')
    for packageset in repoproc.iter_parsed(reponames=options.reponames,
                                           logger=logger):
        fill_packageset_versions(packageset)

        if not options.all and packageset_is_shadow_only(packageset):
            continue

        for package in packageset:
            print(
                options.field_separator.join(
                    (format_package_field(field, getattr(package, field))
                     for field in options.fields)))

    return 0
Esempio n. 2
0
def database_update(env: Environment) -> None:
    logger = env.get_main_logger()
    database = env.get_main_database_connection()

    logger.log('clearing the database')
    database.update_start()

    package_queue = []
    num_pushed = 0
    start_time = timer()

    logger.log('pushing packages to database')
    for packageset in env.get_repo_processor().iter_parsed(
            reponames=env.get_enabled_repo_names(), logger=logger):
        fill_packageset_versions(packageset)
        package_queue.extend(packageset)

        if len(package_queue) >= 10000:
            database.add_packages(package_queue)
            num_pushed += len(package_queue)
            package_queue = []
            logger.get_indented().log(
                'pushed {} packages, {:.2f} packages/second'.format(
                    num_pushed, num_pushed / (timer() - start_time)))

    # process what's left in the queue
    database.add_packages(package_queue)

    logger.log('updating views')
    database.update_finish()

    logger.log('committing changes')
    database.commit()
Esempio n. 3
0
    def test_suppress_ignored(self) -> None:
        packages = [
            (Package(repo='1', family='1', name='a', version='2.0', flags=PackageFlags.IGNORE), PackageStatus.UNIQUE),
            (Package(repo='2', family='1', name='a', version='1.0', flags=PackageFlags.IGNORE), PackageStatus.OUTDATED),
        ]

        fill_packageset_versions([package for package, _ in packages])

        for package, expectedclass in packages:
            self.assertEqual(package.versionclass, expectedclass, msg='repo {}, pkg {}, ver {}'.format(package.repo, package.name, package.version))
Esempio n. 4
0
    def test_versionclass_unique(self) -> None:
        packages = [
            (Package(repo='1', family='1', name='a', version='2.0alpha1', flags=PackageFlags.DEVEL), PackageStatus.UNIQUE),
            (Package(repo='2', family='1', name='a', version='1.2'), PackageStatus.UNIQUE),
            (Package(repo='3', family='1', name='a', version='1.1'), PackageStatus.OUTDATED),
            (Package(repo='3', family='1', name='a', version='1.0'), PackageStatus.LEGACY),
        ]

        fill_packageset_versions([package for package, _ in packages])

        for package, expectedclass in packages:
            self.assertEqual(package.versionclass, expectedclass, msg='repo {}, pkg {}, ver {}'.format(package.repo, package.name, package.version))
Esempio n. 5
0
    def test_versionclass_sameversionsamestatus(self) -> None:
        packages = [
            (Package(repo='2', family='2', name='a', version='2.2'), PackageStatus.NEWEST),
            # one of these packages should not make the other one legacy instead of outdated
            (Package(repo='1', family='1', name='a', version='2.1'), PackageStatus.OUTDATED),
            (Package(repo='1', family='1', name='a', version='2.1'), PackageStatus.OUTDATED),
        ]

        fill_packageset_versions([package for package, _ in packages])

        for package, expectedclass in packages:
            self.assertEqual(package.versionclass, expectedclass, msg='repo {}, pkg {}, ver {}'.format(package.repo, package.name, package.version))
Esempio n. 6
0
    def test_versionclass_unignored_really_unignored(self) -> None:
        packages = [
            # ignored package should be fully unignored with the same non-ignored version in another repo
            (Package(repo='1', family='1', name='a', version='2.1', flags=PackageFlags.IGNORE), PackageStatus.NEWEST),
            (Package(repo='1', family='1', name='a', version='2.0'), PackageStatus.LEGACY),

            (Package(repo='2', family='2', name='a', version='2.1'), PackageStatus.NEWEST),
            (Package(repo='2', family='2', name='a', version='2.0'), PackageStatus.LEGACY),
        ]

        fill_packageset_versions([package for package, _ in packages])

        for package, expectedclass in packages:
            self.assertEqual(package.versionclass, expectedclass, msg='repo {}, pkg {}, ver {}'.format(package.repo, package.name, package.version))
Esempio n. 7
0
    def test_versionclass_devel_lower_than_default(self) -> None:
        packages = [
            # devel package < normal package
            (Package(repo='1', family='1', name='a', version='2.1'), PackageStatus.NEWEST),
            (Package(repo='1', family='1', name='a', version='2.0', flags=PackageFlags.DEVEL), PackageStatus.LEGACY),

            (Package(repo='2', family='2', name='a', version='2.1'), PackageStatus.NEWEST),
            (Package(repo='2', family='2', name='a', version='2.0', flags=PackageFlags.DEVEL), PackageStatus.LEGACY),
        ]

        fill_packageset_versions([package for package, _ in packages])

        for package, expectedclass in packages:
            self.assertEqual(package.versionclass, expectedclass, msg='repo {}, pkg {}, ver {}'.format(package.repo, package.name, package.version))
Esempio n. 8
0
    def test_versionclass_branch_confusion(self) -> None:
        packages = [
            # same version is both devel and default in different packages
            # this should be consistently aggregated
            (Package(repo='1', family='1', name='a', version='2.1', flags=PackageFlags.DEVEL), PackageStatus.DEVEL),
            (Package(repo='1', family='1', name='a', version='2.0', flags=PackageFlags.DEVEL), PackageStatus.LEGACY),

            (Package(repo='2', family='2', name='a', version='2.1'), PackageStatus.DEVEL),
            (Package(repo='2', family='2', name='a', version='2.0'), PackageStatus.LEGACY),
        ]

        fill_packageset_versions([package for package, _ in packages])

        for package, expectedclass in packages:
            self.assertEqual(package.versionclass, expectedclass, msg='repo {}, pkg {}, ver {}'.format(package.repo, package.name, package.version))
Esempio n. 9
0
    def _push_packages(self, projects: Iterable[List[Package]]) -> None:
        self._logger.log('updating projects')

        field_stats_per_repo: Dict[str, FieldStatistics] = defaultdict(
            FieldStatistics)
        stats = ProjectsChangeStatistics()

        prev_total = 0

        changed_projects = ChangedProjectsAccumulator(self._database)

        for change in iter_changed_projects(
                iter_project_hashes(self._database), projects, stats):
            if isinstance(change, UpdatedProject):
                fill_packageset_versions(change.packages)
                self._database.add_packages(change.packages)
                self._database.update_project_hash(change.effname,
                                                   change.hash_)

                for package in change.packages:
                    field_stats_per_repo[package.repo].add(package)

            elif isinstance(change, RemovedProject):
                self._database.remove_project_hash(change.effname)

            changed_projects.add(change.effname)

            if stats.total - prev_total >= 10000 or prev_total == 0:
                self._logger.log(f'  at "{change.effname}": {stats}')
                prev_total = stats.total

        changed_projects.flush()
        self._logger.log(f'  done: {stats}')

        self._logger.log('updating field statistics')
        for repo, field_stats in field_stats_per_repo.items():
            self._database.update_repository_used_package_fields(
                repo, field_stats.get_used_fields())

        # Fraction picked experimentally: at change size of around 100k of 400k projects
        # time of partial update of most binding tables approaches or exceeds full update
        # time. In fact this doesn't matter much, as general update is arond 0.001 (0.1%),
        # and a few cases of > 0.01 (1%) are when new repositories are added, othewise it's
        # 1 (100%) when Package format changes or when database is filled for the first time.
        self._enable_partial_update = stats.change_fraction < 0.25

        # This was picked randomly
        self._enable_explicit_analyze = stats.change_fraction > 0.05
Esempio n. 10
0
    def _push_packages(self, projects: Iterable[List[Package]]) -> None:
        self._logger.log('updating projects')

        field_stats_per_repo: Dict[str, FieldStatistics] = defaultdict(
            FieldStatistics)
        stats = ProjectsChangeStatistics()

        prev_total = 0

        changed_projects = ChangedProjectsAccumulator(self._database)

        for change in iter_changed_projects(
                iter_project_hashes(self._database), projects, stats):
            if isinstance(change, UpdatedProject):
                if len(change.packages) >= 20000:
                    raise RuntimeError(
                        'sanity check failed, more than 20k packages for a single project'
                    )

                fill_packageset_versions(change.packages)
                self._database.add_packages(map(adapt_package,
                                                change.packages))
                self._database.update_project_hash(change.effname,
                                                   change.hash_)

                for package in change.packages:
                    field_stats_per_repo[package.repo].add(package)

            elif isinstance(change, RemovedProject):
                self._database.remove_project_hash(change.effname)

            changed_projects.add(change.effname)

            if stats.total - prev_total >= 10000 or prev_total == 0:
                self._logger.log(f'  at "{change.effname}": {stats}')
                prev_total = stats.total

        changed_projects.flush()
        self._logger.log(f'  done: {stats}')

        self._logger.log('updating field statistics')
        for repo, field_stats in field_stats_per_repo.items():
            self._database.update_repository_used_package_fields(
                repo, field_stats.get_used_fields(),
                field_stats.get_used_link_types())

        # This was picked randomly
        self._enable_explicit_analyze = stats.change_fraction > 0.05
Esempio n. 11
0
    def test_versionclass_branch_bounds(self) -> None:
        packages = [
            (Package(repo='1', family='1', name='a', version='2.2beta1', flags=PackageFlags.DEVEL), PackageStatus.DEVEL),
            (Package(repo='1', family='1', name='a', version='2.2alpha1.9999', flags=PackageFlags.IGNORE | PackageFlags.DEVEL), PackageStatus.LEGACY),
            # see #338. There are multiple possible ways to ignored version between branches,
            # we go with ignored for now
            (Package(repo='1', family='1', name='a', version='2.1.9999', flags=PackageFlags.IGNORE), PackageStatus.IGNORED),
            (Package(repo='1', family='1', name='a', version='2.1'), PackageStatus.NEWEST),
            (Package(repo='1', family='1', name='a', version='2.0'), PackageStatus.LEGACY),

            (Package(repo='2', family='2', name='a', version='2.1'), PackageStatus.NEWEST),
        ]

        fill_packageset_versions([package for package, _ in packages])

        for package, expectedclass in packages:
            self.assertEqual(package.versionclass, expectedclass, msg='repo {}, pkg {}, ver {}'.format(package.repo, package.name, package.version))
Esempio n. 12
0
    def test_versionclass_ignoredignored(self) -> None:
        packages = [
            (Package(repo='1', family='1', name='a', version='2.2.99999999', flags=PackageFlags.IGNORE), PackageStatus.IGNORED),
            (Package(repo='1', family='1', name='a', version='2.2.9999', flags=PackageFlags.IGNORE), PackageStatus.IGNORED),
            # this one should be outdated, not legacy, e.g. ignored's should not be counted
            # as first packages in the branch
            (Package(repo='1', family='1', name='a', version='2.1'), PackageStatus.OUTDATED),
            (Package(repo='1', family='1', name='a', version='2.0'), PackageStatus.LEGACY),

            (Package(repo='2', family='2', name='a', version='2.2'), PackageStatus.NEWEST),

        ]

        fill_packageset_versions([package for package, _ in packages])

        for package, expectedclass in packages:
            self.assertEqual(package.versionclass, expectedclass, msg='repo {}, pkg {}, ver {}'.format(package.repo, package.name, package.version))
Esempio n. 13
0
    def test_versionclass_single_branch2(self) -> None:
        packages = [
            # here we only have devel branch
            (Package(repo='1', family='1', name='a', version='2.2rc1.20990101', flags=PackageFlags.IGNORE | PackageFlags.DEVEL), PackageStatus.IGNORED),
            (Package(repo='1', family='1', name='a', version='2.2beta1', flags=PackageFlags.DEVEL), PackageStatus.DEVEL),
            (Package(repo='1', family='1', name='a', version='2.2alpha1.20990101', flags=PackageFlags.IGNORE | PackageFlags.DEVEL), PackageStatus.LEGACY),
            (Package(repo='1', family='1', name='a', version='2.2alpha1', flags=PackageFlags.DEVEL), PackageStatus.LEGACY),

            (Package(repo='2', family='2', name='a', version='2.2beta1', flags=PackageFlags.DEVEL), PackageStatus.DEVEL),
            (Package(repo='2', family='2', name='a', version='2.2alpha1', flags=PackageFlags.DEVEL), PackageStatus.LEGACY),

            (Package(repo='3', family='3', name='a', version='2.2alpha1', flags=PackageFlags.DEVEL), PackageStatus.OUTDATED),
        ]

        fill_packageset_versions([package for package, _ in packages])

        for package, expectedclass in packages:
            self.assertEqual(package.versionclass, expectedclass, msg='repo {}, pkg {}, ver {}'.format(package.repo, package.name, package.version))
Esempio n. 14
0
def database_update(env: Environment) -> None:
    logger = env.get_main_logger()
    database = env.get_main_database_connection()

    logger.log('clearing the database')
    database.update_start()

    package_queue = []
    num_pushed = 0
    start_time = timer()

    logger.log('pushing packages to database')

    field_stats_per_repo: Dict[str,
                               FieldStatistics] = defaultdict(FieldStatistics)

    for packageset in env.get_repo_processor().iter_parsed(
            reponames=env.get_enabled_repo_names(), logger=logger):
        fill_packageset_versions(packageset)
        package_queue.extend(packageset)

        for package in packageset:
            field_stats_per_repo[package.repo].add(package)

        if len(package_queue) >= 10000:
            database.add_packages(package_queue)
            num_pushed += len(package_queue)
            package_queue = []
            logger.get_indented().log(
                'pushed {} packages, {:.2f} packages/second'.format(
                    num_pushed, num_pushed / (timer() - start_time)))

    # process what's left in the queue
    database.add_packages(package_queue)

    for repo, field_stats in field_stats_per_repo.items():
        database.update_repository_used_package_fields(
            repo, field_stats.get_used_fields())

    logger.log('updating views')
    database.update_finish()

    logger.log('committing changes')
    database.commit()
Esempio n. 15
0
    def test_versionclass_flavors(self) -> None:
        packages = [
            (Package(repo='1', family='1', name='a', version='2.2'), PackageStatus.NEWEST),

            (Package(repo='2', family='2', name='a', version='2.1'), PackageStatus.OUTDATED),
            (Package(repo='2', family='2', name='a', version='2.0'), PackageStatus.LEGACY),

            (Package(repo='3', family='3', name='a', version='2.1'), PackageStatus.OUTDATED),
            (Package(repo='3', family='3', name='a', version='2.0', flavors=['foo']), PackageStatus.OUTDATED),

            (Package(repo='4', family='4', name='a', version='2.1', flavors=['foo']), PackageStatus.OUTDATED),
            (Package(repo='4', family='4', name='a', version='2.0'), PackageStatus.OUTDATED),

            (Package(repo='5', family='5', name='a', version='2.1', flavors=['foo']), PackageStatus.OUTDATED),
            (Package(repo='5', family='5', name='a', version='2.0', flavors=['foo']), PackageStatus.LEGACY),
        ]

        fill_packageset_versions([package for package, _ in packages])

        for package, expectedclass in packages:
            self.assertEqual(package.versionclass, expectedclass, msg='repo {}, pkg {}, ver {}'.format(package.repo, package.name, package.version))
Esempio n. 16
0
def main() -> int:
    options = parse_arguments()

    logger: Logger = StderrLogger()
    if options.logfile:
        logger = FileLogger(options.logfile)

    if options.fields == 'all':
        options.fields = ['effname', 'repo', 'version'] + [
            slot for slot in Package.__slots__
            if slot not in ['effname', 'repo', 'version']
        ]
    else:
        options.fields = options.fields.split(',')

    repomgr = RepositoryManager(options.repos_dir)
    repoproc = RepositoryProcessor(repomgr, options.statedir,
                                   options.parseddir)

    logger.log('dumping...')
    for packageset in repoproc.iter_parsed(reponames=options.reponames,
                                           logger=logger):
        if options.from_ is not None and packageset[0].effname < options.from_:
            continue
        if options.to is not None and packageset[0].effname > options.to:
            break

        fill_packageset_versions(packageset)

        if not options.all and packageset_is_shadow_only(packageset):
            continue

        for package in packageset:
            print(
                options.field_separator.join(
                    (format_package_field(field, getattr(package, field, None))
                     for field in options.fields)))

    return 0
Esempio n. 17
0
def update_project(database: Database, change: UpdatedProject) -> None:
    fill_packageset_versions(change.packages)

    database.add_packages(change.packages)

    database.update_project_hash(change.effname, change.hash)
Esempio n. 18
0
    def test_versionclasses_big(self) -> None:
        packages = [
            # Reference repo
            (Package(repo='1', family='1', name='a', version='2.2.20990101', flags=PackageFlags.IGNORE), PackageStatus.IGNORED),
            (Package(repo='1', family='1', name='a', version='2.2beta1', flags=PackageFlags.DEVEL), PackageStatus.DEVEL),
            (Package(repo='1', family='1', name='a', version='2.2alpha1.20990101', flags=PackageFlags.DEVEL | PackageFlags.IGNORE), PackageStatus.LEGACY),
            (Package(repo='1', family='1', name='a', version='2.2alpha1', flags=PackageFlags.DEVEL), PackageStatus.LEGACY),
            # see #338. There are multiple possible ways to ignored version between branches,
            # we go with ignored for now
            (Package(repo='1', family='1', name='a', version='2.1.20990101', flags=PackageFlags.IGNORE), PackageStatus.IGNORED),
            (Package(repo='1', family='1', name='a', version='2.1'), PackageStatus.NEWEST),
            (Package(repo='1', family='1', name='a', version='2.0.20990101', flags=PackageFlags.IGNORE), PackageStatus.LEGACY),
            (Package(repo='1', family='1', name='a', version='2.0'), PackageStatus.LEGACY),

            (Package(repo='1', family='1', name='a', version='1.2.20990101', flags=PackageFlags.IGNORE), PackageStatus.LEGACY),
            (Package(repo='1', family='1', name='a', version='1.2beta1', flags=PackageFlags.DEVEL), PackageStatus.LEGACY),
            (Package(repo='1', family='1', name='a', version='1.2alpha1.20990101', flags=PackageFlags.DEVEL | PackageFlags.IGNORE), PackageStatus.LEGACY),
            (Package(repo='1', family='1', name='a', version='1.2alpha1', flags=PackageFlags.DEVEL), PackageStatus.LEGACY),
            (Package(repo='1', family='1', name='a', version='1.1.20990101', flags=PackageFlags.IGNORE), PackageStatus.LEGACY),
            (Package(repo='1', family='1', name='a', version='1.1'), PackageStatus.LEGACY),
            (Package(repo='1', family='1', name='a', version='1.0.20990101', flags=PackageFlags.IGNORE), PackageStatus.LEGACY),
            (Package(repo='1', family='1', name='a', version='1.0'), PackageStatus.LEGACY),

            # devel + legacy
            (Package(repo='2', family='2', name='a', version='2.2beta1', flags=PackageFlags.DEVEL), PackageStatus.DEVEL),
            (Package(repo='2', family='2', name='a', version='2.0'), PackageStatus.OUTDATED),

            # devel + newest + legacy
            (Package(repo='3', family='3', name='a', version='2.2beta1', flags=PackageFlags.DEVEL), PackageStatus.DEVEL),
            (Package(repo='3', family='3', name='a', version='2.1'), PackageStatus.NEWEST),
            (Package(repo='3', family='3', name='a', version='2.0'), PackageStatus.LEGACY),

            # newest + legacy
            (Package(repo='4', family='4', name='a', version='2.1'), PackageStatus.NEWEST),
            (Package(repo='4', family='4', name='a', version='2.0'), PackageStatus.LEGACY),

            # outdated + legacy
            (Package(repo='5', family='5', name='a', version='1.1'), PackageStatus.OUTDATED),
            (Package(repo='5', family='5', name='a', version='1.0'), PackageStatus.LEGACY),

            # outdated outdated/ignored + legacy
            (Package(repo='6', family='6', name='a', version='1.1.20990101', flags=PackageFlags.IGNORE), PackageStatus.OUTDATED),
            (Package(repo='6', family='6', name='a', version='1.1'), PackageStatus.LEGACY),
            (Package(repo='6', family='6', name='a', version='1.0'), PackageStatus.LEGACY),

            # ignored classes are unignored when they are backed with real classes
            (Package(repo='8', family='8', name='a', version='2.2beta1', flags=PackageFlags.DEVEL | PackageFlags.IGNORE), PackageStatus.DEVEL),

            (Package(repo='9', family='9', name='a', version='2.1', flags=PackageFlags.IGNORE), PackageStatus.NEWEST),

            (Package(repo='10', family='10', name='a', version='2.0', flags=PackageFlags.IGNORE), PackageStatus.OUTDATED),
            (Package(repo='10', family='10', name='a', version='1.9', flags=PackageFlags.IGNORE), PackageStatus.LEGACY),

            # version between newest and devel should be outdated when there's no devel
            (Package(repo='11', family='11', name='a', version='2.2alpha1', flags=PackageFlags.DEVEL), PackageStatus.OUTDATED),

            # outdated in devel and normal at the same time
            (Package(repo='12', family='12', name='a', version='2.2alpha1', flags=PackageFlags.DEVEL), PackageStatus.OUTDATED),
            (Package(repo='12', family='12', name='a', version='2.0'), PackageStatus.OUTDATED),
        ]

        fill_packageset_versions([package for package, _ in packages])

        for package, expectedclass in packages:
            self.assertEqual(package.versionclass, expectedclass, msg='repo {}, pkg {}, ver {}'.format(package.repo, package.name, package.version))
Esempio n. 19
0
    def _check_fill_versions(self, *samples: PackageSample) -> None:
        fill_packageset_versions([sample.package for sample in samples])

        for sample in samples:
            sample.check(self)