def main() -> int: options = parse_arguments() logger: Logger = StderrLogger() if options.logfile: logger = FileLogger(options.logfile) if options.fields == 'all': options.fields = sorted(Package().__dict__.keys()) else: options.fields = options.fields.split(',') repomgr = RepositoryManager(options.repos_dir) repoproc = RepositoryProcessor(repomgr, options.statedir, options.parseddir) logger.log('dumping...') for packageset in repoproc.iter_parsed(reponames=options.reponames, logger=logger): fill_packageset_versions(packageset) if not options.all and packageset_is_shadow_only(packageset): continue for package in packageset: print( options.field_separator.join( (format_package_field(field, getattr(package, field)) for field in options.fields))) return 0
def database_update(env: Environment) -> None: logger = env.get_main_logger() database = env.get_main_database_connection() logger.log('clearing the database') database.update_start() package_queue = [] num_pushed = 0 start_time = timer() logger.log('pushing packages to database') for packageset in env.get_repo_processor().iter_parsed( reponames=env.get_enabled_repo_names(), logger=logger): fill_packageset_versions(packageset) package_queue.extend(packageset) if len(package_queue) >= 10000: database.add_packages(package_queue) num_pushed += len(package_queue) package_queue = [] logger.get_indented().log( 'pushed {} packages, {:.2f} packages/second'.format( num_pushed, num_pushed / (timer() - start_time))) # process what's left in the queue database.add_packages(package_queue) logger.log('updating views') database.update_finish() logger.log('committing changes') database.commit()
def test_suppress_ignored(self) -> None: packages = [ (Package(repo='1', family='1', name='a', version='2.0', flags=PackageFlags.IGNORE), PackageStatus.UNIQUE), (Package(repo='2', family='1', name='a', version='1.0', flags=PackageFlags.IGNORE), PackageStatus.OUTDATED), ] fill_packageset_versions([package for package, _ in packages]) for package, expectedclass in packages: self.assertEqual(package.versionclass, expectedclass, msg='repo {}, pkg {}, ver {}'.format(package.repo, package.name, package.version))
def test_versionclass_unique(self) -> None: packages = [ (Package(repo='1', family='1', name='a', version='2.0alpha1', flags=PackageFlags.DEVEL), PackageStatus.UNIQUE), (Package(repo='2', family='1', name='a', version='1.2'), PackageStatus.UNIQUE), (Package(repo='3', family='1', name='a', version='1.1'), PackageStatus.OUTDATED), (Package(repo='3', family='1', name='a', version='1.0'), PackageStatus.LEGACY), ] fill_packageset_versions([package for package, _ in packages]) for package, expectedclass in packages: self.assertEqual(package.versionclass, expectedclass, msg='repo {}, pkg {}, ver {}'.format(package.repo, package.name, package.version))
def test_versionclass_sameversionsamestatus(self) -> None: packages = [ (Package(repo='2', family='2', name='a', version='2.2'), PackageStatus.NEWEST), # one of these packages should not make the other one legacy instead of outdated (Package(repo='1', family='1', name='a', version='2.1'), PackageStatus.OUTDATED), (Package(repo='1', family='1', name='a', version='2.1'), PackageStatus.OUTDATED), ] fill_packageset_versions([package for package, _ in packages]) for package, expectedclass in packages: self.assertEqual(package.versionclass, expectedclass, msg='repo {}, pkg {}, ver {}'.format(package.repo, package.name, package.version))
def test_versionclass_unignored_really_unignored(self) -> None: packages = [ # ignored package should be fully unignored with the same non-ignored version in another repo (Package(repo='1', family='1', name='a', version='2.1', flags=PackageFlags.IGNORE), PackageStatus.NEWEST), (Package(repo='1', family='1', name='a', version='2.0'), PackageStatus.LEGACY), (Package(repo='2', family='2', name='a', version='2.1'), PackageStatus.NEWEST), (Package(repo='2', family='2', name='a', version='2.0'), PackageStatus.LEGACY), ] fill_packageset_versions([package for package, _ in packages]) for package, expectedclass in packages: self.assertEqual(package.versionclass, expectedclass, msg='repo {}, pkg {}, ver {}'.format(package.repo, package.name, package.version))
def test_versionclass_devel_lower_than_default(self) -> None: packages = [ # devel package < normal package (Package(repo='1', family='1', name='a', version='2.1'), PackageStatus.NEWEST), (Package(repo='1', family='1', name='a', version='2.0', flags=PackageFlags.DEVEL), PackageStatus.LEGACY), (Package(repo='2', family='2', name='a', version='2.1'), PackageStatus.NEWEST), (Package(repo='2', family='2', name='a', version='2.0', flags=PackageFlags.DEVEL), PackageStatus.LEGACY), ] fill_packageset_versions([package for package, _ in packages]) for package, expectedclass in packages: self.assertEqual(package.versionclass, expectedclass, msg='repo {}, pkg {}, ver {}'.format(package.repo, package.name, package.version))
def test_versionclass_branch_confusion(self) -> None: packages = [ # same version is both devel and default in different packages # this should be consistently aggregated (Package(repo='1', family='1', name='a', version='2.1', flags=PackageFlags.DEVEL), PackageStatus.DEVEL), (Package(repo='1', family='1', name='a', version='2.0', flags=PackageFlags.DEVEL), PackageStatus.LEGACY), (Package(repo='2', family='2', name='a', version='2.1'), PackageStatus.DEVEL), (Package(repo='2', family='2', name='a', version='2.0'), PackageStatus.LEGACY), ] fill_packageset_versions([package for package, _ in packages]) for package, expectedclass in packages: self.assertEqual(package.versionclass, expectedclass, msg='repo {}, pkg {}, ver {}'.format(package.repo, package.name, package.version))
def _push_packages(self, projects: Iterable[List[Package]]) -> None: self._logger.log('updating projects') field_stats_per_repo: Dict[str, FieldStatistics] = defaultdict( FieldStatistics) stats = ProjectsChangeStatistics() prev_total = 0 changed_projects = ChangedProjectsAccumulator(self._database) for change in iter_changed_projects( iter_project_hashes(self._database), projects, stats): if isinstance(change, UpdatedProject): fill_packageset_versions(change.packages) self._database.add_packages(change.packages) self._database.update_project_hash(change.effname, change.hash_) for package in change.packages: field_stats_per_repo[package.repo].add(package) elif isinstance(change, RemovedProject): self._database.remove_project_hash(change.effname) changed_projects.add(change.effname) if stats.total - prev_total >= 10000 or prev_total == 0: self._logger.log(f' at "{change.effname}": {stats}') prev_total = stats.total changed_projects.flush() self._logger.log(f' done: {stats}') self._logger.log('updating field statistics') for repo, field_stats in field_stats_per_repo.items(): self._database.update_repository_used_package_fields( repo, field_stats.get_used_fields()) # Fraction picked experimentally: at change size of around 100k of 400k projects # time of partial update of most binding tables approaches or exceeds full update # time. In fact this doesn't matter much, as general update is arond 0.001 (0.1%), # and a few cases of > 0.01 (1%) are when new repositories are added, othewise it's # 1 (100%) when Package format changes or when database is filled for the first time. self._enable_partial_update = stats.change_fraction < 0.25 # This was picked randomly self._enable_explicit_analyze = stats.change_fraction > 0.05
def _push_packages(self, projects: Iterable[List[Package]]) -> None: self._logger.log('updating projects') field_stats_per_repo: Dict[str, FieldStatistics] = defaultdict( FieldStatistics) stats = ProjectsChangeStatistics() prev_total = 0 changed_projects = ChangedProjectsAccumulator(self._database) for change in iter_changed_projects( iter_project_hashes(self._database), projects, stats): if isinstance(change, UpdatedProject): if len(change.packages) >= 20000: raise RuntimeError( 'sanity check failed, more than 20k packages for a single project' ) fill_packageset_versions(change.packages) self._database.add_packages(map(adapt_package, change.packages)) self._database.update_project_hash(change.effname, change.hash_) for package in change.packages: field_stats_per_repo[package.repo].add(package) elif isinstance(change, RemovedProject): self._database.remove_project_hash(change.effname) changed_projects.add(change.effname) if stats.total - prev_total >= 10000 or prev_total == 0: self._logger.log(f' at "{change.effname}": {stats}') prev_total = stats.total changed_projects.flush() self._logger.log(f' done: {stats}') self._logger.log('updating field statistics') for repo, field_stats in field_stats_per_repo.items(): self._database.update_repository_used_package_fields( repo, field_stats.get_used_fields(), field_stats.get_used_link_types()) # This was picked randomly self._enable_explicit_analyze = stats.change_fraction > 0.05
def test_versionclass_branch_bounds(self) -> None: packages = [ (Package(repo='1', family='1', name='a', version='2.2beta1', flags=PackageFlags.DEVEL), PackageStatus.DEVEL), (Package(repo='1', family='1', name='a', version='2.2alpha1.9999', flags=PackageFlags.IGNORE | PackageFlags.DEVEL), PackageStatus.LEGACY), # see #338. There are multiple possible ways to ignored version between branches, # we go with ignored for now (Package(repo='1', family='1', name='a', version='2.1.9999', flags=PackageFlags.IGNORE), PackageStatus.IGNORED), (Package(repo='1', family='1', name='a', version='2.1'), PackageStatus.NEWEST), (Package(repo='1', family='1', name='a', version='2.0'), PackageStatus.LEGACY), (Package(repo='2', family='2', name='a', version='2.1'), PackageStatus.NEWEST), ] fill_packageset_versions([package for package, _ in packages]) for package, expectedclass in packages: self.assertEqual(package.versionclass, expectedclass, msg='repo {}, pkg {}, ver {}'.format(package.repo, package.name, package.version))
def test_versionclass_ignoredignored(self) -> None: packages = [ (Package(repo='1', family='1', name='a', version='2.2.99999999', flags=PackageFlags.IGNORE), PackageStatus.IGNORED), (Package(repo='1', family='1', name='a', version='2.2.9999', flags=PackageFlags.IGNORE), PackageStatus.IGNORED), # this one should be outdated, not legacy, e.g. ignored's should not be counted # as first packages in the branch (Package(repo='1', family='1', name='a', version='2.1'), PackageStatus.OUTDATED), (Package(repo='1', family='1', name='a', version='2.0'), PackageStatus.LEGACY), (Package(repo='2', family='2', name='a', version='2.2'), PackageStatus.NEWEST), ] fill_packageset_versions([package for package, _ in packages]) for package, expectedclass in packages: self.assertEqual(package.versionclass, expectedclass, msg='repo {}, pkg {}, ver {}'.format(package.repo, package.name, package.version))
def test_versionclass_single_branch2(self) -> None: packages = [ # here we only have devel branch (Package(repo='1', family='1', name='a', version='2.2rc1.20990101', flags=PackageFlags.IGNORE | PackageFlags.DEVEL), PackageStatus.IGNORED), (Package(repo='1', family='1', name='a', version='2.2beta1', flags=PackageFlags.DEVEL), PackageStatus.DEVEL), (Package(repo='1', family='1', name='a', version='2.2alpha1.20990101', flags=PackageFlags.IGNORE | PackageFlags.DEVEL), PackageStatus.LEGACY), (Package(repo='1', family='1', name='a', version='2.2alpha1', flags=PackageFlags.DEVEL), PackageStatus.LEGACY), (Package(repo='2', family='2', name='a', version='2.2beta1', flags=PackageFlags.DEVEL), PackageStatus.DEVEL), (Package(repo='2', family='2', name='a', version='2.2alpha1', flags=PackageFlags.DEVEL), PackageStatus.LEGACY), (Package(repo='3', family='3', name='a', version='2.2alpha1', flags=PackageFlags.DEVEL), PackageStatus.OUTDATED), ] fill_packageset_versions([package for package, _ in packages]) for package, expectedclass in packages: self.assertEqual(package.versionclass, expectedclass, msg='repo {}, pkg {}, ver {}'.format(package.repo, package.name, package.version))
def database_update(env: Environment) -> None: logger = env.get_main_logger() database = env.get_main_database_connection() logger.log('clearing the database') database.update_start() package_queue = [] num_pushed = 0 start_time = timer() logger.log('pushing packages to database') field_stats_per_repo: Dict[str, FieldStatistics] = defaultdict(FieldStatistics) for packageset in env.get_repo_processor().iter_parsed( reponames=env.get_enabled_repo_names(), logger=logger): fill_packageset_versions(packageset) package_queue.extend(packageset) for package in packageset: field_stats_per_repo[package.repo].add(package) if len(package_queue) >= 10000: database.add_packages(package_queue) num_pushed += len(package_queue) package_queue = [] logger.get_indented().log( 'pushed {} packages, {:.2f} packages/second'.format( num_pushed, num_pushed / (timer() - start_time))) # process what's left in the queue database.add_packages(package_queue) for repo, field_stats in field_stats_per_repo.items(): database.update_repository_used_package_fields( repo, field_stats.get_used_fields()) logger.log('updating views') database.update_finish() logger.log('committing changes') database.commit()
def test_versionclass_flavors(self) -> None: packages = [ (Package(repo='1', family='1', name='a', version='2.2'), PackageStatus.NEWEST), (Package(repo='2', family='2', name='a', version='2.1'), PackageStatus.OUTDATED), (Package(repo='2', family='2', name='a', version='2.0'), PackageStatus.LEGACY), (Package(repo='3', family='3', name='a', version='2.1'), PackageStatus.OUTDATED), (Package(repo='3', family='3', name='a', version='2.0', flavors=['foo']), PackageStatus.OUTDATED), (Package(repo='4', family='4', name='a', version='2.1', flavors=['foo']), PackageStatus.OUTDATED), (Package(repo='4', family='4', name='a', version='2.0'), PackageStatus.OUTDATED), (Package(repo='5', family='5', name='a', version='2.1', flavors=['foo']), PackageStatus.OUTDATED), (Package(repo='5', family='5', name='a', version='2.0', flavors=['foo']), PackageStatus.LEGACY), ] fill_packageset_versions([package for package, _ in packages]) for package, expectedclass in packages: self.assertEqual(package.versionclass, expectedclass, msg='repo {}, pkg {}, ver {}'.format(package.repo, package.name, package.version))
def main() -> int: options = parse_arguments() logger: Logger = StderrLogger() if options.logfile: logger = FileLogger(options.logfile) if options.fields == 'all': options.fields = ['effname', 'repo', 'version'] + [ slot for slot in Package.__slots__ if slot not in ['effname', 'repo', 'version'] ] else: options.fields = options.fields.split(',') repomgr = RepositoryManager(options.repos_dir) repoproc = RepositoryProcessor(repomgr, options.statedir, options.parseddir) logger.log('dumping...') for packageset in repoproc.iter_parsed(reponames=options.reponames, logger=logger): if options.from_ is not None and packageset[0].effname < options.from_: continue if options.to is not None and packageset[0].effname > options.to: break fill_packageset_versions(packageset) if not options.all and packageset_is_shadow_only(packageset): continue for package in packageset: print( options.field_separator.join( (format_package_field(field, getattr(package, field, None)) for field in options.fields))) return 0
def update_project(database: Database, change: UpdatedProject) -> None: fill_packageset_versions(change.packages) database.add_packages(change.packages) database.update_project_hash(change.effname, change.hash)
def test_versionclasses_big(self) -> None: packages = [ # Reference repo (Package(repo='1', family='1', name='a', version='2.2.20990101', flags=PackageFlags.IGNORE), PackageStatus.IGNORED), (Package(repo='1', family='1', name='a', version='2.2beta1', flags=PackageFlags.DEVEL), PackageStatus.DEVEL), (Package(repo='1', family='1', name='a', version='2.2alpha1.20990101', flags=PackageFlags.DEVEL | PackageFlags.IGNORE), PackageStatus.LEGACY), (Package(repo='1', family='1', name='a', version='2.2alpha1', flags=PackageFlags.DEVEL), PackageStatus.LEGACY), # see #338. There are multiple possible ways to ignored version between branches, # we go with ignored for now (Package(repo='1', family='1', name='a', version='2.1.20990101', flags=PackageFlags.IGNORE), PackageStatus.IGNORED), (Package(repo='1', family='1', name='a', version='2.1'), PackageStatus.NEWEST), (Package(repo='1', family='1', name='a', version='2.0.20990101', flags=PackageFlags.IGNORE), PackageStatus.LEGACY), (Package(repo='1', family='1', name='a', version='2.0'), PackageStatus.LEGACY), (Package(repo='1', family='1', name='a', version='1.2.20990101', flags=PackageFlags.IGNORE), PackageStatus.LEGACY), (Package(repo='1', family='1', name='a', version='1.2beta1', flags=PackageFlags.DEVEL), PackageStatus.LEGACY), (Package(repo='1', family='1', name='a', version='1.2alpha1.20990101', flags=PackageFlags.DEVEL | PackageFlags.IGNORE), PackageStatus.LEGACY), (Package(repo='1', family='1', name='a', version='1.2alpha1', flags=PackageFlags.DEVEL), PackageStatus.LEGACY), (Package(repo='1', family='1', name='a', version='1.1.20990101', flags=PackageFlags.IGNORE), PackageStatus.LEGACY), (Package(repo='1', family='1', name='a', version='1.1'), PackageStatus.LEGACY), (Package(repo='1', family='1', name='a', version='1.0.20990101', flags=PackageFlags.IGNORE), PackageStatus.LEGACY), (Package(repo='1', family='1', name='a', version='1.0'), PackageStatus.LEGACY), # devel + legacy (Package(repo='2', family='2', name='a', version='2.2beta1', flags=PackageFlags.DEVEL), PackageStatus.DEVEL), (Package(repo='2', family='2', name='a', version='2.0'), PackageStatus.OUTDATED), # devel + newest + legacy (Package(repo='3', family='3', name='a', version='2.2beta1', flags=PackageFlags.DEVEL), PackageStatus.DEVEL), (Package(repo='3', family='3', name='a', version='2.1'), PackageStatus.NEWEST), (Package(repo='3', family='3', name='a', version='2.0'), PackageStatus.LEGACY), # newest + legacy (Package(repo='4', family='4', name='a', version='2.1'), PackageStatus.NEWEST), (Package(repo='4', family='4', name='a', version='2.0'), PackageStatus.LEGACY), # outdated + legacy (Package(repo='5', family='5', name='a', version='1.1'), PackageStatus.OUTDATED), (Package(repo='5', family='5', name='a', version='1.0'), PackageStatus.LEGACY), # outdated outdated/ignored + legacy (Package(repo='6', family='6', name='a', version='1.1.20990101', flags=PackageFlags.IGNORE), PackageStatus.OUTDATED), (Package(repo='6', family='6', name='a', version='1.1'), PackageStatus.LEGACY), (Package(repo='6', family='6', name='a', version='1.0'), PackageStatus.LEGACY), # ignored classes are unignored when they are backed with real classes (Package(repo='8', family='8', name='a', version='2.2beta1', flags=PackageFlags.DEVEL | PackageFlags.IGNORE), PackageStatus.DEVEL), (Package(repo='9', family='9', name='a', version='2.1', flags=PackageFlags.IGNORE), PackageStatus.NEWEST), (Package(repo='10', family='10', name='a', version='2.0', flags=PackageFlags.IGNORE), PackageStatus.OUTDATED), (Package(repo='10', family='10', name='a', version='1.9', flags=PackageFlags.IGNORE), PackageStatus.LEGACY), # version between newest and devel should be outdated when there's no devel (Package(repo='11', family='11', name='a', version='2.2alpha1', flags=PackageFlags.DEVEL), PackageStatus.OUTDATED), # outdated in devel and normal at the same time (Package(repo='12', family='12', name='a', version='2.2alpha1', flags=PackageFlags.DEVEL), PackageStatus.OUTDATED), (Package(repo='12', family='12', name='a', version='2.0'), PackageStatus.OUTDATED), ] fill_packageset_versions([package for package, _ in packages]) for package, expectedclass in packages: self.assertEqual(package.versionclass, expectedclass, msg='repo {}, pkg {}, ver {}'.format(package.repo, package.name, package.version))
def _check_fill_versions(self, *samples: PackageSample) -> None: fill_packageset_versions([sample.package for sample in samples]) for sample in samples: sample.check(self)