def _check_transformer(self, rulestext: str, *samples: PackageSample) -> None: transformer = PackageTransformer(repomgr, rulestext=rulestext) for sample in samples: transformer.process(sample.package) sample.check(self)
def test_match_verlonger(self): p1 = Package(name='p1', version='1.0.0') p2 = Package(name='p2', version='1.0') t = PackageTransformer(rulestext='[ { verlonger: 2, setname: bar } ]') t.Process(p1) t.Process(p2) self.assertEqual(p1.effname, 'bar') self.assertEqual(p2.effname, 'p2')
def test_match_namepat(self): p1 = Package(name='p1', version='1.0') p2 = Package(name='p2', version='2.0') t = PackageTransformer( rulestext='[ { namepat: ".*1", setname: bar } ]') t.Process(p1) t.Process(p2) self.assertEqual(p1.effname, 'bar') self.assertEqual(p2.effname, 'p2')
def test_match_name_multi(self): p1 = Package(name='p1', version='1.0') p2 = Package(name='p2', version='2.0') p3 = Package(name='p3', version='2.0') t = PackageTransformer(rulestext='[ { name: [p1,p2], setname: bar } ]') t.Process(p1) t.Process(p2) t.Process(p3) self.assertEqual(p1.effname, 'bar') self.assertEqual(p2.effname, 'bar') self.assertEqual(p3.effname, 'p3')
def test_match_ver(self): p1 = Package(name='p1', version='1.0') p2 = Package(name='p2', version='2.0') p3 = Package(name='p3', version='3.0') t = PackageTransformer( rulestext= '[ { ver: "1.0", setname: bar }, { ver: ["3.0"], setname: baz } ]') t.Process(p1) t.Process(p2) t.Process(p3) self.assertEqual(p1.effname, 'bar') self.assertEqual(p2.effname, 'p2') self.assertEqual(p3.effname, 'baz')
def test_match_category(self): p1 = Package(name='p1', version='1.0', category='foo') p2 = Package(name='p2', version='2.0', category='bar') p3 = Package(name='p3', version='3.0', category='baz') t = PackageTransformer( rulestext= '[ { category: foo, setname: quux }, { category: [ baz ] , setname: bat } ]' ) t.Process(p1) t.Process(p2) t.Process(p3) self.assertEqual(p1.effname, 'quux') self.assertEqual(p2.effname, 'p2') self.assertEqual(p3.effname, 'bat')
def check_transformer(rulestext: str, *samples: PackageSample) -> None: __tracebackhide__ = True ruleset = Ruleset(YamlConfig.from_text(rulestext)) sample_by_repo = defaultdict(list) for sample in samples: sample_by_repo[sample.package.repo].append(sample) for repo, repo_samples in sample_by_repo.items(): transformer = PackageTransformer(ruleset, repo, {repo}) for sample in repo_samples: transformer.process(sample.package) sample.check_pytest()
def check_transformer(self, rulestext: str, *packages: Dict[str, Any]) -> None: transformer = PackageTransformer(repomgr, rulestext=rulestext) for packagedict in packages: create_params = {} expected_params = {} for field, value in packagedict.items(): if field.startswith('expect_'): expected_params[field[7:]] = value else: create_params[field] = value package = Package(**create_params) transformer.process(package) for field, value in expected_params.items(): self.assertEqual(package.__dict__[field], value)
def Main(): options = ParseArguments() repoman = RepositoryManager(options.repos_dir) repoproc = RepositoryProcessor(repoman, options.statedir) if options.list: print('\n'.join(repoman.GetNames(reponames=options.reponames))) return 0 transformer = PackageTransformer(repoman, options.rules_dir) logger = StderrLogger() if options.logfile: logger = FileLogger(options.logfile) repositories_updated = [] repositories_not_updated = [] start = timer() if options.fetch or options.parse or options.reprocess: repositories_updated, repositories_not_updated = ProcessRepositories(options=options, logger=logger, repoproc=repoproc, transformer=transformer, reponames=repoman.GetNames(reponames=options.reponames)) if options.initdb or options.database: ProcessDatabase(options=options, logger=logger, repoproc=repoproc, repositories_updated=repositories_updated) if (options.parse or options.reprocess) and (options.show_unmatched_rules): ShowUnmatchedRules(options=options, logger=logger, transformer=transformer, reliable=repositories_not_updated == []) logger.Log('total time taken: {:.2f} seconds'.format((timer() - start))) return 1 if repositories_not_updated else 0
def test_unignorever(self): p = Package(name='foo', version='1.0') self.assertEqual(p.ignoreversion, False) PackageTransformer( rulestext='[ { ignorever: true }, { unignorever: true } ]' ).Process(p) self.assertEqual(p.ignoreversion, False)
def Main(): parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('-S', '--statedir', default=repology.config.STATE_DIR, help='path to directory with repository state') parser.add_argument('-L', '--logfile', help='path to log file (log to stderr by default)') parser.add_argument('-E', '--repos-dir', default=repology.config.REPOS_DIR, help='path to directory with repository configs') parser.add_argument('-U', '--rules-dir', default=repology.config.RULES_DIR, help='path to directory with rules') parser.add_argument('-D', '--dsn', default=repology.config.DSN, help='database connection params') actions_grp = parser.add_argument_group('Actions') actions_grp.add_argument('-l', '--list', action='store_true', help='list repositories repology will work on') actions_grp.add_argument('-f', '--fetch', action='store_true', help='fetching repository data') actions_grp.add_argument('-u', '--update', action='store_true', help='when fetching, allow updating (otherwise, only fetch once)') actions_grp.add_argument('-p', '--parse', action='store_true', help='parse, process and serialize repository data') # XXX: this is dangerous as long as ignored packages are removed from dumps actions_grp.add_argument('-P', '--reprocess', action='store_true', help='reprocess repository data') actions_grp.add_argument('-i', '--initdb', action='store_true', help='(re)initialize database schema') actions_grp.add_argument('-d', '--database', action='store_true', help='store in the database') actions_grp.add_argument('-r', '--show-unmatched-rules', action='store_true', help='show unmatched rules when parsing') parser.add_argument('reponames', default=repology.config.REPOSITORIES, metavar='repo|tag', nargs='*', help='repository or tag name to process') options = parser.parse_args() repoman = RepositoryManager(options.repos_dir, options.statedir) if options.list: print('\n'.join(sorted(repoman.GetNames(reponames=options.reponames)))) return 0 transformer = PackageTransformer(options.rules_dir) logger = StderrLogger() if options.logfile: logger = FileLogger(options.logfile) repositories_updated = [] repositories_not_updated = [] start = timer() if options.fetch or options.parse or options.reprocess: repositories_updated, repositories_not_updated = ProcessRepositories(options=options, logger=logger, repoman=repoman, transformer=transformer) if options.initdb or options.database: ProcessDatabase(options=options, logger=logger, repoman=repoman, repositories_updated=repositories_updated) if (options.parse or options.reprocess) and (options.show_unmatched_rules): ShowUnmatchedRules(options=options, logger=logger, transformer=transformer, reliable=repositories_not_updated == []) logger.Log('total time taken: {:.2f} seconds'.format((timer() - start))) return 1 if repositories_not_updated else 0
def process_repositories(env: Environment) -> None: database = env.get_main_database_connection() for reponame in env.get_processable_repo_names(): repository = env.get_repo_manager().get_repository(reponame) update_period = repository.update_period since_last_fetched = database.get_repository_since_last_fetched( reponame) skip_fetch = since_last_fetched is not None and since_last_fetched < update_period if env.get_options().fetch and skip_fetch: env.get_main_logger().log( f'not fetching {reponame} to honor update period ({update_period-since_last_fetched} left)' .format(reponame)) elif env.get_options().fetch: env.get_main_logger().log('fetching {}'.format(reponame)) # make sure hash is reset untill it's known that the update did not untroduce any changes old_hash = database.get_repository_ruleset_hash(reponame) database.update_repository_ruleset_hash(reponame, None) database.commit() allow_update = env.get_options().fetch >= 1 have_changes = False try: with LogRunManager(env.get_logging_database_connection(), reponame, 'fetch') as runlogger: have_changes = env.get_repo_processor().fetch( [reponame], update=allow_update, logger=runlogger) if not have_changes: runlogger.set_no_changes() env.get_main_logger().get_indented().log( 'done' + ('' if have_changes else ' (no changes)')) except KeyboardInterrupt: raise except Exception as e: env.get_main_logger().get_indented().log('failed: ' + str(e), severity=Logger.ERROR) if env.get_options().fatal: raise if not have_changes: database.update_repository_ruleset_hash(reponame, old_hash) database.mark_repository_fetched(reponame) database.commit() if env.get_options().parse: ruleset = env.get_ruleset() ruleset_hash_changed = ruleset.get_hash( ) != database.get_repository_ruleset_hash(reponame) if ruleset_hash_changed: env.get_main_logger().log('parsing {}'.format(reponame)) elif env.get_options().parse >= 2: env.get_main_logger().log( 'parsing {} (forced)'.format(reponame)) else: env.get_main_logger().log( 'not parsing {} due to no data changes since last run'. format(reponame)) continue # likewise, make sure hash is reset until the source is successfully reparsed database.update_repository_ruleset_hash(reponame, None) database.commit() try: transformer = PackageTransformer(ruleset, reponame, repository.ruleset) maintainermgr = env.get_maintainer_manager() with LogRunManager(env.get_logging_database_connection(), reponame, 'parse') as runlogger: env.get_repo_processor().parse([reponame], transformer=transformer, maintainermgr=maintainermgr, logger=runlogger) env.get_main_logger().get_indented().log('done') transformer.finalize() except KeyboardInterrupt: raise except Exception as e: env.get_main_logger().get_indented().log('failed: ' + str(e), severity=Logger.ERROR) if env.get_options().fatal: raise database.update_repository_ruleset_hash(reponame, ruleset.get_hash()) database.mark_repository_parsed(reponame) database.commit()
def test_tolowername(self): p = Package(name='fOoBaR', version='1.0') PackageTransformer(rulestext='[ { tolowername: true } ]').Process(p) self.assertEqual(p.name, 'fOoBaR') self.assertEqual(p.effname, 'foobar')
def test_setname_subst(self): p = Package(name='foo', version='1.0') PackageTransformer(rulestext='[ { setname: "bar_$0" } ]').Process(p) self.assertEqual(p.name, 'foo') self.assertEqual(p.effname, 'bar_foo')
def iter_parse( self, path: str, factory: PackageFactory, transformer: PackageTransformer ) -> Generator[PackageMaker, None, None]: for packagedata in _iter_packages(path): entity = packagedata['project'].rsplit( '/', 1)[-1] # this is URL, take only the ID from it pkg = factory.begin(entity) pkg.set_extra_field('entity', entity) # generate a package for each version for version in sorted(packagedata['versions'].split(', ')): version, *flags = version.split('|') verpkg = pkg.clone(append_ident=' ' + version) is_devel = 'U' in flags is_foreign_os_release = 'o' in flags and 'O' not in flags is_foreign_platform_release = 'p' in flags and 'P' not in flags if is_foreign_os_release: verpkg.log( 'version {} skipped due to bad OS'.format(version), severity=Logger.NOTICE) continue if is_foreign_platform_release: verpkg.log('version {} skipped due to bad Platform'.format( version), severity=Logger.NOTICE) continue verpkg.set_flags(PackageFlags.devel, is_devel) verpkg.set_version(version) verpkg.set_name(packagedata['projectLabel']) if 'projectDescription' in packagedata: verpkg.set_summary(packagedata['projectDescription']) if packagedata['licenses']: verpkg.add_licenses(packagedata['licenses'].split(', ')) if packagedata['websites']: verpkg.add_homepages(packagedata['websites'].split(', ')) # extract project name(s) from packages information names = set() for fieldname, fakerepo in _DONOR_REPOS: for name in packagedata[fieldname].split(', '): if name: fakepkgmaker = verpkg.clone() fakepkgmaker.set_name(name) fakepkg = fakepkgmaker.unwrap() fakepkg.repo = fakerepo transformer.process(fakepkg) names.add(fakepkg.effname) if names: break if not names: verpkg.log( 'could not guess project name (no Arch/AUR packages defined?)', severity=Logger.ERROR) if len(names) > 1: verpkg.log( 'multiple project names extracted (from {}): {}'. format(fakerepo, ','.join(names)), severity=Logger.WARNING) # generate package for each guessed name; it most cases, these will be merged anyway for name in names: namepkg = verpkg.clone() namepkg.set_basename(name) yield namepkg
def get_package_transformer(self) -> PackageTransformer: return PackageTransformer(self.get_repo_manager(), self.options.rules_dir)
def test_last(self): p = Package(name='foo', version='1.0') PackageTransformer( rulestext='[ { last: true }, { setname: "bar" } ]').Process(p) self.assertEqual(p.effname, 'foo')