def transport_response_simplified(self, response): """Decompress and parse the package index fetched in `transport_request`. For each package, we "pivot" the file list entries (Files, Checksums-Sha1, Checksums-Sha256), to return a files dict mapping filenames to their checksums. """ if self.decompressor: data = self.decompressor(response.raw) else: data = response.raw for src_pkg in Sources.iter_paragraphs(data.readlines()): files = defaultdict(dict) for field in src_pkg._multivalued_fields: if field.startswith('checksums-'): sum_name = field[len('checksums-'):] else: sum_name = 'md5sum' if field in src_pkg: for entry in src_pkg[field]: name = entry['name'] files[name]['name'] = entry['name'] files[name]['size'] = int(entry['size'], 10) files[name][sum_name] = entry[sum_name] yield { 'name': src_pkg['Package'], 'version': src_pkg['Version'], 'directory': src_pkg['Directory'], 'files': files, }
def extract_deb_packages(data, packages): """ Extract package metadata from debian Packages file """ extracted = extract(data) package_re = re.compile('^Package: ', re.M) plen = len(package_re.findall(extracted)) if plen > 0: ptext = 'Extracting packages: ' progress_info_s.send(sender=None, ptext=ptext, plen=plen) sio = StringIO(extracted) for i, stanza in enumerate(Sources.iter_paragraphs(sio)): fullversion = Version(stanza['version']) arch = stanza['architecture'] name = stanza['package'] epoch = fullversion._BaseVersion__epoch if epoch is None: epoch = '' version = fullversion._BaseVersion__upstream_version release = fullversion._BaseVersion__debian_revision if release is None: release = '' progress_update_s.send(sender=None, index=i + 1) package = PackageString(name=name, epoch=epoch, version=version, release=release, arch=arch, packagetype='D') packages.add(package) else: info_message.send(sender=None, text='No packages found in repo\n')
def _init_test( swh_scheduler: SchedulerInterface, debian_sources: Dict[Suite, SourcesText], requests_mock, ) -> Tuple[DebianLister, DebianSuitePkgSrcInfo]: lister = DebianLister( scheduler=swh_scheduler, mirror_url=_mirror_url, suites=list(debian_sources.keys()), components=_components, ) suite_pkg_info: DebianSuitePkgSrcInfo = {} for suite, sources in debian_sources.items(): suite_pkg_info[suite] = defaultdict(list) for pkg_src in Sources.iter_paragraphs(sources): suite_pkg_info[suite][pkg_src["Package"]].append(pkg_src) for idx_url, compression in lister.debian_index_urls( suite, _components[0]): if compression: requests_mock.get(idx_url, status_code=404) else: requests_mock.get(idx_url, text=sources) return lister, suite_pkg_info
async def iter_sources(url): async with ClientSession() as session: async with session.get(url) as resp: if resp.status != 200: raise Exception("URL %s returned response code %d" % (url, resp.status)) contents = await resp.read() if url.endswith(".gz"): contents = gzip.decompress(contents) for source in Sources.iter_paragraphs(contents): yield source
def find_dsc(self, source): sources = "{root}/dists/{suite}/{component}/source/Sources.gz".format( root=self.root, suite=source.suite.name, component=source.component.name ) for entry in Sources.iter_paragraphs(GzipFile(filename=sources)): if entry['Package'] == source.name and entry['Version'] == source.version: dsc = None for line in entry['Files']: if line['name'].endswith(".dsc"): dsc = line['name'] break return (entry['Directory'], dsc) raise Exception("Package not found in Sources.gz")
def find_dsc(self, source): sources = "{root}/dists/{suite}/{component}/source/Sources.gz".format( root=self.root, suite=source.suite.name, component=source.component.name) for entry in Sources.iter_paragraphs(GzipFile(filename=sources)): if entry['Package'] == source.name and entry[ 'Version'] == source.version: dsc = None for line in entry['Files']: if line['name'].endswith(".dsc"): dsc = line['name'] break return (entry['Directory'], dsc) raise RepoPackageNotFound('{0}-{1}'.format(source.name, source.version))
def page_request(self, suite: Suite, component: Component) -> DebianPageType: """Return parsed package Sources file for a given debian suite and component.""" for url, compression in self.debian_index_urls(suite, component): response = requests.get(url, stream=True) logging.debug("Fetched URL: %s, status code: %s", url, response.status_code) if response.status_code == 200: break else: raise Exception("Could not retrieve sources index for %s/%s", suite, component) decompressor = decompressors.get(compression) if decompressor: data = decompressor(response.raw) else: data = response.raw return Sources.iter_paragraphs(data.readlines())
def _init_test( swh_scheduler: SchedulerInterface, debian_sources: Dict[Suite, SourcesText], requests_mock, ) -> Tuple[DebianLister, DebianSuitePkgSrcInfo]: lister = DebianLister( scheduler=swh_scheduler, mirror_url=_mirror_url, suites=list(debian_sources.keys()), components=_components, ) suite_pkg_info: DebianSuitePkgSrcInfo = {} for i, (suite, sources) in enumerate(debian_sources.items()): # ensure to generate a different date for each suite last_modified = formatdate(timeval=datetime.now().timestamp() + i, usegmt=True) suite_pkg_info[suite] = defaultdict(list) for pkg_src in Sources.iter_paragraphs(sources): suite_pkg_info[suite][pkg_src["Package"]].append(pkg_src) # backup package last update date global _last_modified _last_modified[pkg_src["Package"]] = last_modified for idx_url, compression in lister.debian_index_urls(suite, _components[0]): if compression: requests_mock.get(idx_url, status_code=404) else: requests_mock.get( idx_url, text=sources, headers={"Last-Modified": last_modified}, ) for idx_url, _ in lister.debian_index_urls(suite, _components[1]): requests_mock.get(idx_url, status_code=404) return lister, suite_pkg_info
def extract_deb_packages(data, url): """ Extract package metadata from debian Packages file """ extracted = extract(data, url) package_re = re.compile(b'^Package: ', re.M) plen = len(package_re.findall(extracted)) packages = set() if plen > 0: ptext = 'Extracting packages: ' progress_info_s.send(sender=None, ptext=ptext, plen=plen) bio = BytesIO(extracted) for i, stanza in enumerate(Sources.iter_paragraphs(bio)): fullversion = Version(stanza['version']) arch = stanza['architecture'] name = stanza['package'] epoch = fullversion._BaseVersion__epoch if epoch is None: epoch = '' version = fullversion._BaseVersion__upstream_version release = fullversion._BaseVersion__debian_revision if release is None: release = '' progress_update_s.send(sender=None, index=i + 1) package = PackageString(name=name, epoch=epoch, version=version, release=release, arch=arch, packagetype='D') packages.add(package) else: info_message.send(sender=None, text='No packages found in repo') return packages
def page_request(self, suite: Suite, component: Component) -> DebianPageType: """Return parsed package Sources file for a given debian suite and component.""" for url, compression in self.debian_index_urls(suite, component): response = requests.get(url, stream=True) logging.debug("Fetched URL: %s, status code: %s", url, response.status_code) if response.status_code == 200: last_modified = response.headers.get("Last-Modified") self.last_sources_update = ( parsedate_to_datetime(last_modified) if last_modified else None) decompressor = decompressors.get(compression) if decompressor: data = decompressor(response.raw).readlines() else: data = response.raw.readlines() break else: data = "" logger.debug("Could not retrieve sources index for %s/%s", suite, component) return Sources.iter_paragraphs(data)
def _queue_sources_gz_pkgs(self, path): sources = os.path.join(path, 'Sources') with open(sources) as sources_file: for src in Sources.iter_paragraphs(sources_file): package = self.package_to_enqueue(src) super().watch(**package)
def get_sources(self, dist, component): request = requests.get(self._get_source_url(dist, component)) data = io.BytesIO(request.content) stream = gzip.GzipFile(fileobj=data) yield from (Upload(x, self) for x in Sources.iter_paragraphs(stream))
elif ( f.endswith('/sources.deb822.gz') or f.endswith('.sources.deb822.gz') ): source_lists.add(f) else: runtime_package_lists.add(f) sources = {} # type: typing.Dict[str, typing.Dict[str, Version]] for f in source_lists: with GzipFile(f, 'rb') as gzip_reader: for source_stanza in Sources.iter_paragraphs( sequence=gzip_reader, encoding='utf-8', ): source = Source( source_stanza['package'], Version(source_stanza['version']), stanza=source_stanza, ) sources.setdefault(source.name, {})[source.version] = source for f in runtime_package_lists: test.diag('Examining runtime %s...' % f) with GzipFile(f, 'rb') as gzip_reader: for binary_stanza in Packages.iter_paragraphs( sequence=gzip_reader, encoding='utf-8', ):
def fetch_sources(self): fp = self.fetch_indexed_file('Sources') return Sources.iter_paragraphs(fp, use_apt_pkg=False)