class CollectionSyncFirstStage(Stage): """ The first stage of a pulp_ansible sync pipeline. """ def __init__(self, remote, repository, optimize): """ The first stage of a pulp_ansible sync pipeline. Args: remote (CollectionRemote): The remote data to be used when syncing repository (AnsibleRepository): The repository being syncedself. optimize (boolean): Whether to optimize sync or not. """ super().__init__() msg = _("Parsing CollectionVersion Metadata") self.parsing_metadata_progress_bar = ProgressReport( message=msg, code="parsing.metadata") self.remote = remote self.repository = repository self.optimize = optimize self.collection_info = parse_collections_requirements_file( remote.requirements_file) self.deprecations = Q() self.add_dependents = self.collection_info and self.remote.sync_dependencies self.already_synced = set() self._unpaginated_collection_metadata = None self._unpaginated_collection_version_metadata = None self.last_synced_metadata_time = None # Interpret download policy self.deferred_download = self.remote.policy != Remote.IMMEDIATE @alru_cache(maxsize=128) async def _get_root_api(self, root): """ Returns the root api path and api version. Based on https://git.io/JTMxE. """ if root == "https://galaxy.ansible.com" or root == "https://galaxy.ansible.com/": root = "https://galaxy.ansible.com/api/" downloader = self.remote.get_downloader(url=root) try: api_data = parse_metadata(await downloader.run()) except (json.decoder.JSONDecodeError, ClientResponseError): if root.endswith("/api/"): raise root = urljoin(root, "api/") downloader = self.remote.get_downloader(url=root) api_data = parse_metadata(await downloader.run()) if "available_versions" not in api_data: raise RuntimeError( _("Could not find 'available_versions' at {}").format(root)) if "v3" in api_data.get("available_versions", {}): api_version = 3 elif "v2" in api_data.get("available_versions", {}): api_version = 2 else: raise RuntimeError( _("Unsupported API versions at {}").format(root)) endpoint = f"{root}v{api_version}" return endpoint, api_version @alru_cache(maxsize=128) async def _get_paginated_collection_api(self, root): """ Returns the collection api path and api version. Based on https://git.io/JTMxE. """ endpoint, api_version = await self._get_root_api(root) return f"{endpoint}/collections/", api_version async def _fetch_collection_version_metadata(self, api_version, collection_version_url): downloader = self.remote.get_downloader(url=collection_version_url) metadata = parse_metadata(await downloader.run()) await self._add_collection_version(api_version, collection_version_url, metadata) async def _add_collection_version(self, api_version, collection_version_url, metadata): """Add CollectionVersion to the sync pipeline.""" url = metadata["download_url"] collection_version = CollectionVersion( namespace=metadata["namespace"]["name"], name=metadata["collection"]["name"], version=metadata["version"], ) cv_unique = attrgetter("namespace", "name", "version")(collection_version) if cv_unique in self.already_synced: return self.already_synced.add(cv_unique) info = metadata["metadata"] if self.add_dependents: dependencies = info["dependencies"] tasks = [] loop = asyncio.get_event_loop() for full_name, version in dependencies.items(): namespace, name = full_name.split(".") if not (namespace, name, version) in self.already_synced: new_req = RequirementsFileEntry( name=full_name, version=version, source=None, ) tasks.append( loop.create_task( self._fetch_collection_metadata(new_req))) await asyncio.gather(*tasks) info.pop("tags") for attr_name, attr_value in info.items(): if attr_value is None or attr_name not in collection_version.__dict__: continue setattr(collection_version, attr_name, attr_value) artifact = metadata["artifact"] d_artifact = DeclarativeArtifact( artifact=Artifact(sha256=artifact["sha256"], size=artifact["size"]), url=url, relative_path=collection_version.relative_path, remote=self.remote, deferred_download=self.deferred_download, ) extra_data = {} if api_version != 2: # V2 never implemented the docs-blob requests extra_data["docs_blob_url"] = f"{collection_version_url}docs-blob/" d_content = DeclarativeContent( content=collection_version, d_artifacts=[d_artifact], extra_data=extra_data, ) self.parsing_metadata_progress_bar.increment() await self.put(d_content) def _collection_versions_list_downloader(self, api_version, collection_endpoint, namespace, name, page_num, page_size): url_without_get_params = f"{collection_endpoint}{namespace}/{name}/versions/" if api_version == 2: versions_list_url = f"{url_without_get_params}?page={page_num}&page_size={page_size}" else: offset = (page_num - 1) * page_size versions_list_url = f"{url_without_get_params}?limit={page_size}&offset={offset}" return self.remote.get_downloader(url=versions_list_url) async def _fetch_paginated_collection_metadata(self, name, namespace, requirement, source=None): root = source or self.remote.url collection_endpoint, api_version = await self._get_paginated_collection_api( root) collection_url = f"{collection_endpoint}{namespace}/{name}" collection_metadata_downloader = self.remote.get_downloader( url=collection_url) collection_metadata = parse_metadata( await collection_metadata_downloader.run()) loop = asyncio.get_event_loop() tasks = [] page_num = 1 while True: versions_list_downloader = self._collection_versions_list_downloader( api_version, collection_endpoint, namespace, name, page_num, PAGE_SIZE) collection_versions_list = parse_metadata( await versions_list_downloader.run()) if api_version == 2: collection_versions = collection_versions_list["results"] else: collection_versions = collection_versions_list["data"] for collection_version in collection_versions: if collection_version["version"] in requirement: version_num = collection_version["version"] collection_version_detail_url = f"{collection_url}/versions/{version_num}/" if collection_metadata["deprecated"]: self.deprecations |= Q(namespace=namespace, name=name) tasks.append( loop.create_task( self._fetch_collection_version_metadata( api_version, collection_version_detail_url, ))) next_value = self._get_response_next_value( api_version, collection_versions_list) if not next_value: break page_num = page_num + 1 await asyncio.gather(*tasks) async def _read_from_downloaded_metadata(self, name, namespace, requirement): tasks = [] loop = asyncio.get_event_loop() if self._unpaginated_collection_metadata[namespace][name][ "deprecated"]: self.deprecations |= Q(namespace=namespace, name=name) all_versions_of_collection = self._unpaginated_collection_version_metadata[ namespace][name] for col_version_metadata in all_versions_of_collection: if col_version_metadata["version"] in requirement: collection_version_url = urljoin( self.remote.url, f"{col_version_metadata['href']}") tasks.append( loop.create_task( self._add_collection_version(self._api_version, collection_version_url, col_version_metadata))) await asyncio.gather(*tasks) async def _fetch_collection_metadata(self, requirements_entry): if requirements_entry.version == "*": requirement_version = Requirement.parse("collection") else: requirement_version = Requirement.parse( f"collection{requirements_entry.version}") namespace, name = requirements_entry.name.split(".") if self._unpaginated_collection_version_metadata and requirements_entry.source is None: await self._read_from_downloaded_metadata(name, namespace, requirement_version) else: await self._fetch_paginated_collection_metadata( name, namespace, requirement_version, requirements_entry.source) @staticmethod def _get_response_next_value(api_version, response): if api_version == 2: return response["next"] else: return response["links"]["next"] def _collection_list_downloader(self, api_version, collection_endpoint, page_num, page_size): if api_version == 2: collection_list_url = f"{collection_endpoint}?page={page_num}&page_size={page_size}" else: offset = (page_num - 1) * page_size collection_list_url = f"{collection_endpoint}?limit={page_size}&offset={offset}" return self.remote.get_downloader(url=collection_list_url) async def _download_unpaginated_metadata(self): root_endpoint, api_version = await self._get_root_api(self.remote.url) self._api_version = api_version if api_version > 2: collection_endpoint = f"{root_endpoint}/collections/all/" downloader = self.remote.get_downloader( url=collection_endpoint, silence_errors_for_response_status_codes={404}) try: collection_metadata_list = parse_metadata(await downloader.run()) except FileNotFoundError: pass else: self._unpaginated_collection_metadata = defaultdict(dict) for collection in collection_metadata_list: namespace = collection["namespace"] name = collection["name"] self._unpaginated_collection_metadata[namespace][ name] = collection collection_version_endpoint = f"{root_endpoint}/collection_versions/all/" downloader = self.remote.get_downloader( url=collection_version_endpoint) collection_version_metadata_list = parse_metadata( await downloader.run()) self._unpaginated_collection_version_metadata = defaultdict( lambda: defaultdict(list)) for collection_version_metadata in collection_version_metadata_list: namespace = collection_version_metadata["namespace"][ "name"] name = collection_version_metadata["name"] self._unpaginated_collection_version_metadata[namespace][ name].append(collection_version_metadata) async def _find_all_collections_from_unpaginated_data(self): tasks = [] loop = asyncio.get_event_loop() for collection_namespace_dict in self._unpaginated_collection_metadata.values( ): for collection in collection_namespace_dict.values(): if collection["deprecated"]: self.deprecations |= Q(namespace=collection["namespace"], name=collection["name"]) for collections_in_namespace in self._unpaginated_collection_version_metadata.values( ): for collection_versions in collections_in_namespace.values(): for collection_version in collection_versions: collection_version_url = urljoin( self.remote.url, f"{collection_version['href']}") tasks.append( loop.create_task( self._add_collection_version( self._api_version, collection_version_url, collection_version))) await asyncio.gather(*tasks) async def _find_all_collections(self): if self._unpaginated_collection_version_metadata: await self._find_all_collections_from_unpaginated_data() return collection_endpoint, api_version = await self._get_paginated_collection_api( self.remote.url) loop = asyncio.get_event_loop() tasks = [] page_num = 1 while True: collection_list_downloader = self._collection_list_downloader( api_version, collection_endpoint, page_num, PAGE_SIZE) collection_list = parse_metadata(await collection_list_downloader.run()) if api_version == 2: collections = collection_list["results"] else: collections = collection_list["data"] for collection in collections: if api_version == 2: namespace = collection["namespace"]["name"] else: namespace = collection["namespace"] name = collection["name"] requirements_file = RequirementsFileEntry( name=".".join([namespace, name]), version="*", source=None, ) tasks.append( loop.create_task( self._fetch_collection_metadata(requirements_file))) next_value = self._get_response_next_value(api_version, collection_list) if not next_value: break page_num = page_num + 1 await asyncio.gather(*tasks) async def _should_we_sync(self): """Check last synced metadata time.""" msg = _("no-op: Checking if remote changed since last sync.") noop = ProgressReport(message=msg, code="noop") noop.state = TASK_STATES.COMPLETED noop.save() if not self.repository.remote: return True if self.remote != self.repository.remote.cast(): return True root, api_version = await self._get_root_api(self.remote.url) if api_version == 3: downloader = self.remote.get_downloader( url=root, silence_errors_for_response_status_codes={404}) try: metadata = parse_metadata(await downloader.run()) except FileNotFoundError: return True try: self.last_synced_metadata_time = parse_datetime( metadata["published"]) except KeyError: return True sources = set() if self.collection_info: sources = {r.source for r in self.collection_info if r.source} sources.add(self.remote.url) if len(sources) > 1: return True if self.last_synced_metadata_time == self.repository.last_synced_metadata_time: noop.message = _( "no-op: {remote} did not change since last sync - {published}" .format(remote=self.remote.url, published=self.last_synced_metadata_time)) noop.save() return False return True async def run(self): """ Build and emit `DeclarativeContent` from the ansible metadata. """ if self.optimize: should_we_sync = await self._should_we_sync() if should_we_sync is False: log.debug(_("no-op: remote wasn't updated since last sync.")) return tasks = [] loop = asyncio.get_event_loop() await self._download_unpaginated_metadata() if self.collection_info: for requirement_entry in self.collection_info: tasks.append( loop.create_task( self._fetch_collection_metadata(requirement_entry))) else: tasks.append(loop.create_task(self._find_all_collections())) await asyncio.gather(*tasks) self.parsing_metadata_progress_bar.state = TASK_STATES.COMPLETED self.parsing_metadata_progress_bar.save()
async def run(self): """ ContainerFirstStage. """ future_manifests = [] tag_list = [] to_download = [] man_dcs = {} total_blobs = [] with ProgressReport( message='Downloading tag list', code='downloading.tag_list', total=1 ) as pb: repo_name = self.remote.namespaced_upstream_name relative_url = '/v2/{name}/tags/list'.format(name=repo_name) tag_list_url = urljoin(self.remote.url, relative_url) list_downloader = self.remote.get_downloader(url=tag_list_url) await list_downloader.run(extra_data={'repo_name': repo_name}) with open(list_downloader.path) as tags_raw: tags_dict = json.loads(tags_raw.read()) tag_list = tags_dict['tags'] # check for the presence of the pagination link header link = list_downloader.response_headers.get('Link') await self.handle_pagination(link, repo_name, tag_list) tag_list = self.filter_tags(tag_list) pb.increment() for tag_name in tag_list: relative_url = '/v2/{name}/manifests/{tag}'.format( name=self.remote.namespaced_upstream_name, tag=tag_name, ) url = urljoin(self.remote.url, relative_url) downloader = self.remote.get_downloader(url=url) to_download.append(downloader.run(extra_data={'headers': V2_ACCEPT_HEADERS})) pb_parsed_tags = ProgressReport( message='Processing Tags', code='processing.tag', state=TASK_STATES.RUNNING, total=len(tag_list) ) for download_tag in asyncio.as_completed(to_download): tag = await download_tag with open(tag.path, 'rb') as content_file: raw_data = content_file.read() content_data = json.loads(raw_data) media_type = content_data.get('mediaType') tag.artifact_attributes['file'] = tag.path saved_artifact = Artifact(**tag.artifact_attributes) try: saved_artifact.save() except IntegrityError: del tag.artifact_attributes['file'] saved_artifact = Artifact.objects.get(**tag.artifact_attributes) tag_dc = self.create_tag(saved_artifact, tag.url) if media_type in (MEDIA_TYPE.MANIFEST_LIST, MEDIA_TYPE.INDEX_OCI): list_dc = self.create_tagged_manifest_list( tag_dc, content_data) await self.put(list_dc) tag_dc.extra_data['man_relation'] = list_dc for manifest_data in content_data.get('manifests'): man_dc = self.create_manifest(list_dc, manifest_data) future_manifests.append(man_dc) man_dcs[man_dc.content.digest] = man_dc await self.put(man_dc) else: man_dc = self.create_tagged_manifest(tag_dc, content_data, raw_data) await self.put(man_dc) tag_dc.extra_data['man_relation'] = man_dc self.handle_blobs(man_dc, content_data, total_blobs) await self.put(tag_dc) pb_parsed_tags.increment() pb_parsed_tags.state = 'completed' pb_parsed_tags.save() for manifest_future in future_manifests: man = await manifest_future.resolution() with man._artifacts.get().file.open() as content_file: raw = content_file.read() content_data = json.loads(raw) man_dc = man_dcs[man.digest] self.handle_blobs(man_dc, content_data, total_blobs) for blob in total_blobs: await self.put(blob)
async def run(self): """ Build `DeclarativeContent` from the repodata. """ packages_pb = ProgressReport(message='Parsed Packages', code='parsing.packages') errata_pb = ProgressReport(message='Parsed Erratum', code='parsing.errata') modulemd_pb = ProgressReport(message='Parse Modulemd', code='parsing.modulemds') modulemd_defaults_pb = ProgressReport( message='Parse Modulemd-defaults', code='parsing.modulemddefaults') comps_pb = ProgressReport(message='Parsed Comps', code='parsing.comps') packages_pb.save() errata_pb.save() comps_pb.save() remote_url = self.new_url or self.remote.url remote_url = remote_url if remote_url[-1] == "/" else f"{remote_url}/" progress_data = dict(message='Downloading Metadata Files', code='downloading.metadata') with ProgressReport(**progress_data) as metadata_pb: downloader = self.remote.get_downloader( url=urljoin(remote_url, 'repodata/repomd.xml')) # TODO: decide how to distinguish between a mirror list and a normal repo result = await downloader.run() metadata_pb.increment() if self.kickstart: d_artifacts = [] for path, checksum in self.kickstart["download"][ "images"].items(): artifact = Artifact(**checksum) da = DeclarativeArtifact( artifact=artifact, url=urljoin(remote_url, path), relative_path=path, remote=self.remote, deferred_download=self.deferred_download) d_artifacts.append(da) distribution_tree = DistributionTree( **self.kickstart["distribution_tree"]) dc = DeclarativeContent(content=distribution_tree, d_artifacts=d_artifacts) dc.extra_data = self.kickstart await self.put(dc) repomd_path = result.path repomd = cr.Repomd(repomd_path) package_repodata_urls = {} downloaders = [] modulemd_list = list() dc_groups = [] dc_categories = [] dc_environments = [] nevra_to_module = defaultdict(dict) pkgname_to_groups = defaultdict(list) group_to_categories = defaultdict(list) group_to_environments = defaultdict(list) optionalgroup_to_environments = defaultdict(list) modulemd_results = None comps_downloader = None for record in repomd.records: if record.type in PACKAGE_REPODATA: package_repodata_urls[record.type] = urljoin( remote_url, record.location_href) elif record.type in UPDATE_REPODATA: updateinfo_url = urljoin(remote_url, record.location_href) downloader = self.remote.get_downloader(url=updateinfo_url) downloaders.append([downloader.run()]) elif record.type in COMPS_REPODATA: comps_url = urljoin(remote_url, record.location_href) comps_downloader = self.remote.get_downloader( url=comps_url) elif record.type in SKIP_REPODATA: continue elif record.type in MODULAR_REPODATA: modules_url = urljoin(remote_url, record.location_href) modulemd_downloader = self.remote.get_downloader( url=modules_url) modulemd_results = await modulemd_downloader.run() elif record.type not in PACKAGE_DB_REPODATA: file_data = { record.checksum_type: record.checksum, "size": record.size } da = DeclarativeArtifact( artifact=Artifact(**file_data), url=urljoin(remote_url, record.location_href), relative_path=record.location_href, remote=self.remote, deferred_download=False) repo_metadata_file = RepoMetadataFile( data_type=record.type, checksum_type=record.checksum_type, checksum=record.checksum, ) dc = DeclarativeContent(content=repo_metadata_file, d_artifacts=[da]) await self.put(dc) # we have to sync module.yaml first if it exists, to make relations to packages if modulemd_results: modulemd_index = mmdlib.ModuleIndex.new() open_func = gzip.open if modulemd_results.url.endswith( '.gz') else open with open_func(modulemd_results.path, 'r') as moduleyaml: modulemd_index.update_from_string( moduleyaml.read().decode(), True) modulemd_names = modulemd_index.get_module_names() or [] modulemd_all = parse_modulemd(modulemd_names, modulemd_index) modulemd_pb.total = len(modulemd_all) modulemd_pb.state = 'running' modulemd_pb.save() for modulemd in modulemd_all: artifact = modulemd.pop('artifact') relative_path = '{}{}{}{}{}snippet'.format( modulemd[PULP_MODULE_ATTR.NAME], modulemd[PULP_MODULE_ATTR.STREAM], modulemd[PULP_MODULE_ATTR.VERSION], modulemd[PULP_MODULE_ATTR.CONTEXT], modulemd[PULP_MODULE_ATTR.ARCH]) da = DeclarativeArtifact(artifact=artifact, relative_path=relative_path, url=modules_url) modulemd_content = Modulemd(**modulemd) dc = DeclarativeContent(content=modulemd_content, d_artifacts=[da]) dc.extra_data = defaultdict(list) # dc.content.artifacts are Modulemd artifacts for artifact in json.loads(dc.content.artifacts): nevra_to_module.setdefault(artifact, set()).add(dc) modulemd_list.append(dc) modulemd_default_names = parse_defaults(modulemd_index) modulemd_defaults_pb.total = len(modulemd_default_names) modulemd_defaults_pb.state = 'running' modulemd_defaults_pb.save() for default in modulemd_default_names: artifact = default.pop('artifact') relative_path = '{}{}snippet'.format( default[PULP_MODULEDEFAULTS_ATTR.MODULE], default[PULP_MODULEDEFAULTS_ATTR.STREAM]) da = DeclarativeArtifact(artifact=artifact, relative_path=relative_path, url=modules_url) default_content = ModulemdDefaults(**default) modulemd_defaults_pb.increment() dc = DeclarativeContent(content=default_content, d_artifacts=[da]) await self.put(dc) if comps_downloader: comps_result = await comps_downloader.run() comps = libcomps.Comps() comps.fromxml_f(comps_result.path) comps_pb.total = (len(comps.groups) + len(comps.categories) + len(comps.environments)) comps_pb.state = 'running' comps_pb.save() if comps.langpacks: langpack_dict = PackageLangpacks.libcomps_to_dict( comps.langpacks) packagelangpack = PackageLangpacks( matches=strdict_to_dict(comps.langpacks), digest=dict_digest(langpack_dict)) dc = DeclarativeContent(content=packagelangpack) dc.extra_data = defaultdict(list) await self.put(dc) if comps.categories: for category in comps.categories: category_dict = PackageCategory.libcomps_to_dict( category) category_dict['digest'] = dict_digest(category_dict) packagecategory = PackageCategory(**category_dict) dc = DeclarativeContent(content=packagecategory) dc.extra_data = defaultdict(list) if packagecategory.group_ids: for group_id in packagecategory.group_ids: group_to_categories[group_id['name']].append( dc) dc_categories.append(dc) if comps.environments: for environment in comps.environments: environment_dict = PackageEnvironment.libcomps_to_dict( environment) environment_dict['digest'] = dict_digest( environment_dict) packageenvironment = PackageEnvironment( **environment_dict) dc = DeclarativeContent(content=packageenvironment) dc.extra_data = defaultdict(list) if packageenvironment.option_ids: for option_id in packageenvironment.option_ids: optionalgroup_to_environments[ option_id['name']].append(dc) if packageenvironment.group_ids: for group_id in packageenvironment.group_ids: group_to_environments[group_id['name']].append( dc) dc_environments.append(dc) if comps.groups: for group in comps.groups: group_dict = PackageGroup.libcomps_to_dict(group) group_dict['digest'] = dict_digest(group_dict) packagegroup = PackageGroup(**group_dict) dc = DeclarativeContent(content=packagegroup) dc.extra_data = defaultdict(list) if packagegroup.packages: for package in packagegroup.packages: pkgname_to_groups[package['name']].append(dc) if dc.content.id in group_to_categories.keys(): for dc_category in group_to_categories[ dc.content.id]: dc.extra_data['category_relations'].append( dc_category) dc_category.extra_data['packagegroups'].append( dc) if dc.content.id in group_to_environments.keys(): for dc_environment in group_to_environments[ dc.content.id]: dc.extra_data['environment_relations'].append( dc_environment) dc_environment.extra_data[ 'packagegroups'].append(dc) if dc.content.id in optionalgroup_to_environments.keys( ): for dc_environment in optionalgroup_to_environments[ dc.content.id]: dc.extra_data['env_relations_optional'].append( dc_environment) dc_environment.extra_data[ 'optionalgroups'].append(dc) dc_groups.append(dc) for dc_category in dc_categories: comps_pb.increment() await self.put(dc_category) for dc_environment in dc_environments: comps_pb.increment() await self.put(dc_environment) # to preserve order, downloaders are created after all repodata urls are identified package_repodata_downloaders = [] for repodata_type in PACKAGE_REPODATA: downloader = self.remote.get_downloader( url=package_repodata_urls[repodata_type]) package_repodata_downloaders.append(downloader.run()) downloaders.append(package_repodata_downloaders) # asyncio.gather is used to preserve the order of results for package repodata pending = [ asyncio.gather(*downloaders_group) for downloaders_group in downloaders ] while pending: done, pending = await asyncio.wait( pending, return_when=asyncio.FIRST_COMPLETED) for downloader in done: results = downloader.result() if results[0].url == package_repodata_urls['primary']: primary_xml_path = results[0].path filelists_xml_path = results[1].path other_xml_path = results[2].path metadata_pb.done += 3 metadata_pb.save() packages = await RpmFirstStage.parse_repodata( primary_xml_path, filelists_xml_path, other_xml_path) packages_pb.total = len(packages) packages_pb.state = 'running' packages_pb.save() for pkg in packages.values(): package = Package( **Package.createrepo_to_dict(pkg)) artifact = Artifact(size=package.size_package) checksum_type = getattr( CHECKSUM_TYPES, package.checksum_type.upper()) setattr(artifact, checksum_type, package.pkgId) url = urljoin(remote_url, package.location_href) filename = os.path.basename(package.location_href) da = DeclarativeArtifact( artifact=artifact, url=url, relative_path=filename, remote=self.remote, deferred_download=self.deferred_download) dc = DeclarativeContent(content=package, d_artifacts=[da]) dc.extra_data = defaultdict(list) # find if a package relates to a modulemd if dc.content.nevra in nevra_to_module.keys(): dc.content.is_modular = True for dc_modulemd in nevra_to_module[ dc.content.nevra]: dc.extra_data['modulemd_relation'].append( dc_modulemd) dc_modulemd.extra_data[ 'package_relation'].append(dc) if dc.content.name in pkgname_to_groups.keys(): for dc_group in pkgname_to_groups[ dc.content.name]: dc.extra_data['group_relations'].append( dc_group) dc_group.extra_data[ 'related_packages'].append(dc) packages_pb.increment() await self.put(dc) elif results[0].url == updateinfo_url: updateinfo_xml_path = results[0].path metadata_pb.increment() updates = await RpmFirstStage.parse_updateinfo( updateinfo_xml_path) errata_pb.total = len(updates) errata_pb.state = 'running' errata_pb.save() for update in updates: update_record = UpdateRecord( **UpdateRecord.createrepo_to_dict(update)) update_record.digest = RpmFirstStage.hash_update_record( update) future_relations = { 'collections': defaultdict(list), 'references': [] } for collection in update.collections: coll_dict = UpdateCollection.createrepo_to_dict( collection) coll = UpdateCollection(**coll_dict) for package in collection.packages: pkg_dict = UpdateCollectionPackage.createrepo_to_dict( package) pkg = UpdateCollectionPackage(**pkg_dict) future_relations['collections'][ coll].append(pkg) for reference in update.references: reference_dict = UpdateReference.createrepo_to_dict( reference) ref = UpdateReference(**reference_dict) future_relations['references'].append(ref) errata_pb.increment() dc = DeclarativeContent(content=update_record) dc.extra_data = future_relations await self.put(dc) # now send modules down the pipeline since all relations have been set up for modulemd in modulemd_list: modulemd_pb.increment() await self.put(modulemd) for dc_group in dc_groups: comps_pb.increment() await self.put(dc_group) packages_pb.state = 'completed' errata_pb.state = 'completed' modulemd_pb.state = 'completed' modulemd_defaults_pb.state = 'completed' comps_pb.state = 'completed' packages_pb.save() errata_pb.save() modulemd_pb.save() modulemd_defaults_pb.save() comps_pb.save()