Example #1
0
def parse_comps_components(comps_file):
    """Parse comps-related components found in the specified file."""
    # created = {"categories": [], "environments": [], "groups": [], "langpack": None}
    created_objects = []
    all_objects = []
    comps = libcomps.Comps()
    # Read the file and pass the string along because comps.fromxml_f() will only take a
    # path-string that doesn't work on things like S3 storage
    with comps_file.file.open("rb") as f:
        data = f.read()
        comps.fromxml_str(data.decode("utf-8"))

    if comps.langpacks:
        langpack_dict = PackageLangpacks.libcomps_to_dict(comps.langpacks)
        langpack, created = PackageLangpacks.objects.get_or_create(
            matches=strdict_to_dict(comps.langpacks),
            digest=dict_digest(langpack_dict))
        if created:
            created_objects.append(langpack)
        all_objects.append(langpack)

    if comps.categories:
        for category in comps.categories:
            category_dict = PackageCategory.libcomps_to_dict(category)
            category_dict["digest"] = dict_digest(category_dict)
            packagecategory, created = PackageCategory.objects.get_or_create(
                **category_dict)
            if created:
                created_objects.append(packagecategory)
            all_objects.append(packagecategory)

    if comps.environments:
        for environment in comps.environments:
            environment_dict = PackageEnvironment.libcomps_to_dict(environment)
            environment_dict["digest"] = dict_digest(environment_dict)
            packageenvironment, created = PackageEnvironment.objects.get_or_create(
                **environment_dict)
            if created:
                created_objects.append(packageenvironment)
            all_objects.append(packageenvironment)

    if comps.groups:
        for group in comps.groups:
            group_dict = PackageGroup.libcomps_to_dict(group)
            group_dict["digest"] = dict_digest(group_dict)
            packagegroup, created = PackageGroup.objects.get_or_create(
                **group_dict)
            if created:
                created_objects.append(packagegroup)
            all_objects.append(packagegroup)

    return created_objects, all_objects
Example #2
0
    def _init_dc_groups(self, comps):
        if comps.groups:
            for group in comps.groups:
                group_dict = PackageGroup.libcomps_to_dict(group)
                group_dict["digest"] = dict_digest(group_dict)
                packagegroup = PackageGroup(**group_dict)
                dc = DeclarativeContent(content=packagegroup)
                dc.extra_data = defaultdict(list)

                if packagegroup.packages:
                    for package in packagegroup.packages:
                        self.data.pkgname_to_groups[package["name"]].append(dc)

                if dc.content.id in self.group_to_categories.keys():
                    for dc_category in self.group_to_categories[dc.content.id]:
                        dc.extra_data["category_relations"].append(dc_category)
                        dc_category.extra_data["packagegroups"].append(dc)

                if dc.content.id in self.group_to_environments.keys():
                    for dc_environment in self.group_to_environments[
                            dc.content.id]:
                        dc.extra_data["environment_relations"].append(
                            dc_environment)
                        dc_environment.extra_data["packagegroups"].append(dc)

                if dc.content.id in self.optionalgroup_to_environments.keys():
                    for dc_environment in self.optionalgroup_to_environments[
                            dc.content.id]:
                        dc.extra_data["env_relations_optional"].append(
                            dc_environment)
                        dc_environment.extra_data["optionalgroups"].append(dc)

                self.data.dc_groups.append(dc)
Example #3
0
 def create_pulp3_content(self):
     """
     Create a Pulp 3 Package Category content for saving it later in a bulk operation.
     """
     cat = pkg_cat_to_libcomps(self)
     category_dict = PackageCategory.libcomps_to_dict(cat)
     category_dict['digest'] = dict_digest(category_dict)
     return (PackageCategory(**category_dict), None)
Example #4
0
 def create_pulp3_content(self):
     """
     Create a Pulp 3 Package Langpacks content for saving it later in a bulk operation.
     """
     langpacks = langpacks_to_libcomps(self)
     langpacks_dict = PackageLangpacks.libcomps_to_dict(langpacks)
     return (PackageLangpacks(matches=langpacks_dict['matches'],
                              digest=dict_digest(langpacks_dict)), None)
Example #5
0
 def create_pulp3_content(self):
     """
     Create a Pulp 3 Package Environment content for saving it later in a bulk operation.
     """
     env = pkg_env_to_libcomps(self)
     environment_dict = PackageEnvironment.libcomps_to_dict(env)
     environment_dict['digest'] = dict_digest(environment_dict)
     return (PackageEnvironment(**environment_dict), None)
Example #6
0
 def create_pulp3_content(self):
     """
     Create a Pulp 3 Package Group content for saving it later in a bulk operation.
     """
     group = pkg_grp_to_libcomps(self)
     group_dict = PackageGroup.libcomps_to_dict(group)
     packages = group_dict['packages']
     # ugly stuff
     for pkg in packages:
         pkg['requires'] = pkg['requires'] or None
     group_dict['digest'] = dict_digest(group_dict)
     return (PackageGroup(**group_dict), None)
Example #7
0
    def _init_dc_categories(self, comps):
        if comps.categories:
            for category in comps.categories:
                category_dict = PackageCategory.libcomps_to_dict(category)
                category_dict["digest"] = dict_digest(category_dict)
                packagecategory = PackageCategory(**category_dict)
                dc = DeclarativeContent(content=packagecategory)
                dc.extra_data = defaultdict(list)

                if packagecategory.group_ids:
                    for group_id in packagecategory.group_ids:
                        self.group_to_categories[group_id["name"]].append(dc)
                self.dc_categories.append(dc)
Example #8
0
    def _init_dc_environments(self, comps):
        if comps.environments:
            for environment in comps.environments:
                environment_dict = PackageEnvironment.libcomps_to_dict(
                    environment)
                environment_dict["digest"] = dict_digest(environment_dict)
                packageenvironment = PackageEnvironment(**environment_dict)
                dc = DeclarativeContent(content=packageenvironment)
                dc.extra_data = defaultdict(list)

                if packageenvironment.option_ids:
                    for option_id in packageenvironment.option_ids:
                        self.optionalgroup_to_environments[
                            option_id["name"]].append(dc)

                if packageenvironment.group_ids:
                    for group_id in packageenvironment.group_ids:
                        self.group_to_environments[group_id["name"]].append(dc)

                self.dc_environments.append(dc)
Example #9
0
    def parse(self):
        """Parse packages' components."""
        comps = libcomps.Comps()
        comps.fromxml_f(self.comps_result.path)

        with ProgressReport(message="Parsed Comps",
                            code="parsing.comps") as comps_pb:
            comps_total = len(comps.groups) + len(comps.categories) + len(
                comps.environments)
            comps_pb.total = comps_total
            comps_pb.done = comps_total

        if comps.langpacks:
            langpack_dict = PackageLangpacks.libcomps_to_dict(comps.langpacks)
            packagelangpack = PackageLangpacks(
                matches=strdict_to_dict(comps.langpacks),
                digest=dict_digest(langpack_dict))
            self.package_language_pack_dc = DeclarativeContent(
                content=packagelangpack)
            self.package_language_pack_dc.extra_data = defaultdict(list)

        self._init_dc_categories(comps)
        self._init_dc_environments(comps)
        self._init_dc_groups(comps)
Example #10
0
    async def run(self):
        """
        Build `DeclarativeContent` from the repodata.
        """
        remote_url = self.new_url or self.remote.url
        remote_url = remote_url if remote_url[-1] == "/" else f"{remote_url}/"
        optimize_sync = self.optimize

        progress_data = dict(message='Downloading Metadata Files',
                             code='downloading.metadata')
        with ProgressReport(**progress_data) as metadata_pb:
            downloader = self.remote.get_downloader(
                url=urljoin(remote_url, 'repodata/repomd.xml'))
            # TODO: decide how to distinguish between a mirror list and a normal repo
            result = await downloader.run()
            metadata_pb.increment()

            repomd_path = result.path
            repomd = cr.Repomd(repomd_path)

            # Caution: we are not storing when the remote was last updated, so the order of this
            # logic must remain in this order where we first check the version number as other
            # changes than sync could have taken place such that the date or repo version will be
            # different from last sync
            if (optimize_sync and self.repository.last_sync_remote
                    and self.remote.pk == self.repository.last_sync_remote.pk
                    and (self.repository.last_sync_repo_version
                         == self.repository.latest_version().number)
                    and (self.remote.pulp_last_updated <=
                         self.repository.latest_version().pulp_created)
                    and is_previous_version(
                        repomd.revision,
                        self.repository.last_sync_revision_number)):
                optimize_data = dict(message='Optimizing Sync',
                                     code='optimizing.sync')
                with ProgressReport(**optimize_data) as optimize_pb:
                    optimize_pb.done = 1
                    optimize_pb.save()
                    return

            self.repository.last_sync_revision_number = repomd.revision

            if self.treeinfo:
                d_artifacts = [
                    DeclarativeArtifact(
                        artifact=Artifact(),
                        url=urljoin(remote_url, self.treeinfo["filename"]),
                        relative_path=".treeinfo",
                        remote=self.remote,
                        deferred_download=False,
                    )
                ]
                for path, checksum in self.treeinfo["download"][
                        "images"].items():
                    artifact = Artifact(**checksum)
                    da = DeclarativeArtifact(
                        artifact=artifact,
                        url=urljoin(remote_url, path),
                        relative_path=path,
                        remote=self.remote,
                        deferred_download=self.deferred_download)
                    d_artifacts.append(da)

                distribution_tree = DistributionTree(
                    **self.treeinfo["distribution_tree"])
                dc = DeclarativeContent(content=distribution_tree,
                                        d_artifacts=d_artifacts)
                dc.extra_data = self.treeinfo
                await self.put(dc)

            package_repodata_urls = {}
            downloaders = []
            modulemd_list = list()
            dc_groups = []
            dc_categories = []
            dc_environments = []
            nevra_to_module = defaultdict(dict)
            pkgname_to_groups = defaultdict(list)
            group_to_categories = defaultdict(list)
            group_to_environments = defaultdict(list)
            optionalgroup_to_environments = defaultdict(list)
            modulemd_results = None
            comps_downloader = None
            main_types = set()
            checksums = {}

            for record in repomd.records:
                checksums[record.type] = record.checksum_type.upper()
                if record.type in PACKAGE_REPODATA:
                    main_types.update([record.type])
                    package_repodata_urls[record.type] = urljoin(
                        remote_url, record.location_href)

                elif record.type in UPDATE_REPODATA:
                    updateinfo_url = urljoin(remote_url, record.location_href)
                    downloader = self.remote.get_downloader(url=updateinfo_url)
                    downloaders.append([downloader.run()])

                elif record.type in COMPS_REPODATA:
                    comps_url = urljoin(remote_url, record.location_href)
                    comps_downloader = self.remote.get_downloader(
                        url=comps_url)

                elif record.type in SKIP_REPODATA:
                    continue

                elif '_zck' in record.type:
                    continue

                elif record.type in MODULAR_REPODATA:
                    modules_url = urljoin(remote_url, record.location_href)
                    modulemd_downloader = self.remote.get_downloader(
                        url=modules_url)
                    modulemd_results = await modulemd_downloader.run()

                elif record.type not in PACKAGE_DB_REPODATA:
                    file_data = {
                        record.checksum_type: record.checksum,
                        "size": record.size
                    }
                    da = DeclarativeArtifact(
                        artifact=Artifact(**file_data),
                        url=urljoin(remote_url, record.location_href),
                        relative_path=record.location_href,
                        remote=self.remote,
                        deferred_download=False)
                    repo_metadata_file = RepoMetadataFile(
                        data_type=record.type,
                        checksum_type=record.checksum_type,
                        checksum=record.checksum,
                    )
                    dc = DeclarativeContent(content=repo_metadata_file,
                                            d_artifacts=[da])
                    await self.put(dc)

            missing_type = set(PACKAGE_REPODATA) - main_types
            if missing_type:
                raise FileNotFoundError(
                    _("XML file(s): {filename} not found").format(
                        filename=", ".join(missing_type)))

            self.repository.original_checksum_types = checksums

            # we have to sync module.yaml first if it exists, to make relations to packages
            if modulemd_results:
                modulemd_index = mmdlib.ModuleIndex.new()
                open_func = gzip.open if modulemd_results.url.endswith(
                    '.gz') else open
                with open_func(modulemd_results.path, 'r') as moduleyaml:
                    content = moduleyaml.read()
                    module_content = content if isinstance(
                        content, str) else content.decode()
                    modulemd_index.update_from_string(module_content, True)

                modulemd_names = modulemd_index.get_module_names() or []
                modulemd_all = parse_modulemd(modulemd_names, modulemd_index)

                # Parsing modules happens all at one time, and from here on no useful work happens.
                # So just report that it finished this stage.
                modulemd_pb_data = {
                    'message': 'Parsed Modulemd',
                    'code': 'parsing.modulemds'
                }
                with ProgressReport(**modulemd_pb_data) as modulemd_pb:
                    modulemd_total = len(modulemd_all)
                    modulemd_pb.total = modulemd_total
                    modulemd_pb.done = modulemd_total

                for modulemd in modulemd_all:
                    artifact = modulemd.pop('artifact')
                    relative_path = '{}{}{}{}{}snippet'.format(
                        modulemd[PULP_MODULE_ATTR.NAME],
                        modulemd[PULP_MODULE_ATTR.STREAM],
                        modulemd[PULP_MODULE_ATTR.VERSION],
                        modulemd[PULP_MODULE_ATTR.CONTEXT],
                        modulemd[PULP_MODULE_ATTR.ARCH])
                    da = DeclarativeArtifact(artifact=artifact,
                                             relative_path=relative_path,
                                             url=modules_url)
                    modulemd_content = Modulemd(**modulemd)
                    dc = DeclarativeContent(content=modulemd_content,
                                            d_artifacts=[da])
                    dc.extra_data = defaultdict(list)

                    # dc.content.artifacts are Modulemd artifacts
                    for artifact in dc.content.artifacts:
                        nevra_to_module.setdefault(artifact, set()).add(dc)
                    modulemd_list.append(dc)

                # delete list now that we're done with it for memory savings
                del modulemd_all

                modulemd_default_names = parse_defaults(modulemd_index)

                # Parsing module-defaults happens all at one time, and from here on no useful
                # work happens. So just report that it finished this stage.
                modulemd_defaults_pb_data = {
                    'message': 'Parsed Modulemd-defaults',
                    'code': 'parsing.modulemd_defaults'
                }
                with ProgressReport(
                        **modulemd_defaults_pb_data) as modulemd_defaults_pb:
                    modulemd_defaults_total = len(modulemd_default_names)
                    modulemd_defaults_pb.total = modulemd_defaults_total
                    modulemd_defaults_pb.done = modulemd_defaults_total

                for default in modulemd_default_names:
                    artifact = default.pop('artifact')
                    relative_path = '{}{}snippet'.format(
                        default[PULP_MODULEDEFAULTS_ATTR.MODULE],
                        default[PULP_MODULEDEFAULTS_ATTR.STREAM])
                    da = DeclarativeArtifact(artifact=artifact,
                                             relative_path=relative_path,
                                             url=modules_url)
                    default_content = ModulemdDefaults(**default)
                    dc = DeclarativeContent(content=default_content,
                                            d_artifacts=[da])
                    await self.put(dc)

                # delete list now that we're done with it for memory savings
                del modulemd_default_names

            if comps_downloader:
                comps_result = await comps_downloader.run()

                comps = libcomps.Comps()
                comps.fromxml_f(comps_result.path)

                with ProgressReport(message='Parsed Comps',
                                    code='parsing.comps') as comps_pb:
                    comps_total = (len(comps.groups) + len(comps.categories) +
                                   len(comps.environments))
                    comps_pb.total = comps_total
                    comps_pb.done = comps_total

                if comps.langpacks:
                    langpack_dict = PackageLangpacks.libcomps_to_dict(
                        comps.langpacks)
                    packagelangpack = PackageLangpacks(
                        matches=strdict_to_dict(comps.langpacks),
                        digest=dict_digest(langpack_dict))
                    dc = DeclarativeContent(content=packagelangpack)
                    dc.extra_data = defaultdict(list)
                    await self.put(dc)

                if comps.categories:
                    for category in comps.categories:
                        category_dict = PackageCategory.libcomps_to_dict(
                            category)
                        category_dict['digest'] = dict_digest(category_dict)
                        packagecategory = PackageCategory(**category_dict)
                        dc = DeclarativeContent(content=packagecategory)
                        dc.extra_data = defaultdict(list)

                        if packagecategory.group_ids:
                            for group_id in packagecategory.group_ids:
                                group_to_categories[group_id['name']].append(
                                    dc)
                        dc_categories.append(dc)

                if comps.environments:
                    for environment in comps.environments:
                        environment_dict = PackageEnvironment.libcomps_to_dict(
                            environment)
                        environment_dict['digest'] = dict_digest(
                            environment_dict)
                        packageenvironment = PackageEnvironment(
                            **environment_dict)
                        dc = DeclarativeContent(content=packageenvironment)
                        dc.extra_data = defaultdict(list)

                        if packageenvironment.option_ids:
                            for option_id in packageenvironment.option_ids:
                                optionalgroup_to_environments[
                                    option_id['name']].append(dc)

                        if packageenvironment.group_ids:
                            for group_id in packageenvironment.group_ids:
                                group_to_environments[group_id['name']].append(
                                    dc)

                        dc_environments.append(dc)

                if comps.groups:
                    for group in comps.groups:
                        group_dict = PackageGroup.libcomps_to_dict(group)
                        group_dict['digest'] = dict_digest(group_dict)
                        packagegroup = PackageGroup(**group_dict)
                        dc = DeclarativeContent(content=packagegroup)
                        dc.extra_data = defaultdict(list)

                        if packagegroup.packages:
                            for package in packagegroup.packages:
                                pkgname_to_groups[package['name']].append(dc)

                        if dc.content.id in group_to_categories.keys():
                            for dc_category in group_to_categories[
                                    dc.content.id]:
                                dc.extra_data['category_relations'].append(
                                    dc_category)
                                dc_category.extra_data['packagegroups'].append(
                                    dc)

                        if dc.content.id in group_to_environments.keys():
                            for dc_environment in group_to_environments[
                                    dc.content.id]:
                                dc.extra_data['environment_relations'].append(
                                    dc_environment)
                                dc_environment.extra_data[
                                    'packagegroups'].append(dc)

                        if dc.content.id in optionalgroup_to_environments.keys(
                        ):
                            for dc_environment in optionalgroup_to_environments[
                                    dc.content.id]:
                                dc.extra_data['env_relations_optional'].append(
                                    dc_environment)
                                dc_environment.extra_data[
                                    'optionalgroups'].append(dc)

                        dc_groups.append(dc)

                for dc_category in dc_categories:
                    await self.put(dc_category)

                for dc_environment in dc_environments:
                    await self.put(dc_environment)

            # delete lists now that we're done with them for memory savings
            del dc_environments
            del dc_categories

            # to preserve order, downloaders are created after all repodata urls are identified
            package_repodata_downloaders = []
            for repodata_type in PACKAGE_REPODATA:
                downloader = self.remote.get_downloader(
                    url=package_repodata_urls[repodata_type])
                package_repodata_downloaders.append(downloader.run())

            downloaders.append(package_repodata_downloaders)

            # asyncio.gather is used to preserve the order of results for package repodata
            pending = [
                asyncio.gather(*downloaders_group)
                for downloaders_group in downloaders
            ]

            while pending:
                done, pending = await asyncio.wait(
                    pending, return_when=asyncio.FIRST_COMPLETED)
                for downloader in done:
                    try:
                        results = downloader.result()
                    except ClientResponseError as exc:
                        raise HTTPNotFound(
                            reason=_("File not found: {filename}").format(
                                filename=exc.request_info.url))
                    if results[0].url == package_repodata_urls['primary']:
                        primary_xml_path = results[0].path
                        filelists_xml_path = results[1].path
                        other_xml_path = results[2].path
                        metadata_pb.done += 3
                        metadata_pb.save()

                        packages = await RpmFirstStage.parse_repodata(
                            primary_xml_path, filelists_xml_path,
                            other_xml_path)
                        # skip SRPM if defined
                        if 'srpm' in self.skip_types:
                            packages = {
                                pkgId: pkg
                                for pkgId, pkg in packages.items()
                                if pkg.arch != 'src'
                            }

                        progress_data = {
                            'message': 'Parsed Packages',
                            'code': 'parsing.packages',
                            'total': len(packages),
                        }
                        with ProgressReport(**progress_data) as packages_pb:
                            for pkg in packages.values():
                                package = Package(
                                    **Package.createrepo_to_dict(pkg))
                                artifact = Artifact(size=package.size_package)
                                checksum_type = getattr(
                                    CHECKSUM_TYPES,
                                    package.checksum_type.upper())
                                setattr(artifact, checksum_type, package.pkgId)
                                url = urljoin(remote_url,
                                              package.location_href)
                                filename = os.path.basename(
                                    package.location_href)
                                da = DeclarativeArtifact(
                                    artifact=artifact,
                                    url=url,
                                    relative_path=filename,
                                    remote=self.remote,
                                    deferred_download=self.deferred_download)
                                dc = DeclarativeContent(content=package,
                                                        d_artifacts=[da])
                                dc.extra_data = defaultdict(list)

                                # find if a package relates to a modulemd
                                if dc.content.nevra in nevra_to_module.keys():
                                    dc.content.is_modular = True
                                    for dc_modulemd in nevra_to_module[
                                            dc.content.nevra]:
                                        dc.extra_data[
                                            'modulemd_relation'].append(
                                                dc_modulemd)
                                        dc_modulemd.extra_data[
                                            'package_relation'].append(dc)

                                if dc.content.name in pkgname_to_groups.keys():
                                    for dc_group in pkgname_to_groups[
                                            dc.content.name]:
                                        dc.extra_data[
                                            'group_relations'].append(dc_group)
                                        dc_group.extra_data[
                                            'related_packages'].append(dc)

                                packages_pb.increment()
                                await self.put(dc)

                    elif results[0].url == updateinfo_url:
                        updateinfo_xml_path = results[0].path
                        metadata_pb.increment()

                        updates = await RpmFirstStage.parse_updateinfo(
                            updateinfo_xml_path)

                        progress_data = {
                            'message': 'Parsed Advisories',
                            'code': 'parsing.advisories',
                            'total': len(updates),
                        }
                        with ProgressReport(**progress_data) as advisories_pb:
                            for update in updates:
                                update_record = UpdateRecord(
                                    **UpdateRecord.createrepo_to_dict(update))
                                update_record.digest = hash_update_record(
                                    update)
                                future_relations = {
                                    'collections': defaultdict(list),
                                    'references': []
                                }

                                for collection in update.collections:
                                    coll_dict = UpdateCollection.createrepo_to_dict(
                                        collection)
                                    coll = UpdateCollection(**coll_dict)

                                    for package in collection.packages:
                                        pkg_dict = UpdateCollectionPackage.createrepo_to_dict(
                                            package)
                                        pkg = UpdateCollectionPackage(
                                            **pkg_dict)
                                        future_relations['collections'][
                                            coll].append(pkg)

                                for reference in update.references:
                                    reference_dict = UpdateReference.createrepo_to_dict(
                                        reference)
                                    ref = UpdateReference(**reference_dict)
                                    future_relations['references'].append(ref)

                                advisories_pb.increment()
                                dc = DeclarativeContent(content=update_record)
                                dc.extra_data = future_relations
                                await self.put(dc)

            # now send modules down the pipeline since all relations have been set up
            for modulemd in modulemd_list:
                await self.put(modulemd)

            for dc_group in dc_groups:
                await self.put(dc_group)
Example #11
0
    async def run(self):
        """
        Build `DeclarativeContent` from the repodata.
        """
        packages_pb = ProgressReport(message='Parsed Packages',
                                     code='parsing.packages')
        errata_pb = ProgressReport(message='Parsed Erratum',
                                   code='parsing.errata')
        modulemd_pb = ProgressReport(message='Parse Modulemd',
                                     code='parsing.modulemds')
        modulemd_defaults_pb = ProgressReport(
            message='Parse Modulemd-defaults', code='parsing.modulemddefaults')
        comps_pb = ProgressReport(message='Parsed Comps', code='parsing.comps')

        packages_pb.save()
        errata_pb.save()
        comps_pb.save()

        remote_url = self.new_url or self.remote.url
        remote_url = remote_url if remote_url[-1] == "/" else f"{remote_url}/"

        progress_data = dict(message='Downloading Metadata Files',
                             code='downloading.metadata')
        with ProgressReport(**progress_data) as metadata_pb:
            downloader = self.remote.get_downloader(
                url=urljoin(remote_url, 'repodata/repomd.xml'))
            # TODO: decide how to distinguish between a mirror list and a normal repo
            result = await downloader.run()
            metadata_pb.increment()

            if self.kickstart:
                d_artifacts = []
                for path, checksum in self.kickstart["download"][
                        "images"].items():
                    artifact = Artifact(**checksum)

                    da = DeclarativeArtifact(
                        artifact=artifact,
                        url=urljoin(remote_url, path),
                        relative_path=path,
                        remote=self.remote,
                        deferred_download=self.deferred_download)

                    d_artifacts.append(da)

                distribution_tree = DistributionTree(
                    **self.kickstart["distribution_tree"])
                dc = DeclarativeContent(content=distribution_tree,
                                        d_artifacts=d_artifacts)
                dc.extra_data = self.kickstart
                await self.put(dc)

            repomd_path = result.path
            repomd = cr.Repomd(repomd_path)
            package_repodata_urls = {}
            downloaders = []
            modulemd_list = list()
            dc_groups = []
            dc_categories = []
            dc_environments = []
            nevra_to_module = defaultdict(dict)
            pkgname_to_groups = defaultdict(list)
            group_to_categories = defaultdict(list)
            group_to_environments = defaultdict(list)
            optionalgroup_to_environments = defaultdict(list)
            modulemd_results = None
            comps_downloader = None

            for record in repomd.records:
                if record.type in PACKAGE_REPODATA:
                    package_repodata_urls[record.type] = urljoin(
                        remote_url, record.location_href)
                elif record.type in UPDATE_REPODATA:
                    updateinfo_url = urljoin(remote_url, record.location_href)
                    downloader = self.remote.get_downloader(url=updateinfo_url)
                    downloaders.append([downloader.run()])

                elif record.type in COMPS_REPODATA:
                    comps_url = urljoin(remote_url, record.location_href)
                    comps_downloader = self.remote.get_downloader(
                        url=comps_url)

                elif record.type in SKIP_REPODATA:
                    continue

                elif record.type in MODULAR_REPODATA:
                    modules_url = urljoin(remote_url, record.location_href)
                    modulemd_downloader = self.remote.get_downloader(
                        url=modules_url)
                    modulemd_results = await modulemd_downloader.run()

                elif record.type not in PACKAGE_DB_REPODATA:
                    file_data = {
                        record.checksum_type: record.checksum,
                        "size": record.size
                    }
                    da = DeclarativeArtifact(
                        artifact=Artifact(**file_data),
                        url=urljoin(remote_url, record.location_href),
                        relative_path=record.location_href,
                        remote=self.remote,
                        deferred_download=False)
                    repo_metadata_file = RepoMetadataFile(
                        data_type=record.type,
                        checksum_type=record.checksum_type,
                        checksum=record.checksum,
                    )
                    dc = DeclarativeContent(content=repo_metadata_file,
                                            d_artifacts=[da])
                    await self.put(dc)

            # we have to sync module.yaml first if it exists, to make relations to packages
            if modulemd_results:
                modulemd_index = mmdlib.ModuleIndex.new()
                open_func = gzip.open if modulemd_results.url.endswith(
                    '.gz') else open
                with open_func(modulemd_results.path, 'r') as moduleyaml:
                    modulemd_index.update_from_string(
                        moduleyaml.read().decode(), True)

                modulemd_names = modulemd_index.get_module_names() or []
                modulemd_all = parse_modulemd(modulemd_names, modulemd_index)

                modulemd_pb.total = len(modulemd_all)
                modulemd_pb.state = 'running'
                modulemd_pb.save()

                for modulemd in modulemd_all:
                    artifact = modulemd.pop('artifact')
                    relative_path = '{}{}{}{}{}snippet'.format(
                        modulemd[PULP_MODULE_ATTR.NAME],
                        modulemd[PULP_MODULE_ATTR.STREAM],
                        modulemd[PULP_MODULE_ATTR.VERSION],
                        modulemd[PULP_MODULE_ATTR.CONTEXT],
                        modulemd[PULP_MODULE_ATTR.ARCH])
                    da = DeclarativeArtifact(artifact=artifact,
                                             relative_path=relative_path,
                                             url=modules_url)
                    modulemd_content = Modulemd(**modulemd)
                    dc = DeclarativeContent(content=modulemd_content,
                                            d_artifacts=[da])
                    dc.extra_data = defaultdict(list)

                    # dc.content.artifacts are Modulemd artifacts
                    for artifact in json.loads(dc.content.artifacts):
                        nevra_to_module.setdefault(artifact, set()).add(dc)
                    modulemd_list.append(dc)

                modulemd_default_names = parse_defaults(modulemd_index)

                modulemd_defaults_pb.total = len(modulemd_default_names)
                modulemd_defaults_pb.state = 'running'
                modulemd_defaults_pb.save()

                for default in modulemd_default_names:
                    artifact = default.pop('artifact')
                    relative_path = '{}{}snippet'.format(
                        default[PULP_MODULEDEFAULTS_ATTR.MODULE],
                        default[PULP_MODULEDEFAULTS_ATTR.STREAM])
                    da = DeclarativeArtifact(artifact=artifact,
                                             relative_path=relative_path,
                                             url=modules_url)
                    default_content = ModulemdDefaults(**default)
                    modulemd_defaults_pb.increment()
                    dc = DeclarativeContent(content=default_content,
                                            d_artifacts=[da])
                    await self.put(dc)

            if comps_downloader:
                comps_result = await comps_downloader.run()

                comps = libcomps.Comps()
                comps.fromxml_f(comps_result.path)

                comps_pb.total = (len(comps.groups) + len(comps.categories) +
                                  len(comps.environments))
                comps_pb.state = 'running'
                comps_pb.save()

                if comps.langpacks:
                    langpack_dict = PackageLangpacks.libcomps_to_dict(
                        comps.langpacks)
                    packagelangpack = PackageLangpacks(
                        matches=strdict_to_dict(comps.langpacks),
                        digest=dict_digest(langpack_dict))
                    dc = DeclarativeContent(content=packagelangpack)
                    dc.extra_data = defaultdict(list)
                    await self.put(dc)

                if comps.categories:
                    for category in comps.categories:
                        category_dict = PackageCategory.libcomps_to_dict(
                            category)
                        category_dict['digest'] = dict_digest(category_dict)
                        packagecategory = PackageCategory(**category_dict)
                        dc = DeclarativeContent(content=packagecategory)
                        dc.extra_data = defaultdict(list)

                        if packagecategory.group_ids:
                            for group_id in packagecategory.group_ids:
                                group_to_categories[group_id['name']].append(
                                    dc)
                        dc_categories.append(dc)

                if comps.environments:
                    for environment in comps.environments:
                        environment_dict = PackageEnvironment.libcomps_to_dict(
                            environment)
                        environment_dict['digest'] = dict_digest(
                            environment_dict)
                        packageenvironment = PackageEnvironment(
                            **environment_dict)
                        dc = DeclarativeContent(content=packageenvironment)
                        dc.extra_data = defaultdict(list)

                        if packageenvironment.option_ids:
                            for option_id in packageenvironment.option_ids:
                                optionalgroup_to_environments[
                                    option_id['name']].append(dc)

                        if packageenvironment.group_ids:
                            for group_id in packageenvironment.group_ids:
                                group_to_environments[group_id['name']].append(
                                    dc)

                        dc_environments.append(dc)

                if comps.groups:
                    for group in comps.groups:
                        group_dict = PackageGroup.libcomps_to_dict(group)
                        group_dict['digest'] = dict_digest(group_dict)
                        packagegroup = PackageGroup(**group_dict)
                        dc = DeclarativeContent(content=packagegroup)
                        dc.extra_data = defaultdict(list)

                        if packagegroup.packages:
                            for package in packagegroup.packages:
                                pkgname_to_groups[package['name']].append(dc)

                        if dc.content.id in group_to_categories.keys():
                            for dc_category in group_to_categories[
                                    dc.content.id]:
                                dc.extra_data['category_relations'].append(
                                    dc_category)
                                dc_category.extra_data['packagegroups'].append(
                                    dc)

                        if dc.content.id in group_to_environments.keys():
                            for dc_environment in group_to_environments[
                                    dc.content.id]:
                                dc.extra_data['environment_relations'].append(
                                    dc_environment)
                                dc_environment.extra_data[
                                    'packagegroups'].append(dc)

                        if dc.content.id in optionalgroup_to_environments.keys(
                        ):
                            for dc_environment in optionalgroup_to_environments[
                                    dc.content.id]:
                                dc.extra_data['env_relations_optional'].append(
                                    dc_environment)
                                dc_environment.extra_data[
                                    'optionalgroups'].append(dc)

                        dc_groups.append(dc)

                for dc_category in dc_categories:
                    comps_pb.increment()
                    await self.put(dc_category)

                for dc_environment in dc_environments:
                    comps_pb.increment()
                    await self.put(dc_environment)

            # to preserve order, downloaders are created after all repodata urls are identified
            package_repodata_downloaders = []
            for repodata_type in PACKAGE_REPODATA:
                downloader = self.remote.get_downloader(
                    url=package_repodata_urls[repodata_type])
                package_repodata_downloaders.append(downloader.run())

            downloaders.append(package_repodata_downloaders)

            # asyncio.gather is used to preserve the order of results for package repodata
            pending = [
                asyncio.gather(*downloaders_group)
                for downloaders_group in downloaders
            ]

            while pending:
                done, pending = await asyncio.wait(
                    pending, return_when=asyncio.FIRST_COMPLETED)
                for downloader in done:
                    results = downloader.result()
                    if results[0].url == package_repodata_urls['primary']:
                        primary_xml_path = results[0].path
                        filelists_xml_path = results[1].path
                        other_xml_path = results[2].path
                        metadata_pb.done += 3
                        metadata_pb.save()

                        packages = await RpmFirstStage.parse_repodata(
                            primary_xml_path, filelists_xml_path,
                            other_xml_path)
                        packages_pb.total = len(packages)
                        packages_pb.state = 'running'
                        packages_pb.save()

                        for pkg in packages.values():
                            package = Package(
                                **Package.createrepo_to_dict(pkg))
                            artifact = Artifact(size=package.size_package)
                            checksum_type = getattr(
                                CHECKSUM_TYPES, package.checksum_type.upper())
                            setattr(artifact, checksum_type, package.pkgId)
                            url = urljoin(remote_url, package.location_href)
                            filename = os.path.basename(package.location_href)
                            da = DeclarativeArtifact(
                                artifact=artifact,
                                url=url,
                                relative_path=filename,
                                remote=self.remote,
                                deferred_download=self.deferred_download)
                            dc = DeclarativeContent(content=package,
                                                    d_artifacts=[da])
                            dc.extra_data = defaultdict(list)

                            # find if a package relates to a modulemd
                            if dc.content.nevra in nevra_to_module.keys():
                                dc.content.is_modular = True
                                for dc_modulemd in nevra_to_module[
                                        dc.content.nevra]:
                                    dc.extra_data['modulemd_relation'].append(
                                        dc_modulemd)
                                    dc_modulemd.extra_data[
                                        'package_relation'].append(dc)

                            if dc.content.name in pkgname_to_groups.keys():
                                for dc_group in pkgname_to_groups[
                                        dc.content.name]:
                                    dc.extra_data['group_relations'].append(
                                        dc_group)
                                    dc_group.extra_data[
                                        'related_packages'].append(dc)

                            packages_pb.increment()
                            await self.put(dc)

                    elif results[0].url == updateinfo_url:
                        updateinfo_xml_path = results[0].path
                        metadata_pb.increment()

                        updates = await RpmFirstStage.parse_updateinfo(
                            updateinfo_xml_path)

                        errata_pb.total = len(updates)
                        errata_pb.state = 'running'
                        errata_pb.save()

                        for update in updates:
                            update_record = UpdateRecord(
                                **UpdateRecord.createrepo_to_dict(update))
                            update_record.digest = RpmFirstStage.hash_update_record(
                                update)
                            future_relations = {
                                'collections': defaultdict(list),
                                'references': []
                            }

                            for collection in update.collections:
                                coll_dict = UpdateCollection.createrepo_to_dict(
                                    collection)
                                coll = UpdateCollection(**coll_dict)

                                for package in collection.packages:
                                    pkg_dict = UpdateCollectionPackage.createrepo_to_dict(
                                        package)
                                    pkg = UpdateCollectionPackage(**pkg_dict)
                                    future_relations['collections'][
                                        coll].append(pkg)

                            for reference in update.references:
                                reference_dict = UpdateReference.createrepo_to_dict(
                                    reference)
                                ref = UpdateReference(**reference_dict)
                                future_relations['references'].append(ref)

                            errata_pb.increment()
                            dc = DeclarativeContent(content=update_record)
                            dc.extra_data = future_relations
                            await self.put(dc)

            # now send modules down the pipeline since all relations have been set up
            for modulemd in modulemd_list:
                modulemd_pb.increment()
                await self.put(modulemd)

            for dc_group in dc_groups:
                comps_pb.increment()
                await self.put(dc_group)

        packages_pb.state = 'completed'
        errata_pb.state = 'completed'
        modulemd_pb.state = 'completed'
        modulemd_defaults_pb.state = 'completed'
        comps_pb.state = 'completed'
        packages_pb.save()
        errata_pb.save()
        modulemd_pb.save()
        modulemd_defaults_pb.save()
        comps_pb.save()