Ejemplo n.º 1
0
def is_optimized_sync(repository, remote, url):
    """
    Check whether it is possible to optimize the synchronization or not.

    Caution: we are not storing when the remote was last updated, so the order of this
    logic must remain in this order where we first check the version number as other
    changes than sync could have taken place such that the date or repo version will be
    different from last sync.

    Args:
        repository(RpmRepository): An RpmRepository to check optimization for.
        remote(RpmRemote): An RPMRemote to check optimization for.
        url(str): A remote repository URL.

    Returns:
        bool: True, if sync is optimized; False, otherwise.

    """
    with WorkingDirectory():
        result = get_repomd_file(remote, url)
        if not result:
            return False

        repomd_path = result.path
        repomd = cr.Repomd(repomd_path)
        repomd_checksum = get_sha256(repomd_path)

    is_optimized = (
        repository.last_sync_remote
        and remote.pk == repository.last_sync_remote.pk and
        repository.last_sync_repo_version == repository.latest_version().number
        and
        remote.pulp_last_updated <= repository.latest_version().pulp_created
        and is_previous_version(repomd.revision,
                                repository.last_sync_revision_number)
        and repository.last_sync_repomd_checksum == repomd_checksum)
    if is_optimized:
        optimize_data = dict(message="Optimizing Sync", code="optimizing.sync")
        with ProgressReport(**optimize_data) as optimize_pb:
            optimize_pb.done = 1
            optimize_pb.save()

    return is_optimized
Ejemplo n.º 2
0
    def test_is_previous_version(self):
        """Test version-comparator."""
        # Versions must be int or 1.2.3
        # non-integer versions return False, always
        # True if version <= target

        # None
        self.assertTrue(is_previous_version(None, "1"))
        self.assertTrue(is_previous_version("1", None))
        self.assertTrue(is_previous_version(None, None))

        # Integer versions
        # v = t : v < t : v > t
        self.assertTrue(is_previous_version("1", "1"))
        self.assertTrue(is_previous_version("1", "2"))
        self.assertFalse(is_previous_version("2", "1"))

        # m.n
        # v = t : v m. < t m. : v m.n < t m.n : v m. > t.m : v m.n > t.m.n
        self.assertTrue(is_previous_version("1.2", "1.2"))
        self.assertTrue(is_previous_version("1.2", "2.2"))
        self.assertTrue(is_previous_version("1.2", "1.3"))
        self.assertFalse(is_previous_version("2.2", "1.2"))
        self.assertFalse(is_previous_version("2.2", "2.1"))

        # non-numeric : v not-digits : t not-digits : v-dot-nondigits : t dot-non-digits
        self.assertFalse(is_previous_version("foo", "1.2"))
        self.assertFalse(is_previous_version("1.2", "bar"))
        self.assertFalse(is_previous_version("foo.2", "2.1"))
        self.assertFalse(is_previous_version("1.2", "bar.1"))
        self.assertTrue(is_previous_version("1.foo", "2.bar"))
        self.assertFalse(is_previous_version("1.foo", "1.bar"))
Ejemplo n.º 3
0
def resolve_advisory_conflict(previous_advisory, added_advisory):
    """
    Decide which advisory to add to a repo version, create a new one if needed.

    No advisories with the same id can be present in a repo version.

    An existing advisory can be removed from a repo version, a newly added one can stay in a repo
    version, or advisories merge into newly created one which is added to a repo version.
    Merge is done based on criteria described below.

     1. If updated_dates and update_version are the same and pkglist intersection is empty
     (e.g. base repo merged with debuginfo repo) -> new UpdateRecord content unit with combined
     pkglist is created.
     2. If updated_dates or update_version differ and pkglist intersection is non-empty
     (update/re-sync/upload-new case) -> UpdateRecord with newer updated_date or update_version
     is added.
     3. If updated_dates differ and pkglist intersection is empty: ERROR CONDITION
     (e.g. base and-debuginfo repos are from different versions, not at same date)
     4. If update_dates and update_version are the same, pkglist intersection is non-empty
     and not equal to either pkglist - ERROR CONDITION!
     (never-happen case - "something is Terribly Wrong Here")

     Args:
       previous_advisory(pulp_rpm.app.models.UpdateRecord): Advisory which is in a previous repo
                                                            version
       added_advisory(pulp_rpm.app.models.UpdateRecord): Advisory which is being added

     Returns:
       to_add(pulp_rpm.app.models.UpdateRecord): Advisory to add to a repo version, can be
                                                 a newly created one
       to_remove(pulp_rpm.app.models.UpdateRecord): Advisory to remove from a repo version
       to_exclude(pulp_rpm.app.models.UpdateRecord): Advisory to exclude from the added set of
                                                     content for a repo version

    """
    to_add, to_remove, to_exclude = [], [], []

    previous_updated_date = parse_datetime(previous_advisory.updated_date
                                           or previous_advisory.issued_date)
    added_updated_date = parse_datetime(added_advisory.updated_date
                                        or added_advisory.issued_date)
    previous_updated_version = previous_advisory.version
    added_updated_version = added_advisory.version
    previous_pkglist = set(previous_advisory.get_pkglist())
    added_pkglist = set(added_advisory.get_pkglist())

    # Prepare results of conditions for easier use.
    same_dates = previous_updated_date == added_updated_date
    same_version = previous_updated_version == added_updated_version
    pkgs_intersection = previous_pkglist.intersection(added_pkglist)

    if same_dates and same_version and pkgs_intersection:
        if previous_pkglist != added_pkglist:
            raise AdvisoryConflict(
                _('Incoming and existing advisories have the same id and '
                  'timestamp but different and intersecting package lists. '
                  'At least one of them is wrong. '
                  f'Advisory id: {previous_advisory.id}'))
        elif previous_pkglist == added_pkglist:
            # it means some advisory metadata changed without bumping the updated_date or version.
            # There is no way to find out which one is newer, and a user can't fix it,
            # so we are choosing the incoming advisory.
            to_remove.append(previous_advisory.pk)
    elif (not same_dates and not pkgs_intersection) or \
            (same_dates and not same_version and not pkgs_intersection):
        raise AdvisoryConflict(
            _('Incoming and existing advisories have the same id but '
              'different timestamps and intersecting package lists. It is '
              'likely that they are from two different incompatible remote '
              'repositories. E.g. RHELX-repo and RHELY-debuginfo repo. '
              'Ensure that you are adding content for the compatible '
              f'repositories. Advisory id: {previous_advisory.id}'))
    elif not same_dates and pkgs_intersection:
        if previous_updated_date < added_updated_date:
            to_remove.append(previous_advisory.pk)
        else:
            to_exclude.append(added_advisory.pk)
    elif not same_version and pkgs_intersection:
        if is_previous_version(previous_updated_version,
                               added_updated_version):
            to_remove.append(previous_advisory.pk)
        else:
            to_exclude.append(added_advisory.pk)
    elif same_dates and same_version and not pkgs_intersection:
        # previous_advisory is used to copy the object and thus the variable refers to a
        # different object after `merge_advisories` call
        previous_advisory_pk = previous_advisory.pk
        merged_advisory = merge_advisories(previous_advisory, added_advisory)
        to_add.append(merged_advisory.pk)
        to_remove.append(previous_advisory_pk)
        to_exclude.append(added_advisory.pk)

    return to_add, to_remove, to_exclude
Ejemplo n.º 4
0
    async def run(self):
        """
        Build `DeclarativeContent` from the repodata.
        """
        remote_url = self.new_url or self.remote.url
        remote_url = remote_url if remote_url[-1] == "/" else f"{remote_url}/"
        optimize_sync = self.optimize

        progress_data = dict(message='Downloading Metadata Files',
                             code='downloading.metadata')
        with ProgressReport(**progress_data) as metadata_pb:
            downloader = self.remote.get_downloader(
                url=urljoin(remote_url, 'repodata/repomd.xml'))
            # TODO: decide how to distinguish between a mirror list and a normal repo
            result = await downloader.run()
            metadata_pb.increment()

            repomd_path = result.path
            repomd = cr.Repomd(repomd_path)

            # Caution: we are not storing when the remote was last updated, so the order of this
            # logic must remain in this order where we first check the version number as other
            # changes than sync could have taken place such that the date or repo version will be
            # different from last sync
            if (optimize_sync and self.repository.last_sync_remote
                    and self.remote.pk == self.repository.last_sync_remote.pk
                    and (self.repository.last_sync_repo_version
                         == self.repository.latest_version().number)
                    and (self.remote.pulp_last_updated <=
                         self.repository.latest_version().pulp_created)
                    and is_previous_version(
                        repomd.revision,
                        self.repository.last_sync_revision_number)):
                optimize_data = dict(message='Optimizing Sync',
                                     code='optimizing.sync')
                with ProgressReport(**optimize_data) as optimize_pb:
                    optimize_pb.done = 1
                    optimize_pb.save()
                    return

            self.repository.last_sync_revision_number = repomd.revision

            if self.treeinfo:
                d_artifacts = [
                    DeclarativeArtifact(
                        artifact=Artifact(),
                        url=urljoin(remote_url, self.treeinfo["filename"]),
                        relative_path=".treeinfo",
                        remote=self.remote,
                        deferred_download=False,
                    )
                ]
                for path, checksum in self.treeinfo["download"][
                        "images"].items():
                    artifact = Artifact(**checksum)
                    da = DeclarativeArtifact(
                        artifact=artifact,
                        url=urljoin(remote_url, path),
                        relative_path=path,
                        remote=self.remote,
                        deferred_download=self.deferred_download)
                    d_artifacts.append(da)

                distribution_tree = DistributionTree(
                    **self.treeinfo["distribution_tree"])
                dc = DeclarativeContent(content=distribution_tree,
                                        d_artifacts=d_artifacts)
                dc.extra_data = self.treeinfo
                await self.put(dc)

            package_repodata_urls = {}
            downloaders = []
            modulemd_list = list()
            dc_groups = []
            dc_categories = []
            dc_environments = []
            nevra_to_module = defaultdict(dict)
            pkgname_to_groups = defaultdict(list)
            group_to_categories = defaultdict(list)
            group_to_environments = defaultdict(list)
            optionalgroup_to_environments = defaultdict(list)
            modulemd_results = None
            comps_downloader = None
            main_types = set()
            checksums = {}

            for record in repomd.records:
                checksums[record.type] = record.checksum_type.upper()
                if record.type in PACKAGE_REPODATA:
                    main_types.update([record.type])
                    package_repodata_urls[record.type] = urljoin(
                        remote_url, record.location_href)

                elif record.type in UPDATE_REPODATA:
                    updateinfo_url = urljoin(remote_url, record.location_href)
                    downloader = self.remote.get_downloader(url=updateinfo_url)
                    downloaders.append([downloader.run()])

                elif record.type in COMPS_REPODATA:
                    comps_url = urljoin(remote_url, record.location_href)
                    comps_downloader = self.remote.get_downloader(
                        url=comps_url)

                elif record.type in SKIP_REPODATA:
                    continue

                elif '_zck' in record.type:
                    continue

                elif record.type in MODULAR_REPODATA:
                    modules_url = urljoin(remote_url, record.location_href)
                    modulemd_downloader = self.remote.get_downloader(
                        url=modules_url)
                    modulemd_results = await modulemd_downloader.run()

                elif record.type not in PACKAGE_DB_REPODATA:
                    file_data = {
                        record.checksum_type: record.checksum,
                        "size": record.size
                    }
                    da = DeclarativeArtifact(
                        artifact=Artifact(**file_data),
                        url=urljoin(remote_url, record.location_href),
                        relative_path=record.location_href,
                        remote=self.remote,
                        deferred_download=False)
                    repo_metadata_file = RepoMetadataFile(
                        data_type=record.type,
                        checksum_type=record.checksum_type,
                        checksum=record.checksum,
                    )
                    dc = DeclarativeContent(content=repo_metadata_file,
                                            d_artifacts=[da])
                    await self.put(dc)

            missing_type = set(PACKAGE_REPODATA) - main_types
            if missing_type:
                raise FileNotFoundError(
                    _("XML file(s): {filename} not found").format(
                        filename=", ".join(missing_type)))

            self.repository.original_checksum_types = checksums

            # we have to sync module.yaml first if it exists, to make relations to packages
            if modulemd_results:
                modulemd_index = mmdlib.ModuleIndex.new()
                open_func = gzip.open if modulemd_results.url.endswith(
                    '.gz') else open
                with open_func(modulemd_results.path, 'r') as moduleyaml:
                    content = moduleyaml.read()
                    module_content = content if isinstance(
                        content, str) else content.decode()
                    modulemd_index.update_from_string(module_content, True)

                modulemd_names = modulemd_index.get_module_names() or []
                modulemd_all = parse_modulemd(modulemd_names, modulemd_index)

                # Parsing modules happens all at one time, and from here on no useful work happens.
                # So just report that it finished this stage.
                modulemd_pb_data = {
                    'message': 'Parsed Modulemd',
                    'code': 'parsing.modulemds'
                }
                with ProgressReport(**modulemd_pb_data) as modulemd_pb:
                    modulemd_total = len(modulemd_all)
                    modulemd_pb.total = modulemd_total
                    modulemd_pb.done = modulemd_total

                for modulemd in modulemd_all:
                    artifact = modulemd.pop('artifact')
                    relative_path = '{}{}{}{}{}snippet'.format(
                        modulemd[PULP_MODULE_ATTR.NAME],
                        modulemd[PULP_MODULE_ATTR.STREAM],
                        modulemd[PULP_MODULE_ATTR.VERSION],
                        modulemd[PULP_MODULE_ATTR.CONTEXT],
                        modulemd[PULP_MODULE_ATTR.ARCH])
                    da = DeclarativeArtifact(artifact=artifact,
                                             relative_path=relative_path,
                                             url=modules_url)
                    modulemd_content = Modulemd(**modulemd)
                    dc = DeclarativeContent(content=modulemd_content,
                                            d_artifacts=[da])
                    dc.extra_data = defaultdict(list)

                    # dc.content.artifacts are Modulemd artifacts
                    for artifact in dc.content.artifacts:
                        nevra_to_module.setdefault(artifact, set()).add(dc)
                    modulemd_list.append(dc)

                # delete list now that we're done with it for memory savings
                del modulemd_all

                modulemd_default_names = parse_defaults(modulemd_index)

                # Parsing module-defaults happens all at one time, and from here on no useful
                # work happens. So just report that it finished this stage.
                modulemd_defaults_pb_data = {
                    'message': 'Parsed Modulemd-defaults',
                    'code': 'parsing.modulemd_defaults'
                }
                with ProgressReport(
                        **modulemd_defaults_pb_data) as modulemd_defaults_pb:
                    modulemd_defaults_total = len(modulemd_default_names)
                    modulemd_defaults_pb.total = modulemd_defaults_total
                    modulemd_defaults_pb.done = modulemd_defaults_total

                for default in modulemd_default_names:
                    artifact = default.pop('artifact')
                    relative_path = '{}{}snippet'.format(
                        default[PULP_MODULEDEFAULTS_ATTR.MODULE],
                        default[PULP_MODULEDEFAULTS_ATTR.STREAM])
                    da = DeclarativeArtifact(artifact=artifact,
                                             relative_path=relative_path,
                                             url=modules_url)
                    default_content = ModulemdDefaults(**default)
                    dc = DeclarativeContent(content=default_content,
                                            d_artifacts=[da])
                    await self.put(dc)

                # delete list now that we're done with it for memory savings
                del modulemd_default_names

            if comps_downloader:
                comps_result = await comps_downloader.run()

                comps = libcomps.Comps()
                comps.fromxml_f(comps_result.path)

                with ProgressReport(message='Parsed Comps',
                                    code='parsing.comps') as comps_pb:
                    comps_total = (len(comps.groups) + len(comps.categories) +
                                   len(comps.environments))
                    comps_pb.total = comps_total
                    comps_pb.done = comps_total

                if comps.langpacks:
                    langpack_dict = PackageLangpacks.libcomps_to_dict(
                        comps.langpacks)
                    packagelangpack = PackageLangpacks(
                        matches=strdict_to_dict(comps.langpacks),
                        digest=dict_digest(langpack_dict))
                    dc = DeclarativeContent(content=packagelangpack)
                    dc.extra_data = defaultdict(list)
                    await self.put(dc)

                if comps.categories:
                    for category in comps.categories:
                        category_dict = PackageCategory.libcomps_to_dict(
                            category)
                        category_dict['digest'] = dict_digest(category_dict)
                        packagecategory = PackageCategory(**category_dict)
                        dc = DeclarativeContent(content=packagecategory)
                        dc.extra_data = defaultdict(list)

                        if packagecategory.group_ids:
                            for group_id in packagecategory.group_ids:
                                group_to_categories[group_id['name']].append(
                                    dc)
                        dc_categories.append(dc)

                if comps.environments:
                    for environment in comps.environments:
                        environment_dict = PackageEnvironment.libcomps_to_dict(
                            environment)
                        environment_dict['digest'] = dict_digest(
                            environment_dict)
                        packageenvironment = PackageEnvironment(
                            **environment_dict)
                        dc = DeclarativeContent(content=packageenvironment)
                        dc.extra_data = defaultdict(list)

                        if packageenvironment.option_ids:
                            for option_id in packageenvironment.option_ids:
                                optionalgroup_to_environments[
                                    option_id['name']].append(dc)

                        if packageenvironment.group_ids:
                            for group_id in packageenvironment.group_ids:
                                group_to_environments[group_id['name']].append(
                                    dc)

                        dc_environments.append(dc)

                if comps.groups:
                    for group in comps.groups:
                        group_dict = PackageGroup.libcomps_to_dict(group)
                        group_dict['digest'] = dict_digest(group_dict)
                        packagegroup = PackageGroup(**group_dict)
                        dc = DeclarativeContent(content=packagegroup)
                        dc.extra_data = defaultdict(list)

                        if packagegroup.packages:
                            for package in packagegroup.packages:
                                pkgname_to_groups[package['name']].append(dc)

                        if dc.content.id in group_to_categories.keys():
                            for dc_category in group_to_categories[
                                    dc.content.id]:
                                dc.extra_data['category_relations'].append(
                                    dc_category)
                                dc_category.extra_data['packagegroups'].append(
                                    dc)

                        if dc.content.id in group_to_environments.keys():
                            for dc_environment in group_to_environments[
                                    dc.content.id]:
                                dc.extra_data['environment_relations'].append(
                                    dc_environment)
                                dc_environment.extra_data[
                                    'packagegroups'].append(dc)

                        if dc.content.id in optionalgroup_to_environments.keys(
                        ):
                            for dc_environment in optionalgroup_to_environments[
                                    dc.content.id]:
                                dc.extra_data['env_relations_optional'].append(
                                    dc_environment)
                                dc_environment.extra_data[
                                    'optionalgroups'].append(dc)

                        dc_groups.append(dc)

                for dc_category in dc_categories:
                    await self.put(dc_category)

                for dc_environment in dc_environments:
                    await self.put(dc_environment)

            # delete lists now that we're done with them for memory savings
            del dc_environments
            del dc_categories

            # to preserve order, downloaders are created after all repodata urls are identified
            package_repodata_downloaders = []
            for repodata_type in PACKAGE_REPODATA:
                downloader = self.remote.get_downloader(
                    url=package_repodata_urls[repodata_type])
                package_repodata_downloaders.append(downloader.run())

            downloaders.append(package_repodata_downloaders)

            # asyncio.gather is used to preserve the order of results for package repodata
            pending = [
                asyncio.gather(*downloaders_group)
                for downloaders_group in downloaders
            ]

            while pending:
                done, pending = await asyncio.wait(
                    pending, return_when=asyncio.FIRST_COMPLETED)
                for downloader in done:
                    try:
                        results = downloader.result()
                    except ClientResponseError as exc:
                        raise HTTPNotFound(
                            reason=_("File not found: {filename}").format(
                                filename=exc.request_info.url))
                    if results[0].url == package_repodata_urls['primary']:
                        primary_xml_path = results[0].path
                        filelists_xml_path = results[1].path
                        other_xml_path = results[2].path
                        metadata_pb.done += 3
                        metadata_pb.save()

                        packages = await RpmFirstStage.parse_repodata(
                            primary_xml_path, filelists_xml_path,
                            other_xml_path)
                        # skip SRPM if defined
                        if 'srpm' in self.skip_types:
                            packages = {
                                pkgId: pkg
                                for pkgId, pkg in packages.items()
                                if pkg.arch != 'src'
                            }

                        progress_data = {
                            'message': 'Parsed Packages',
                            'code': 'parsing.packages',
                            'total': len(packages),
                        }
                        with ProgressReport(**progress_data) as packages_pb:
                            for pkg in packages.values():
                                package = Package(
                                    **Package.createrepo_to_dict(pkg))
                                artifact = Artifact(size=package.size_package)
                                checksum_type = getattr(
                                    CHECKSUM_TYPES,
                                    package.checksum_type.upper())
                                setattr(artifact, checksum_type, package.pkgId)
                                url = urljoin(remote_url,
                                              package.location_href)
                                filename = os.path.basename(
                                    package.location_href)
                                da = DeclarativeArtifact(
                                    artifact=artifact,
                                    url=url,
                                    relative_path=filename,
                                    remote=self.remote,
                                    deferred_download=self.deferred_download)
                                dc = DeclarativeContent(content=package,
                                                        d_artifacts=[da])
                                dc.extra_data = defaultdict(list)

                                # find if a package relates to a modulemd
                                if dc.content.nevra in nevra_to_module.keys():
                                    dc.content.is_modular = True
                                    for dc_modulemd in nevra_to_module[
                                            dc.content.nevra]:
                                        dc.extra_data[
                                            'modulemd_relation'].append(
                                                dc_modulemd)
                                        dc_modulemd.extra_data[
                                            'package_relation'].append(dc)

                                if dc.content.name in pkgname_to_groups.keys():
                                    for dc_group in pkgname_to_groups[
                                            dc.content.name]:
                                        dc.extra_data[
                                            'group_relations'].append(dc_group)
                                        dc_group.extra_data[
                                            'related_packages'].append(dc)

                                packages_pb.increment()
                                await self.put(dc)

                    elif results[0].url == updateinfo_url:
                        updateinfo_xml_path = results[0].path
                        metadata_pb.increment()

                        updates = await RpmFirstStage.parse_updateinfo(
                            updateinfo_xml_path)

                        progress_data = {
                            'message': 'Parsed Advisories',
                            'code': 'parsing.advisories',
                            'total': len(updates),
                        }
                        with ProgressReport(**progress_data) as advisories_pb:
                            for update in updates:
                                update_record = UpdateRecord(
                                    **UpdateRecord.createrepo_to_dict(update))
                                update_record.digest = hash_update_record(
                                    update)
                                future_relations = {
                                    'collections': defaultdict(list),
                                    'references': []
                                }

                                for collection in update.collections:
                                    coll_dict = UpdateCollection.createrepo_to_dict(
                                        collection)
                                    coll = UpdateCollection(**coll_dict)

                                    for package in collection.packages:
                                        pkg_dict = UpdateCollectionPackage.createrepo_to_dict(
                                            package)
                                        pkg = UpdateCollectionPackage(
                                            **pkg_dict)
                                        future_relations['collections'][
                                            coll].append(pkg)

                                for reference in update.references:
                                    reference_dict = UpdateReference.createrepo_to_dict(
                                        reference)
                                    ref = UpdateReference(**reference_dict)
                                    future_relations['references'].append(ref)

                                advisories_pb.increment()
                                dc = DeclarativeContent(content=update_record)
                                dc.extra_data = future_relations
                                await self.put(dc)

            # now send modules down the pipeline since all relations have been set up
            for modulemd in modulemd_list:
                await self.put(modulemd)

            for dc_group in dc_groups:
                await self.put(dc_group)
Ejemplo n.º 5
0
def resolve_advisory_conflict(previous_advisory, added_advisory):
    """
    Decide which advisory to add to a repo version, create a new one if needed.

    No advisories with the same id can be present in a repo version.

    An existing advisory can be removed from a repo version, a newly added one can stay in a repo
    version, or advisories merge into newly created one which is added to a repo version.
    Merge is done based on criteria described below.

     1. If updated_dates and update_version are the same and pkglist intersection is empty
     (e.g. base repo merged with debuginfo repo) -> new UpdateRecord content unit with combined
     pkglist is created.

     2. If updated_dates or update_version differ and pkglist intersection is non-empty
     (update/re-sync/upload-new case) -> UpdateRecord with newer updated_date or update_version
     is added.

     3. If updated_dates differ and pkglist intersection is empty:
       3.a If pklists differ only IN EVR (ie, name-intersection is Not Empty) -> use-newer
       3.b else -> ERROR CONDITION
         (e.g. base and-debuginfo repos are from different versions, not at same date)

     4. If update_dates and update_version are the same, pkglist intersection is non-empty
     and not a proper subset of to either pkglist - ERROR CONDITION!
     (never-happen case - "something is Terribly Wrong Here")

     Args:
       previous_advisory(pulp_rpm.app.models.UpdateRecord): Advisory which is in a previous repo
                                                            version
       added_advisory(pulp_rpm.app.models.UpdateRecord): Advisory which is being added

     Returns:
       to_add(list): UUIDs of advisories to add to a repo version, can be newly created ones
       to_remove(list): UUIDs of advisories to remove from a repo version
       to_exclude(list): UUIDs of advisories to exclude from the added set of content for a repo
                                  version

    """
    def _datetime_heuristics(in_str):
        # issue- and update-dates can be datetimes, empty, or timetamps. Alas.
        # Try to Do The Right Thing.
        # Return None if we give up
        if not in_str:
            return None

        dt = parse_datetime(in_str)
        if not dt:
            try:
                tstamp = int(in_str)
                dt = datetime.fromtimestamp(tstamp)
            except:  # noqa
                # No idea what this is - give up and return None
                return None
        return dt

    def _do_merge():
        # previous_advisory is used to copy the object and thus the variable refers to a
        # different object after `merge_advisories` call
        previous_advisory_pk = previous_advisory.pk
        merged_advisory = merge_advisories(previous_advisory, added_advisory)
        to_add.append(merged_advisory.pk)
        to_remove.append(previous_advisory_pk)
        to_exclude.append(added_advisory.pk)

    def _name_intersect(prev_pkgs, new_pkgs):
        prev_names = set([x[0] for x in prev_pkgs])
        new_names = set([x[0] for x in new_pkgs])
        return prev_names.intersection(new_names)

    to_add, to_remove, to_exclude = [], [], []

    previous_updated_date = _datetime_heuristics(
        previous_advisory.updated_date or previous_advisory.issued_date)
    added_updated_date = _datetime_heuristics(added_advisory.updated_date
                                              or added_advisory.issued_date)
    previous_updated_version = previous_advisory.version
    added_updated_version = added_advisory.version
    previous_pkglist = set(previous_advisory.get_pkglist())
    added_pkglist = set(added_advisory.get_pkglist())

    # Prepare results of conditions for easier use.
    same_dates = previous_updated_date == added_updated_date
    same_version = previous_updated_version == added_updated_version
    pkgs_intersection = previous_pkglist.intersection(added_pkglist)
    names_intersection = _name_intersect(previous_pkglist, added_pkglist)

    if same_dates and same_version and pkgs_intersection:
        if previous_pkglist != added_pkglist:
            # prev and new have different pkg-lists. See if one is a proper-subset of the other;
            # if so, choose the one with the *larger* pkglist. Otherwise, error.
            if previous_pkglist < added_pkglist:
                # new has more pkgs - remove previous
                to_remove.append(previous_advisory.pk)
            elif added_pkglist < previous_pkglist:
                # prev has more pkgs - exclude new
                to_exclude.append(added_advisory.pk)
            else:
                if settings.ALLOW_AUTOMATIC_UNSAFE_ADVISORY_CONFLICT_RESOLUTION:
                    _do_merge()
                else:
                    raise AdvisoryConflict(
                        _("Incoming and existing advisories have the same id and timestamp "
                          "but different and intersecting package lists, "
                          "and neither package list is a proper subset of the other. "
                          "At least one of the advisories is wrong. "
                          "To allow this behavior, set "
                          "ALLOW_AUTOMATIC_UNSAFE_ADVISORY_CONFLICT_RESOLUTION = True (q.v.) "
                          "in your configuration. Advisory id: {}").format(
                              previous_advisory.id))
        elif previous_pkglist == added_pkglist:
            # it means some advisory metadata changed without bumping the updated_date or version.
            # There is no way to find out which one is newer, and a user can't fix it,
            # so we are choosing the incoming advisory.
            to_remove.append(previous_advisory.pk)
    elif (not same_dates or
          (same_dates and not same_version)) and not pkgs_intersection:
        if names_intersection or settings.ALLOW_AUTOMATIC_UNSAFE_ADVISORY_CONFLICT_RESOLUTION:
            # Keep "newer" advisory
            if not same_dates:
                if previous_updated_date < added_updated_date:
                    to_remove.append(previous_advisory.pk)
                else:
                    to_exclude.append(added_advisory.pk)
            elif not same_version:
                if is_previous_version(previous_updated_version,
                                       added_updated_version):
                    to_remove.append(previous_advisory.pk)
                else:
                    to_exclude.append(added_advisory.pk)
        else:
            raise AdvisoryConflict(
                _("Incoming and existing advisories have the same id but "
                  "different timestamps and non-intersecting package lists. "
                  "It is likely that they are from two different incompatible remote "
                  "repositories. E.g. RHELX-repo and RHELY-debuginfo repo. "
                  "Ensure that you are adding content for the compatible repositories. "
                  "To allow this behavior, set "
                  "ALLOW_AUTOMATIC_UNSAFE_ADVISORY_CONFLICT_RESOLUTION = True (q.v.) "
                  "in your configuration. Advisory id: {}").format(
                      previous_advisory.id))
    elif not same_dates and pkgs_intersection:
        if previous_updated_date < added_updated_date:
            to_remove.append(previous_advisory.pk)
        else:
            to_exclude.append(added_advisory.pk)
    elif not same_version and pkgs_intersection:
        if is_previous_version(previous_updated_version,
                               added_updated_version):
            to_remove.append(previous_advisory.pk)
        else:
            to_exclude.append(added_advisory.pk)
    elif same_dates and same_version and not pkgs_intersection:
        _do_merge()

    return to_add, to_remove, to_exclude
Ejemplo n.º 6
0
def resolve_advisory_conflict(previous_advisory, added_advisory):
    """
    Decide which advisory to add to a repo version, create a new one if needed.

    No advisories with the same id can be present in a repo version.

    An existing advisory can be removed from a repo version, a newly added one can stay in a repo
    version, or advisories merge into newly created one which is added to a repo version.
    Merge is done based on criteria described below.

     1. If updated_dates and update_version are the same and pkglist intersection is empty
     (e.g. base repo merged with debuginfo repo) -> new UpdateRecord content unit with combined
     pkglist is created.
     2. If updated_dates or update_version differ and pkglist intersection is non-empty
     (update/re-sync/upload-new case) -> UpdateRecord with newer updated_date or update_version
     is added.
     3. If updated_dates differ and pkglist intersection is empty: ERROR CONDITION
     (e.g. base and-debuginfo repos are from different versions, not at same date)
     4. If update_dates and update_version are the same, pkglist intersection is non-empty
     and not equal to either pkglist - ERROR CONDITION!
     (never-happen case - "something is Terribly Wrong Here")

     Args:
       previous_advisory(pulp_rpm.app.models.UpdateRecord): Advisory which is in a previous repo
                                                            version
       added_advisory(pulp_rpm.app.models.UpdateRecord): Advisory which is being added

     Returns:
       to_add(pulp_rpm.app.models.UpdateRecord): Advisory to add to a repo version, can be
                                                 a newly created one
       to_remove(pulp_rpm.app.models.UpdateRecord): Advisory to remove from a repo version
       to_exclude(pulp_rpm.app.models.UpdateRecord): Advisory to exclude from the added set of
                                                     content for a repo version

    """
    def _do_merge():
        # previous_advisory is used to copy the object and thus the variable refers to a
        # different object after `merge_advisories` call
        previous_advisory_pk = previous_advisory.pk
        merged_advisory = merge_advisories(previous_advisory, added_advisory)
        to_add.append(merged_advisory.pk)
        to_remove.append(previous_advisory_pk)
        to_exclude.append(added_advisory.pk)

    to_add, to_remove, to_exclude = [], [], []

    previous_updated_date = parse_datetime(previous_advisory.updated_date
                                           or previous_advisory.issued_date)
    added_updated_date = parse_datetime(added_advisory.updated_date
                                        or added_advisory.issued_date)
    previous_updated_version = previous_advisory.version
    added_updated_version = added_advisory.version
    previous_pkglist = set(previous_advisory.get_pkglist())
    added_pkglist = set(added_advisory.get_pkglist())

    # Prepare results of conditions for easier use.
    same_dates = previous_updated_date == added_updated_date
    same_version = previous_updated_version == added_updated_version
    pkgs_intersection = previous_pkglist.intersection(added_pkglist)

    if same_dates and same_version and pkgs_intersection:
        if previous_pkglist != added_pkglist:
            # prev and new have different pkg-lists. See if one is a proper-subset of the other;
            # if so, choose the one with the *larger* pkglist. Otherwise, error.
            if previous_pkglist < added_pkglist:
                # new has more pkgs - remove previous
                to_remove.append(previous_advisory.pk)
            elif added_pkglist < previous_pkglist:
                # prev has more pkgs - exclude new
                to_exclude.append(added_advisory.pk)
            else:
                if settings.ALLOW_AUTOMATIC_UNSAFE_ADVISORY_CONFLICT_RESOLUTION:
                    _do_merge()
                else:
                    raise AdvisoryConflict(
                        _("Incoming and existing advisories have the same id and timestamp "
                          "but different and intersecting package lists, "
                          "and neither package list is a proper subset of the other. "
                          "At least one of the advisories is wrong. "
                          "Advisory id: {}").format(previous_advisory.id))
        elif previous_pkglist == added_pkglist:
            # it means some advisory metadata changed without bumping the updated_date or version.
            # There is no way to find out which one is newer, and a user can't fix it,
            # so we are choosing the incoming advisory.
            to_remove.append(previous_advisory.pk)
    elif (not same_dates or
          (same_dates and not same_version)) and not pkgs_intersection:
        if settings.ALLOW_AUTOMATIC_UNSAFE_ADVISORY_CONFLICT_RESOLUTION:
            to_remove.append(previous_advisory.pk)
        else:
            raise AdvisoryConflict(
                _("Incoming and existing advisories have the same id but "
                  "different timestamps and non-intersecting package lists. "
                  "It is likely that they are from two different incompatible remote "
                  "repositories. E.g. RHELX-repo and RHELY-debuginfo repo. "
                  "Ensure that you are adding content for the compatible repositories. "
                  "Advisory id: {}").format(previous_advisory.id))
    elif not same_dates and pkgs_intersection:
        if previous_updated_date < added_updated_date:
            to_remove.append(previous_advisory.pk)
        else:
            to_exclude.append(added_advisory.pk)
    elif not same_version and pkgs_intersection:
        if is_previous_version(previous_updated_version,
                               added_updated_version):
            to_remove.append(previous_advisory.pk)
        else:
            to_exclude.append(added_advisory.pk)
    elif same_dates and same_version and not pkgs_intersection:
        _do_merge()

    return to_add, to_remove, to_exclude