Exemple #1
0
    def _resolve_new_units(self, existing, wanted):
        """
        Decide what units are needed to be downloaded.

        Filter out units which are already in a repository,
        associate units which are already downloaded,

        :param existing: units which are already in a repository
        :type existing: list of unit keys as namedtuples
        :param wanted: units which should be imported into a repository
        :type wanted: list of unit keys as namedtuples

        :return: list of unit keys to download; empty list if all units are already downloaded
        :rtype:  list of unit keys as namedtuples
        """
        model = plugin_api.get_unit_model_by_id(constants.TYPE_PUPPET_MODULE)
        unit_generator = (model(**unit_tuple._asdict())
                          for unit_tuple in wanted)
        still_wanted = set(wanted)
        for unit in units_controller.find_units(unit_generator):
            file_exists = unit._storage_path is not None and os.path.isfile(
                unit._storage_path)
            if file_exists:
                if unit.unit_key_as_named_tuple not in existing:
                    repo_controller.associate_single_unit(
                        self.repo.repo_obj, unit)
                still_wanted.discard(unit.unit_key_as_named_tuple)

        return list(still_wanted)
Exemple #2
0
    def _resolve_new_units(self, existing, wanted):
        """
        Decide what units are needed to be downloaded.

        Filter out units which are already in a repository,
        associate units which are already downloaded,

        :param existing: units which are already in a repository
        :type existing: list of unit keys as namedtuples
        :param wanted: units which should be imported into a repository
        :type wanted: list of unit keys as namedtuples

        :return: list of unit keys to download; empty list if all units are already downloaded
        :rtype:  list of unit keys as namedtuples
        """
        model = plugin_api.get_unit_model_by_id(constants.TYPE_PUPPET_MODULE)
        unit_generator = (model(**unit_tuple._asdict()) for unit_tuple in wanted)
        still_wanted = set(wanted)
        for unit in units_controller.find_units(unit_generator):
            file_exists = unit._storage_path is not None and os.path.isfile(unit._storage_path)
            if file_exists:
                if unit.unit_key_as_named_tuple not in existing:
                    repo_controller.associate_single_unit(self.repo.repo_obj, unit)
                still_wanted.discard(unit.unit_key_as_named_tuple)

        return list(still_wanted)
Exemple #3
0
    def process_main(self, item=None):
        """
        given the passed-in unit keys, determine which of them already exist in
        pulp, and save those with the conduit found on the parent.

        :param item: The item to process or none if get_iterator is not defined
        :param item: object or None
        """
        # any units that are already in pulp
        units_we_already_had = set()

        # If available_units was defined in the constructor, let's use it. Otherwise let's use the
        # default of self.parent.available_units
        if self.available_units is not None:
            available_units = self.available_units
        else:
            available_units = self.parent.available_units

        for units_group in misc.paginate(available_units,
                                         self.unit_pagination_size):
            # Get this group of units
            query = units_controller.find_units(units_group)

            for found_unit in query:
                units_we_already_had.add(hash(found_unit))
                repo_controller.associate_single_unit(self.get_repo().repo_obj,
                                                      found_unit)

            for unit in units_group:
                if hash(unit) not in units_we_already_had:
                    self.units_to_download.append(unit)
Exemple #4
0
    def process_main(self, item=None):
        """
        given the passed-in unit keys, determine which of them already exist in
        pulp, and save those with the conduit found on the parent.

        :param item: The item to process or none if get_iterator is not defined
        :param item: object or None
        """
        # any units that are already in pulp
        units_we_already_had = set()

        # If available_units was defined in the constructor, let's use it. Otherwise let's use the
        # default of self.parent.available_units
        available_units = self.available_units or self.parent.available_units

        for units_group in misc.paginate(available_units, self.unit_pagination_size):
            # Get this group of units
            query = units_controller.find_units(units_group)

            for found_unit in query:
                units_we_already_had.add(hash(found_unit))
                repo_controller.associate_single_unit(self.get_repo().repo_obj, found_unit)

            for unit in units_group:
                if hash(unit) not in units_we_already_had:
                    self.units_to_download.append(unit)
Exemple #5
0
    def test_paginate(self, mock_paginate):
        """
        ensure that paginate is used
        """
        model_1 = DemoModel(key_field='a')
        model_2 = DemoModel(key_field='B')
        units_iterable = (model_1, model_2)

        # turn into list so the generator will be evaluated
        list(units_controller.find_units(units_iterable))

        mock_paginate.assert_called_once_with(units_iterable, 50)
Exemple #6
0
    def test_paginate(self, mock_paginate):
        """
        ensure that paginate is used
        """
        model_1 = DemoModel(key_field='a')
        model_2 = DemoModel(key_field='B')
        units_iterable = (model_1, model_2)

        # turn into list so the generator will be evaluated
        list(units_controller.find_units(units_iterable))

        mock_paginate.assert_called_once_with(units_iterable, 50)
Exemple #7
0
    def test_results(self):
        """
        Test that the mongo query generated is the one we expect
        """
        model_1 = DemoModel(key_field='a')
        model_2 = DemoModel(key_field='B')
        units_iterable = (model_1, model_2)
        model_2_defined = DemoModel(key_field='B', id='foo')
        DemoModel.objects.return_value = [model_2_defined]

        # turn into list so the generator will be evaluated
        result = list(units_controller.find_units(units_iterable))
        self.assertEqual(result, [model_2_defined])
Exemple #8
0
    def test_query(self):
        """
        Test that the mongo query generated is the one we expect
        """
        model_1 = DemoModel(key_field='a')
        model_2 = DemoModel(key_field='B')
        units_iterable = (model_1, model_2)

        # turn into list so the generator will be evaluated
        list(units_controller.find_units(units_iterable))
        query_dict = DemoModel.objects.call_args[0][0].to_query(DemoModel)
        expected_result = {'$or': [{'key_field': u'a'}, {'key_field': u'B'}]}
        self.assertDictEqual(query_dict, expected_result)
Exemple #9
0
    def test_results(self):
        """
        Test that the mongo query generated is the one we expect
        """
        model_1 = DemoModel(key_field='a')
        model_2 = DemoModel(key_field='B')
        units_iterable = (model_1, model_2)
        model_2_defined = DemoModel(key_field='B', id='foo')
        DemoModel.objects.return_value = [model_2_defined]

        # turn into list so the generator will be evaluated
        result = list(units_controller.find_units(units_iterable))
        self.assertEqual(result, [model_2_defined])
Exemple #10
0
    def test_query(self):
        """
        Test that the mongo query generated is the one we expect
        """
        model_1 = DemoModel(key_field='a')
        model_2 = DemoModel(key_field='B')
        units_iterable = (model_1, model_2)

        # turn into list so the generator will be evaluated
        list(units_controller.find_units(units_iterable))
        query_dict = DemoModel.objects.call_args[0][0].to_query(DemoModel)
        expected_result = {'$or': [{'key_field': u'a'}, {'key_field': u'B'}]}
        self.assertDictEqual(query_dict, expected_result)
Exemple #11
0
def check_all_and_associate(wanted, conduit, config, download_deferred, catalog):
    """
    Given a set of unit keys as namedtuples, this function checks if a unit
    already exists in Pulp and returns the set of tuples that were not
    found. This checks for the unit in the db as well as for the actual file
    on the filesystem. If a unit exists in the db and the filesystem, this function
    also associates the unit to the given repo. Note that the check for the actual file
    is performed only for the supported unit types.

    :param wanted:            iterable of units as namedtuples
    :type  wanted:            iterable
    :param conduit:           repo sync conduit
    :type  conduit:           pulp.plugins.conduits.repo_sync.RepoSync
    :param config:            configuration instance passed to the importer
    :type  config:            pulp.plugins.config.PluginCallConfiguration
    :param download_deferred: indicates downloading is deferred (or not).
    :type  download_deferred: bool
    :param catalog:           Deferred downloading catalog.
    :type  catalog:           pulp_rpm.plugins.importers.yum.sync.PackageCatalog

    :return:    set of unit keys as namedtuples, identifying which of the
                named tuples received as input were not found on the server.
    :rtype:     set
    """
    sorted_units = _sort_by_type(wanted)
    for unit_type, values in sorted_units.iteritems():
        model = plugin_api.get_unit_model_by_id(unit_type)
        # FIXME "fields" does not get used, but it should
        # fields = model.unit_key_fields + ('_storage_path',)
        unit_generator = (model(**unit_tuple._asdict()) for unit_tuple in values.copy())
        for unit in units_controller.find_units(unit_generator):
            # Existing RPMs, DRPMs and SRPMs are disqualified when the associated
            # package file does not exist and downloading is not deferred.
            if not download_deferred and unit_type in (
                    ids.TYPE_ID_RPM, ids.TYPE_ID_SRPM, ids.TYPE_ID_DRPM):
                if unit._storage_path is None or not os.path.isfile(unit._storage_path):
                    continue
            catalog.add(unit)
            if rpm_parse.signature_enabled(config):
                try:
                    rpm_parse.filter_signature(unit, config)
                except PulpCodedException as e:
                    _LOGGER.debug(e)
                    continue
            repo_controller.associate_single_unit(conduit.repo, unit)
            values.discard(unit.unit_key_as_named_tuple)
    still_wanted = set()
    still_wanted.update(*sorted_units.values())
    return still_wanted
Exemple #12
0
def check_repo(wanted):
    """
    Given an iterable of units as namedtuples, this function will search for them
    using the given search method and return the set of tuples that were not
    found. This checks for the unit in the db as well as for the actual file
    on the filesystem. Note that the check for the actual file is performed only
    for the supported unit types.

    This is useful in a case where you know what units you want to have in a repo,
    but need to know which you need to actually download by eliminating the ones
    you already have.

    :param wanted:          iterable of units as namedtuples
    :type  wanted:          iterable
    :param sync_conduit:
    :type  sync_conduit:    pulp.plugins.conduits.repo_sync.RepoSyncConduit

    :return:    set of unit keys as namedtuples, identifying which of the
                named tuples received as input were not found by the
                search method.
    :rtype:     set
    """
    # sort by type
    sorted_units = _sort_by_type(wanted)
    # UAQ for each type
    for unit_type, values in sorted_units.iteritems():
        model = plugin_api.get_unit_model_by_id(unit_type)

        fields = model.unit_key_fields + ('_storage_path', )
        rpm_srpm_drpm = unit_type in (ids.TYPE_ID_RPM, ids.TYPE_ID_SRPM,
                                      ids.TYPE_ID_DRPM)

        # FIXME this function being called doesn't have a fields parameter
        unit_generator = (model(**unit_tuple._asdict())
                          for unit_tuple in values.copy())
        for unit in units_controller.find_units(unit_generator, fields=fields):
            if rpm_srpm_drpm:
                # For RPMs, SRPMs and DRPMs, also check if the file exists on the filesystem.
                # If not, we do not want to skip downloading the unit.
                if unit._storage_path is None or not os.path.isfile(
                        unit._storage_path):
                    continue
            values.discard(unit.unit_key_as_named_tuple)

    ret = set()
    ret.update(*sorted_units.values())
    return ret
Exemple #13
0
def check_repo(wanted):
    """
    Given an iterable of units as namedtuples, this function will search for them
    using the given search method and return the set of tuples that were not
    found. This checks for the unit in the db as well as for the actual file
    on the filesystem. Note that the check for the actual file is performed only
    for the supported unit types.

    This is useful in a case where you know what units you want to have in a repo,
    but need to know which you need to actually download by eliminating the ones
    you already have.

    :param wanted:          iterable of units as namedtuples
    :type  wanted:          iterable
    :param sync_conduit:
    :type  sync_conduit:    pulp.plugins.conduits.repo_sync.RepoSyncConduit

    :return:    set of unit keys as namedtuples, identifying which of the
                named tuples received as input were not found by the
                search method.
    :rtype:     set
    """
    # sort by type
    sorted_units = _sort_by_type(wanted)
    # UAQ for each type
    for unit_type, values in sorted_units.iteritems():
        model = plugin_api.get_unit_model_by_id(unit_type)

        fields = model.unit_key_fields + ('_storage_path',)
        rpm_srpm_drpm = unit_type in (ids.TYPE_ID_RPM,
                                      ids.TYPE_ID_SRPM,
                                      ids.TYPE_ID_DRPM)

        # FIXME this function being called doesn't have a fields parameter
        unit_generator = (model(**unit_tuple._asdict()) for unit_tuple in values.copy())
        for unit in units_controller.find_units(unit_generator, fields=fields):
            if rpm_srpm_drpm:
                # For RPMs, SRPMs and DRPMs, also check if the file exists on the filesystem.
                # If not, we do not want to skip downloading the unit.
                if unit._storage_path is None or not os.path.isfile(unit._storage_path):
                    continue
            values.discard(unit.unit_key_as_named_tuple)

    ret = set()
    ret.update(*sorted_units.values())
    return ret
Exemple #14
0
def check_all_and_associate(wanted, conduit, download_deferred, catalog):
    """
    Given a set of unit keys as namedtuples, this function checks if a unit
    already exists in Pulp and returns the set of tuples that were not
    found. This checks for the unit in the db as well as for the actual file
    on the filesystem. If a unit exists in the db and the filesystem, this function
    also associates the unit to the given repo. Note that the check for the actual file
    is performed only for the supported unit types.

    :param wanted:            iterable of units as namedtuples
    :type  wanted:            iterable
    :param conduit:           repo sync conduit
    :type  conduit:           pulp.plugins.conduits.repo_sync.RepoSync
    :param download_deferred: indicates downloading is deferred (or not).
    :type  download_deferred: bool
    :param catalog:           Deferred downloading catalog.
    :type  catalog:           pulp_rpm.plugins.importers.yum.sync.PackageCatalog

    :return:    set of unit keys as namedtuples, identifying which of the
                named tuples received as input were not found on the server.
    :rtype:     set
    """
    sorted_units = _sort_by_type(wanted)
    for unit_type, values in sorted_units.iteritems():
        model = plugin_api.get_unit_model_by_id(unit_type)
        # FIXME "fields" does not get used, but it should
        # fields = model.unit_key_fields + ('_storage_path',)
        unit_generator = (model(**unit_tuple._asdict())
                          for unit_tuple in values.copy())
        for unit in units_controller.find_units(unit_generator):
            # Existing RPMs, DRPMs and SRPMs are disqualified when the associated
            # package file does not exist and downloading is not deferred.
            if not download_deferred and unit_type in (ids.TYPE_ID_RPM,
                                                       ids.TYPE_ID_SRPM,
                                                       ids.TYPE_ID_DRPM):
                if unit._storage_path is None or not os.path.isfile(
                        unit._storage_path):
                    continue
            catalog.add(unit)
            repo_controller.associate_single_unit(conduit.repo, unit)
            values.discard(unit.unit_key_as_named_tuple)
    still_wanted = set()
    still_wanted.update(*sorted_units.values())
    return still_wanted
Exemple #15
0
    def _decide_what_to_download(self, metadata_files):
        with metadata_files.get_metadata_file_handle(primary.METADATA_FILE_NAME) as primary_file_handle:  # noqa
            package_info_generator = packages.package_list_generator(
                primary_file_handle, primary.PACKAGE_TAG,
                self._process_package_element)

            sep_units = self._separate_units_by_type(package_info_generator)
        to_download = dict()
        for model_class, units in sorted(sep_units.items()):
            # Because models don't implement an __eq__, we can't simply throw
            # them in a set (even though they do implement __hash__)
            k2u = dict((u.unit_key_as_named_tuple, u)
                       for u in units)
            # Units from the database
            unit_generator = [model_class(**unit.unit_key)
                              for unit in sorted(units)]
            unit_generator = units_controller.find_units(unit_generator)
            upstream_unit_keys = set(k2u)
            # Compute the unit keys we need to download
            wanted = upstream_unit_keys.difference(
                u.unit_key_as_named_tuple for u in unit_generator)
            for existing_key in upstream_unit_keys.difference(wanted):
                existing = k2u[existing_key]
                # Existing units get re-associated
                yumsync.repo_controller.associate_single_unit(
                    self.conduit.repo, existing)
            to_download[model_class] = [k2u[k] for k in wanted]

        unit_counts = dict()
        flattened = set()
        fileless = set()
        for model_class, wanted in to_download.items():
            unit_counts[model_class.TYPE_ID] = len(wanted)
            if 'filename' in model_class._fields:
                flattened.update(wanted)
            else:
                fileless.update(wanted)

        total_size = sum(x.size for x in flattened if x.size)
        self.content_report.set_initial_values(unit_counts, total_size)
        self.set_progress()
        return flattened, fileless
Exemple #16
0
def check_all_and_associate(wanted, sync_conduit):
    """
    Given a set of unit keys as namedtuples, this function checks if a unit
    already exists in Pulp and returns the set of tuples that were not
    found. This checks for the unit in the db as well as for the actual file
    on the filesystem. If a unit exists in the db and the filesystem, this function
    also associates the unit to the given repo. Note that the check for the actual file
    is performed only for the supported unit types.

    :param wanted:          iterable of units as namedtuples
    :type  wanted:          iterable
    :param sync_conduit:    repo sync conduit
    :type  sync_conduit:    pulp.plugins.conduits.repo_sync.RepoSync

    :return:    set of unit keys as namedtuples, identifying which of the
                named tuples received as input were not found on the server.
    :rtype:     set
    """
    sorted_units = _sort_by_type(wanted)
    for unit_type, values in sorted_units.iteritems():
        model = plugin_api.get_unit_model_by_id(unit_type)
        # FIXME "fields" does not get used, but it should
        # fields = model.unit_key_fields + ('_storage_path',)
        rpm_srpm_drpm = unit_type in (ids.TYPE_ID_RPM,
                                      ids.TYPE_ID_SRPM,
                                      ids.TYPE_ID_DRPM)

        unit_generator = (model(**unit_tuple._asdict()) for unit_tuple in values)
        for unit in units_controller.find_units(unit_generator):
            if rpm_srpm_drpm:
                # For RPMs, SRPMs and DRPMs, also check if the file exists on the filesystem.
                # If not, we do not want to skip downloading the unit.
                if unit._storage_path is None or not os.path.isfile(unit._storage_path):
                    continue
            # Add the existing unit to the repository
            repo_controller.associate_single_unit(sync_conduit.repo, unit)
            values.discard(unit.unit_key_as_named_tuple)

    ret = set()
    ret.update(*sorted_units.values())
    return ret
Exemple #17
0
    def _decide_what_to_download(self, metadata_files):
        with metadata_files.get_metadata_file_handle(
                primary.METADATA_FILE_NAME) as primary_file_handle:  # noqa
            package_info_generator = packages.package_list_generator(
                primary_file_handle, primary.PACKAGE_TAG,
                self._process_package_element)

            sep_units = self._separate_units_by_type(package_info_generator)
        to_download = dict()
        for model_class, units in sorted(sep_units.items()):
            upstream_unit_keys = set(u.unit_key_as_named_tuple for u in units)
            # Units from the database
            unit_generator = units_controller.find_units(units)
            available_units = list(unit_generator)
            # Compute the unit keys we need to download
            wanted = upstream_unit_keys.difference(u.unit_key_as_named_tuple
                                                   for u in available_units)
            for existing in available_units:
                # Existing units get re-associated
                yumsync.repo_controller.associate_single_unit(
                    self.conduit.repo, existing)
            to_download[model_class] = [
                u for u in units if u.unit_key_as_named_tuple in wanted
            ]

        unit_counts = dict()
        flattened = set()
        fileless = set()
        for model_class, wanted in to_download.items():
            unit_counts[model_class.TYPE_ID] = len(wanted)
            if 'filename' in model_class._fields:
                flattened.update(wanted)
            else:
                fileless.update(wanted)

        total_size = sum(x.size for x in flattened if x.size)
        self.content_report.set_initial_values(unit_counts, total_size)
        self.set_progress()
        return flattened, fileless
Exemple #18
0
def check_all_and_associate(wanted, conduit, config, download_deferred,
                            catalog):
    """
    Given a set of unit keys as namedtuples, this function checks if a unit
    already exists in Pulp and returns the set of tuples that were not
    found. This checks for the unit in the db as well as for the actual file
    on the filesystem. If a unit exists in the db and the filesystem, this function
    also associates the unit to the given repo. Note that the check for the actual file
    is performed only for the supported unit types.

    :param wanted:            dict where keys are units as namedtuples, and values are
                              WantedUnitInfo instances
    :type  wanted:            dict
    :param conduit:           repo sync conduit
    :type  conduit:           pulp.plugins.conduits.repo_sync.RepoSync
    :param config:            configuration instance passed to the importer
    :type  config:            pulp.plugins.config.PluginCallConfiguration
    :param download_deferred: indicates downloading is deferred (or not).
    :type  download_deferred: bool
    :param catalog:           Deferred downloading catalog.
    :type  catalog:           pulp_rpm.plugins.importers.yum.sync.PackageCatalog

    :return:    set of unit keys as namedtuples, identifying which of the
                named tuples received as input were not found on the server.
    :rtype:     set
    """
    rpm_drpm_srpm = (ids.TYPE_ID_RPM, ids.TYPE_ID_SRPM, ids.TYPE_ID_DRPM)
    all_associated_units = set()
    for unit_type in rpm_drpm_srpm:
        units_generator = repo_controller.get_associated_unit_ids(
            conduit.repo.repo_id, unit_type)
        all_associated_units.update(units_generator)

    sorted_units = _sort_by_type(wanted.iterkeys())
    for unit_type, values in sorted_units.iteritems():
        model = plugin_api.get_unit_model_by_id(unit_type)
        # FIXME "fields" does not get used, but it should
        # fields = model.unit_key_fields + ('_storage_path',)
        unit_generator = (model(**unit_tuple._asdict())
                          for unit_tuple in values.copy())
        for unit in units_controller.find_units(unit_generator):
            is_rpm_drpm_srpm = unit_type in rpm_drpm_srpm
            file_exists = unit._storage_path is not None and os.path.isfile(
                unit._storage_path)
            if is_rpm_drpm_srpm:
                # no matter what is the download policy, if existing unit has a valid storage_path,
                # we need to set the downloaded flag to True
                if file_exists and not unit.downloaded:
                    unit.downloaded = True
                    unit.save()
                # Existing RPMs, DRPMs and SRPMs are disqualified when the associated
                # package file does not exist and downloading is not deferred.
                if not download_deferred and not file_exists:
                    continue
            catalog.add(unit,
                        wanted[unit.unit_key_as_named_tuple].download_path)
            if unit.id not in all_associated_units:
                if rpm_parse.signature_enabled(config):
                    try:
                        rpm_parse.filter_signature(unit, config)
                    except PulpCodedException as e:
                        _LOGGER.debug(e)
                        continue
                repo_controller.associate_single_unit(conduit.repo, unit)
            values.discard(unit.unit_key_as_named_tuple)
    still_wanted = set()
    still_wanted.update(*sorted_units.values())
    return still_wanted