def _resolve_new_units(self, existing, wanted): """ Decide what units are needed to be downloaded. Filter out units which are already in a repository, associate units which are already downloaded, :param existing: units which are already in a repository :type existing: list of unit keys as namedtuples :param wanted: units which should be imported into a repository :type wanted: list of unit keys as namedtuples :return: list of unit keys to download; empty list if all units are already downloaded :rtype: list of unit keys as namedtuples """ model = plugin_api.get_unit_model_by_id(constants.TYPE_PUPPET_MODULE) unit_generator = (model(**unit_tuple._asdict()) for unit_tuple in wanted) still_wanted = set(wanted) for unit in units_controller.find_units(unit_generator): file_exists = unit._storage_path is not None and os.path.isfile( unit._storage_path) if file_exists: if unit.unit_key_as_named_tuple not in existing: repo_controller.associate_single_unit( self.repo.repo_obj, unit) still_wanted.discard(unit.unit_key_as_named_tuple) return list(still_wanted)
def _resolve_new_units(self, existing, wanted): """ Decide what units are needed to be downloaded. Filter out units which are already in a repository, associate units which are already downloaded, :param existing: units which are already in a repository :type existing: list of unit keys as namedtuples :param wanted: units which should be imported into a repository :type wanted: list of unit keys as namedtuples :return: list of unit keys to download; empty list if all units are already downloaded :rtype: list of unit keys as namedtuples """ model = plugin_api.get_unit_model_by_id(constants.TYPE_PUPPET_MODULE) unit_generator = (model(**unit_tuple._asdict()) for unit_tuple in wanted) still_wanted = set(wanted) for unit in units_controller.find_units(unit_generator): file_exists = unit._storage_path is not None and os.path.isfile(unit._storage_path) if file_exists: if unit.unit_key_as_named_tuple not in existing: repo_controller.associate_single_unit(self.repo.repo_obj, unit) still_wanted.discard(unit.unit_key_as_named_tuple) return list(still_wanted)
def process_main(self, item=None): """ given the passed-in unit keys, determine which of them already exist in pulp, and save those with the conduit found on the parent. :param item: The item to process or none if get_iterator is not defined :param item: object or None """ # any units that are already in pulp units_we_already_had = set() # If available_units was defined in the constructor, let's use it. Otherwise let's use the # default of self.parent.available_units if self.available_units is not None: available_units = self.available_units else: available_units = self.parent.available_units for units_group in misc.paginate(available_units, self.unit_pagination_size): # Get this group of units query = units_controller.find_units(units_group) for found_unit in query: units_we_already_had.add(hash(found_unit)) repo_controller.associate_single_unit(self.get_repo().repo_obj, found_unit) for unit in units_group: if hash(unit) not in units_we_already_had: self.units_to_download.append(unit)
def process_main(self, item=None): """ given the passed-in unit keys, determine which of them already exist in pulp, and save those with the conduit found on the parent. :param item: The item to process or none if get_iterator is not defined :param item: object or None """ # any units that are already in pulp units_we_already_had = set() # If available_units was defined in the constructor, let's use it. Otherwise let's use the # default of self.parent.available_units available_units = self.available_units or self.parent.available_units for units_group in misc.paginate(available_units, self.unit_pagination_size): # Get this group of units query = units_controller.find_units(units_group) for found_unit in query: units_we_already_had.add(hash(found_unit)) repo_controller.associate_single_unit(self.get_repo().repo_obj, found_unit) for unit in units_group: if hash(unit) not in units_we_already_had: self.units_to_download.append(unit)
def test_paginate(self, mock_paginate): """ ensure that paginate is used """ model_1 = DemoModel(key_field='a') model_2 = DemoModel(key_field='B') units_iterable = (model_1, model_2) # turn into list so the generator will be evaluated list(units_controller.find_units(units_iterable)) mock_paginate.assert_called_once_with(units_iterable, 50)
def test_results(self): """ Test that the mongo query generated is the one we expect """ model_1 = DemoModel(key_field='a') model_2 = DemoModel(key_field='B') units_iterable = (model_1, model_2) model_2_defined = DemoModel(key_field='B', id='foo') DemoModel.objects.return_value = [model_2_defined] # turn into list so the generator will be evaluated result = list(units_controller.find_units(units_iterable)) self.assertEqual(result, [model_2_defined])
def test_query(self): """ Test that the mongo query generated is the one we expect """ model_1 = DemoModel(key_field='a') model_2 = DemoModel(key_field='B') units_iterable = (model_1, model_2) # turn into list so the generator will be evaluated list(units_controller.find_units(units_iterable)) query_dict = DemoModel.objects.call_args[0][0].to_query(DemoModel) expected_result = {'$or': [{'key_field': u'a'}, {'key_field': u'B'}]} self.assertDictEqual(query_dict, expected_result)
def check_all_and_associate(wanted, conduit, config, download_deferred, catalog): """ Given a set of unit keys as namedtuples, this function checks if a unit already exists in Pulp and returns the set of tuples that were not found. This checks for the unit in the db as well as for the actual file on the filesystem. If a unit exists in the db and the filesystem, this function also associates the unit to the given repo. Note that the check for the actual file is performed only for the supported unit types. :param wanted: iterable of units as namedtuples :type wanted: iterable :param conduit: repo sync conduit :type conduit: pulp.plugins.conduits.repo_sync.RepoSync :param config: configuration instance passed to the importer :type config: pulp.plugins.config.PluginCallConfiguration :param download_deferred: indicates downloading is deferred (or not). :type download_deferred: bool :param catalog: Deferred downloading catalog. :type catalog: pulp_rpm.plugins.importers.yum.sync.PackageCatalog :return: set of unit keys as namedtuples, identifying which of the named tuples received as input were not found on the server. :rtype: set """ sorted_units = _sort_by_type(wanted) for unit_type, values in sorted_units.iteritems(): model = plugin_api.get_unit_model_by_id(unit_type) # FIXME "fields" does not get used, but it should # fields = model.unit_key_fields + ('_storage_path',) unit_generator = (model(**unit_tuple._asdict()) for unit_tuple in values.copy()) for unit in units_controller.find_units(unit_generator): # Existing RPMs, DRPMs and SRPMs are disqualified when the associated # package file does not exist and downloading is not deferred. if not download_deferred and unit_type in ( ids.TYPE_ID_RPM, ids.TYPE_ID_SRPM, ids.TYPE_ID_DRPM): if unit._storage_path is None or not os.path.isfile(unit._storage_path): continue catalog.add(unit) if rpm_parse.signature_enabled(config): try: rpm_parse.filter_signature(unit, config) except PulpCodedException as e: _LOGGER.debug(e) continue repo_controller.associate_single_unit(conduit.repo, unit) values.discard(unit.unit_key_as_named_tuple) still_wanted = set() still_wanted.update(*sorted_units.values()) return still_wanted
def check_repo(wanted): """ Given an iterable of units as namedtuples, this function will search for them using the given search method and return the set of tuples that were not found. This checks for the unit in the db as well as for the actual file on the filesystem. Note that the check for the actual file is performed only for the supported unit types. This is useful in a case where you know what units you want to have in a repo, but need to know which you need to actually download by eliminating the ones you already have. :param wanted: iterable of units as namedtuples :type wanted: iterable :param sync_conduit: :type sync_conduit: pulp.plugins.conduits.repo_sync.RepoSyncConduit :return: set of unit keys as namedtuples, identifying which of the named tuples received as input were not found by the search method. :rtype: set """ # sort by type sorted_units = _sort_by_type(wanted) # UAQ for each type for unit_type, values in sorted_units.iteritems(): model = plugin_api.get_unit_model_by_id(unit_type) fields = model.unit_key_fields + ('_storage_path', ) rpm_srpm_drpm = unit_type in (ids.TYPE_ID_RPM, ids.TYPE_ID_SRPM, ids.TYPE_ID_DRPM) # FIXME this function being called doesn't have a fields parameter unit_generator = (model(**unit_tuple._asdict()) for unit_tuple in values.copy()) for unit in units_controller.find_units(unit_generator, fields=fields): if rpm_srpm_drpm: # For RPMs, SRPMs and DRPMs, also check if the file exists on the filesystem. # If not, we do not want to skip downloading the unit. if unit._storage_path is None or not os.path.isfile( unit._storage_path): continue values.discard(unit.unit_key_as_named_tuple) ret = set() ret.update(*sorted_units.values()) return ret
def check_repo(wanted): """ Given an iterable of units as namedtuples, this function will search for them using the given search method and return the set of tuples that were not found. This checks for the unit in the db as well as for the actual file on the filesystem. Note that the check for the actual file is performed only for the supported unit types. This is useful in a case where you know what units you want to have in a repo, but need to know which you need to actually download by eliminating the ones you already have. :param wanted: iterable of units as namedtuples :type wanted: iterable :param sync_conduit: :type sync_conduit: pulp.plugins.conduits.repo_sync.RepoSyncConduit :return: set of unit keys as namedtuples, identifying which of the named tuples received as input were not found by the search method. :rtype: set """ # sort by type sorted_units = _sort_by_type(wanted) # UAQ for each type for unit_type, values in sorted_units.iteritems(): model = plugin_api.get_unit_model_by_id(unit_type) fields = model.unit_key_fields + ('_storage_path',) rpm_srpm_drpm = unit_type in (ids.TYPE_ID_RPM, ids.TYPE_ID_SRPM, ids.TYPE_ID_DRPM) # FIXME this function being called doesn't have a fields parameter unit_generator = (model(**unit_tuple._asdict()) for unit_tuple in values.copy()) for unit in units_controller.find_units(unit_generator, fields=fields): if rpm_srpm_drpm: # For RPMs, SRPMs and DRPMs, also check if the file exists on the filesystem. # If not, we do not want to skip downloading the unit. if unit._storage_path is None or not os.path.isfile(unit._storage_path): continue values.discard(unit.unit_key_as_named_tuple) ret = set() ret.update(*sorted_units.values()) return ret
def check_all_and_associate(wanted, conduit, download_deferred, catalog): """ Given a set of unit keys as namedtuples, this function checks if a unit already exists in Pulp and returns the set of tuples that were not found. This checks for the unit in the db as well as for the actual file on the filesystem. If a unit exists in the db and the filesystem, this function also associates the unit to the given repo. Note that the check for the actual file is performed only for the supported unit types. :param wanted: iterable of units as namedtuples :type wanted: iterable :param conduit: repo sync conduit :type conduit: pulp.plugins.conduits.repo_sync.RepoSync :param download_deferred: indicates downloading is deferred (or not). :type download_deferred: bool :param catalog: Deferred downloading catalog. :type catalog: pulp_rpm.plugins.importers.yum.sync.PackageCatalog :return: set of unit keys as namedtuples, identifying which of the named tuples received as input were not found on the server. :rtype: set """ sorted_units = _sort_by_type(wanted) for unit_type, values in sorted_units.iteritems(): model = plugin_api.get_unit_model_by_id(unit_type) # FIXME "fields" does not get used, but it should # fields = model.unit_key_fields + ('_storage_path',) unit_generator = (model(**unit_tuple._asdict()) for unit_tuple in values.copy()) for unit in units_controller.find_units(unit_generator): # Existing RPMs, DRPMs and SRPMs are disqualified when the associated # package file does not exist and downloading is not deferred. if not download_deferred and unit_type in (ids.TYPE_ID_RPM, ids.TYPE_ID_SRPM, ids.TYPE_ID_DRPM): if unit._storage_path is None or not os.path.isfile( unit._storage_path): continue catalog.add(unit) repo_controller.associate_single_unit(conduit.repo, unit) values.discard(unit.unit_key_as_named_tuple) still_wanted = set() still_wanted.update(*sorted_units.values()) return still_wanted
def _decide_what_to_download(self, metadata_files): with metadata_files.get_metadata_file_handle(primary.METADATA_FILE_NAME) as primary_file_handle: # noqa package_info_generator = packages.package_list_generator( primary_file_handle, primary.PACKAGE_TAG, self._process_package_element) sep_units = self._separate_units_by_type(package_info_generator) to_download = dict() for model_class, units in sorted(sep_units.items()): # Because models don't implement an __eq__, we can't simply throw # them in a set (even though they do implement __hash__) k2u = dict((u.unit_key_as_named_tuple, u) for u in units) # Units from the database unit_generator = [model_class(**unit.unit_key) for unit in sorted(units)] unit_generator = units_controller.find_units(unit_generator) upstream_unit_keys = set(k2u) # Compute the unit keys we need to download wanted = upstream_unit_keys.difference( u.unit_key_as_named_tuple for u in unit_generator) for existing_key in upstream_unit_keys.difference(wanted): existing = k2u[existing_key] # Existing units get re-associated yumsync.repo_controller.associate_single_unit( self.conduit.repo, existing) to_download[model_class] = [k2u[k] for k in wanted] unit_counts = dict() flattened = set() fileless = set() for model_class, wanted in to_download.items(): unit_counts[model_class.TYPE_ID] = len(wanted) if 'filename' in model_class._fields: flattened.update(wanted) else: fileless.update(wanted) total_size = sum(x.size for x in flattened if x.size) self.content_report.set_initial_values(unit_counts, total_size) self.set_progress() return flattened, fileless
def check_all_and_associate(wanted, sync_conduit): """ Given a set of unit keys as namedtuples, this function checks if a unit already exists in Pulp and returns the set of tuples that were not found. This checks for the unit in the db as well as for the actual file on the filesystem. If a unit exists in the db and the filesystem, this function also associates the unit to the given repo. Note that the check for the actual file is performed only for the supported unit types. :param wanted: iterable of units as namedtuples :type wanted: iterable :param sync_conduit: repo sync conduit :type sync_conduit: pulp.plugins.conduits.repo_sync.RepoSync :return: set of unit keys as namedtuples, identifying which of the named tuples received as input were not found on the server. :rtype: set """ sorted_units = _sort_by_type(wanted) for unit_type, values in sorted_units.iteritems(): model = plugin_api.get_unit_model_by_id(unit_type) # FIXME "fields" does not get used, but it should # fields = model.unit_key_fields + ('_storage_path',) rpm_srpm_drpm = unit_type in (ids.TYPE_ID_RPM, ids.TYPE_ID_SRPM, ids.TYPE_ID_DRPM) unit_generator = (model(**unit_tuple._asdict()) for unit_tuple in values) for unit in units_controller.find_units(unit_generator): if rpm_srpm_drpm: # For RPMs, SRPMs and DRPMs, also check if the file exists on the filesystem. # If not, we do not want to skip downloading the unit. if unit._storage_path is None or not os.path.isfile(unit._storage_path): continue # Add the existing unit to the repository repo_controller.associate_single_unit(sync_conduit.repo, unit) values.discard(unit.unit_key_as_named_tuple) ret = set() ret.update(*sorted_units.values()) return ret
def _decide_what_to_download(self, metadata_files): with metadata_files.get_metadata_file_handle( primary.METADATA_FILE_NAME) as primary_file_handle: # noqa package_info_generator = packages.package_list_generator( primary_file_handle, primary.PACKAGE_TAG, self._process_package_element) sep_units = self._separate_units_by_type(package_info_generator) to_download = dict() for model_class, units in sorted(sep_units.items()): upstream_unit_keys = set(u.unit_key_as_named_tuple for u in units) # Units from the database unit_generator = units_controller.find_units(units) available_units = list(unit_generator) # Compute the unit keys we need to download wanted = upstream_unit_keys.difference(u.unit_key_as_named_tuple for u in available_units) for existing in available_units: # Existing units get re-associated yumsync.repo_controller.associate_single_unit( self.conduit.repo, existing) to_download[model_class] = [ u for u in units if u.unit_key_as_named_tuple in wanted ] unit_counts = dict() flattened = set() fileless = set() for model_class, wanted in to_download.items(): unit_counts[model_class.TYPE_ID] = len(wanted) if 'filename' in model_class._fields: flattened.update(wanted) else: fileless.update(wanted) total_size = sum(x.size for x in flattened if x.size) self.content_report.set_initial_values(unit_counts, total_size) self.set_progress() return flattened, fileless
def check_all_and_associate(wanted, conduit, config, download_deferred, catalog): """ Given a set of unit keys as namedtuples, this function checks if a unit already exists in Pulp and returns the set of tuples that were not found. This checks for the unit in the db as well as for the actual file on the filesystem. If a unit exists in the db and the filesystem, this function also associates the unit to the given repo. Note that the check for the actual file is performed only for the supported unit types. :param wanted: dict where keys are units as namedtuples, and values are WantedUnitInfo instances :type wanted: dict :param conduit: repo sync conduit :type conduit: pulp.plugins.conduits.repo_sync.RepoSync :param config: configuration instance passed to the importer :type config: pulp.plugins.config.PluginCallConfiguration :param download_deferred: indicates downloading is deferred (or not). :type download_deferred: bool :param catalog: Deferred downloading catalog. :type catalog: pulp_rpm.plugins.importers.yum.sync.PackageCatalog :return: set of unit keys as namedtuples, identifying which of the named tuples received as input were not found on the server. :rtype: set """ rpm_drpm_srpm = (ids.TYPE_ID_RPM, ids.TYPE_ID_SRPM, ids.TYPE_ID_DRPM) all_associated_units = set() for unit_type in rpm_drpm_srpm: units_generator = repo_controller.get_associated_unit_ids( conduit.repo.repo_id, unit_type) all_associated_units.update(units_generator) sorted_units = _sort_by_type(wanted.iterkeys()) for unit_type, values in sorted_units.iteritems(): model = plugin_api.get_unit_model_by_id(unit_type) # FIXME "fields" does not get used, but it should # fields = model.unit_key_fields + ('_storage_path',) unit_generator = (model(**unit_tuple._asdict()) for unit_tuple in values.copy()) for unit in units_controller.find_units(unit_generator): is_rpm_drpm_srpm = unit_type in rpm_drpm_srpm file_exists = unit._storage_path is not None and os.path.isfile( unit._storage_path) if is_rpm_drpm_srpm: # no matter what is the download policy, if existing unit has a valid storage_path, # we need to set the downloaded flag to True if file_exists and not unit.downloaded: unit.downloaded = True unit.save() # Existing RPMs, DRPMs and SRPMs are disqualified when the associated # package file does not exist and downloading is not deferred. if not download_deferred and not file_exists: continue catalog.add(unit, wanted[unit.unit_key_as_named_tuple].download_path) if unit.id not in all_associated_units: if rpm_parse.signature_enabled(config): try: rpm_parse.filter_signature(unit, config) except PulpCodedException as e: _LOGGER.debug(e) continue repo_controller.associate_single_unit(conduit.repo, unit) values.discard(unit.unit_key_as_named_tuple) still_wanted = set() still_wanted.update(*sorted_units.values()) return still_wanted