def list_content_units(self, content_type, db_spec=None, model_fields=None, start=0, limit=None): """ DEPRECATED!!! Please use find_by_criteria() instead. List the content units in a content type collection. @param content_type: unique id of content collection @type content_type: str @param db_spec: spec document used to filter the results, None means no filter @type db_spec: None or dict @param model_fields: fields of each content unit to report, None means all fields @type model_fields: None or list of str's @param start: offset from the beginning of the results to return as the first element @type start: non-negative int @param limit: the maximum number of results to return, None means no limit @type limit: None or non-negative int @return: list of content units in the content type collection that matches the parameters @rtype: (possibly empty) tuple of dicts """ collection = content_types_db.type_units_collection(content_type) if db_spec is None: db_spec = {} cursor = collection.find(db_spec, fields=model_fields) if start > 0: cursor.skip(start) if limit is not None: cursor.limit(limit) return tuple(cursor)
def test_migrate_category(self): # Setup orig_cat_id = add_unit('c1', self.source_repo_id, ids.TYPE_ID_PKG_CATEGORY) associate_unit(orig_cat_id, self.source_repo_id, ids.TYPE_ID_PKG_CATEGORY) associate_unit(orig_cat_id, self.dest_repo_id, ids.TYPE_ID_PKG_CATEGORY) # Test self.migration.migrate() group_coll = types_db.type_units_collection(ids.TYPE_ID_PKG_CATEGORY) all_cats = group_coll.find({}).sort('repo_id', 1) self.assertEqual(2, all_cats.count()) dest_cat = all_cats[0] # ordered by ID, this will be first self.assertEqual(dest_cat['id'], 'c1') self.assertEqual(dest_cat['repo_id'], self.dest_repo_id) source_cat = all_cats[1] self.assertEqual(source_cat['id'], 'c1') self.assertEqual(source_cat['repo_id'], self.source_repo_id) # Verify the associations query_manager = factory.repo_unit_association_query_manager() source_units = query_manager.get_units(self.source_repo_id) self.assertEqual(1, len(source_units)) self.assertEqual(source_units[0]['unit_type_id'], ids.TYPE_ID_PKG_CATEGORY) self.assertEqual(source_units[0]['unit_id'], source_cat['_id']) dest_units = query_manager.get_units(self.dest_repo_id) self.assertEqual(1, len(dest_units)) self.assertEqual(dest_units[0]['unit_type_id'], ids.TYPE_ID_PKG_CATEGORY) self.assertEqual(dest_units[0]['unit_id'], dest_cat['_id'])
def delete_orphans_by_type(content_type_id, content_unit_ids=None): """ Delete the orphaned content units for the given content type. If the content_unit_ids parameter is not None, is acts as a filter of the specific orphaned content units that may be deleted. NOTE: this method deletes the content unit's bits from disk, if applicable. :param content_type_id: id of the content type :type content_type_id: basestring :param content_unit_ids: list of content unit ids to delete; None means delete them all :type content_unit_ids: iterable or None """ content_units_collection = content_types_db.type_units_collection(content_type_id) for content_unit in OrphanManager.generate_orphans_by_type(content_type_id, fields=['_id', '_storage_path']): if content_unit_ids is not None and content_unit['_id'] not in content_unit_ids: continue content_units_collection.remove(content_unit['_id'], safe=False) storage_path = content_unit.get('_storage_path', None) if storage_path is not None: OrphanManager.delete_orphaned_file(storage_path)
def create_content_unit(unit_data, relative_path=None): collection = types_database.type_units_collection(_TYPE_YUM_REPO_METADATA_FILE) unit_data['_id'] = str(uuid.uuid4()) unit_data['_content_type_id'] = _TYPE_YUM_REPO_METADATA_FILE unit_data['_storage_path'] = get_content_storage_path(relative_path) collection.insert(unit_data, safe=True) return unit_data
def delete_orphans_by_id(self, orphans): """ Delete a list of orphaned content units by their content type and unit ids. @param orphans: list of documents with 'content_type' and 'content_id' keys @type orphans: list """ # XXX this does no validation of the orphans # munge the orphans into something more programmatic-ly convenient orphans_by_id = {} for o in orphans: if 'content_type_id' not in o or 'unit_id' not in o: raise pulp_exceptions.InvalidValue(['content_type_id', 'unit_id']) id_list = orphans_by_id.setdefault(o['content_type_id'], []) id_list.append(o['unit_id']) # iterate through the types and ids content_query_manager = manager_factory.content_query_manager() for content_type, content_id_list in orphans_by_id.items(): # build a list of the on-disk contents orphaned_paths = [] for unit_id in content_id_list: content_unit = content_query_manager.get_content_unit_by_id(content_type, unit_id, model_fields=['_storage_path']) if content_unit['_storage_path'] is not None: orphaned_paths.append(content_unit['_storage_path']) # remove the orphans from the db collection = content_types_db.type_units_collection(content_type) spec = {'_id': {'$in': content_id_list}} collection.remove(spec, safe=True) # delete the on-disk contents for path in orphaned_paths: self.delete_orphaned_file(path)
def test_migrate(self): # Setup for type_id in (TYPE_ID_RPM, TYPE_ID_SRPM, TYPE_ID_DRPM): self.add_sample_data(type_id) # Test migration = _import_all_the_way( 'pulp_rpm.plugins.migrations.0008_version_sort_index') migration.migrate() # Verify # The migration should cover these three types, so make sure they were all included for type_id in (TYPE_ID_RPM, TYPE_ID_SRPM, TYPE_ID_DRPM): collection = types_db.type_units_collection(type_id) test_me = collection.find_one({'version': '1.1'}) self.assertEqual(test_me['version_sort_index'], version_utils.encode('1.1')) self.assertEqual(test_me['release_sort_index'], version_utils.encode('1.1')) # Make sure the script didn't run on units that already have the indexes test_me = collection.find_one({'version': '3.1'}) self.assertEqual(test_me['version_sort_index'], 'fake') self.assertEqual(test_me['release_sort_index'], 'fake')
def test_pulp_manage_db_loads_types(self, listdir_mock): """ Test calling pulp-manage-db imports types on a clean types database. """ manage.main() all_collection_names = types_db.all_type_collection_names() self.assertEqual(len(all_collection_names), 1) self.assertEqual(['units_test_type_id'], all_collection_names) # Let's make sure we loaded the type definitions correctly db_type_definitions = types_db.all_type_definitions() self.assertEquals(len(db_type_definitions), 1) test_json = json.loads(_test_type_json) for attribute in ['id', 'display_name', 'description', 'unit_key', 'search_indexes']: self.assertEquals(test_json['types'][0][attribute], db_type_definitions[0][attribute]) # Now let's ensure that we have the correct indexes collection = types_db.type_units_collection('test_type_id') indexes = collection.index_information() self.assertEqual(indexes['_id_']['key'], [(u'_id', 1)]) # Make sure we have the unique constraint on all three attributes self.assertEqual(indexes['attribute_1_1_attribute_2_1_attribute_3_1']['unique'], True) self.assertEqual(indexes['attribute_1_1_attribute_2_1_attribute_3_1']['dropDups'], False) self.assertEqual(indexes['attribute_1_1_attribute_2_1_attribute_3_1']['key'], [(u'attribute_1', 1), (u'attribute_2', 1), (u'attribute_3', 1)]) # Make sure we indexes attributes 1 and 3 self.assertEqual(indexes['attribute_1_1']['dropDups'], False) self.assertEqual(indexes['attribute_1_1']['key'], [(u'attribute_1', 1)]) self.assertEqual(indexes['attribute_3_1']['dropDups'], False) self.assertEqual(indexes['attribute_3_1']['key'], [(u'attribute_3', 1)]) # Make sure we only have the indexes that we've hand inspected here self.assertEqual(indexes.keys(), [u'_id_', u'attribute_1_1_attribute_2_1_attribute_3_1', u'attribute_1_1', u'attribute_3_1'])
def _associated_units_by_type_cursor(unit_type_id, criteria, associated_unit_ids): """ Retrieve a pymongo cursor for units associated with a repository of a give unit type that meet to the provided criteria. :type unit_type_id: str :type criteria: UnitAssociationCriteria :type associated_unit_ids: list :rtype: pymongo.cursor.Cursor """ collection = types_db.type_units_collection(unit_type_id) spec = criteria.unit_filters.copy() spec['_id'] = {'$in': associated_unit_ids} fields = criteria.unit_fields # The _content_type_id is required for looking up the association. if fields is not None and '_content_type_id' not in fields: fields = list(fields) fields.append('_content_type_id') cursor = collection.find(spec, fields=fields) sort = criteria.unit_sort if sort is None: sort = [('_id', SORT_ASCENDING)] cursor.sort(sort) return cursor
def delete_orphans_by_type(self, content_type_id, content_unit_ids=None, flush=True): """ Delete the orphaned content units for the given content type. If the content_unit_ids parameter is not None, is acts as a filter of the specific orphaned content units that may be deleted. NOTE: this method deletes the content unit's bits from disk, if applicable. NOTE: `flush` should not be set to False unless you know what you're doing :param content_type_id: id of the content type :type content_type_id: basestring :param content_unit_ids: list of content unit ids to delete; None means delete them all :type content_unit_ids: iterable or None :param flush: flush the database updates to disk on completion :type flush: bool """ content_units_collection = content_types_db.type_units_collection(content_type_id) for content_unit in self.generate_orphans_by_type(content_type_id, fields=['_id', '_storage_path']): if content_unit_ids is not None and content_unit['_id'] not in content_unit_ids: continue content_units_collection.remove(content_unit['_id'], safe=False) storage_path = content_unit.get('_storage_path', None) if storage_path is not None: self.delete_orphaned_file(storage_path) # this forces the database to flush any cached changes to the disk # in the background; for example: the unsafe deletes in the loop above if flush: db_connection.flush_database()
def generate_orphans_by_type(content_type_id, fields=None): """ Return an generator of all orphaned content units of the given content type. If fields is not specified, only the `_id` field will be present. :param content_type_id: id of the content type :type content_type_id: basestring :param fields: list of fields to include in each content unit :type fields: list or None :return: generator of orphaned content units for the given content type :rtype: generator """ fields = fields if fields is not None else ['_id'] content_units_collection = content_types_db.type_units_collection(content_type_id) repo_content_units_collection = RepoContentUnit.get_collection() for content_unit in content_units_collection.find({}, fields=fields): repo_content_units_cursor = repo_content_units_collection.find( {'unit_id': content_unit['_id']}) if repo_content_units_cursor.count() > 0: continue yield content_unit
def test_migrate_duplicates_doesnt_delete_from_source_repo(self): """ This tests the correct behavior when we try to change the repo_id on an object, and end up causing a duplicate error due to our uniqueness constraint. It also makes sure the units are not deleted from the source repository if they are in the source repository. """ # Let's put two units here with the same IDs with two different repo_ids, and the run the # migration. source_repo_group_id = add_unit('group', self.source_repo_id, ids.TYPE_ID_PKG_GROUP) dest_repo_group_id = add_unit('group', self.dest_repo_id, ids.TYPE_ID_PKG_GROUP) # Associate the source_repo_group_id with both source and destination repos associate_unit(source_repo_group_id, self.source_repo_id, ids.TYPE_ID_PKG_GROUP) associate_unit(source_repo_group_id, self.dest_repo_id, ids.TYPE_ID_PKG_GROUP) associate_unit(dest_repo_group_id, self.dest_repo_id, ids.TYPE_ID_PKG_GROUP) # Migrate should not cause a DuplicateKeyError self.migration.migrate() # Verify that both groups remain, because the migration should not have removed either group_collection = types_db.type_units_collection( ids.TYPE_ID_PKG_GROUP) all_groups = list(group_collection.find()) self.assertEqual(len(all_groups), 2) self.assertEqual( group_collection.find({ 'id': 'group', 'repo_id': self.dest_repo_id }).count(), 1) self.assertEqual( group_collection.find({ 'id': 'group', 'repo_id': self.source_repo_id }).count(), 1) # Let's make sure that there are two associations, and that they are correct. query_manager = factory.repo_unit_association_query_manager() dest_units = query_manager.get_units(self.dest_repo_id) self.assertEqual(len(dest_units), 1) dest_unit = dest_units[0] self.assertEqual(dest_unit['unit_type_id'], ids.TYPE_ID_PKG_GROUP) self.assertEqual(dest_unit['unit_id'], dest_repo_group_id) source_units = query_manager.get_units(self.source_repo_id) self.assertEqual(len(source_units), 1) source_unit = source_units[0] self.assertEqual(source_unit['unit_type_id'], ids.TYPE_ID_PKG_GROUP) self.assertEqual(source_unit['unit_id'], source_repo_group_id) # Verify the repo counts source = model.Repository.objects.get(repo_id='source-repo') self.assertEqual(source.content_unit_counts, {'package_group': 1}) dest = model.Repository.objects.get(repo_id='dest-repo') self.assertEqual(dest.content_unit_counts, {'package_group': 1})
def get_content_unit_keys(self, content_type, unit_ids): """ Return the keys and values that will uniquely identify the content units that match the given unique ids. @param content_type: unique id of content collection @type content_type: str @param unit_ids: list of unique content unit ids @type unit_ids: list of str's @return: two tuples of the same length, one of ids the second of key dicts the same index in each tuple corresponds to a single content unit @rtype: tuple of (possibly empty) tuples """ try: key_fields = units_controller.get_unit_key_fields_for_type( content_type) except ValueError: raise InvalidValue(['content_type']) all_fields = ['_id'] _flatten_keys(all_fields, key_fields) collection = content_types_db.type_units_collection(content_type) cursor = collection.find({'_id': { '$in': unit_ids }}, projection=all_fields) dicts = tuple(dict(d) for d in cursor) ids = tuple(d.pop('_id') for d in dicts) return (ids, dicts)
def create_content_unit(unit_data, relative_path=None): collection = types_database.type_units_collection(_TYPE_YUM_REPO_METADATA_FILE) unit_data["_id"] = str(uuid.uuid4()) unit_data["_content_type_id"] = _TYPE_YUM_REPO_METADATA_FILE unit_data["_storage_path"] = get_content_storage_path(relative_path) collection.insert(unit_data, safe=True) return unit_data
def generate_orphans_by_type(content_type_id, fields=None): """ Return an generator of all orphaned content units of the given content type. If fields is not specified, only the `_id` field will be present. :param content_type_id: id of the content type :type content_type_id: basestring :param fields: list of fields to include in each content unit :type fields: list or None :return: generator of orphaned content units for the given content type :rtype: generator """ fields = fields if fields is not None else ['_id'] content_units_collection = content_types_db.type_units_collection( content_type_id) repo_content_units_collection = RepoContentUnit.get_collection() for content_unit in content_units_collection.find({}, fields=fields): repo_content_units_cursor = repo_content_units_collection.find( {'unit_id': content_unit['_id']}) if repo_content_units_cursor.count() > 0: continue yield content_unit
def delete_orphans_by_type(content_type_id, content_unit_ids=None): """ Delete the orphaned content units for the given content type. If the content_unit_ids parameter is not None, is acts as a filter of the specific orphaned content units that may be deleted. NOTE: this method deletes the content unit's bits from disk, if applicable. :param content_type_id: id of the content type :type content_type_id: basestring :param content_unit_ids: list of content unit ids to delete; None means delete them all :type content_unit_ids: iterable or None :return: count of units deleted :rtype: int """ content_units_collection = content_types_db.type_units_collection(content_type_id) count = 0 for content_unit in OrphanManager.generate_orphans_by_type(content_type_id, fields=["_id", "_storage_path"]): if content_unit_ids is not None and content_unit["_id"] not in content_unit_ids: continue model.LazyCatalogEntry.objects(unit_id=content_unit["_id"], unit_type_id=content_type_id).delete() content_units_collection.remove(content_unit["_id"]) storage_path = content_unit.get("_storage_path", None) if storage_path is not None: OrphanManager.delete_orphaned_file(storage_path) count += 1 return count
def _migrate_collection(type_id): collection = types_db.type_units_collection(type_id) for package in collection.find(): # grab the raw XML and parse it into the elements we'll need later try: # make a guess at the encoding codec = 'UTF-8' package['repodata']['primary'].encode(codec) except UnicodeEncodeError: # best second guess we have, and it will never fail due to the nature # of the encoding. codec = 'ISO-8859-1' package['repodata']['primary'].encode(codec) fake_xml = FAKE_XML % {'encoding': codec, 'xml': package['repodata']['primary']} fake_element = ET.fromstring(fake_xml.encode(codec)) utils.strip_ns(fake_element) primary_element = fake_element.find('package') format_element = primary_element.find('format') provides_element = format_element.find('provides') requires_element = format_element.find('requires') # add these attributes, which we previously didn't track in the DB. package['size'] = int(primary_element.find('size').attrib['package']) if type_id == 'rpm': package['sourcerpm'] = format_element.find('sourcerpm').text package['summary'] = primary_element.find('summary').text # re-parse provides and requires. The format changed from 2.1, and the # 2.1 upload workflow was known to produce invalid data for these fields package['provides'] = map(primary._process_rpm_entry_element, provides_element.findall('entry')) if provides_element else [] package['requires'] = map(primary._process_rpm_entry_element, requires_element.findall('entry')) if requires_element else [] collection.save(package)
def test_migrate(self, start_logging_mock, listdir_mock, mock_plugin_definitions, mock_drop_indices): """ Ensure that migrate() imports types on a clean types database. """ migration.migrate() self.assertTrue(mock_drop_indices.called) all_collection_names = types_db.all_type_collection_names() self.assertEqual(len(all_collection_names), 1) self.assertEqual(['units_test_type_id'], all_collection_names) # Let's make sure we loaded the type definitions correctly db_type_definitions = types_db.all_type_definitions() self.assertEquals(len(db_type_definitions), 1) test_json = json.loads(_test_type_json) for attribute in ['id', 'display_name', 'description', 'unit_key', 'search_indexes']: self.assertEquals(test_json['types'][0][attribute], db_type_definitions[0][attribute]) # Now let's ensure that we have the correct indexes collection = types_db.type_units_collection('test_type_id') indexes = collection.index_information() self.assertEqual(indexes['_id_']['key'], [(u'_id', 1)]) # Make sure we have the unique constraint on all three attributes self.assertEqual(indexes['attribute_1_1_attribute_2_1_attribute_3_1']['unique'], True) self.assertEqual(indexes['attribute_1_1_attribute_2_1_attribute_3_1']['key'], [(u'attribute_1', 1), (u'attribute_2', 1), (u'attribute_3', 1)]) # Make sure we indexed attributes 1 and 3 self.assertEqual(indexes['attribute_1_1']['key'], [(u'attribute_1', 1)]) self.assertEqual(indexes['attribute_3_1']['key'], [(u'attribute_3', 1)]) # Make sure we only have the indexes that we've hand inspected here self.assertEqual(indexes.keys(), [u'_id_', u'attribute_1_1_attribute_2_1_attribute_3_1', u'attribute_1_1', u'attribute_3_1'])
def test_update_missing_no_error(self): """ Tests that updating a previously loaded database with some missing definitions does not throw an error. """ # Setup defs = [DEF_1, DEF_2, DEF_3] types_db.update_database(defs) # Test new_defs = [DEF_4] types_db.update_database(new_defs) # Verify all_collection_names = types_db.all_type_collection_names() self.assertEqual(len(defs) + len(new_defs), len(all_collection_names)) # old are not deleted for d in defs: self.assertTrue( types_db.unit_collection_name(d.id) in all_collection_names) # Quick sanity check on the indexes collection = types_db.type_units_collection(d.id) all_indexes = collection.index_information() total_index_count = 1 + 1 + len( d.search_indexes) # _id + unit key + all search self.assertEqual(total_index_count, len(all_indexes))
def test_update_missing_no_error(self): """ Tests that updating a previously loaded database with some missing definitions does not throw an error. """ # Setup defs = [DEF_1, DEF_2, DEF_3] types_db.update_database(defs) # Test new_defs = [DEF_4] types_db.update_database(new_defs) # Verify all_collection_names = types_db.all_type_collection_names() self.assertEqual(len(defs) + len(new_defs), len(all_collection_names)) # old are not deleted for d in defs: self.assertTrue(types_db.unit_collection_name(d.id) in all_collection_names) # Quick sanity check on the indexes collection = types_db.type_units_collection(d.id) all_indexes = collection.index_information() total_index_count = 1 + 1 + len(d.search_indexes) # _id + unit key + all search self.assertEqual(total_index_count, len(all_indexes))
def get_multiple_units_by_keys_dicts(self, content_type, unit_keys_dicts, model_fields=None): """ Look up multiple content units in the collection for the given content type collection that match the list of keys dictionaries. :param content_type: unique id of content collection :type content_type: str :param unit_keys_dicts: list of dictionaries whose key, value pairs can uniquely identify a content unit :type unit_keys_dicts: list of dict's :param model_fields: fields of each content unit to report, None means all fields :type model_fields: None or list of str's :return: tuple of content units found in the content type collection that match the given unit keys dictionaries :rtype: (possibly empty) tuple of dict's :raises ValueError: if any of the keys dictionaries are invalid """ collection = content_types_db.type_units_collection(content_type) for segment in paginate(unit_keys_dicts, page_size=50): spec = _build_multi_keys_spec(content_type, segment) cursor = collection.find(spec, fields=model_fields) for unit_dict in cursor: yield unit_dict
def test_update_no_changes(self): """ Tests the common use case of loading type definitions that have been loaded already and have not changed. """ # Setup defs = [DEF_1, DEF_2, DEF_3, DEF_4] types_db.update_database(defs) # Test same_defs = [ DEF_4, DEF_3, DEF_2, DEF_1 ] # no real reason for this, just felt better than using the previous list types_db.update_database(same_defs) # Verify all_collection_names = types_db.all_type_collection_names() self.assertEqual(len(same_defs), len(all_collection_names)) for d in defs: self.assertTrue( types_db.unit_collection_name(d.id) in all_collection_names) # Quick sanity check on the indexes collection = types_db.type_units_collection(d.id) all_indexes = collection.index_information() total_index_count = 1 + 1 + len( d.search_indexes) # _id + unit key + all search self.assertEqual(total_index_count, len(all_indexes))
def test_update_no_changes(self): """ Tests the common use case of loading type definitions that have been loaded already and have not changed. """ # Setup defs = [DEF_1, DEF_2, DEF_3, DEF_4] types_db.update_database(defs) # Test same_defs = [DEF_4, DEF_3, DEF_2, DEF_1] # no real reason for this, just felt better than using the previous list types_db.update_database(same_defs) # Verify all_collection_names = types_db.all_type_collection_names() self.assertEqual(len(same_defs), len(all_collection_names)) for d in defs: self.assertTrue(types_db.unit_collection_name(d.id) in all_collection_names) # Quick sanity check on the indexes collection = types_db.type_units_collection(d.id) all_indexes = collection.index_information() total_index_count = 1 + 1 + len(d.search_indexes) # _id + unit key + all search self.assertEqual(total_index_count, len(all_indexes))
def link_referenced_content_units(self, from_type, from_id, to_type, to_ids): """ Link referenced content units. @param from_type: unique id of the parent content collection @type from_type: str @param from_id: unique id of the parent content unit @type from_id: str @param to_type: unique id of the child content collection @type to_type: str @param to_ids: list of unique ids of child content units @types child_ids: tuple of list """ collection = content_types_db.type_units_collection(from_type) parent = collection.find_one({'_id': from_id}) if parent is None: raise InvalidValue(['from_type']) parent_type_def = content_types_db.type_definition(from_type) if to_type not in parent_type_def['referenced_types']: raise Exception() children = parent.setdefault('_%s_references' % to_type, []) for id_ in to_ids: if id_ in children: continue children.append(id_) collection.update({'_id': from_id}, parent, safe=True)
def delete_orphans_by_type(content_type_id, content_unit_ids=None): """ Delete the orphaned content units for the given content type. If the content_unit_ids parameter is not None, is acts as a filter of the specific orphaned content units that may be deleted. NOTE: this method deletes the content unit's bits from disk, if applicable. :param content_type_id: id of the content type :type content_type_id: basestring :param content_unit_ids: list of content unit ids to delete; None means delete them all :type content_unit_ids: iterable or None """ content_units_collection = content_types_db.type_units_collection( content_type_id) for content_unit in OrphanManager.generate_orphans_by_type( content_type_id, fields=['_id', '_storage_path']): if content_unit_ids is not None and content_unit[ '_id'] not in content_unit_ids: continue content_units_collection.remove(content_unit['_id'], safe=False) storage_path = content_unit.get('_storage_path', None) if storage_path is not None: OrphanManager.delete_orphaned_file(storage_path)
def populate_units(self, env): # RPMs collection = database.type_units_collection(self.TYPE_ID) for i in range(0, env.repo_units): unit = UNIT_TEMPLATE.copy() unit['name'] = 'unit_%d' % i unit['version'] = '1.1' collection.save(unit, safe=True) # ERRATA errata = ERRATA_TEMPLATE.copy() packages = [] for i in range(0, env.errata_packages): p = ERRATA_PACKAGE.copy() p['name'] = 'unit_%d' % i packages.append(p) errata['pkglist'][0]['packages'] = packages collection = database.type_units_collection(self.ERRATA_TYPE_ID) collection.save(errata, safe=True)
def _update_type(type_id): collection = types_db.type_units_collection(type_id) # Both indexes should be set at the same time, so this single check should be safe fix_us = collection.find({'version_sort_index' : None}) for package in fix_us: package['version_sort_index'] = version_utils.encode(package['version']) package['release_sort_index'] = version_utils.encode(package['release']) collection.save(package, safe=True)
def add_sample_data(self, type_id): collection = types_db.type_units_collection(type_id) collection.save({'version': '1.1', 'release': '1.1'}) collection.save({ 'version': '3.1', 'version_sort_index': 'fake', 'release': '3.1', 'release_sort_index': 'fake' })
def remove_content_unit(self, content_type, unit_id): """ Remove a content unit and its metadata from the corresponding pulp db collection. @param content_type: unique id of content collection @type content_type: str @param unit_id: unique id of content unit @type unit_id: str """ collection = content_types_db.type_units_collection(content_type) collection.remove({'_id': unit_id}, safe=True)
def test_migrate_duplicates(self): """ This tests the correct behavior when we try to change the repo_id on an object, and end up causing a duplicate error due to our uniqueness constraint. """ # Let's put two units here with the same IDs with two different repo_ids, and the run the # migration. source_repo_group_id = add_unit('group', self.source_repo_id, ids.TYPE_ID_PKG_GROUP) dest_repo_group_id = add_unit('group', self.dest_repo_id, ids.TYPE_ID_PKG_GROUP) associate_unit(source_repo_group_id, self.dest_repo_id, ids.TYPE_ID_PKG_GROUP) associate_unit(dest_repo_group_id, self.dest_repo_id, ids.TYPE_ID_PKG_GROUP) # Migrate should not cause a DuplicateKeyError self.migration.migrate() # Verify that both groups remain. group_collection = types_db.type_units_collection( ids.TYPE_ID_PKG_GROUP) all_groups = list(group_collection.find()) self.assertEqual(len(all_groups), 2) self.assertEqual( group_collection.find({ 'id': 'group', 'repo_id': self.dest_repo_id }).count(), 1) self.assertEqual( group_collection.find({ 'id': 'group', 'repo_id': self.source_repo_id }).count(), 1) # Let's make sure that the dest group is associated, but not the source one query_manager = factory.repo_unit_association_query_manager() dest_units = query_manager.get_units(self.dest_repo_id) self.assertEqual(len(dest_units), 1) dest_unit = dest_units[0] self.assertEqual(dest_unit['unit_type_id'], ids.TYPE_ID_PKG_GROUP) self.assertEqual(dest_unit['unit_id'], dest_repo_group_id) self.assertEqual(query_manager.get_units(self.source_repo_id), []) # Verify the repo counts self.assertEqual( Repo.get_collection().find({'id': 'source-repo' })[0]['content_unit_counts'], {}) self.assertEqual( Repo.get_collection().find({'id': 'dest-repo' })[0]['content_unit_counts'], {'package_group': 1})
def _update_type(type_id): collection = types_db.type_units_collection(type_id) # Both indexes should be set at the same time, so this single check should be safe fix_us = collection.find({'version_sort_index': None}) for package in fix_us: package['version_sort_index'] = version_utils.encode( package['version']) package['release_sort_index'] = version_utils.encode( package['release']) collection.save(package, safe=True)
def update_content_unit(self, content_type, unit_id, unit_metadata_delta): """ Update a content unit's stored metadata. @param content_type: unique id of content collection @type content_type: str @param unit_id: unique id of content unit @type unit_id: str @param unit_metadata_delta: metadata fields that have changed @type unit_metadata_delta: dict """ collection = content_types_db.type_units_collection(content_type) collection.update({'_id': unit_id}, {'$set': unit_metadata_delta}, safe=True)
def get_content_unit_collection(type_id): """ Get and return the PulpCollection that corresponds to a given ContentType id. @param type_id: ContentType id @type type_id: basestring @return: PulpCollection instance @rtype: PulpCollection """ return content_types_db.type_units_collection(type_id)
def test_pulp_manage_db_loads_types(self, initialize, start_logging_mock, listdir_mock, mock_drop_indices): """ Test calling pulp-manage-db imports types on a clean types database. """ manage.main() all_collection_names = types_db.all_type_collection_names() self.assertFalse(mock_drop_indices.called) self.assertEqual(len(all_collection_names), 1) self.assertEqual(['units_test_type_id'], all_collection_names) # Let's make sure we loaded the type definitions correctly db_type_definitions = types_db.all_type_definitions() self.assertEquals(len(db_type_definitions), 1) test_json = json.loads(_test_type_json) for attribute in [ 'id', 'display_name', 'description', 'unit_key', 'search_indexes' ]: self.assertEquals(test_json['types'][0][attribute], db_type_definitions[0][attribute]) # Now let's ensure that we have the correct indexes collection = types_db.type_units_collection('test_type_id') indexes = collection.index_information() self.assertEqual(indexes['_id_']['key'], [(u'_id', 1)]) # Make sure we have the unique constraint on all three attributes self.assertEqual( indexes['attribute_1_1_attribute_2_1_attribute_3_1']['unique'], True) self.assertEqual( indexes['attribute_1_1_attribute_2_1_attribute_3_1']['dropDups'], False) self.assertEqual( indexes['attribute_1_1_attribute_2_1_attribute_3_1']['key'], [(u'attribute_1', 1), (u'attribute_2', 1), (u'attribute_3', 1)]) # Make sure we indexes attributes 1 and 3 self.assertEqual(indexes['attribute_1_1']['dropDups'], False) self.assertEqual(indexes['attribute_1_1']['key'], [(u'attribute_1', 1)]) self.assertEqual(indexes['attribute_3_1']['dropDups'], False) self.assertEqual(indexes['attribute_3_1']['key'], [(u'attribute_3', 1)]) # Make sure we only have the indexes that we've hand inspected here self.assertEqual(indexes.keys(), [ u'_id_', u'attribute_1_1_attribute_2_1_attribute_3_1', u'attribute_1_1', u'attribute_3_1' ]) initialize.assert_called_once_with(max_timeout=1)
def update_content_unit(self, content_type, unit_id, unit_metadata_delta): """ Update a content unit's stored metadata. @param content_type: unique id of content collection @type content_type: str @param unit_id: unique id of content unit @type unit_id: str @param unit_metadata_delta: metadata fields that have changed @type unit_metadata_delta: dict """ unit_metadata_delta['_last_updated'] = dateutils.now_utc_timestamp() collection = content_types_db.type_units_collection(content_type) collection.update({'_id': unit_id}, {'$set': unit_metadata_delta})
def _associated_units_by_type_cursor(unit_type_id, criteria, associated_unit_ids): """ Retrieve a pymongo cursor for units associated with a repository of a give unit type that meet to the provided criteria. :type unit_type_id: str :type criteria: UnitAssociationCriteria :type associated_unit_ids: list :rtype: pymongo.cursor.Cursor """ collection = types_db.type_units_collection(unit_type_id) serializer = units.get_model_serializer_for_type(unit_type_id) unit_filter = criteria.unit_filters.copy() if unit_filter and serializer: unit_filter = serializer.translate_filters(serializer.model, unit_filter) spec = { '$and': [ {'_id': {'$in': associated_unit_ids}}, unit_filter ] } fields = criteria.unit_fields if fields is not None: fields = list(fields) # The _content_type_id is required for looking up the association if '_content_type_id' not in fields: fields.append('_content_type_id') # translate incoming fields (e.g. id -> foo_id) if serializer: for index, field in enumerate(fields): fields[index] = serializer.translate_field(serializer.model, field) cursor = collection.find(spec, projection=fields) sort = criteria.unit_sort if sort is None: sort = [('_id', SORT_ASCENDING)] elif serializer: sort = list(sort) for index, (field, direction) in enumerate(sort): sort[index] = (serializer.translate_field(serializer.model, field), direction) cursor.sort(sort) return cursor
def delete_orphans_by_type(content_type_id, content_unit_ids=None): """ Delete the orphaned content units for the given content type. If the content_unit_ids parameter is not None, is acts as a filter of the specific orphaned content units that may be deleted. NOTE: this method deletes the content unit's bits from disk, if applicable. :param content_type_id: id of the content type :type content_type_id: basestring :param content_unit_ids: list of content unit ids to delete; None means delete them all :type content_unit_ids: iterable or None :return: count of units deleted :rtype: int """ content_units_collection = content_types_db.type_units_collection( content_type_id) content_model = plugin_api.get_unit_model_by_id(content_type_id) try: unit_key_fields = units_controller.get_unit_key_fields_for_type( content_type_id) except ValueError: raise MissingResource(content_type_id=content_type_id) fields = ('_id', '_storage_path') + unit_key_fields count = 0 for content_unit in OrphanManager.generate_orphans_by_type( content_type_id, fields=fields): if content_unit_ids is not None and content_unit[ '_id'] not in content_unit_ids: continue model.LazyCatalogEntry.objects( unit_id=content_unit['_id'], unit_type_id=content_type_id).delete() content_units_collection.remove(content_unit['_id']) if hasattr(content_model, 'do_post_delete_actions'): content_model.do_post_delete_actions(content_unit) storage_path = content_unit.get('_storage_path', None) if storage_path is not None: OrphanManager.delete_orphaned_file(storage_path) count += 1 return count
def test_migrate_duplicates_doesnt_delete_from_source_repo(self): """ This tests the correct behavior when we try to change the repo_id on an object, and end up causing a duplicate error due to our uniqueness constraint. It also makes sure the units are not deleted from the source repository if they are in the source repository. """ # Let's put two units here with the same IDs with two different repo_ids, and the run the # migration. source_repo_group_id = add_unit('group', self.source_repo_id, ids.TYPE_ID_PKG_GROUP) dest_repo_group_id = add_unit('group', self.dest_repo_id, ids.TYPE_ID_PKG_GROUP) # Associate the source_repo_group_id with both source and destination repos associate_unit(source_repo_group_id, self.source_repo_id, ids.TYPE_ID_PKG_GROUP) associate_unit(source_repo_group_id, self.dest_repo_id, ids.TYPE_ID_PKG_GROUP) associate_unit(dest_repo_group_id, self.dest_repo_id, ids.TYPE_ID_PKG_GROUP) # Migrate should not cause a DuplicateKeyError self.migration.migrate() # Verify that both groups remain, because the migration should not have removed either group_collection = types_db.type_units_collection(ids.TYPE_ID_PKG_GROUP) all_groups = list(group_collection.find()) self.assertEqual(len(all_groups), 2) self.assertEqual( group_collection.find({'id': 'group', 'repo_id': self.dest_repo_id}).count(), 1) self.assertEqual( group_collection.find({'id': 'group', 'repo_id': self.source_repo_id}).count(), 1) # Let's make sure that there are two associations, and that they are correct. query_manager = factory.repo_unit_association_query_manager() dest_units = query_manager.get_units(self.dest_repo_id) self.assertEqual(len(dest_units), 1) dest_unit = dest_units[0] self.assertEqual(dest_unit['unit_type_id'], ids.TYPE_ID_PKG_GROUP) self.assertEqual(dest_unit['unit_id'], dest_repo_group_id) source_units = query_manager.get_units(self.source_repo_id) self.assertEqual(len(source_units), 1) source_unit = source_units[0] self.assertEqual(source_unit['unit_type_id'], ids.TYPE_ID_PKG_GROUP) self.assertEqual(source_unit['unit_id'], source_repo_group_id) # Verify the repo counts self.assertEqual( Repo.get_collection().find({'id': 'source-repo'})[0]['content_unit_counts'], {'package_group': 1}) self.assertEqual(Repo.get_collection().find({'id': 'dest-repo'})[0]['content_unit_counts'], {'package_group': 1})
def _associated_units_by_type_cursor(unit_type_id, criteria, associated_unit_ids): """ Retrieve a pymongo cursor for units associated with a repository of a give unit type that meet to the provided criteria. :type unit_type_id: str :type criteria: UnitAssociationCriteria :type associated_unit_ids: list :rtype: pymongo.cursor.Cursor """ collection = types_db.type_units_collection(unit_type_id) serializer = units.get_model_serializer_for_type(unit_type_id) unit_filter = criteria.unit_filters.copy() if unit_filter and serializer: unit_filter = serializer.translate_filters(serializer.model, unit_filter) spec = {'$and': [{'_id': {'$in': associated_unit_ids}}, unit_filter]} fields = criteria.unit_fields if fields is not None: fields = list(fields) # The _content_type_id is required for looking up the association if '_content_type_id' not in fields: fields.append('_content_type_id') # translate incoming fields (e.g. id -> foo_id) if serializer: for index, field in enumerate(fields): fields[index] = serializer.translate_field( serializer.model, field) cursor = collection.find(spec, projection=fields) sort = criteria.unit_sort if sort is None: sort = [('_id', SORT_ASCENDING)] elif serializer: sort = list(sort) for index, (field, direction) in enumerate(sort): sort[index] = (serializer.translate_field( serializer.model, field), direction) cursor.sort(sort) return cursor
def associate_units(self): manager = managers.repo_unit_association_manager() # RPMs collection = database.type_units_collection(self.TYPE_ID) for unit in collection.find(): manager.associate_unit_by_id( self.REPO_ID, self.TYPE_ID, unit['_id'], RepoContentUnit.OWNER_TYPE_IMPORTER, 'stuffed', False ) # ERRATA collection = database.type_units_collection(self.ERRATA_TYPE_ID) for unit in collection.find(): manager.associate_unit_by_id( self.REPO_ID, self.ERRATA_TYPE_ID, unit['_id'], RepoContentUnit.OWNER_TYPE_IMPORTER, 'stuffed', False )
def open_cursors(unit_ids): """ Get a generator of unit cursors. :param unit_ids: A dictionary of unit_ids keyed by type_id. :type unit_ids: dict :return: A list of open cursors. :rtype: generator """ for type_id, id_list in unit_ids.items(): for page in paginate(id_list): query = {'_id': {'$in': page}} collection = type_units_collection(type_id) cursor = collection.find(query) yield cursor
def delete_orphans_by_type(self, content_type): """ Delete all orphaned content units of the given content type. @param content_type: content type of the orphans to delete @type content_type: str """ orphaned_units = self.list_orphans_by_type(content_type) if not orphaned_units: return collection = content_types_db.type_units_collection(content_type) spec = {'_id': {'$in': [o['_id'] for o in orphaned_units]}} collection.remove(spec, safe=True) orphaned_paths = [o['_storage_path'] for o in orphaned_units if o['_storage_path'] is not None] for path in orphaned_paths: self.delete_orphaned_file(path)
def migrate(*args, **kwargs): """ Perform the migration as described in this module's docblock. :param args: unused :type args: list :param kwargs: unused :type kwargs: dict """ plugin_manager = factory.plugin_manager() types = plugin_manager.types() for content_type in types: collection = database.type_units_collection(content_type['id']) collection.update({constants.PULP_USER_METADATA_FIELDNAME: {'$exists': False}}, {'$set': {constants.PULP_USER_METADATA_FIELDNAME: {}}}, multi=True)
def get_multiple_units_by_ids(self, content_type, unit_ids, model_fields=None): """ Look up multiple content units in the collection for the given content type collection that match the list of ids. @param content_type: unique id of content collection @type content_type: str @param unit_ids: list of unique content unit ids @type unit_ids: list of str's @param model_fields: fields of each content unit to report, None means all fields @type model_fields: None or list of str's @return: tuple of content units found in the content type collection that match the given ids @rtype: (possibly empty) tuple of dict's """ collection = content_types_db.type_units_collection(content_type) cursor = collection.find({'_id': {'$in': unit_ids}}, projection=model_fields) return tuple(cursor)
def get_multiple_units_by_ids(self, content_type, unit_ids, model_fields=None): """ Look up multiple content units in the collection for the given content type collection that match the list of ids. @param content_type: unique id of content collection @type content_type: str @param unit_ids: list of unique content unit ids @type unit_ids: list of str's @param model_fields: fields of each content unit to report, None means all fields @type model_fields: None or list of str's @return: tuple of content units found in the content type collection that match the given ids @rtype: (possibly empty) tuple of dict's """ collection = content_types_db.type_units_collection(content_type) cursor = collection.find({"_id": {"$in": unit_ids}}, fields=model_fields) return tuple(cursor)
def list_orphans_by_type(self, content_type): """ List all content units of a given type that are not associated with a repository. @param content_type: content type of orphaned units @type content_type: str @return: list of content units of the given type @rtype: list """ # find units of this type that are associated with one or more repositories associated_collection = RepoContentUnit.get_collection() associated_units = associated_collection.find({'unit_type_id': content_type}, fields=['unit_id']) associated_unit_ids = set(d['unit_id'] for d in associated_units) # find units that are not associated with any repositories units_collection = content_types_db.type_units_collection(content_type) spec = {'_id': {'$nin': list(associated_unit_ids)}} orphaned_units = units_collection.find(spec) return list(orphaned_units)
def _associated_units_by_type_cursor(unit_type_id, criteria, associated_unit_ids): """ Retrieve a pymongo cursor for units associated with a repository of a give unit type that meet to the provided criteria. :type unit_type_id: str :type criteria: UnitAssociationCriteria :type associated_unit_ids: list :rtype: pymongo.cursor.Cursor """ collection = types_db.type_units_collection(unit_type_id) spec = criteria.unit_filters.copy() spec['_id'] = {'$in': associated_unit_ids} fields = criteria.unit_fields # The _content_type_id is required for looking up the association. if fields is not None and '_content_type_id' not in fields: fields = list(fields) fields.append('_content_type_id') cursor = collection.find(spec, fields=fields) sort = criteria.unit_sort if sort is None: try: unit_key = units_controller.get_unit_key_fields_for_type( unit_type_id) except ValueError: unit_key = None if unit_key is not None: sort = [(u, SORT_ASCENDING) for u in unit_key] if sort is not None: cursor.sort(sort) return cursor
def unlink_referenced_content_units(self, from_type, from_id, to_type, to_ids): """ Unlink referenced content units. @param from_type: unique id of the parent content collection @type from_type: str @param from_id: unique id of the parent content unit @type from_id: str @param to_type: unique id of the child content collection @type to_type: str @param to_ids: list of unique ids of child content units @types child_ids: tuple of list """ collection = content_types_db.type_units_collection(from_type) parent = collection.find_one({'_id': from_id}) if parent is None: raise InvalidValue(['from_type']) key = '_%s_references' % to_type children = set(parent.get(key, [])) parent[key] = list(children.difference(to_ids)) collection.update({'_id': from_id}, parent, safe=True)
def add_content_unit(self, content_type, unit_id, unit_metadata): """ Add a content unit and its metadata to the corresponding pulp db collection. @param content_type: unique id of content collection @type content_type: str @param unit_id: unique id of content unit, None means to generate id @type unit_id: str or None @param unit_metadata: content unit metadata @type unit_metadata: dict @return: unit id, useful if it was generated @rtype: str """ collection = content_types_db.type_units_collection(content_type) if unit_id is None: unit_id = str(uuid.uuid4()) unit_doc = {'_id': unit_id, '_content_type_id': content_type} unit_doc.update(unit_metadata) collection.insert(unit_doc, safe=True) return unit_id
def get_content_unit_ids(content_type, unit_keys): """ Return a generator of ids that uniquely identify the content units that match the given unique keys dictionaries. :param content_type: unique id of content collection :type content_type: str :param unit_keys: list of keys dictionaries that uniquely identify content units in the given content type collection :type unit_keys: list of dicts :return: generator of unit IDs as strings :rtype: generator """ collection = content_types_db.type_units_collection(content_type) for segment in paginate(unit_keys): spec = _build_multi_keys_spec(content_type, segment) fields = ['_id'] for item in collection.find(spec, fields=fields): yield str(item['_id'])
def delete_orphans_by_type(self, content_type_id, content_unit_ids=None, flush=True): """ Delete the orphaned content units for the given content type. If the content_unit_ids parameter is not None, is acts as a filter of the specific orphaned content units that may be deleted. NOTE: this method deletes the content unit's bits from disk, if applicable. NOTE: `flush` should not be set to False unless you know what you're doing :param content_type_id: id of the content type :type content_type_id: basestring :param content_unit_ids: list of content unit ids to delete; None means delete them all :type content_unit_ids: iterable or None :param flush: flush the database updates to disk on completion :type flush: bool """ content_units_collection = content_types_db.type_units_collection( content_type_id) for content_unit in self.generate_orphans_by_type( content_type_id, fields=['_id', '_storage_path']): if content_unit_ids is not None and content_unit[ '_id'] not in content_unit_ids: continue content_units_collection.remove(content_unit['_id'], safe=False) storage_path = content_unit.get('_storage_path', None) if storage_path is not None: self.delete_orphaned_file(storage_path) # this forces the database to flush any cached changes to the disk # in the background; for example: the unsafe deletes in the loop above if flush: db_connection.flush_database()
def test_migrate_groups(self): # Setup orig_group_id = add_unit('g1', self.source_repo_id, ids.TYPE_ID_PKG_GROUP) associate_unit(orig_group_id, self.source_repo_id, ids.TYPE_ID_PKG_GROUP) associate_unit(orig_group_id, self.dest_repo_id, ids.TYPE_ID_PKG_GROUP) # Test self.migration.migrate() # Verify # Verify a new group was created with the correct metadata group_coll = types_db.type_units_collection(ids.TYPE_ID_PKG_GROUP) all_groups = group_coll.find({}).sort('repo_id', 1) self.assertEqual(2, all_groups.count()) dest_group = all_groups[0] # ordered by ID, this will be first self.assertEqual(dest_group['id'], 'g1') self.assertEqual(dest_group['repo_id'], self.dest_repo_id) source_group = all_groups[1] self.assertEqual(source_group['id'], 'g1') self.assertEqual(source_group['repo_id'], self.source_repo_id) # Verify the associations query_manager = factory.repo_unit_association_query_manager() source_units = query_manager.get_units(self.source_repo_id) self.assertEqual(1, len(source_units)) self.assertEqual(source_units[0]['unit_type_id'], ids.TYPE_ID_PKG_GROUP) self.assertEqual(source_units[0]['unit_id'], source_group['_id']) dest_units = query_manager.get_units(self.dest_repo_id) self.assertEqual(1, len(dest_units)) self.assertEqual(dest_units[0]['unit_type_id'], ids.TYPE_ID_PKG_GROUP) self.assertEqual(dest_units[0]['unit_id'], dest_group['_id'])