def test_checksum_type(self): self.assertEqual(cr.checksum_type("sha256"), cr.SHA256) self.assertEqual(cr.checksum_type("SHA256"), cr.SHA256) self.assertEqual(cr.checksum_type("Sha256"), cr.SHA256) self.assertEqual(cr.checksum_type("sHa256"), cr.SHA256) self.assertEqual(cr.checksum_type("ShA256"), cr.SHA256) self.assertEqual(cr.checksum_type("sha224"), cr.SHA224) self.assertEqual(cr.checksum_type("sha256"), cr.SHA256) self.assertEqual(cr.checksum_type("sha384"), cr.SHA384) self.assertEqual(cr.checksum_type("sha512"), cr.SHA512) self.assertEqual(cr.checksum_type("foobar"), cr.UNKNOWN_CHECKSUM)
def create(self, validated_data): """ Create UpdateRecord and its subclasses from JSON file. Returns: UpdateRecord instance """ references = validated_data.pop("references", []) pkglist = validated_data.pop("pkglist", []) update_collection_packages_to_save = list() update_references_to_save = list() try: update_record = super().create(validated_data) except IntegrityError: raise serializers.ValidationError( "Advisory already exists in Pulp.") for collection in pkglist: new_coll = copy.deepcopy(collection) packages = new_coll.pop("packages", []) new_coll[PULP_UPDATE_COLLECTION_ATTRS.SHORTNAME] = new_coll.pop( "short", "") coll = UpdateCollection(**new_coll) coll.save() coll.update_record.add(update_record) for package in packages: pkg = UpdateCollectionPackage(**package) try: pkg.sum_type = createrepo_c.checksum_type(pkg.sum_type) except TypeError: raise TypeError(f'"{pkg.sum_type}" is not supported.') pkg.update_collection = coll update_collection_packages_to_save.append(pkg) for reference in references: new_ref = dict() new_ref[PULP_UPDATE_REFERENCE_ATTRS.HREF] = reference.get( CR_UPDATE_REFERENCE_ATTRS.HREF, "") new_ref[PULP_UPDATE_REFERENCE_ATTRS.ID] = reference.get( CR_UPDATE_REFERENCE_ATTRS.ID, "") new_ref[PULP_UPDATE_REFERENCE_ATTRS.TITLE] = reference.get( CR_UPDATE_REFERENCE_ATTRS.TITLE, "") new_ref[PULP_UPDATE_REFERENCE_ATTRS.TYPE] = reference.get( CR_UPDATE_REFERENCE_ATTRS.TYPE, "") ref = UpdateReference(**new_ref) ref.update_record = update_record update_references_to_save.append(ref) if update_collection_packages_to_save: UpdateCollectionPackage.objects.bulk_create( update_collection_packages_to_save) if update_references_to_save: UpdateReference.objects.bulk_create(update_references_to_save) cr_update_record = update_record.to_createrepo_c() update_record.digest = hash_update_record(cr_update_record) update_record.save() return update_record
def _new_metadata(self, metadata_type): """Return Metadata Object for the metadata_type""" metadata = Metadata(metadata_type) metadata.checksum_type = self.checksum_type metadata.compression_type = DEFAULT_COMPRESSION_TYPE # Set output directory metadata.out_dir = self.new_repodata_path # Properties related to the first (old) repository old_rec = self.old_records.get(metadata_type) metadata.old_rec = old_rec if old_rec: # Build old filename metadata.old_fn = os.path.join(self.old_repo_path, old_rec.location_href) if os.path.isfile(metadata.old_fn): metadata.old_fn_exists = True else: msg = "File {0} doesn't exist in the old repository" \ " (but it should - delta may rely on " \ "it)!".format(metadata.old_fn) self._warning(msg) if not self.ignore_missing: raise DeltaRepoError(msg + " Use --ignore-missing option " "to ignore this error") # Properties related to the second (delta) repository delta_rec = self.delta_records.get(metadata_type) metadata.delta_rec = delta_rec if delta_rec: metadata.delta_fn = os.path.join(self.delta_repo_path, delta_rec.location_href) if os.path.isfile(metadata.delta_fn): metadata.delta_fn_exists = True # Determine compression type detected_compression_type = cr.detect_compression( metadata.delta_fn) if (detected_compression_type != cr.UNKNOWN_COMPRESSION): metadata.compression_type = detected_compression_type else: self._warning("Cannot detect compression type for " "{0}".format(metadata.delta_fn)) else: msg = ("The file {0} doesn't exist in the delta" "repository!".format(metadata.new_fn)) self._warning(msg) if not self.ignore_missing: raise DeltaRepoError(msg + " Use --ignore-missing option " "to ignore this error") metadata.checksum_type = cr.checksum_type(delta_rec.checksum_type) return metadata
def _new_metadata(self, metadata_type): """Return Metadata Object for the metadata_type""" metadata = Metadata(metadata_type) metadata.checksum_type = self.checksum_type metadata.compression_type = DEFAULT_COMPRESSION_TYPE # Set output directory metadata.out_dir = self.new_repodata_path # Properties related to the first (old) repository old_rec = self.old_records.get(metadata_type) metadata.old_rec = old_rec if old_rec: # Build old filename metadata.old_fn = os.path.join(self.old_repo_path, old_rec.location_href) if os.path.isfile(metadata.old_fn): metadata.old_fn_exists = True else: msg = ( "File {0} doesn't exist in the old repository" " (but it should - delta may rely on " "it)!".format(metadata.old_fn) ) self._warning(msg) if not self.ignore_missing: raise DeltaRepoError(msg + " Use --ignore-missing option " "to ignore this error") # Properties related to the second (delta) repository delta_rec = self.delta_records.get(metadata_type) metadata.delta_rec = delta_rec if delta_rec: metadata.delta_fn = os.path.join(self.delta_repo_path, delta_rec.location_href) if os.path.isfile(metadata.delta_fn): metadata.delta_fn_exists = True # Determine compression type detected_compression_type = cr.detect_compression(metadata.delta_fn) if detected_compression_type != cr.UNKNOWN_COMPRESSION: metadata.compression_type = detected_compression_type else: self._warning("Cannot detect compression type for " "{0}".format(metadata.delta_fn)) else: msg = "The file {0} doesn't exist in the delta" "repository!".format(metadata.new_fn) self._warning(msg) if not self.ignore_missing: raise DeltaRepoError(msg + " Use --ignore-missing option " "to ignore this error") metadata.checksum_type = cr.checksum_type(delta_rec.checksum_type) return metadata
def check(self): """ Check if content of deltametadata seem to be valid :rtype: bool """ if not self.revision_src or not self.revision_dst: return False if not self.contenthash_dst or not self.contenthash_dst: return False if not self.contenthash_type: return False if cr.checksum_type(self.contenthash_type) == cr.UNKNOWN_CHECKSUM: return False if not isnonnegativeint(self.timestamp_src): return False if not isnonnegativeint(self.timestamp_dst): return False for pluginbundle in self.usedplugins: if not pluginbundle.check(): return False return True
def check(self): """ Check if content of deltametadata seem to be valid :rtype: bool """ if not self.revision_src or not self.revision_dst: return False if not self.contenthash_dst or not self.contenthash_dst: return False if not self.contenthash_type: return False if cr.checksum_type(self.contenthash_type) == cr.UNKNOWN_CHECKSUM: return False if not isnonnegativeint(self.timestamp_src): return False if not isnonnegativeint(self.timestamp_dst): return False for pluginbundle in self.usedplugins: if not pluginbundle.check(): return False return True
def create(self, validated_data): """ Create UpdateRecord and its subclasses from JSON file. Returns: UpdateRecord instance """ references = validated_data.pop("references", []) pkglist = validated_data.pop("pkglist", []) # detach any specified repository; attach once the advisory exists repository = validated_data.pop("repository", None) update_collection_packages_to_save = list() update_references_to_save = list() with transaction.atomic(): try: # This persists an advisory with an empty digest update_record = super().create(validated_data) except IntegrityError: # At this point, the advisory has an empty digest. If we hit this, # it means a previous advisory-create failed to clean up after itself. raise serializers.ValidationError( "Advisory already exists in Pulp.") for collection in pkglist: new_coll = copy.deepcopy(collection) packages = new_coll.pop("packages", []) new_coll[ PULP_UPDATE_COLLECTION_ATTRS.SHORTNAME] = new_coll.pop( "short", "") coll = UpdateCollection(**new_coll) coll.update_record = update_record coll.save() for package in packages: pkg = UpdateCollectionPackage(**package) try: pkg.sum_type = createrepo_c.checksum_type(pkg.sum_type) except TypeError: raise TypeError(f'"{pkg.sum_type}" is not supported.') pkg.update_collection = coll update_collection_packages_to_save.append(pkg) for reference in references: new_ref = dict() new_ref[PULP_UPDATE_REFERENCE_ATTRS.HREF] = reference.get( CR_UPDATE_REFERENCE_ATTRS.HREF, "") new_ref[PULP_UPDATE_REFERENCE_ATTRS.ID] = reference.get( CR_UPDATE_REFERENCE_ATTRS.ID, "") new_ref[PULP_UPDATE_REFERENCE_ATTRS.TITLE] = reference.get( CR_UPDATE_REFERENCE_ATTRS.TITLE, "") new_ref[PULP_UPDATE_REFERENCE_ATTRS.TYPE] = reference.get( CR_UPDATE_REFERENCE_ATTRS.TYPE, "") ref = UpdateReference(**new_ref) ref.update_record = update_record update_references_to_save.append(ref) if update_collection_packages_to_save: UpdateCollectionPackage.objects.bulk_create( update_collection_packages_to_save) if update_references_to_save: UpdateReference.objects.bulk_create(update_references_to_save) cr_update_record = update_record.to_createrepo_c() update_record.digest = hash_update_record(cr_update_record) # The advisory now has a digest - *if* this works update_record.save() # create new repo version with uploaded advisory if repository: repository.cast() content_to_add = self.Meta.model.objects.filter( pk=update_record.pk) with repository.new_version() as new_version: new_version.add_content(content_to_add) return update_record
def __init__(self, old_repo_path, new_repo_path, out_path=None, logger=None, contenthash_type="sha256", compression_type="xz", force_database=False, ignore_missing=False): # Initialization self.ignore_missing = ignore_missing LoggingInterface.__init__(self, logger) self.out_path = out_path or "./" self.final_path = os.path.join(self.out_path, "repodata") self.new_repo_path = new_repo_path self.new_repodata_path = os.path.join(self.new_repo_path, "repodata/") self.new_repomd_path = os.path.join(self.new_repodata_path, "repomd.xml") self.old_repo_path = old_repo_path self.old_repodata_path = os.path.join(self.old_repo_path, "repodata/") self.old_repomd_path = os.path.join(self.old_repodata_path, "repomd.xml") self.delta_repo_path = out_path self.delta_repodata_path = os.path.join(self.delta_repo_path, ".repodata/") self.delta_repomd_path = os.path.join(self.delta_repodata_path, "repomd.xml") # contenthash type self.contenthash_type_str = contenthash_type or "sha256" self.compression_type_str = compression_type or "xz" self.compression_type = cr.compression_type(self.compression_type_str) # Prepare Repomd objects self.old_repomd = cr.Repomd(self.old_repomd_path) self.new_repomd = cr.Repomd(self.new_repomd_path) self.delta_repomd = cr.Repomd() # Use revision and tags self.delta_repomd.set_revision(self.new_repomd.revision) for tag in self.new_repomd.distro_tags: self.delta_repomd.add_distro_tag(tag[1], tag[0]) for tag in self.new_repomd.repo_tags: self.delta_repomd.add_repo_tag(tag) for tag in self.new_repomd.content_tags: self.delta_repomd.add_content_tag(tag) # Load records self.old_records = {} self.new_records = {} for record in self.old_repomd.records: self.old_records[record.type] = record for record in self.new_repomd.records: self.new_records[record.type] = record old_record_types = set(self.old_records.keys()) new_record_types = set(self.new_records.keys()) self.deleted_repomd_record_types = old_record_types - new_record_types self.added_repomd_record_types = new_record_types - old_record_types # Important sanity checks (repo without primary is definitely bad) if not "primary" in self.old_records: raise DeltaRepoError("Missing \"primary\" metadata in old repo") if not "primary" in self.new_records: raise DeltaRepoError("Missing \"primary\" metadata in new repo") # Detect type of checksum in the new repomd.xml (global) self.checksum_type = cr.checksum_type( self.new_records["primary"].checksum_type) if self.checksum_type == cr.UNKNOWN_CHECKSUM: raise DeltaRepoError("Unknown checksum type used in new repo: %s" % \ self.new_records["primary"].checksum_type) # TODO: Je treba detekovat typ checksumu, kdyz se stejne pro kazdej # record nakonec detekuje znova??? # Detection if use unique md filenames if self.new_records["primary"].location_href.split("primary")[0] != "": self.unique_md_filenames = True self.old_contenthash = self.old_repomd.contenthash self.new_contenthash = self.new_repomd.contenthash self.deltametadata = DeltaMetadata() # Prepare global bundle self.globalbundle = GlobalBundle() self.globalbundle.contenthash_type_str = self.contenthash_type_str self.globalbundle.unique_md_filenames = self.unique_md_filenames self.globalbundle.force_database = force_database self.globalbundle.ignore_missing = ignore_missing
def __init__(self, old_repo_path, delta_repo_path, out_path=None, logger=None, force_database=False, ignore_missing=False): # Initialization LoggingInterface.__init__(self, logger) self.contenthash_type = None self.unique_md_filenames = False self.force_database = force_database self.ignore_missing = ignore_missing self.deltametadata = DeltaMetadata() self.out_path = out_path or "./" self.final_path = os.path.join(self.out_path, "repodata") self.new_repo_path = out_path self.new_repodata_path = os.path.join(self.new_repo_path, ".repodata/") self.new_repomd_path = os.path.join(self.new_repodata_path, "repomd.xml") self.old_repo_path = old_repo_path self.old_repodata_path = os.path.join(self.old_repo_path, "repodata/") self.old_repomd_path = os.path.join(self.old_repodata_path, "repomd.xml") self.delta_repo_path = delta_repo_path self.delta_repodata_path = os.path.join(self.delta_repo_path, "repodata/") self.delta_repomd_path = os.path.join(self.delta_repodata_path, "repomd.xml") # Prepare repomd objects self.old_repomd = cr.Repomd(self.old_repomd_path) self.delta_repomd = cr.Repomd(self.delta_repomd_path) self.new_repomd = cr.Repomd() # Check if delta repo id correspond with the old repo id if not self.delta_repomd.contenthash or \ len(self.delta_repomd.contenthash.split('-')) != 2: raise DeltaRepoError("Bad content hash") self.contenthash_type_str = self.delta_repomd.contenthash_type res = self.delta_repomd.contenthash.split('-') self.old_contenthash, self.new_contenthash = res self._debug("Delta %s -> %s" % (self.old_contenthash, self.new_contenthash)) if self.old_repomd.contenthash_type == self.delta_repomd.contenthash_type: if self.old_repomd.contenthash and self.old_repomd.contenthash != self.old_contenthash: raise DeltaRepoError("Not suitable delta for current repo " \ "(Expected: {0} Real: {1})".format( self.old_contenthash, self.old_repomd.contenthash)) else: self._debug( "Different contenthash types repo: {0} vs delta: {1}".format( self.old_repomd.contenthash_type, self.delta_repomd.contenthash_type)) # Use revision and tags self.new_repomd.set_revision(self.delta_repomd.revision) for tag in self.delta_repomd.distro_tags: self.new_repomd.add_distro_tag(tag[1], tag[0]) for tag in self.delta_repomd.repo_tags: self.new_repomd.add_repo_tag(tag) for tag in self.delta_repomd.content_tags: self.new_repomd.add_content_tag(tag) # Load records self.old_records = {} self.delta_records = {} for record in self.old_repomd.records: self.old_records[record.type] = record for record in self.delta_repomd.records: self.delta_records[record.type] = record old_record_types = set(self.old_records.keys()) delta_record_types = set(self.delta_records.keys()) self.deleted_repomd_record_types = old_record_types - delta_record_types self.added_repomd_record_types = delta_record_types - old_record_types # Important sanity checks (repo without primary is definitely bad) if not "primary" in self.old_records: raise DeltaRepoError("Missing \"primary\" metadata in old repo") # Detect type of checksum in the delta repomd.xml self.checksum_type = cr.checksum_type( self.delta_records["deltametadata"].checksum_type) if self.checksum_type == cr.UNKNOWN_CHECKSUM: raise DeltaRepoError("Unknown checksum type used in delta repo: %s" % \ self.delta_records["deltametadata"].checksum_type) # Detection if use unique md filenames if self.delta_records["deltametadata"].location_href.split( "deltametadata")[0] != "": self.unique_md_filenames = True # Load removedxml self.removedxml_path = None if "deltametadata" in self.delta_records: self.deltametadata_path = os.path.join( self.delta_repo_path, self.delta_records["deltametadata"].location_href) self.deltametadata.load(self.deltametadata_path) else: self._warning("\"deltametadata\" record is missing in repomd.xml "\ "of delta repo") # Prepare global bundle self.globalbundle = GlobalBundle() self.globalbundle.contenthash_type_str = self.contenthash_type_str self.globalbundle.unique_md_filenames = self.unique_md_filenames self.globalbundle.force_database = self.force_database self.globalbundle.ignore_missing = self.ignore_missing
def test_checksum_type(self): self.assertEqual(cr.checksum_type("sha256"), cr.SHA256) self.assertEqual(cr.checksum_type("SHA256"), cr.SHA256) self.assertEqual(cr.checksum_type("Sha256"), cr.SHA256) self.assertEqual(cr.checksum_type("sHa256"), cr.SHA256) self.assertEqual(cr.checksum_type("ShA256"), cr.SHA256) self.assertEqual(cr.checksum_type("md5"), cr.MD5) self.assertEqual(cr.checksum_type("sha"), cr.SHA) self.assertEqual(cr.checksum_type("sha1"), cr.SHA1) self.assertEqual(cr.checksum_type("sha224"), cr.SHA224) self.assertEqual(cr.checksum_type("sha256"), cr.SHA256) self.assertEqual(cr.checksum_type("sha384"), cr.SHA384) self.assertEqual(cr.checksum_type("sha512"), cr.SHA512) self.assertEqual(cr.checksum_type("foobar"), cr.UNKNOWN_CHECKSUM)
def _apply_basic_delta(self, md, notes): """ """ if not md: # No metadata - Nothing to do return (True, None) # Init some stuff in md # This variables should be set only if new record was generated # Otherwise it should by None/False md.new_rec = None md.new_fn_exists = False if not notes: # No notes - Nothing to do return (True, None) if not md.old_rec and not md.delta_rec: # None metadata record exists. self._debug("\"{0}\": Doesn't exist " "in any repo".format(md.metadata_type)) return (True, None) if not md.delta_rec: # This record is missing in delta repo if notes.get("unchanged") != "1": # This metadata were removed in the new version of repo self._debug("\"{0}\": Removed in the new version of repodata" "".format(md.metadata_type)) return (True, None) # Copy from the old repo should be used if not md.old_fn_exists: # This is missing in the old repo self._warning("\"{0}\": From old repo should be used, but " "it is missing".format(md.metadata_type)) return (True, None) # Use copy from the old repo # Check if old file should have a new name basename = notes.get("new_name") if not basename: basename = os.path.basename(md.old_fn) md.new_fn = os.path.join(md.out_dir, basename) checksum_name = notes.get("checksum_name", DEFAULT_CHECKSUM_NAME) checksum_type = cr.checksum_type(checksum_name) # Copy the file and create repomd record shutil.copy2(md.old_fn, md.new_fn) rec = cr.RepomdRecord(md.metadata_type, md.new_fn) rec.fill(checksum_type) if self.globalbundle.unique_md_filenames: rec.rename_file() md.new_fn = rec.location_real md.new_rec = rec md.new_fn_exists = True return (True, rec) if not md.delta_fn_exists: # Delta is missing self._warning("\"{0}\": Delta file is missing" "".format(md.metadata_type)) return (True, None) # At this point we are sure, we have a delta file if notes.get("original") == "1": # Delta file is the target file # Check if file should be uncompressed decompress = False if notes.get("compressed") == "1": decompress = True rec = self.apply_use_original(md, decompress) self._debug("\"{0}\": Used delta is just a copy") md.new_rec = rec md.new_fn_exists = True return (True, rec) if not md.old_fn_exists: # Old file is missing self._warning("\"{0}\": Old file is missing" "".format(md.metadata_type)) return (True, None) # Delta file exists and it is not a copy nor metadata # file from old repo should be used. # this is job for a real delta plugin :) return (False, None)
def gendelta(self, old_path, new_path, out_path=None, do_only=None, skip=None): removedxml = RemovedXml() hash_in_the_name = False # Prepare variables with paths new_repodata_path = os.path.join(new_path, "repodata/") old_repodata_path = os.path.join(old_path, "repodata/") old_repomd_path = os.path.join(old_repodata_path, "repomd.xml") new_repomd_path = os.path.join(new_repodata_path, "repomd.xml") # Prepare Repomd objects old_repomd = cr.Repomd(old_repomd_path) new_repomd = cr.Repomd(new_repomd_path) delta_repomd = cr.Repomd() # Prepare output path delta_path = os.path.join(out_path, ".deltarepo/") delta_repodata_path = os.path.join(delta_path, "repodata/") os.mkdir(delta_path) os.mkdir(delta_repodata_path) # Do repomd delta delta_repomd.set_revision(new_repomd.revision) for tag in new_repomd.distro_tags: delta_repomd.add_distro_tag(tag[1], tag[0]) for tag in new_repomd.repo_tags: delta_repomd.add_repo_tag(tag) for tag in new_repomd.content_tags: delta_repomd.add_content_tag(tag) old_records = dict([(record.type, record) for record in old_repomd.records ]) new_records = dict([(record.type, record) for record in new_repomd.records ]) old_record_types = set(old_records.keys()) new_record_types = set(new_records.keys()) deleted_repomd_record_types = old_record_types - new_record_types added_repomd_record_types = new_record_types - old_record_types # Important sanity check (repo without primary is definitely bad) if not "primary" in old_records or not "primary" in new_records: raise DeltaRepoError("Missing primary metadata") # Detect type of checksum in the new repomd.xml self.checksum_type = cr.checksum_type(new_records["primary"].checksum_type) if self.checksum_type == cr.UNKNOWN_CHECKSUM: raise DeltaRepoError("Unknown checksum type detected: %s" % \ new_records["primary"].checksum_type) # Detection if use unique md filenames if new_records["primary"].location_href.split("primary")[0] != "": hash_in_the_name = True # Do deltas for the "primary", "filelists" and "other" pri_old_fn = os.path.join(old_path, old_records["primary"].location_href) pri_new_fn = os.path.join(new_path, new_records["primary"].location_href) pri_out_fn = os.path.join(delta_repodata_path, "primary.xml.gz") pri_out_f_stat = cr.ContentStat(self.checksum_type) pri_out_f = cr.PrimaryXmlFile(pri_out_fn, cr.GZ_COMPRESSION) fil_new_fn = None fil_out_fn = None fil_out_f_stat = None fil_out_f = None if ("filelists" in new_records): fil_new_fn = os.path.join(new_path, new_records["filelists"].location_href) fil_out_fn = os.path.join(delta_repodata_path, "filelists.xml.gz") fil_out_f_stat = cr.ContentStat(self.checksum_type) fil_out_f = cr.FilelistsXmlFile(fil_out_fn, cr.GZ_COMPRESSION) oth_new_fn = None out_out_fn = None oth_out_f_stat = None oth_out_f = None if ("other" in new_records): oth_new_fn = os.path.join(new_path, new_records["other"].location_href) oth_out_fn = os.path.join(delta_repodata_path, "other.xml.gz") oth_out_f_stat = cr.ContentStat(self.checksum_type) oth_out_f = cr.OtherXmlFile(oth_out_fn, cr.GZ_COMPRESSION) deltamodule = MainDeltaModule(id_type=self.id_type, logger=self.logger) ids = deltamodule.do(pri_old_fn, pri_new_fn, pri_out_f, fil_new_fn, fil_out_f, oth_new_fn, oth_out_f, removedxml) # Prepare repomd.xml records pri_rec = cr.RepomdRecord("primary", pri_out_fn) pri_rec.load_contentstat(pri_out_f_stat) pri_rec.fill(self.checksum_type) if hash_in_the_name: pri_rec.rename_file() delta_repomd.set_record(pri_rec) if fil_out_fn: fil_rec = cr.RepomdRecord("filelists", fil_out_fn) fil_rec.load_contentstat(fil_out_f_stat) fil_rec.fill(self.checksum_type) if hash_in_the_name: fil_rec.rename_file() delta_repomd.set_record(fil_rec) if oth_out_fn: oth_rec = cr.RepomdRecord("other", oth_out_fn) oth_rec.load_contentstat(oth_out_f_stat) oth_rec.fill(self.checksum_type) if hash_in_the_name: oth_rec.rename_file() delta_repomd.set_record(oth_rec) # Write out removed.xml # TODO: Compression via compression wrapper removedxml_path = os.path.join(delta_repodata_path, "removed.xml") #removedxml_path_gz = os.path.join(delta_repodata_path, "removed.xml.gz") removedxml_xml = removedxml.xml_dump() self._debug("Writing removed.xml") open(removedxml_path, "w").write(removedxml_xml) stat = cr.ContentStat(self.checksum_type) #cr.compress_file(removedxml_path, removedxml_path_gz, cr.GZ, stat) #os.remove(removedxml_path) #removedxml_rec = cr.RepomdRecord("removed", removedxml_path_gz) removedxml_rec = cr.RepomdRecord("removed", removedxml_path) removedxml_rec.load_contentstat(stat) removedxml_rec.fill(self.checksum_type) if hash_in_the_name: removedxml_rec.rename_file() delta_repomd.set_record(removedxml_rec) # Write out repomd.xml deltarepoid = "%s-%s" % ids delta_repomd.set_repoid(deltarepoid, self.id_type) delta_repomd_path = os.path.join(delta_repodata_path, "repomd.xml") delta_repomd_xml = delta_repomd.xml_dump() self._debug("Writing repomd.xml") open(delta_repomd_path, "w").write(delta_repomd_xml) # Final move final_destination = os.path.join(out_path, "%s-%s" % ids) if os.path.exists(final_destination): self._warning("Destination dir already exists! Removing %s" % \ final_destination) shutil.rmtree(final_destination) self._info("Moving %s -> %s" % (delta_path, final_destination)) os.rename(delta_path, final_destination)
def applydelta(self, old_path, delta_path, out_path=None, database=False): removedxml = RemovedXml() hash_in_the_name = False # Prepare variables with paths old_repodata_path = os.path.join(old_path, "repodata/") delta_repodata_path = os.path.join(delta_path, "repodata/") old_repomd_path = os.path.join(old_repodata_path, "repomd.xml") delta_repomd_path = os.path.join(delta_repodata_path, "repomd.xml") # Prepare Repomd objects old_repomd = cr.Repomd(old_repomd_path) delta_repomd = cr.Repomd(delta_repomd_path) new_repomd = cr.Repomd() # Check if delta id correspond with used repo if not delta_repomd.repoid or len(delta_repomd.repoid.split('-')) != 2: raise DeltaRepoError("Bad DeltaRepoId") self.id_type = delta_repomd.repoid_type old_id, new_id = delta_repomd.repoid.split('-') self._debug("Delta %s -> %s" % (old_id, new_id)) if old_repomd.repoid_type == delta_repomd.repoid_type: if old_repomd.repoid and old_repomd.repoid != old_id: raise DeltaRepoError("Not suitable delta for current repo " \ "(Expected: %s Real: %s)" % (old_id, old_repomd.repoid)) else: self._debug("Different repoid types repo: %s vs delta: %s" % \ (old_repomd.repoid_type, delta_repomd.repoid_type)) # Prepare output path new_path = os.path.join(out_path, ".repodata/") new_repodata_path = os.path.join(new_path, "repodata/") os.mkdir(new_path) os.mkdir(new_repodata_path) # Apply repomd delta new_repomd.set_revision(delta_repomd.revision) for tag in delta_repomd.distro_tags: new_repomd.add_distro_tag(tag[1], tag[0]) for tag in delta_repomd.repo_tags: new_repomd.add_repo_tag(tag) for tag in delta_repomd.content_tags: new_repomd.add_content_tag(tag) old_records = dict([(record.type, record) for record in old_repomd.records ]) delta_records = dict([(record.type, record) for record in delta_repomd.records ]) old_record_types = set(old_records.keys()) delta_record_types = set(delta_records.keys()) deleted_repomd_record_types = old_record_types - delta_record_types added_repomd_record_types = delta_record_types - old_record_types # Prepare removedxml if "removed" in delta_records: removedxml_path = os.path.join(delta_path, delta_records["removed"].location_href) removedxml.xml_parse(removedxml_path) else: self._warning("\"removed\" record is missing in repomd.xml "\ "of delta repo") # Important sanity check (repo without primary is definitely bad) if not "primary" in old_records or not "primary" in delta_records: raise DeltaRepoError("Missing primary metadata") # Detect type of checksum in the delta repomd.xml self.checksum_type = cr.checksum_type(delta_records["primary"].checksum_type) if self.checksum_type == cr.UNKNOWN_CHECKSUM: raise DeltaRepoError("Unknown checksum type detected: %s" % \ delta_records["primary"].checksum_type) # Detection if use unique md filenames if delta_records["primary"].location_href.split("primary")[0] != "": hash_in_the_name = True # Apply delta on primary, filelists and other pri_old_fn = os.path.join(old_path, old_records["primary"].location_href) pri_delta_fn = os.path.join(delta_path, delta_records["primary"].location_href) pri_out_fn = os.path.join(new_repodata_path, "primary.xml.gz") pri_out_f_stat = cr.ContentStat(self.checksum_type) pri_out_f = cr.PrimaryXmlFile(pri_out_fn, cr.GZ_COMPRESSION) pri_db_fn = None pri_db = None if database: pri_db_fn = os.path.join(new_repodata_path, "primary.sqlite") pri_db = cr.PrimarySqlite(pri_db_fn) fil_old_fn = None fil_delta_fn = None fil_out_fn = None fil_out_f_stat = None fil_out_f = None fil_db_fn = None fil_db = None if ("filelists" in delta_records): fil_old_fn = os.path.join(old_path, old_records["filelists"].location_href) fil_delta_fn = os.path.join(delta_path, delta_records["filelists"].location_href) fil_out_fn = os.path.join(new_repodata_path, "filelists.xml.gz") fil_out_f_stat = cr.ContentStat(self.checksum_type) fil_out_f = cr.FilelistsXmlFile(fil_out_fn, cr.GZ_COMPRESSION) if database: fil_db_fn = os.path.join(new_repodata_path, "filelists.sqlite") fil_db = cr.FilelistsSqlite(fil_db_fn) oth_old_fn = None oth_delta_fn = None oth_out_fn = None oth_out_f_stat = None oth_out_f = None oth_db_fn = None oth_db = None if ("other" in delta_records): oth_old_fn = os.path.join(old_path, old_records["other"].location_href) oth_delta_fn = os.path.join(delta_path, delta_records["other"].location_href) oth_out_fn = os.path.join(new_repodata_path, "other.xml.gz") oth_out_f_stat = cr.ContentStat(self.checksum_type) oth_out_f = cr.OtherXmlFile(oth_out_fn, cr.GZ_COMPRESSION) if database: oth_db_fn = os.path.join(new_repodata_path, "other.sqlite") oth_db = cr.OtherSqlite(oth_db_fn) deltamodule = MainDeltaModule(id_type=self.id_type, logger=self.logger) ids = deltamodule.apply(pri_old_fn, pri_delta_fn, pri_out_f, pri_db, fil_old_fn, fil_delta_fn, fil_out_f, fil_db, oth_old_fn, oth_delta_fn, oth_out_f, oth_db, removedxml) pri_out_f.close() fil_out_f.close() oth_out_f.close() # Check returned IDs cold_id, cnew_id = ids # Calculated ids if cold_id != old_id: raise DeltaRepoError("Calculated old RepoId doesn't match!") if cnew_id != new_id: raise DeltaRepoError("Calculated new RepoId doesn't match!") self._debug("RepoIds match") # Prepare repomd.xml records pri_rec = cr.RepomdRecord("primary", pri_out_fn) pri_rec.load_contentstat(pri_out_f_stat) pri_rec.fill(self.checksum_type) if hash_in_the_name: pri_rec.rename_file() new_repomd.set_record(pri_rec) if database: pri_db.dbinfo_update(pri_rec.checksum) pri_db.close() pri_db_stat = cr.ContentStat(self.checksum_type) pri_db_compressed = os.path.join(pri_db_fn+".bz2") cr.compress_file(pri_db_fn, None, cr.BZ2, pri_db_stat) os.remove(pri_db_fn) pri_db_rec = cr.RepomdRecord("primary_db", pri_db_compressed) pri_db_rec.load_contentstat(pri_db_stat) pri_db_rec.fill(self.checksum_type) if hash_in_the_name: pri_db_rec.rename_file() new_repomd.set_record(pri_db_rec) if fil_out_fn: fil_rec = cr.RepomdRecord("filelists", fil_out_fn) fil_rec.load_contentstat(fil_out_f_stat) fil_rec.fill(self.checksum_type) if hash_in_the_name: fil_rec.rename_file() new_repomd.set_record(fil_rec) if database: fil_db.dbinfo_update(fil_rec.checksum) fil_db.close() fil_db_stat = cr.ContentStat(self.checksum_type) fil_db_compressed = os.path.join(fil_db_fn+".bz2") cr.compress_file(fil_db_fn, None, cr.BZ2, fil_db_stat) os.remove(fil_db_fn) fil_db_rec = cr.RepomdRecord("primary_db", fil_db_compressed) fil_db_rec.load_contentstat(fil_db_stat) fil_db_rec.fill(self.checksum_type) if hash_in_the_name: fil_db_rec.rename_file() new_repomd.set_record(fil_db_rec) if oth_out_fn: oth_rec = cr.RepomdRecord("other", oth_out_fn) oth_rec.load_contentstat(oth_out_f_stat) oth_rec.fill(self.checksum_type) if hash_in_the_name: oth_rec.rename_file() new_repomd.set_record(oth_rec) if database: oth_db.dbinfo_update(oth_rec.checksum) oth_db.close() oth_db_stat = cr.ContentStat(self.checksum_type) oth_db_compressed = os.path.join(oth_db_fn+".bz2") cr.compress_file(oth_db_fn, None, cr.BZ2, oth_db_stat) os.remove(oth_db_fn) oth_db_rec = cr.RepomdRecord("primary_db", oth_db_compressed) oth_db_rec.load_contentstat(oth_db_stat) oth_db_rec.fill(self.checksum_type) if hash_in_the_name: oth_db_rec.rename_file() new_repomd.set_record(oth_db_rec) # Write out repomd.xml new_repomd.set_repoid(ids[1], self.id_type) new_repomd_path = os.path.join(new_repodata_path, "repomd.xml") new_repomd_xml = new_repomd.xml_dump() self._debug("Writing repomd.xml") open(new_repomd_path, "w").write(new_repomd_xml) # Final move final_destination = os.path.join(out_path, "repodata/") if os.path.exists(final_destination): self._warning("Destination dir already exists! Removing %s" % \ final_destination) shutil.rmtree(final_destination) self._info("Moving %s -> %s" % (new_path, final_destination)) os.rename(new_path, final_destination)
def __init__( self, old_repo_path, delta_repo_path, out_path=None, logger=None, force_database=False, ignore_missing=False ): # Initialization LoggingInterface.__init__(self, logger) self.contenthash_type = None self.unique_md_filenames = False self.force_database = force_database self.ignore_missing = ignore_missing self.deltametadata = DeltaMetadata() self.out_path = out_path or "./" self.final_path = os.path.join(self.out_path, "repodata") self.new_repo_path = out_path self.new_repodata_path = os.path.join(self.new_repo_path, ".repodata/") self.new_repomd_path = os.path.join(self.new_repodata_path, "repomd.xml") self.old_repo_path = old_repo_path self.old_repodata_path = os.path.join(self.old_repo_path, "repodata/") self.old_repomd_path = os.path.join(self.old_repodata_path, "repomd.xml") self.delta_repo_path = delta_repo_path self.delta_repodata_path = os.path.join(self.delta_repo_path, "repodata/") self.delta_repomd_path = os.path.join(self.delta_repodata_path, "repomd.xml") # Prepare repomd objects self.old_repomd = cr.Repomd(self.old_repomd_path) self.delta_repomd = cr.Repomd(self.delta_repomd_path) self.new_repomd = cr.Repomd() # Check if delta repo id correspond with the old repo id if not self.delta_repomd.contenthash or len(self.delta_repomd.contenthash.split("-")) != 2: raise DeltaRepoError("Bad content hash") self.contenthash_type_str = self.delta_repomd.contenthash_type res = self.delta_repomd.contenthash.split("-") self.old_contenthash, self.new_contenthash = res self._debug("Delta %s -> %s" % (self.old_contenthash, self.new_contenthash)) if self.old_repomd.contenthash_type == self.delta_repomd.contenthash_type: if self.old_repomd.contenthash and self.old_repomd.contenthash != self.old_contenthash: raise DeltaRepoError( "Not suitable delta for current repo " "(Expected: {0} Real: {1})".format(self.old_contenthash, self.old_repomd.contenthash) ) else: self._debug( "Different contenthash types repo: {0} vs delta: {1}".format( self.old_repomd.contenthash_type, self.delta_repomd.contenthash_type ) ) # Use revision and tags self.new_repomd.set_revision(self.delta_repomd.revision) for tag in self.delta_repomd.distro_tags: self.new_repomd.add_distro_tag(tag[1], tag[0]) for tag in self.delta_repomd.repo_tags: self.new_repomd.add_repo_tag(tag) for tag in self.delta_repomd.content_tags: self.new_repomd.add_content_tag(tag) # Load records self.old_records = {} self.delta_records = {} for record in self.old_repomd.records: self.old_records[record.type] = record for record in self.delta_repomd.records: self.delta_records[record.type] = record old_record_types = set(self.old_records.keys()) delta_record_types = set(self.delta_records.keys()) self.deleted_repomd_record_types = old_record_types - delta_record_types self.added_repomd_record_types = delta_record_types - old_record_types # Important sanity checks (repo without primary is definitely bad) if not "primary" in self.old_records: raise DeltaRepoError('Missing "primary" metadata in old repo') # Detect type of checksum in the delta repomd.xml self.checksum_type = cr.checksum_type(self.delta_records["deltametadata"].checksum_type) if self.checksum_type == cr.UNKNOWN_CHECKSUM: raise DeltaRepoError( "Unknown checksum type used in delta repo: %s" % self.delta_records["deltametadata"].checksum_type ) # Detection if use unique md filenames if self.delta_records["deltametadata"].location_href.split("deltametadata")[0] != "": self.unique_md_filenames = True # Load removedxml self.removedxml_path = None if "deltametadata" in self.delta_records: self.deltametadata_path = os.path.join( self.delta_repo_path, self.delta_records["deltametadata"].location_href ) self.deltametadata.load(self.deltametadata_path) else: self._warning('"deltametadata" record is missing in repomd.xml ' "of delta repo") # Prepare global bundle self.globalbundle = GlobalBundle() self.globalbundle.contenthash_type_str = self.contenthash_type_str self.globalbundle.unique_md_filenames = self.unique_md_filenames self.globalbundle.force_database = self.force_database self.globalbundle.ignore_missing = self.ignore_missing
def __init__(self, old_repo_path, new_repo_path, out_path=None, logger=None, contenthash_type="sha256", compression_type="xz", force_database=False, ignore_missing=False): # Initialization self.ignore_missing = ignore_missing LoggingInterface.__init__(self, logger) self.out_path = out_path or "./" self.final_path = os.path.join(self.out_path, "repodata") self.new_repo_path = new_repo_path self.new_repodata_path = os.path.join(self.new_repo_path, "repodata/") self.new_repomd_path = os.path.join(self.new_repodata_path, "repomd.xml") self.old_repo_path = old_repo_path self.old_repodata_path = os.path.join(self.old_repo_path, "repodata/") self.old_repomd_path = os.path.join(self.old_repodata_path, "repomd.xml") self.delta_repo_path = out_path self.delta_repodata_path = os.path.join(self.delta_repo_path, ".repodata/") self.delta_repomd_path = os.path.join(self.delta_repodata_path, "repomd.xml") # contenthash type self.contenthash_type_str = contenthash_type or "sha256" self.compression_type_str = compression_type or "xz" self.compression_type = cr.compression_type(self.compression_type_str) # Prepare Repomd objects self.old_repomd = cr.Repomd(self.old_repomd_path) self.new_repomd = cr.Repomd(self.new_repomd_path) self.delta_repomd = cr.Repomd() # Use revision and tags self.delta_repomd.set_revision(self.new_repomd.revision) for tag in self.new_repomd.distro_tags: self.delta_repomd.add_distro_tag(tag[1], tag[0]) for tag in self.new_repomd.repo_tags: self.delta_repomd.add_repo_tag(tag) for tag in self.new_repomd.content_tags: self.delta_repomd.add_content_tag(tag) # Load records self.old_records = {} self.new_records = {} for record in self.old_repomd.records: self.old_records[record.type] = record for record in self.new_repomd.records: self.new_records[record.type] = record old_record_types = set(self.old_records.keys()) new_record_types = set(self.new_records.keys()) self.deleted_repomd_record_types = old_record_types - new_record_types self.added_repomd_record_types = new_record_types - old_record_types # Important sanity checks (repo without primary is definitely bad) if not "primary" in self.old_records: raise DeltaRepoError("Missing \"primary\" metadata in old repo") if not "primary" in self.new_records: raise DeltaRepoError("Missing \"primary\" metadata in new repo") # Detect type of checksum in the new repomd.xml (global) self.checksum_type = cr.checksum_type(self.new_records["primary"].checksum_type) if self.checksum_type == cr.UNKNOWN_CHECKSUM: raise DeltaRepoError("Unknown checksum type used in new repo: %s" % \ self.new_records["primary"].checksum_type) # TODO: Je treba detekovat typ checksumu, kdyz se stejne pro kazdej # record nakonec detekuje znova??? # Detection if use unique md filenames if self.new_records["primary"].location_href.split("primary")[0] != "": self.unique_md_filenames = True self.old_contenthash = self.old_repomd.contenthash self.new_contenthash = self.new_repomd.contenthash self.deltametadata = DeltaMetadata() # Prepare global bundle self.globalbundle = GlobalBundle() self.globalbundle.contenthash_type_str = self.contenthash_type_str self.globalbundle.unique_md_filenames = self.unique_md_filenames self.globalbundle.force_database = force_database self.globalbundle.ignore_missing = ignore_missing
def _apply_basic_delta(self, md, notes): """ """ if not md: # No metadata - Nothing to do return (True, None) # Init some stuff in md # This variables should be set only if new record was generated # Otherwise it should by None/False md.new_rec = None md.new_fn_exists = False if not notes: # No notes - Nothing to do return (True, None) if not md.old_rec and not md.delta_rec: # None metadata record exists. self._debug("\"{0}\": Doesn't exist " "in any repo".format(md.metadata_type)) return (True, None) if not md.delta_rec: # This record is missing in delta repo if notes.get("unchanged") != "1": # This metadata were removed in the new version of repo self._debug("\"{0}\": Removed in the new version of repodata" "".format(md.metadata_type)) return (True, None) # Copy from the old repo should be used if not md.old_fn_exists: # This is missing in the old repo self._warning("\"{0}\": From old repo should be used, but " "it is missing".format(md.metadata_type)) return (True, None) # Use copy from the old repo # Check if old file should have a new name basename = notes.get("new_name") if not basename: basename = os.path.basename(md.old_fn) md.new_fn = os.path.join(md.out_dir, basename) checksum_name = notes.get("checksum_name", DEFAULT_CHECKSUM_NAME) checksum_type = cr.checksum_type(checksum_name) # Copy the file and create repomd record shutil.copy2(md.old_fn, md.new_fn) rec = cr.RepomdRecord(md.metadata_type, md.new_fn) rec.fill(checksum_type) if self.globalbundle.unique_md_filenames: rec.rename_file() md.new_fn = rec.location_real md.new_rec = rec md.new_fn_exists = True return (True, rec) if not md.delta_fn_exists: # Delta is missing self._warning("\"{0}\": Delta file is missing" "".format(md.metadata_type)) return (True, None) # At this point we are sure, we have a delta file if notes.get("original") == "1": # Delta file is the target file # Check if file should be uncompressed decompress = False if notes.get("compressed") == "1": decompress = True rec = self.apply_use_original(md, decompress) self._debug("\"{0}\": Used delta is just a copy") md.new_rec = rec md.new_fn_exists = True return (True, rec) if not md.old_fn_exists: # Old file is missing self._warning("\"{0}\": Old file is missing" "".format(md.metadata_type)) return (True, None) # Delta file exists and it is not a copy nor metadata # file from old repo should be used. # this is job for a real delta plugin :) return (False, None)