def verify(self, tempfile=None, allowEmptyChecksums=False): ''' Verifies this replica's data matches the Datafile checksums. It must have at least one checksum hash to verify unless "allowEmptyChecksums" is True. If passed a file handle, it will write the file to it instead of discarding data as it's read. ''' from .datafile import Dataset_File df = self.datafile if not (allowEmptyChecksums or df.sha512sum or df.md5sum): logger.error("Datafile for %s has no checksums", self.url) return False sourcefile = self.get_file(requireVerified=False) if not sourcefile: logger.error("%s content not accessible", self.url) return False logger.info("Downloading %s for verification", self.url) md5sum, sha512sum, size, mimetype_buffer = \ generate_file_checksums(sourcefile, tempfile) if not (df.size and size == int(df.size)): if (df.sha512sum or df.md5sum) and not df.size: # If the size is missing but we have a checksum to check # the missing size is harmless ... we will fill it in below. logger.warn("%s size is missing" % (self.url)) else: logger.error("%s failed size check: %d != %s", self.url, size, df.size) return False if df.sha512sum and sha512sum.lower() != df.sha512sum.lower(): logger.error("%s failed SHA-512 sum check: %s != %s", self.url, sha512sum, df.sha512sum) return False if df.md5sum and md5sum.lower() != df.md5sum.lower(): logger.error("%s failed MD5 sum check: %s != %s", self.url, md5sum, df.md5sum) return False if df.mimetype: mimetype = df.mimetype elif len(mimetype_buffer) > 0: mimetype = Magic(mime=True).from_buffer(mimetype_buffer) else: mimetype = '' if not (df.size and df.md5sum and df.sha512sum and df.mimetype): df.md5sum = md5sum.lower() df.sha512sum = sha512sum.lower() df.size = str(size) df.mimetype = mimetype df.save() self.verified = True return True
def get_metadata(self, replica): filename = self._uri_to_filename(replica.url) with self.storage.open(filename, 'r') as f: try: md5sum, sha512sum, size, _ = generate_file_checksums(f, None) except OSError as e: raise TransferError(e.strerror) return {'md5sum': md5sum, 'sha512sum': sha512sum, 'length': str(size)}
def check_file_transferred(replica, location): """ Check that a replica has been successfully transfered to a remote storage location """ from tardis.tardis_portal.models import Dataset_File datafile = Dataset_File.objects.get(pk=replica.datafile.id) # If the remote is capable, get it to send us the checksums and / or # file length for its copy of the file try: # Fetch the remote's metadata for the file m = location.provider.get_metadata(replica) _check_attribute(m, datafile.size, 'length') if (_check_attribute(m, datafile.sha512sum, 'sha512sum') or \ _check_attribute(m, datafile.md5sum, 'md5sum')): return True if location.trust_length and \ _check_attribute(m, datafile.size, 'length') : return False raise MigrationError('Not enough metadata for verification') except NotImplementedError: pass except HTTPError as e: # Bad request means that the remote didn't recognize the query if e.code != 400: raise if location.provider.trust_length : try: length = location.provider.get_length(replica) if _check_attribute2(length, datafile.size, 'length'): return False except NotImplementedError: pass # Fetch back the remote file and verify it locally. f = location.provider.get_opener(replica)() md5sum, sha512sum, size, x = generate_file_checksums(f, None) _check_attribute2(str(size), datafile.size, 'length') if _check_attribute2(sha512sum, datafile.sha512sum, 'sha512sum') or \ _check_attribute2(md5sum, datafile.md5sum, 'md5sum'): return True raise MigrationError('Not enough metadata for file verification')
def check_file_transferred(replica, location): """ Check that a replica has been successfully transfered to a remote storage location """ from tardis.tardis_portal.models import Dataset_File datafile = Dataset_File.objects.get(pk=replica.datafile.id) # If the remote is capable, get it to send us the checksums and / or # file length for its copy of the file try: # Fetch the remote's metadata for the file m = location.provider.get_metadata(replica) _check_attribute(m, datafile.size, 'length') if (_check_attribute(m, datafile.sha512sum, 'sha512sum') or \ _check_attribute(m, datafile.md5sum, 'md5sum')): return True if location.trust_length and \ _check_attribute(m, datafile.size, 'length') : return False raise MigrationError('Not enough metadata for verification') except NotImplementedError: pass except HTTPError as e: # Bad request means that the remote didn't recognize the query if e.code != 400: raise if location.provider.trust_length: try: length = location.provider.get_length(replica) if _check_attribute2(length, datafile.size, 'length'): return False except NotImplementedError: pass # Fetch back the remote file and verify it locally. f = location.provider.get_opener(replica)() md5sum, sha512sum, size, x = generate_file_checksums(f, None) _check_attribute2(str(size), datafile.size, 'length') if _check_attribute2(sha512sum, datafile.sha512sum, 'sha512sum') or \ _check_attribute2(md5sum, datafile.md5sum, 'md5sum'): return True raise MigrationError('Not enough metadata for file verification')
def verify(self, tempfile=None, allowEmptyChecksums=None): """ Verifies this replica's data matches the Datafile checksums. It must have at least one checksum hash to verify unless "allowEmptyChecksums" is True. If "allowEmptyChecksums" is provided (whether True or False), it will override the system-wide REQUIRE_DATAFILE_CHECKSUMS setting. If passed a file handle, it will write the file to it instead of discarding data as it's read. """ if allowEmptyChecksums is None: allowEmptyChecksums = not getattr(settings, "REQUIRE_DATAFILE_CHECKSUMS", True) df = self.datafile if not (allowEmptyChecksums or df.sha512sum or df.md5sum): logger.error("Datafile for %s has no checksums", self.url) return False try: sourcefile = self.get_file(requireVerified=False) except IOError: logger.error("Replica %s not found/accessible at: %s" % (self.id, self.url)) return False if not sourcefile: logger.error("%s content not accessible", self.url) return False logger.info("Downloading %s for verification", self.url) md5sum, sha512sum, size, mimetype_buffer = generate_file_checksums(sourcefile, tempfile) if not (df.size and size == int(df.size)): if (df.sha512sum or df.md5sum) and not df.size: # If the size is missing but we have a checksum to check # the missing size is harmless ... we will fill it in below. logger.warn("%s size is missing" % (self.url)) else: logger.error("%s failed size check: %d != %s", self.url, size, df.size) return False if df.sha512sum and sha512sum.lower() != df.sha512sum.lower(): logger.error("%s failed SHA-512 sum check: %s != %s", self.url, sha512sum, df.sha512sum) return False if df.md5sum and md5sum.lower() != df.md5sum.lower(): logger.error("%s failed MD5 sum check: %s != %s", self.url, md5sum, df.md5sum) return False if df.mimetype: mimetype = df.mimetype elif len(mimetype_buffer) > 0: mimetype = Magic(mime=True).from_buffer(mimetype_buffer) else: mimetype = "" if not (df.size and df.md5sum and df.sha512sum and df.mimetype): df.md5sum = md5sum.lower() df.sha512sum = sha512sum.lower() df.size = str(size) df.mimetype = mimetype df.save() self.verified = True return True
def verify(self, tempfile=None, allowEmptyChecksums=None): ''' Verifies this replica's data matches the Datafile checksums. It must have at least one checksum hash to verify unless "allowEmptyChecksums" is True. If "allowEmptyChecksums" is provided (whether True or False), it will override the system-wide REQUIRE_DATAFILE_CHECKSUMS setting. If passed a file handle, it will write the file to it instead of discarding data as it's read. ''' if allowEmptyChecksums is None: allowEmptyChecksums = not getattr( settings, "REQUIRE_DATAFILE_CHECKSUMS", True) df = self.datafile if not (allowEmptyChecksums or df.sha512sum or df.md5sum): logger.error("Datafile for %s has no checksums", self.url) return False try: sourcefile = self.get_file(requireVerified=False) except IOError: logger.error("Replica %s not found/accessible at: %s" % (self.id, self.url)) return False if not sourcefile: logger.error("%s content not accessible", self.url) return False logger.info("Downloading %s for verification", self.url) md5sum, sha512sum, size, mimetype_buffer = \ generate_file_checksums(sourcefile, tempfile) if not (df.size and size == int(df.size)): if (df.sha512sum or df.md5sum) and not df.size: # If the size is missing but we have a checksum to check # the missing size is harmless ... we will fill it in below. logger.warn("%s size is missing" % (self.url)) else: logger.error("%s failed size check: %d != %s", self.url, size, df.size) return False if df.sha512sum and sha512sum.lower() != df.sha512sum.lower(): logger.error("%s failed SHA-512 sum check: %s != %s", self.url, sha512sum, df.sha512sum) return False if df.md5sum and md5sum.lower() != df.md5sum.lower(): logger.error("%s failed MD5 sum check: %s != %s", self.url, md5sum, df.md5sum) return False if df.mimetype: mimetype = df.mimetype elif len(mimetype_buffer) > 0: mimetype = Magic(mime=True).from_buffer(mimetype_buffer) else: mimetype = '' if not (df.size and df.md5sum and df.sha512sum and df.mimetype): df.md5sum = md5sum.lower() df.sha512sum = sha512sum.lower() df.size = str(size) df.mimetype = mimetype df.save() self.verified = True return True