Example #1
0
    def verify(self, tempfile=None, allowEmptyChecksums=False):
        '''
        Verifies this replica's data matches the Datafile checksums. 
        It must have at least one checksum hash to verify unless 
        "allowEmptyChecksums" is True.

        If passed a file handle, it will write the file to it instead of
        discarding data as it's read.
        '''

        from .datafile import Dataset_File
        df = self.datafile
        if not (allowEmptyChecksums or df.sha512sum or df.md5sum):
            logger.error("Datafile for %s has no checksums", self.url)
            return False

        sourcefile = self.get_file(requireVerified=False)
        if not sourcefile:
            logger.error("%s content not accessible", self.url)
            return False
        logger.info("Downloading %s for verification", self.url)
        md5sum, sha512sum, size, mimetype_buffer = \
            generate_file_checksums(sourcefile, tempfile)

        if not (df.size and size == int(df.size)):
            if (df.sha512sum or df.md5sum) and not df.size: 
                # If the size is missing but we have a checksum to check
                # the missing size is harmless ... we will fill it in below.
                logger.warn("%s size is missing" % (self.url))
            else:
                logger.error("%s failed size check: %d != %s",
                             self.url, size, df.size)
                return False

        if df.sha512sum and sha512sum.lower() != df.sha512sum.lower():
            logger.error("%s failed SHA-512 sum check: %s != %s",
                         self.url, sha512sum, df.sha512sum)
            return False

        if df.md5sum and md5sum.lower() != df.md5sum.lower():
            logger.error("%s failed MD5 sum check: %s != %s",
                         self.url, md5sum, df.md5sum)
            return False

        if df.mimetype:
            mimetype = df.mimetype
        elif len(mimetype_buffer) > 0:
            mimetype = Magic(mime=True).from_buffer(mimetype_buffer)
        else:
            mimetype = ''
        if not (df.size and df.md5sum and df.sha512sum and df.mimetype):
            df.md5sum = md5sum.lower()
            df.sha512sum = sha512sum.lower()
            df.size = str(size)
            df.mimetype = mimetype
            df.save()
        self.verified = True
        return True
Example #2
0
 def get_metadata(self, replica):
     filename = self._uri_to_filename(replica.url)
     with self.storage.open(filename, 'r') as f:
         try:
             md5sum, sha512sum, size, _ = generate_file_checksums(f, None)
         except OSError as e:
             raise TransferError(e.strerror)
         return {'md5sum': md5sum,
                 'sha512sum': sha512sum,
                 'length': str(size)}
Example #3
0
def check_file_transferred(replica, location):
    """
    Check that a replica has been successfully transfered to a remote
    storage location
    """

    from tardis.tardis_portal.models import Dataset_File
    datafile = Dataset_File.objects.get(pk=replica.datafile.id)

    # If the remote is capable, get it to send us the checksums and / or
    # file length for its copy of the file
    try:
        # Fetch the remote's metadata for the file
        m = location.provider.get_metadata(replica)
        _check_attribute(m, datafile.size, 'length')
        if (_check_attribute(m, datafile.sha512sum, 'sha512sum') or \
               _check_attribute(m, datafile.md5sum, 'md5sum')):
            return True
        if location.trust_length and \
                 _check_attribute(m, datafile.size, 'length') :
            return False
        raise MigrationError('Not enough metadata for verification')
    except NotImplementedError:
        pass
    except HTTPError as e:
        # Bad request means that the remote didn't recognize the query
        if e.code != 400:
            raise

    if location.provider.trust_length :
        try:
            length = location.provider.get_length(replica)
            if _check_attribute2(length, datafile.size, 'length'):
                return False
        except NotImplementedError:
            pass
    
    # Fetch back the remote file and verify it locally.
    f = location.provider.get_opener(replica)()
    md5sum, sha512sum, size, x = generate_file_checksums(f, None)
    _check_attribute2(str(size), datafile.size, 'length')
    if _check_attribute2(sha512sum, datafile.sha512sum, 'sha512sum') or \
            _check_attribute2(md5sum, datafile.md5sum, 'md5sum'):
        return True
    raise MigrationError('Not enough metadata for file verification')
Example #4
0
def check_file_transferred(replica, location):
    """
    Check that a replica has been successfully transfered to a remote
    storage location
    """

    from tardis.tardis_portal.models import Dataset_File
    datafile = Dataset_File.objects.get(pk=replica.datafile.id)

    # If the remote is capable, get it to send us the checksums and / or
    # file length for its copy of the file
    try:
        # Fetch the remote's metadata for the file
        m = location.provider.get_metadata(replica)
        _check_attribute(m, datafile.size, 'length')
        if (_check_attribute(m, datafile.sha512sum, 'sha512sum') or \
               _check_attribute(m, datafile.md5sum, 'md5sum')):
            return True
        if location.trust_length and \
                 _check_attribute(m, datafile.size, 'length') :
            return False
        raise MigrationError('Not enough metadata for verification')
    except NotImplementedError:
        pass
    except HTTPError as e:
        # Bad request means that the remote didn't recognize the query
        if e.code != 400:
            raise

    if location.provider.trust_length:
        try:
            length = location.provider.get_length(replica)
            if _check_attribute2(length, datafile.size, 'length'):
                return False
        except NotImplementedError:
            pass

    # Fetch back the remote file and verify it locally.
    f = location.provider.get_opener(replica)()
    md5sum, sha512sum, size, x = generate_file_checksums(f, None)
    _check_attribute2(str(size), datafile.size, 'length')
    if _check_attribute2(sha512sum, datafile.sha512sum, 'sha512sum') or \
            _check_attribute2(md5sum, datafile.md5sum, 'md5sum'):
        return True
    raise MigrationError('Not enough metadata for file verification')
Example #5
0
    def verify(self, tempfile=None, allowEmptyChecksums=None):
        """
        Verifies this replica's data matches the Datafile checksums.
        It must have at least one checksum hash to verify unless
        "allowEmptyChecksums" is True. If "allowEmptyChecksums" is provided
        (whether True or False), it will override the system-wide
        REQUIRE_DATAFILE_CHECKSUMS setting.

        If passed a file handle, it will write the file to it instead of
        discarding data as it's read.
        """
        if allowEmptyChecksums is None:
            allowEmptyChecksums = not getattr(settings, "REQUIRE_DATAFILE_CHECKSUMS", True)

        df = self.datafile
        if not (allowEmptyChecksums or df.sha512sum or df.md5sum):
            logger.error("Datafile for %s has no checksums", self.url)
            return False

        try:
            sourcefile = self.get_file(requireVerified=False)
        except IOError:
            logger.error("Replica %s not found/accessible at: %s" % (self.id, self.url))
            return False
        if not sourcefile:
            logger.error("%s content not accessible", self.url)
            return False
        logger.info("Downloading %s for verification", self.url)
        md5sum, sha512sum, size, mimetype_buffer = generate_file_checksums(sourcefile, tempfile)

        if not (df.size and size == int(df.size)):
            if (df.sha512sum or df.md5sum) and not df.size:
                # If the size is missing but we have a checksum to check
                # the missing size is harmless ... we will fill it in below.
                logger.warn("%s size is missing" % (self.url))
            else:
                logger.error("%s failed size check: %d != %s", self.url, size, df.size)
                return False

        if df.sha512sum and sha512sum.lower() != df.sha512sum.lower():
            logger.error("%s failed SHA-512 sum check: %s != %s", self.url, sha512sum, df.sha512sum)
            return False

        if df.md5sum and md5sum.lower() != df.md5sum.lower():
            logger.error("%s failed MD5 sum check: %s != %s", self.url, md5sum, df.md5sum)
            return False

        if df.mimetype:
            mimetype = df.mimetype
        elif len(mimetype_buffer) > 0:
            mimetype = Magic(mime=True).from_buffer(mimetype_buffer)
        else:
            mimetype = ""
        if not (df.size and df.md5sum and df.sha512sum and df.mimetype):
            df.md5sum = md5sum.lower()
            df.sha512sum = sha512sum.lower()
            df.size = str(size)
            df.mimetype = mimetype
            df.save()
        self.verified = True
        return True
Example #6
0
    def verify(self, tempfile=None, allowEmptyChecksums=None):
        '''
        Verifies this replica's data matches the Datafile checksums.
        It must have at least one checksum hash to verify unless
        "allowEmptyChecksums" is True. If "allowEmptyChecksums" is provided
        (whether True or False), it will override the system-wide
        REQUIRE_DATAFILE_CHECKSUMS setting.

        If passed a file handle, it will write the file to it instead of
        discarding data as it's read.
        '''
        if allowEmptyChecksums is None:
            allowEmptyChecksums = not getattr(
                settings, "REQUIRE_DATAFILE_CHECKSUMS", True)

        df = self.datafile
        if not (allowEmptyChecksums or df.sha512sum or df.md5sum):
            logger.error("Datafile for %s has no checksums", self.url)
            return False

        try:
            sourcefile = self.get_file(requireVerified=False)
        except IOError:
            logger.error("Replica %s not found/accessible at: %s" %
                         (self.id, self.url))
            return False
        if not sourcefile:
            logger.error("%s content not accessible", self.url)
            return False
        logger.info("Downloading %s for verification", self.url)
        md5sum, sha512sum, size, mimetype_buffer = \
            generate_file_checksums(sourcefile, tempfile)

        if not (df.size and size == int(df.size)):
            if (df.sha512sum or df.md5sum) and not df.size:
                # If the size is missing but we have a checksum to check
                # the missing size is harmless ... we will fill it in below.
                logger.warn("%s size is missing" % (self.url))
            else:
                logger.error("%s failed size check: %d != %s", self.url, size,
                             df.size)
                return False

        if df.sha512sum and sha512sum.lower() != df.sha512sum.lower():
            logger.error("%s failed SHA-512 sum check: %s != %s", self.url,
                         sha512sum, df.sha512sum)
            return False

        if df.md5sum and md5sum.lower() != df.md5sum.lower():
            logger.error("%s failed MD5 sum check: %s != %s", self.url, md5sum,
                         df.md5sum)
            return False

        if df.mimetype:
            mimetype = df.mimetype
        elif len(mimetype_buffer) > 0:
            mimetype = Magic(mime=True).from_buffer(mimetype_buffer)
        else:
            mimetype = ''
        if not (df.size and df.md5sum and df.sha512sum and df.mimetype):
            df.md5sum = md5sum.lower()
            df.sha512sum = sha512sum.lower()
            df.size = str(size)
            df.mimetype = mimetype
            df.save()
        self.verified = True
        return True