def get_file_ctime_iso_date_str(file_path, fmt=DT_ISO_FORMAT, wd=None): fp = remove_protocol(file_path) path = fp if wd is None else os.path.join(wd, fp) dt = pytz.timezone('Europe/Vienna').localize( datetime.datetime.fromtimestamp( os.path.getctime(path)).replace(microsecond=0)) return dt.strftime(fmt)
def package_sub_path_from_relative_path(root, containing_file_path, relative_path): containing_path, _ = os.path.split(containing_file_path) return strip_prefixes( os.path.abspath( os.path.join(containing_path, remove_protocol(relative_path))), root)
def validate_file(self, file): ''' Validates every file found inside a Mets, so far: size, checksum, fixity. If a file exists, the counter for self.total_files is diminished. @param file: XML Element of a file that will be validated. @return: ''' err = [] log = [] # get information about the file attr_path = file.getchildren()[0].attrib[q(XLINK_NS, 'href')] attr_size = file.attrib['SIZE'] attr_checksum = file.attrib['CHECKSUM'] attr_checksumtype = file.attrib['CHECKSUMTYPE'] # mimetpye = file.attrib['MIMETYPE'] # check if file exists, if yes validate it fitem = remove_protocol(attr_path) file_path = os.path.join(self.rootpath, fitem).replace('\\', '/') if not os.path.exists(file_path): err.append( "Unable to find file referenced in delivery METS file: %s" % file_path) else: self.total_files -= 1 # check if file size is valid # TODO: is this even needed? file_size = os.path.getsize(file_path) if not int(file_size) == int(attr_size): err.append( "Actual file size %s does not equal file size attribute value %s" % (file_size, attr_size)) # workaround for earkweb.log in AIP metadata/ folder on IP root level if file_path[-22:] == './metadata/earkweb.log': err.pop() log.append( 'Forced validation result \'True\' for file: %s' % (file_path)) # validate checksum checksum_validation = ChecksumValidation() checksum_result = checksum_validation.validate_checksum( file_path, attr_checksum, attr_checksumtype) # workaround for earkweb.log in AIP metadata/ folder on IP root level if file_path[-22:] == './metadata/earkweb.log': checksum_result = True if not checksum_result == True: err.append('Checksum validation failed for: %s' % file_path) for error in err: print 'File validation error: ' + error self.validation_errors.append(error)
def fsize(file_path, wd=None): fp = remove_protocol(file_path) path = fp if wd is None else os.path.join(wd, fp) return int(os.path.getsize(path))
def checksum(file_path, wd=None, alg=ChecksumAlgorithm.SHA256): fp = remove_protocol(file_path) path = fp if wd is None else os.path.join(wd, fp) return ChecksumFile(path).get(alg)