def check_file(self, file_path): srcfullpath = os.path.abspath(file_path) srcfullpath = os.path.realpath(srcfullpath) #TODO: Build a DB table with "root paths" (repetitive Media base paths) # root paths shall not be substrings of each other #TODO: Build a DB table with "root path id" | modtime | "relative path" | hash #TODO: Try to find the modtime:/filepath in the DB -> if yes return that metadata # otherwise fall back to this default behaviour hash_value = hash_file(srcfullpath) print("hash:", hash_value) self.metadata_db.dump(hash_value)
def _verify_files_in_dir(self, reldirname, mediaabsdirname, force): results = {} jsonfilepath = os.path.join(mediaabsdirname, 'verify.json') if os.path.isfile(jsonfilepath): LOGGER.debug('Reading %s', jsonfilepath) try: with open(jsonfilepath, 'r') as jsonfile: results = json.load(jsonfile) #print(results) except: LOGGER.error("Error during reading json file %s", jsonfilepath) raise changed = False # go through all files in directory and check hash for root, dirs, files in os.walk(mediaabsdirname): del dirs[:] # we do not want to recurse for name in files: if not name.lower().endswith('.json'): try: recheck = self._get_recheck_flag(force, results, name) except: LOGGER.error( "Error during _get_recheck_flag for file %s/%s", (root, name)) raise if recheck: file_to_verify = os.path.join(root, name) LOGGER.debug("Verifying '%s'", file_to_verify) actual_hash_value = hash_file(file_to_verify) expected_hash_value = '{}{}{}'.format( reldirname[0], reldirname[1], name) status = 'OK' if expected_hash_value == actual_hash_value else 'FAILED' timestamp = datetime.now().isoformat() changed = True results[name] = {} results[name]['status'] = status results[name]['checked'] = timestamp if status != 'OK': LOGGER.error("Mismatching hash for file %s/%s", (root, name)) else: LOGGER.info("OK - %s", actual_hash_value) for name in sorted(results.keys()): if results[name]['status'] != 'OK': LOGGER.error("Mismatching hash for file %s%s", reldirname, name) if changed: self._flush_verification_status(jsonfilepath, results)
def _verify_files_in_dir(self, reldirname, mediaabsdirname, force): results = {} jsonfilepath = os.path.join(mediaabsdirname, 'verify.json') if os.path.isfile(jsonfilepath): LOGGER.debug('Reading %s', jsonfilepath) try: with open(jsonfilepath, 'r') as jsonfile: results = json.load(jsonfile) #print(results) except: LOGGER.error("Error during reading json file %s", jsonfilepath) raise changed = False # go through all files in directory and check hash for root, dirs, files in os.walk(mediaabsdirname): del dirs[:] # we do not want to recurse for name in files: if not name.lower().endswith('.json'): try: recheck = self._get_recheck_flag(force, results, name) except: LOGGER.error("Error during _get_recheck_flag for file %s/%s", (root, name)) raise if recheck: file_to_verify = os.path.join(root, name) LOGGER.debug("Verifying '%s'", file_to_verify) actual_hash_value = hash_file(file_to_verify) expected_hash_value = '{}{}{}'.format(reldirname[0], reldirname[1], name) status = 'OK' if expected_hash_value == actual_hash_value else 'FAILED' timestamp = datetime.now().isoformat() changed = True results[name] = {} results[name]['status'] = status results[name]['checked'] = timestamp if status != 'OK': LOGGER.error("Mismatching hash for file %s/%s", (root, name)) else: LOGGER.info("OK - %s", actual_hash_value) for name in sorted(results.keys()): if results[name]['status'] != 'OK': LOGGER.error("Mismatching hash for file %s%s", reldirname, name) if changed: self._flush_verification_status(jsonfilepath, results)
def import_file(self, src, reflink): """Import a single media file. Args: src (str): The path to the file to import reflink (bool): Use a reflink if the backend FS supports it Returns: Nothing. """ srcfullpath = os.path.abspath(src) srcfullpath = os.path.realpath(srcfullpath) # compute the hash and relative path to the file hash_value = hash_file(srcfullpath) reldirname = shard(hash_value, 2, 2) (dstfullpath, is_duplicate) = self.media_db.import_file(srcfullpath, reldirname, reflink) self.metadata_db.import_file(srcfullpath, dstfullpath, reldirname)