def _generate_db(source): """Regenerate the hash.json database which contains all of the sha1 signatures of media files. """ result = Result() source = os.path.abspath(os.path.expanduser(source)) extensions = set() all_files = set() valid_files = set() if not os.path.isdir(source): log.error('Source is not a valid directory %s' % source) sys.exit(1) subclasses = get_all_subclasses(Base) for cls in subclasses: extensions.update(cls.extensions) all_files.update(FILESYSTEM.get_all_files(source, None)) db = Db() db.backup_hash_db() db.reset_hash_db() for current_file in all_files: if os.path.splitext(current_file)[1][1:].lower() not in extensions: log.info('Skipping invalid file %s' % current_file) result.append((current_file, False)) continue result.append((current_file, True)) db.add_hash(db.checksum(current_file), current_file) db.update_hash_db() result.write()
def test_get_hash_exists(): db = Db() random_key = helper.random_string(10) random_value = helper.random_string(12) # Test with explicit False value as 3rd param db.add_hash(random_key, random_value, False) assert db.get_hash(random_key) == random_value, 'Lookup for hash that exists did not return value'
def test_reset_hash_db(): db = Db() random_key = helper.random_string(10) random_value = helper.random_string(12) # Test with explicit False value as 3rd param db.add_hash(random_key, random_value, False) assert random_key in db.hash_db, random_key db.reset_hash_db() assert random_key not in db.hash_db, random_key
def process_file(self, _file, destination, media, **kwargs): move = False if('move' in kwargs): move = kwargs['move'] allow_duplicate = False if('allowDuplicate' in kwargs): allow_duplicate = kwargs['allowDuplicate'] if(not media.is_valid()): print '%s is not a valid media file. Skipping...' % _file return metadata = media.get_metadata() directory_name = self.get_folder_path(metadata) dest_directory = os.path.join(destination, directory_name) file_name = self.get_file_name(media) dest_path = os.path.join(dest_directory, file_name) db = Db() checksum = db.checksum(_file) if(checksum is None): if(constants.debug is True): print 'Could not get checksum for %s. Skipping...' % _file return # If duplicates are not allowed and this hash exists in the db then we # return if(allow_duplicate is False and db.check_hash(checksum) is True): if(constants.debug is True): print '%s already exists at %s. Skipping...' % ( _file, db.get_hash(checksum) ) return self.create_directory(dest_directory) if(move is True): stat = os.stat(_file) shutil.move(_file, dest_path) os.utime(dest_path, (stat.st_atime, stat.st_mtime)) else: shutil.copy2(_file, dest_path) db.add_hash(checksum, dest_path) db.update_hash_db() return dest_path
def test_add_hash_explicit_write(): db = Db() random_key = helper.random_string(10) random_value = helper.random_string(12) # Test with explicit True value as 3rd param db.add_hash(random_key, random_value, True) assert db.check_hash(random_key) == True, 'Lookup for hash did not return True' # Instnatiate new db class to confirm random_key exists db2 = Db() assert db2.check_hash(random_key) == True
def test_add_hash_default_do_not_write(): db = Db() random_key = helper.random_string(10) random_value = helper.random_string(12) # Test with default False value as 3rd param db.add_hash(random_key, random_value) assert db.check_hash(random_key) == True, 'Lookup for hash did not return True' # Instnatiate new db class to confirm random_key does not exist db2 = Db() assert db2.check_hash(random_key) == False
def process_file(self, _file, destination, media, **kwargs): move = False if ('move' in kwargs): move = kwargs['move'] allow_duplicate = False if ('allowDuplicate' in kwargs): allow_duplicate = kwargs['allowDuplicate'] if (not media.is_valid()): print '%s is not a valid media file. Skipping...' % _file return metadata = media.get_metadata() directory_name = self.get_folder_path(metadata) dest_directory = os.path.join(destination, directory_name) file_name = self.get_file_name(media) dest_path = os.path.join(dest_directory, file_name) db = Db() checksum = db.checksum(_file) if (checksum is None): if (constants.debug is True): print 'Could not get checksum for %s. Skipping...' % _file return # If duplicates are not allowed and this hash exists in the db then we # return if (allow_duplicate is False and db.check_hash(checksum) is True): if (constants.debug is True): print '%s already exists at %s. Skipping...' % ( _file, db.get_hash(checksum)) return self.create_directory(dest_directory) if (move is True): stat = os.stat(_file) shutil.move(_file, dest_path) os.utime(dest_path, (stat.st_atime, stat.st_mtime)) else: shutil.copy2(_file, dest_path) db.add_hash(checksum, dest_path) db.update_hash_db() return dest_path
def test_get_all(): db = Db() db.reset_hash_db() random_keys = [] random_values = [] for _ in range(10): random_keys.append(helper.random_string(10)) random_values.append(helper.random_string(12)) db.add_hash(random_keys[-1:][0], random_values[-1:][0], False) counter = 0 for key, value in db.all(): assert key in random_keys, key assert value in random_values, value counter += 1 assert counter == 10, counter
def _generate_db(source): """Regenerate the hash.json database which contains all of the sha1 signatures of media files. """ result = Result() source = os.path.abspath(os.path.expanduser(source)) if not os.path.isdir(source): log.error('Source is not a valid directory %s' % source) sys.exit(1) db = Db() db.backup_hash_db() db.reset_hash_db() for current_file in FILESYSTEM.get_all_files(source): result.append((current_file, True)) db.add_hash(db.checksum(current_file), current_file) log.progress() db.update_hash_db() log.progress('', True) result.write()
def process_file(self, _file, destination, media, **kwargs): move = False if('move' in kwargs): move = kwargs['move'] allow_duplicate = False if('allowDuplicate' in kwargs): allow_duplicate = kwargs['allowDuplicate'] stat_info_original = os.stat(_file) metadata = media.get_metadata() if(not media.is_valid()): print('%s is not a valid media file. Skipping...' % _file) return checksum = self.process_checksum(_file, allow_duplicate) if(checksum is None): log.info('Original checksum returned None for %s. Skipping...' % _file) return # Run `before()` for every loaded plugin and if any of them raise an exception # then we skip importing the file and log a message. plugins_run_before_status = self.plugins.run_all_before(_file, destination) if(plugins_run_before_status == False): log.warn('At least one plugin pre-run failed for %s' % _file) return directory_name = self.get_folder_path(metadata) dest_directory = os.path.join(destination, directory_name) file_name = self.get_file_name(metadata) dest_path = os.path.join(dest_directory, file_name) media.set_original_name() # If source and destination are identical then # we should not write the file. gh-210 if(_file == dest_path): print('Final source and destination path should not be identical') return self.create_directory(dest_directory) # exiftool renames the original file by appending '_original' to the # file name. A new file is written with new tags with the initial file # name. See exiftool man page for more details. exif_original_file = _file + '_original' # Check if the source file was processed by exiftool and an _original # file was created. exif_original_file_exists = False if(os.path.exists(exif_original_file)): exif_original_file_exists = True if(move is True): stat = os.stat(_file) # Move the processed file into the destination directory shutil.move(_file, dest_path) if(exif_original_file_exists is True): # We can remove it as we don't need the initial file. os.remove(exif_original_file) os.utime(dest_path, (stat.st_atime, stat.st_mtime)) else: if(exif_original_file_exists is True): # Move the newly processed file with any updated tags to the # destination directory shutil.move(_file, dest_path) # Move the exif _original back to the initial source file shutil.move(exif_original_file, _file) else: compatability._copyfile(_file, dest_path) # Set the utime based on what the original file contained # before we made any changes. # Then set the utime on the destination file based on metadata. os.utime(_file, (stat_info_original.st_atime, stat_info_original.st_mtime)) self.set_utime_from_metadata(metadata, dest_path) db = Db() db.add_hash(checksum, dest_path) db.update_hash_db() # Run `after()` for every loaded plugin and if any of them raise an exception # then we skip importing the file and log a message. plugins_run_after_status = self.plugins.run_all_after(_file, destination, dest_path, metadata) if(plugins_run_after_status == False): log.warn('At least one plugin pre-run failed for %s' % _file) return return dest_path
def process_file(self, _file, destination, media, **kwargs): move = False if ('move' in kwargs): move = kwargs['move'] allow_duplicate = False if ('allowDuplicate' in kwargs): allow_duplicate = kwargs['allowDuplicate'] if (not media.is_valid()): print('%s is not a valid media file. Skipping...' % _file) return metadata = media.get_metadata() directory_name = self.get_folder_path(metadata) dest_directory = os.path.join(destination, directory_name) file_name = self.get_file_name(media) dest_path = os.path.join(dest_directory, file_name) db = Db() checksum = db.checksum(_file) if (checksum is None): log.info('Could not get checksum for %s. Skipping...' % _file) return # If duplicates are not allowed then we check if we've seen this file # before via checksum. We also check that the file exists at the # location we believe it to be. # If we find a checksum match but the file doesn't exist where we # believe it to be then we write a debug log and proceed to import. checksum_file = db.get_hash(checksum) if (allow_duplicate is False and checksum_file is not None): if (os.path.isfile(checksum_file)): log.info('%s already exists at %s. Skipping...' % (_file, checksum_file)) return else: log.info( '%s matched checksum but file not found at %s. Importing again...' % ( # noqa _file, checksum_file)) self.create_directory(dest_directory) if (move is True): stat = os.stat(_file) shutil.move(_file, dest_path) os.utime(dest_path, (stat.st_atime, stat.st_mtime)) else: # Do not use copy2(), will have an issue when copying to a # network/mounted drive using copy and manual # set_date_from_filename gets the job done shutil.copy(_file, dest_path) self.set_utime(media) db.add_hash(checksum, dest_path) db.update_hash_db() return dest_path
def process_file(self, _file, destination, media, **kwargs): move = False if ('move' in kwargs): move = kwargs['move'] allow_duplicate = False if ('allowDuplicate' in kwargs): allow_duplicate = kwargs['allowDuplicate'] if (not media.is_valid()): print('%s is not a valid media file. Skipping...' % _file) return media.set_original_name() metadata = media.get_metadata() directory_name = self.get_folder_path(metadata) dest_directory = os.path.join(destination, directory_name) file_name = self.get_file_name(media) dest_path = os.path.join(dest_directory, file_name) db = Db() checksum = db.checksum(_file) if (checksum is None): log.info('Could not get checksum for %s. Skipping...' % _file) return # If duplicates are not allowed then we check if we've seen this file # before via checksum. We also check that the file exists at the # location we believe it to be. # If we find a checksum match but the file doesn't exist where we # believe it to be then we write a debug log and proceed to import. checksum_file = db.get_hash(checksum) if (allow_duplicate is False and checksum_file is not None): if (os.path.isfile(checksum_file)): log.info('%s already exists at %s. Skipping...' % (_file, checksum_file)) return else: log.info( '%s matched checksum but file not found at %s. Importing again...' % ( # noqa _file, checksum_file)) # If source and destination are identical then # we should not write the file. gh-210 if (_file == dest_path): print('Final source and destination path should not be identical') return self.create_directory(dest_directory) if (move is True): stat = os.stat(_file) shutil.move(_file, dest_path) os.utime(dest_path, (stat.st_atime, stat.st_mtime)) else: compatability._copyfile(_file, dest_path) self.set_utime(media) db.add_hash(checksum, dest_path) db.update_hash_db() return dest_path
def process_file(self, _file, destination, media, **kwargs): move = False if('move' in kwargs): move = kwargs['move'] allow_duplicate = False if('allowDuplicate' in kwargs): allow_duplicate = kwargs['allowDuplicate'] if(not media.is_valid()): print('%s is not a valid media file. Skipping...' % _file) return metadata = media.get_metadata() directory_name = self.get_folder_path(metadata) dest_directory = os.path.join(destination, directory_name) file_name = self.get_file_name(media) dest_path = os.path.join(dest_directory, file_name) db = Db() checksum = db.checksum(_file) if(checksum is None): log.info('Could not get checksum for %s. Skipping...' % _file) return # If duplicates are not allowed then we check if we've seen this file # before via checksum. We also check that the file exists at the # location we believe it to be. # If we find a checksum match but the file doesn't exist where we # believe it to be then we write a debug log and proceed to import. checksum_file = db.get_hash(checksum) if(allow_duplicate is False and checksum_file is not None): if(os.path.isfile(checksum_file)): log.info('%s already exists at %s. Skipping...' % ( _file, checksum_file )) return else: log.info('%s matched checksum but file not found at %s. Importing again...' % ( # noqa _file, checksum_file )) self.create_directory(dest_directory) if(move is True): stat = os.stat(_file) shutil.move(_file, dest_path) os.utime(dest_path, (stat.st_atime, stat.st_mtime)) else: # Do not use copy2(), will have an issue when copying to a # network/mounted drive using copy and manual # set_date_from_filename gets the job done shutil.copy(_file, dest_path) self.set_utime(media) db.add_hash(checksum, dest_path) db.update_hash_db() return dest_path