def process_checksum(self, _file, allow_duplicate): db = Db() checksum = db.checksum(_file) if(checksum is None): log.info('Could not get checksum for %s.' % _file) return None # If duplicates are not allowed then we check if we've seen this file # before via checksum. We also check that the file exists at the # location we believe it to be. # If we find a checksum match but the file doesn't exist where we # believe it to be then we write a debug log and proceed to import. checksum_file = db.get_hash(checksum) if(allow_duplicate is False and checksum_file is not None): if(os.path.isfile(checksum_file)): log.info('%s already at %s.' % ( _file, checksum_file )) return None else: log.info('%s matched checksum but file not found at %s.' % ( # noqa _file, checksum_file )) return checksum
def _generate_db(source): """Regenerate the hash.json database which contains all of the sha1 signatures of media files. """ result = Result() source = os.path.abspath(os.path.expanduser(source)) extensions = set() all_files = set() valid_files = set() if not os.path.isdir(source): log.error('Source is not a valid directory %s' % source) sys.exit(1) subclasses = get_all_subclasses(Base) for cls in subclasses: extensions.update(cls.extensions) all_files.update(FILESYSTEM.get_all_files(source, None)) db = Db() db.backup_hash_db() db.reset_hash_db() for current_file in all_files: if os.path.splitext(current_file)[1][1:].lower() not in extensions: log.info('Skipping invalid file %s' % current_file) result.append((current_file, False)) continue result.append((current_file, True)) db.add_hash(db.checksum(current_file), current_file) db.update_hash_db() result.write()
def test_checksum(): db = Db() src = helper.get_file('plain.jpg') checksum = db.checksum(src) assert checksum == 'd5eb755569ddbc8a664712d2d7d6e0fa1ddfcdb378475e4a6758dc38d5ea9a16', 'Checksum for plain.jpg did not match'
def process_file(self, _file, destination, media, **kwargs): move = False if('move' in kwargs): move = kwargs['move'] allow_duplicate = False if('allowDuplicate' in kwargs): allow_duplicate = kwargs['allowDuplicate'] if(not media.is_valid()): print '%s is not a valid media file. Skipping...' % _file return metadata = media.get_metadata() directory_name = self.get_folder_path(metadata) dest_directory = os.path.join(destination, directory_name) file_name = self.get_file_name(media) dest_path = os.path.join(dest_directory, file_name) db = Db() checksum = db.checksum(_file) if(checksum is None): if(constants.debug is True): print 'Could not get checksum for %s. Skipping...' % _file return # If duplicates are not allowed and this hash exists in the db then we # return if(allow_duplicate is False and db.check_hash(checksum) is True): if(constants.debug is True): print '%s already exists at %s. Skipping...' % ( _file, db.get_hash(checksum) ) return self.create_directory(dest_directory) if(move is True): stat = os.stat(_file) shutil.move(_file, dest_path) os.utime(dest_path, (stat.st_atime, stat.st_mtime)) else: shutil.copy2(_file, dest_path) db.add_hash(checksum, dest_path) db.update_hash_db() return dest_path
def process_file(self, _file, destination, media, **kwargs): move = False if ('move' in kwargs): move = kwargs['move'] allow_duplicate = False if ('allowDuplicate' in kwargs): allow_duplicate = kwargs['allowDuplicate'] if (not media.is_valid()): print '%s is not a valid media file. Skipping...' % _file return metadata = media.get_metadata() directory_name = self.get_folder_path(metadata) dest_directory = os.path.join(destination, directory_name) file_name = self.get_file_name(media) dest_path = os.path.join(dest_directory, file_name) db = Db() checksum = db.checksum(_file) if (checksum is None): if (constants.debug is True): print 'Could not get checksum for %s. Skipping...' % _file return # If duplicates are not allowed and this hash exists in the db then we # return if (allow_duplicate is False and db.check_hash(checksum) is True): if (constants.debug is True): print '%s already exists at %s. Skipping...' % ( _file, db.get_hash(checksum)) return self.create_directory(dest_directory) if (move is True): stat = os.stat(_file) shutil.move(_file, dest_path) os.utime(dest_path, (stat.st_atime, stat.st_mtime)) else: shutil.copy2(_file, dest_path) db.add_hash(checksum, dest_path) db.update_hash_db() return dest_path
def _verify(): result = Result() db = Db() for checksum, file_path in db.all(): if not os.path.isfile(file_path): result.append((file_path, False)) continue actual_checksum = db.checksum(file_path) if checksum == actual_checksum: result.append((file_path, True)) else: result.append((file_path, False)) result.write()
def process_file(self, _file, destination, media, **kwargs): move = False if('move' in kwargs): move = kwargs['move'] allow_duplicate = False if('allowDuplicate' in kwargs): allow_duplicate = kwargs['allowDuplicate'] if(not media.is_valid()): print('%s is not a valid media file. Skipping...' % _file) return media.set_original_name() metadata = media.get_metadata() directory_name = self.destination_folder.get_folder_path(metadata) dest_directory = os.path.join(destination, directory_name) file_name = self.get_file_name(media) dest_path = os.path.join(dest_directory, file_name) db = Db() checksum = db.checksum(_file) if(checksum is None): log.info('Could not get checksum for %s.' % _file) return None # If duplicates are not allowed then we check if we've seen this file # before via checksum. We also check that the file exists at the # location we believe it to be. # If we find a checksum match but the file doesn't exist where we # believe it to be then we write a debug log and proceed to import. checksum_file = db.get_hash(checksum) if(allow_duplicate is False and checksum_file is not None): if(os.path.isfile(checksum_file)): log.info('%s already at %s.' % ( _file, checksum_file )) return None else: log.info('%s matched checksum but file not found at %s.' % ( # noqa _file, checksum_file )) return checksum
def _verify(): result = Result() db = Db() for checksum, file_path in db.all(): if not os.path.isfile(file_path): result.append((file_path, False)) log.progress('x') continue actual_checksum = db.checksum(file_path) if checksum == actual_checksum: result.append((file_path, True)) log.progress() else: result.append((file_path, False)) log.progress('x') log.progress('', True) result.write()
def _generate_db(source): """Regenerate the hash.json database which contains all of the sha1 signatures of media files. """ result = Result() source = os.path.abspath(os.path.expanduser(source)) if not os.path.isdir(source): log.error('Source is not a valid directory %s' % source) sys.exit(1) db = Db() db.backup_hash_db() db.reset_hash_db() for current_file in FILESYSTEM.get_all_files(source): result.append((current_file, True)) db.add_hash(db.checksum(current_file), current_file) log.progress() db.update_hash_db() log.progress('', True) result.write()
def process_file(self, _file, destination, media, **kwargs): move = False if ('move' in kwargs): move = kwargs['move'] allow_duplicate = False if ('allowDuplicate' in kwargs): allow_duplicate = kwargs['allowDuplicate'] if (not media.is_valid()): print('%s is not a valid media file. Skipping...' % _file) return metadata = media.get_metadata() directory_name = self.get_folder_path(metadata) dest_directory = os.path.join(destination, directory_name) file_name = self.get_file_name(media) dest_path = os.path.join(dest_directory, file_name) db = Db() checksum = db.checksum(_file) if (checksum is None): log.info('Could not get checksum for %s. Skipping...' % _file) return # If duplicates are not allowed then we check if we've seen this file # before via checksum. We also check that the file exists at the # location we believe it to be. # If we find a checksum match but the file doesn't exist where we # believe it to be then we write a debug log and proceed to import. checksum_file = db.get_hash(checksum) if (allow_duplicate is False and checksum_file is not None): if (os.path.isfile(checksum_file)): log.info('%s already exists at %s. Skipping...' % (_file, checksum_file)) return else: log.info( '%s matched checksum but file not found at %s. Importing again...' % ( # noqa _file, checksum_file)) self.create_directory(dest_directory) if (move is True): stat = os.stat(_file) shutil.move(_file, dest_path) os.utime(dest_path, (stat.st_atime, stat.st_mtime)) else: # Do not use copy2(), will have an issue when copying to a # network/mounted drive using copy and manual # set_date_from_filename gets the job done shutil.copy(_file, dest_path) self.set_utime(media) db.add_hash(checksum, dest_path) db.update_hash_db() return dest_path
def process_file(self, _file, destination, media, **kwargs): move = False if ('move' in kwargs): move = kwargs['move'] allow_duplicate = False if ('allowDuplicate' in kwargs): allow_duplicate = kwargs['allowDuplicate'] if (not media.is_valid()): print('%s is not a valid media file. Skipping...' % _file) return media.set_original_name() metadata = media.get_metadata() directory_name = self.get_folder_path(metadata) dest_directory = os.path.join(destination, directory_name) file_name = self.get_file_name(media) dest_path = os.path.join(dest_directory, file_name) db = Db() checksum = db.checksum(_file) if (checksum is None): log.info('Could not get checksum for %s. Skipping...' % _file) return # If duplicates are not allowed then we check if we've seen this file # before via checksum. We also check that the file exists at the # location we believe it to be. # If we find a checksum match but the file doesn't exist where we # believe it to be then we write a debug log and proceed to import. checksum_file = db.get_hash(checksum) if (allow_duplicate is False and checksum_file is not None): if (os.path.isfile(checksum_file)): log.info('%s already exists at %s. Skipping...' % (_file, checksum_file)) return else: log.info( '%s matched checksum but file not found at %s. Importing again...' % ( # noqa _file, checksum_file)) # If source and destination are identical then # we should not write the file. gh-210 if (_file == dest_path): print('Final source and destination path should not be identical') return self.create_directory(dest_directory) if (move is True): stat = os.stat(_file) shutil.move(_file, dest_path) os.utime(dest_path, (stat.st_atime, stat.st_mtime)) else: compatability._copyfile(_file, dest_path) self.set_utime(media) db.add_hash(checksum, dest_path) db.update_hash_db() return dest_path
def process_file(self, _file, destination, media, **kwargs): move = False if('move' in kwargs): move = kwargs['move'] allow_duplicate = False if('allowDuplicate' in kwargs): allow_duplicate = kwargs['allowDuplicate'] if(not media.is_valid()): print('%s is not a valid media file. Skipping...' % _file) return metadata = media.get_metadata() directory_name = self.get_folder_path(metadata) dest_directory = os.path.join(destination, directory_name) file_name = self.get_file_name(media) dest_path = os.path.join(dest_directory, file_name) db = Db() checksum = db.checksum(_file) if(checksum is None): log.info('Could not get checksum for %s. Skipping...' % _file) return # If duplicates are not allowed then we check if we've seen this file # before via checksum. We also check that the file exists at the # location we believe it to be. # If we find a checksum match but the file doesn't exist where we # believe it to be then we write a debug log and proceed to import. checksum_file = db.get_hash(checksum) if(allow_duplicate is False and checksum_file is not None): if(os.path.isfile(checksum_file)): log.info('%s already exists at %s. Skipping...' % ( _file, checksum_file )) return else: log.info('%s matched checksum but file not found at %s. Importing again...' % ( # noqa _file, checksum_file )) self.create_directory(dest_directory) if(move is True): stat = os.stat(_file) shutil.move(_file, dest_path) os.utime(dest_path, (stat.st_atime, stat.st_mtime)) else: # Do not use copy2(), will have an issue when copying to a # network/mounted drive using copy and manual # set_date_from_filename gets the job done shutil.copy(_file, dest_path) self.set_utime(media) db.add_hash(checksum, dest_path) db.update_hash_db() return dest_path