def process_checksum(self, _file, allow_duplicate): db = Db() checksum = db.checksum(_file) if(checksum is None): log.info('Could not get checksum for %s.' % _file) return None # If duplicates are not allowed then we check if we've seen this file # before via checksum. We also check that the file exists at the # location we believe it to be. # If we find a checksum match but the file doesn't exist where we # believe it to be then we write a debug log and proceed to import. checksum_file = db.get_hash(checksum) if(allow_duplicate is False and checksum_file is not None): if(os.path.isfile(checksum_file)): log.info('%s already at %s.' % ( _file, checksum_file )) return None else: log.info('%s matched checksum but file not found at %s.' % ( # noqa _file, checksum_file )) return checksum
def test_backup_hash_db(): db = Db() backup_file_name = db.backup_hash_db() file_exists = os.path.isfile(backup_file_name) os.remove(backup_file_name) assert file_exists, backup_file_name
def test_checksum(): db = Db() src = helper.get_file('plain.jpg') checksum = db.checksum(src) assert checksum == 'd5eb755569ddbc8a664712d2d7d6e0fa1ddfcdb378475e4a6758dc38d5ea9a16', 'Checksum for plain.jpg did not match'
def _generate_db(source): """Regenerate the hash.json database which contains all of the sha1 signatures of media files. """ result = Result() source = os.path.abspath(os.path.expanduser(source)) extensions = set() all_files = set() valid_files = set() if not os.path.isdir(source): log.error('Source is not a valid directory %s' % source) sys.exit(1) subclasses = get_all_subclasses(Base) for cls in subclasses: extensions.update(cls.extensions) all_files.update(FILESYSTEM.get_all_files(source, None)) db = Db() db.backup_hash_db() db.reset_hash_db() for current_file in all_files: if os.path.splitext(current_file)[1][1:].lower() not in extensions: log.info('Skipping invalid file %s' % current_file) result.append((current_file, False)) continue result.append((current_file, True)) db.add_hash(db.checksum(current_file), current_file) db.update_hash_db() result.write()
def test_check_hash_does_not_exist(): db = Db() random_key = helper.random_string(10) assert db.check_hash( random_key ) == False, 'Lookup for hash that should not exist returned True'
def test_get_hash_does_not_exist(): db = Db() random_key = helper.random_string(10) assert db.get_hash( random_key ) is None, 'Lookup for hash that should not exist did not return None'
def test_add_location(): db = Db() latitude, longitude, name = helper.get_test_location() db.add_location(latitude, longitude, name) retrieved_name = db.get_location_name(latitude, longitude, 5) assert name == retrieved_name
def test_get_hash_exists(): db = Db() random_key = helper.random_string(10) random_value = helper.random_string(12) # Test with explicit False value as 3rd param db.add_hash(random_key, random_value, False) assert db.get_hash(random_key) == random_value, 'Lookup for hash that exists did not return value'
def test_get_all_empty(): db = Db() db.reset_hash_db() counter = 0 for key, value in db.all(): counter += 1 # there's a final iteration because of the generator assert counter == 0, counter
def test_get_location_coordinates_does_not_exists(): db = Db() latitude, longitude, name = helper.get_test_location() name = '%s-%s' % (name, helper.random_string(10)) latitude = helper.random_coordinate(latitude, 1) longitude = helper.random_coordinate(longitude, 1) location = db.get_location_coordinates(name) assert location is None
def test_get_location_name_outside_threshold(): db = Db() latitude, longitude, name = helper.get_test_location() db.add_location(latitude, longitude, name) new_latitude = helper.random_coordinate(latitude, 1) new_longitude = helper.random_coordinate(longitude, 1) # 800 meters retrieved_name = db.get_location_name(new_latitude, new_longitude, 800) assert retrieved_name is None
def _verify(): result = Result() db = Db() for checksum, file_path in db.all(): if not os.path.isfile(file_path): result.append((file_path, False)) continue actual_checksum = db.checksum(file_path) if checksum == actual_checksum: result.append((file_path, True)) else: result.append((file_path, False)) result.write()
def test_get_location_name_within_threshold(): db = Db() latitude, longitude, name = helper.get_test_location() db.add_location(latitude, longitude, name) print(latitude) new_latitude = helper.random_coordinate(latitude, 4) new_longitude = helper.random_coordinate(longitude, 4) print(new_latitude) # 10 miles retrieved_name = db.get_location_name(new_latitude, new_longitude, 1600*10) assert name == retrieved_name, 'Name (%r) did not match retrieved name (%r)' % (name, retrieved_name)
def test_get_location_coordinates_exists(): db = Db() latitude, longitude, name = helper.get_test_location() name = '%s-%s' % (name, helper.random_string(10)) latitude = helper.random_coordinate(latitude, 1) longitude = helper.random_coordinate(longitude, 1) db.add_location(latitude, longitude, name) location = db.get_location_coordinates(name) assert location is not None assert location[0] == latitude assert location[1] == longitude
def process_file(self, _file, destination, media, **kwargs): move = False if('move' in kwargs): move = kwargs['move'] allow_duplicate = False if('allowDuplicate' in kwargs): allow_duplicate = kwargs['allowDuplicate'] if(not media.is_valid()): print('%s is not a valid media file. Skipping...' % _file) return media.set_original_name() metadata = media.get_metadata() directory_name = self.destination_folder.get_folder_path(metadata) dest_directory = os.path.join(destination, directory_name) file_name = self.get_file_name(media) dest_path = os.path.join(dest_directory, file_name) db = Db() checksum = db.checksum(_file) if(checksum is None): log.info('Could not get checksum for %s.' % _file) return None # If duplicates are not allowed then we check if we've seen this file # before via checksum. We also check that the file exists at the # location we believe it to be. # If we find a checksum match but the file doesn't exist where we # believe it to be then we write a debug log and proceed to import. checksum_file = db.get_hash(checksum) if(allow_duplicate is False and checksum_file is not None): if(os.path.isfile(checksum_file)): log.info('%s already at %s.' % ( _file, checksum_file )) return None else: log.info('%s matched checksum but file not found at %s.' % ( # noqa _file, checksum_file )) return checksum
def place_name(lat, lon): # Try to get cached location first db = Db() # 3km distace radious for a match cached_place_name = db.get_location_name(lat, lon, 3000) if (cached_place_name is not None): return cached_place_name lookup_place_name = None geolocation_info = reverse_lookup(lat, lon) if (geolocation_info is not None): if ('address' in geolocation_info): address = geolocation_info['address'] if ('city' in address): lookup_place_name = address['city'] elif ('state' in address): lookup_place_name = address['state'] elif ('country' in address): lookup_place_name = address['country'] if (lookup_place_name is not None): db.add_location(lat, lon, lookup_place_name) # TODO: Maybe this should only be done on exit and not for every write. db.update_location_db() return lookup_place_name
def process_file(self, _file, destination, media, **kwargs): move = False if ('move' in kwargs): move = kwargs['move'] allow_duplicate = False if ('allowDuplicate' in kwargs): allow_duplicate = kwargs['allowDuplicate'] if (not media.is_valid()): print '%s is not a valid media file. Skipping...' % _file return metadata = media.get_metadata() directory_name = self.get_folder_path(metadata) dest_directory = os.path.join(destination, directory_name) file_name = self.get_file_name(media) dest_path = os.path.join(dest_directory, file_name) db = Db() checksum = db.checksum(_file) if (checksum is None): if (constants.debug is True): print 'Could not get checksum for %s. Skipping...' % _file return # If duplicates are not allowed and this hash exists in the db then we # return if (allow_duplicate is False and db.check_hash(checksum) is True): if (constants.debug is True): print '%s already exists at %s. Skipping...' % ( _file, db.get_hash(checksum)) return self.create_directory(dest_directory) if (move is True): stat = os.stat(_file) shutil.move(_file, dest_path) os.utime(dest_path, (stat.st_atime, stat.st_mtime)) else: shutil.copy2(_file, dest_path) db.add_hash(checksum, dest_path) db.update_hash_db() return dest_path
def coordinates_by_name(name): # Try to get cached location first db = Db() cached_coordinates = db.get_location_coordinates(name) if(cached_coordinates is not None): return { 'latitude': cached_coordinates[0], 'longitude': cached_coordinates[1] } # If the name is not cached then we go ahead with an API lookup geolocation_info = lookup(name) if(geolocation_info is not None): if( 'results' in geolocation_info and len(geolocation_info['results']) != 0 and 'locations' in geolocation_info['results'][0] and len(geolocation_info['results'][0]['locations']) != 0 ): # By default we use the first entry unless we find one with # geocodeQuality=city. geolocation_result = geolocation_info['results'][0] use_location = geolocation_result['locations'][0]['latLng'] # Loop over the locations to see if we come accross a # geocodeQuality=city. # If we find a city we set that to the use_location and break for location in geolocation_result['locations']: if( 'latLng' in location and 'lat' in location['latLng'] and 'lng' in location['latLng'] and location['geocodeQuality'].lower() == 'city' ): use_location = location['latLng'] break return { 'latitude': use_location['lat'], 'longitude': use_location['lng'] } return None
def coordinates_by_name(name): # Try to get cached location first db = Db() cached_coordinates = db.get_location_coordinates(name) if(cached_coordinates is not None): return { 'latitude': cached_coordinates[0], 'longitude': cached_coordinates[1] } # If the name is not cached then we go ahead with an API lookup geolocation_info = lookup(location=name) if(geolocation_info is not None): if( 'results' in geolocation_info and len(geolocation_info['results']) != 0 and 'locations' in geolocation_info['results'][0] and len(geolocation_info['results'][0]['locations']) != 0 ): # By default we use the first entry unless we find one with # geocodeQuality=city. geolocation_result = geolocation_info['results'][0] use_location = geolocation_result['locations'][0]['latLng'] # Loop over the locations to see if we come accross a # geocodeQuality=city. # If we find a city we set that to the use_location and break for location in geolocation_result['locations']: if( 'latLng' in location and 'lat' in location['latLng'] and 'lng' in location['latLng'] and location['geocodeQuality'].lower() == 'city' ): use_location = location['latLng'] break return { 'latitude': use_location['lat'], 'longitude': use_location['lng'] } return None
def _verify(): result = Result() db = Db() for checksum, file_path in db.all(): if not os.path.isfile(file_path): result.append((file_path, False)) log.progress('x') continue actual_checksum = db.checksum(file_path) if checksum == actual_checksum: result.append((file_path, True)) log.progress() else: result.append((file_path, False)) log.progress('x') log.progress('', True) result.write()
def test_update_hash_db(): db = Db() random_key = helper.random_string(10) random_value = helper.random_string(12) # Test with default False value as 3rd param db.add_hash(random_key, random_value) assert db.check_hash(random_key) == True, 'Lookup for hash did not return True' # Instnatiate new db class to confirm random_key does not exist db2 = Db() assert db2.check_hash(random_key) == False db.update_hash_db() # Instnatiate new db class to confirm random_key exists db3 = Db() assert db3.check_hash(random_key) == True
def process_file(self, _file, destination, media, **kwargs): move = False if('move' in kwargs): move = kwargs['move'] allow_duplicate = False if('allowDuplicate' in kwargs): allow_duplicate = kwargs['allowDuplicate'] if(not media.is_valid()): print '%s is not a valid media file. Skipping...' % _file return metadata = media.get_metadata() directory_name = self.get_folder_path(metadata) dest_directory = os.path.join(destination, directory_name) file_name = self.get_file_name(media) dest_path = os.path.join(dest_directory, file_name) db = Db() checksum = db.checksum(_file) if(checksum is None): if(constants.debug is True): print 'Could not get checksum for %s. Skipping...' % _file return # If duplicates are not allowed and this hash exists in the db then we # return if(allow_duplicate is False and db.check_hash(checksum) is True): if(constants.debug is True): print '%s already exists at %s. Skipping...' % ( _file, db.get_hash(checksum) ) return self.create_directory(dest_directory) if(move is True): stat = os.stat(_file) shutil.move(_file, dest_path) os.utime(dest_path, (stat.st_atime, stat.st_mtime)) else: shutil.copy2(_file, dest_path) db.add_hash(checksum, dest_path) db.update_hash_db() return dest_path
def place_name(lat, lon): # Try to get cached location first db = Db() # 3km distace radious for a match cached_place_name = db.get_location_name(lat, lon, 3000) if cached_place_name is not None: return cached_place_name lookup_place_name = None geolocation_info = reverse_lookup(lat, lon) if geolocation_info is not None: if "address" in geolocation_info: address = geolocation_info["address"] if "city" in address: lookup_place_name = address["city"] elif "state" in address: lookup_place_name = address["state"] elif "country" in address: lookup_place_name = address["country"] if lookup_place_name is not None: db.add_location(lat, lon, lookup_place_name) # TODO: Maybe this should only be done on exit and not for every write. db.update_location_db() return lookup_place_name
def test_add_hash_default_do_not_write(): db = Db() random_key = helper.random_string(10) random_value = helper.random_string(12) # Test with default False value as 3rd param db.add_hash(random_key, random_value) assert db.check_hash(random_key) == True, 'Lookup for hash did not return True' # Instnatiate new db class to confirm random_key does not exist db2 = Db() assert db2.check_hash(random_key) == False
def test_add_hash_explicit_write(): db = Db() random_key = helper.random_string(10) random_value = helper.random_string(12) # Test with explicit True value as 3rd param db.add_hash(random_key, random_value, True) assert db.check_hash(random_key) == True, 'Lookup for hash did not return True' # Instnatiate new db class to confirm random_key exists db2 = Db() assert db2.check_hash(random_key) == True
def place_name(lat, lon): # Try to get cached location first db = Db() # 3km distace radious for a match cached_place_name = db.get_location_name(lat, lon, 3000) if(cached_place_name is not None): return cached_place_name lookup_place_name = None geolocation_info = reverse_lookup(lat, lon) if(geolocation_info is not None): if('address' in geolocation_info): address = geolocation_info['address'] if('city' in address): lookup_place_name = address['city'] elif('state' in address): lookup_place_name = address['state'] elif('country' in address): lookup_place_name = address['country'] if(lookup_place_name is not None): db.add_location(lat, lon, lookup_place_name) # TODO: Maybe this should only be done on exit and not for every write. db.update_location_db() return lookup_place_name
def coordinates_by_name(name): # Try to get cached location first db = Db() cached_coordinates = db.get_location_coordinates(name) if cached_coordinates is not None: return {"latitude": cached_coordinates[0], "longitude": cached_coordinates[1]} # If the name is not cached then we go ahead with an API lookup geolocation_info = lookup(name) if geolocation_info is not None: if ( "results" in geolocation_info and len(geolocation_info["results"]) != 0 and "locations" in geolocation_info["results"][0] and len(geolocation_info["results"][0]["locations"]) != 0 ): # By default we use the first entry unless we find one with # geocodeQuality=city. geolocation_result = geolocation_info["results"][0] use_location = geolocation_result["locations"][0]["latLng"] # Loop over the locations to see if we come accross a # geocodeQuality=city. # If we find a city we set that to the use_location and break for location in geolocation_result["locations"]: if ( "latLng" in location and "lat" in location["latLng"] and "lng" in location["latLng"] and location["geocodeQuality"].lower() == "city" ): use_location = location["latLng"] break return {"latitude": use_location["lat"], "longitude": use_location["lng"]} return None
def test_regenerate_valid_source(): temporary_folder, folder = helper.create_working_folder() origin = '%s/valid.txt' % folder shutil.copyfile(helper.get_file('valid.txt'), origin) reset_hash_db() runner = CliRunner() result = runner.invoke(elodie._generate_db, ['--source', folder]) db = Db() restore_hash_db() shutil.rmtree(folder) assert result.exit_code == 0, result.exit_code assert 'bde2dc0b839a5d20b0b4c1f57605f84e0e2a4562aaebc1c362de6cb7cc02eeb3' in db.hash_db, db.hash_db
def test_regenerate_valid_source(): temporary_folder, folder = helper.create_working_folder() origin = '%s/valid.txt' % folder shutil.copyfile(helper.get_file('valid.txt'), origin) helper.reset_dbs() runner = CliRunner() result = runner.invoke(elodie._generate_db, ['--source', folder]) db = Db() helper.restore_dbs() shutil.rmtree(folder) assert result.exit_code == 0, result.exit_code assert '3c19a5d751cf19e093b7447297731124d9cc987d3f91a9d1872c3b1c1b15639a' in db.hash_db, db.hash_db
def test_reset_hash_db(): db = Db() random_key = helper.random_string(10) random_value = helper.random_string(12) # Test with explicit False value as 3rd param db.add_hash(random_key, random_value, False) assert random_key in db.hash_db, random_key db.reset_hash_db() assert random_key not in db.hash_db, random_key
def test_regenerate_valid_source_with_invalid_files(): temporary_folder, folder = helper.create_working_folder() origin_valid = '%s/valid.txt' % folder shutil.copyfile(helper.get_file('valid.txt'), origin_valid) origin_invalid = '%s/invalid.invalid' % folder shutil.copyfile(helper.get_file('invalid.invalid'), origin_invalid) reset_hash_db() runner = CliRunner() result = runner.invoke(elodie._generate_db, ['--source', folder]) db = Db() restore_hash_db() shutil.rmtree(folder) assert result.exit_code == 0, result.exit_code assert 'bde2dc0b839a5d20b0b4c1f57605f84e0e2a4562aaebc1c362de6cb7cc02eeb3' in db.hash_db, db.hash_db assert 'e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855' not in db.hash_db, db.hash_db
def test_regenerate_valid_source_with_invalid_files(): temporary_folder, folder = helper.create_working_folder() origin_valid = '%s/valid.txt' % folder shutil.copyfile(helper.get_file('valid.txt'), origin_valid) origin_invalid = '%s/invalid.invalid' % folder shutil.copyfile(helper.get_file('invalid.invalid'), origin_invalid) helper.reset_dbs() runner = CliRunner() result = runner.invoke(elodie._generate_db, ['--source', folder]) db = Db() helper.restore_dbs() shutil.rmtree(folder) assert result.exit_code == 0, result.exit_code assert '3c19a5d751cf19e093b7447297731124d9cc987d3f91a9d1872c3b1c1b15639a' in db.hash_db, db.hash_db assert 'e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855' not in db.hash_db, db.hash_db
def test_get_all(): db = Db() db.reset_hash_db() random_keys = [] random_values = [] for _ in range(10): random_keys.append(helper.random_string(10)) random_values.append(helper.random_string(12)) db.add_hash(random_keys[-1:][0], random_values[-1:][0], False) counter = 0 for key, value in db.all(): assert key in random_keys, key assert value in random_values, value counter += 1 assert counter == 10, counter
def place_name(lat, lon): lookup_place_name_default = {'default': __DEFAULT_LOCATION__} if (lat is None or lon is None): return lookup_place_name_default # Convert lat/lon to floats if (not isinstance(lat, float)): lat = float(lat) if (not isinstance(lon, float)): lon = float(lon) # Try to get cached location first db = Db() # 3km distace radius for a match cached_place_name = db.get_location_name(lat, lon, 3000) # We check that it's a dict to coerce an upgrade of the location # db from a string location to a dictionary. See gh-160. if (isinstance(cached_place_name, dict)): return cached_place_name lookup_place_name = {} geolocation_info = lookup(lat=lat, lon=lon) if (geolocation_info is not None and 'address' in geolocation_info): address = geolocation_info['address'] log.info('Location: "%s"' % geolocation_info['display_name']) # gh-386 adds support for town # taking precedence after city for backwards compatibility for loc in ['hamlet', 'village', 'city', 'town', 'state', 'country']: if (loc in address): lookup_place_name[loc] = address[loc] # In many cases the desired key is not available so we # set the most specific as the default. if ('default' not in lookup_place_name): lookup_place_name['default'] = address[loc] if (lookup_place_name): db.add_location(lat, lon, lookup_place_name) # TODO: Maybe this should only be done on exit and not for every write. db.update_location_db() if ('default' not in lookup_place_name): lookup_place_name = lookup_place_name_default return lookup_place_name
def place_name(lat, lon): # Convert lat/lon to floats if not isinstance(lat, float): lat = float(lat) if not isinstance(lon, float): lon = float(lon) # Try to get cached location first db = Db() # 3km distace radious for a match cached_place_name = db.get_location_name(lat, lon, 3000) # We check that it's a dict to coerce an upgrade of the location # db from a string location to a dictionary. See gh-160. if (isinstance(cached_place_name, dict)): return cached_place_name lookup_place_name = {} geolocation_info = lookup(lat=lat, lon=lon) if (geolocation_info is not None): if ('address' in geolocation_info): address = geolocation_info['address'] for loc in ['city', 'state', 'country']: if (loc in address): lookup_place_name[loc] = address[loc] # In many cases the desired key is not available so we # set the most specific as the default. if ('default' not in lookup_place_name): lookup_place_name['default'] = address[loc] if ('default' not in lookup_place_name): lookup_place_name = 'Unknown Location' if (lookup_place_name is not {}): db.add_location(lat, lon, lookup_place_name) # TODO: Maybe this should only be done on exit and not for every write. db.update_location_db() return lookup_place_name
def process_file(self, _file, destination, media, **kwargs): move = False if('move' in kwargs): move = kwargs['move'] allow_duplicate = False if('allowDuplicate' in kwargs): allow_duplicate = kwargs['allowDuplicate'] stat_info_original = os.stat(_file) metadata = media.get_metadata() if(not media.is_valid()): print('%s is not a valid media file. Skipping...' % _file) return checksum = self.process_checksum(_file, allow_duplicate) if(checksum is None): log.info('Original checksum returned None for %s. Skipping...' % _file) return # Run `before()` for every loaded plugin and if any of them raise an exception # then we skip importing the file and log a message. plugins_run_before_status = self.plugins.run_all_before(_file, destination) if(plugins_run_before_status == False): log.warn('At least one plugin pre-run failed for %s' % _file) return directory_name = self.get_folder_path(metadata) dest_directory = os.path.join(destination, directory_name) file_name = self.get_file_name(metadata) dest_path = os.path.join(dest_directory, file_name) media.set_original_name() # If source and destination are identical then # we should not write the file. gh-210 if(_file == dest_path): print('Final source and destination path should not be identical') return self.create_directory(dest_directory) # exiftool renames the original file by appending '_original' to the # file name. A new file is written with new tags with the initial file # name. See exiftool man page for more details. exif_original_file = _file + '_original' # Check if the source file was processed by exiftool and an _original # file was created. exif_original_file_exists = False if(os.path.exists(exif_original_file)): exif_original_file_exists = True if(move is True): stat = os.stat(_file) # Move the processed file into the destination directory shutil.move(_file, dest_path) if(exif_original_file_exists is True): # We can remove it as we don't need the initial file. os.remove(exif_original_file) os.utime(dest_path, (stat.st_atime, stat.st_mtime)) else: if(exif_original_file_exists is True): # Move the newly processed file with any updated tags to the # destination directory shutil.move(_file, dest_path) # Move the exif _original back to the initial source file shutil.move(exif_original_file, _file) else: compatability._copyfile(_file, dest_path) # Set the utime based on what the original file contained # before we made any changes. # Then set the utime on the destination file based on metadata. os.utime(_file, (stat_info_original.st_atime, stat_info_original.st_mtime)) self.set_utime_from_metadata(metadata, dest_path) db = Db() db.add_hash(checksum, dest_path) db.update_hash_db() # Run `after()` for every loaded plugin and if any of them raise an exception # then we skip importing the file and log a message. plugins_run_after_status = self.plugins.run_all_after(_file, destination, dest_path, metadata) if(plugins_run_after_status == False): log.warn('At least one plugin pre-run failed for %s' % _file) return return dest_path
def process_file(self, _file, destination, media, **kwargs): move = False if ('move' in kwargs): move = kwargs['move'] allow_duplicate = False if ('allowDuplicate' in kwargs): allow_duplicate = kwargs['allowDuplicate'] if (not media.is_valid()): print('%s is not a valid media file. Skipping...' % _file) return metadata = media.get_metadata() directory_name = self.get_folder_path(metadata) dest_directory = os.path.join(destination, directory_name) file_name = self.get_file_name(media) dest_path = os.path.join(dest_directory, file_name) db = Db() checksum = db.checksum(_file) if (checksum is None): log.info('Could not get checksum for %s. Skipping...' % _file) return # If duplicates are not allowed then we check if we've seen this file # before via checksum. We also check that the file exists at the # location we believe it to be. # If we find a checksum match but the file doesn't exist where we # believe it to be then we write a debug log and proceed to import. checksum_file = db.get_hash(checksum) if (allow_duplicate is False and checksum_file is not None): if (os.path.isfile(checksum_file)): log.info('%s already exists at %s. Skipping...' % (_file, checksum_file)) return else: log.info( '%s matched checksum but file not found at %s. Importing again...' % ( # noqa _file, checksum_file)) self.create_directory(dest_directory) if (move is True): stat = os.stat(_file) shutil.move(_file, dest_path) os.utime(dest_path, (stat.st_atime, stat.st_mtime)) else: # Do not use copy2(), will have an issue when copying to a # network/mounted drive using copy and manual # set_date_from_filename gets the job done shutil.copy(_file, dest_path) self.set_utime(media) db.add_hash(checksum, dest_path) db.update_hash_db() return dest_path
def _generate_db(source, debug): """Regenerate the hash.json database which contains all of the sha256 signatures of media files. The hash.json file is located at ~/.elodie/. """ constants.debug = debug result = Result() source = os.path.abspath(os.path.expanduser(source)) if not os.path.isdir(source): log.error('Source is not a valid directory %s' % source) sys.exit(1) db = Db() db.backup_hash_db() db.reset_hash_db() for current_file in FILESYSTEM.get_all_files(source): result.append((current_file, True)) db.add_hash(db.checksum(current_file), current_file) log.progress() db.update_hash_db() log.progress('', True) result.write()
def test_check_hash_does_not_exist(): db = Db() random_key = helper.random_string(10) assert db.check_hash(random_key) == False, 'Lookup for hash that should not exist returned True'
def test_get_hash_does_not_exist(): db = Db() random_key = helper.random_string(10) assert db.get_hash(random_key) is None, 'Lookup for hash that should not exist did not return None'
def process_file(self, _file, destination, media, **kwargs): move = False if('move' in kwargs): move = kwargs['move'] allow_duplicate = False if('allowDuplicate' in kwargs): allow_duplicate = kwargs['allowDuplicate'] if(not media.is_valid()): print('%s is not a valid media file. Skipping...' % _file) return metadata = media.get_metadata() directory_name = self.get_folder_path(metadata) dest_directory = os.path.join(destination, directory_name) file_name = self.get_file_name(media) dest_path = os.path.join(dest_directory, file_name) db = Db() checksum = db.checksum(_file) if(checksum is None): log.info('Could not get checksum for %s. Skipping...' % _file) return # If duplicates are not allowed then we check if we've seen this file # before via checksum. We also check that the file exists at the # location we believe it to be. # If we find a checksum match but the file doesn't exist where we # believe it to be then we write a debug log and proceed to import. checksum_file = db.get_hash(checksum) if(allow_duplicate is False and checksum_file is not None): if(os.path.isfile(checksum_file)): log.info('%s already exists at %s. Skipping...' % ( _file, checksum_file )) return else: log.info('%s matched checksum but file not found at %s. Importing again...' % ( # noqa _file, checksum_file )) self.create_directory(dest_directory) if(move is True): stat = os.stat(_file) shutil.move(_file, dest_path) os.utime(dest_path, (stat.st_atime, stat.st_mtime)) else: # Do not use copy2(), will have an issue when copying to a # network/mounted drive using copy and manual # set_date_from_filename gets the job done shutil.copy(_file, dest_path) self.set_utime(media) db.add_hash(checksum, dest_path) db.update_hash_db() return dest_path