def test_checksum(): db = Manifest() src = helper.get_file('plain.jpg') checksum = db.checksum(src) assert checksum == 'd5eb755569ddbc8a664712d2d7d6e0fa1ddfcdb378475e4a6758dc38d5ea9a16', 'Checksum for plain.jpg did not match'
def test_backup_hash_db(): db = Manifest() backup_file_name = db.backup_hash_db() file_exists = os.path.isfile(backup_file_name) os.remove(backup_file_name) assert file_exists, backup_file_name
def test_add_location(): db = Manifest() latitude, longitude, name = helper.get_test_location() db.add_location(latitude, longitude, name) retrieved_name = db.get_location_name(latitude, longitude, 5) assert name == retrieved_name
def test_get_all_empty(): db = Manifest() db.reset_hash_db() counter = 0 for key, value in db.all(): counter += 1 # there's a final iteration because of the generator assert counter == 0, counter
def test_get_hash_exists(): db = Manifest() random_key = helper.random_string(10) random_value = helper.random_string(12) # Test with explicit False value as 3rd param db.add_hash(random_key, random_value, False) assert db.get_hash(random_key) == random_value, 'Lookup for hash that exists did not return value'
def test_get_location_coordinates_does_not_exists(): db = Manifest() latitude, longitude, name = helper.get_test_location() name = '%s-%s' % (name, helper.random_string(10)) latitude = helper.random_coordinate(latitude, 1) longitude = helper.random_coordinate(longitude, 1) location = db.get_location_coordinates(name) assert location is None
def test_get_location_name_outside_threshold(): db = Manifest() latitude, longitude, name = helper.get_test_location() db.add_location(latitude, longitude, name) new_latitude = helper.random_coordinate(latitude, 1) new_longitude = helper.random_coordinate(longitude, 1) # 800 meters retrieved_name = db.get_location_name(new_latitude, new_longitude, 800) assert retrieved_name is None
def test_get_location_name_within_threshold(): db = Manifest() latitude, longitude, name = helper.get_test_location() db.add_location(latitude, longitude, name) print(latitude) new_latitude = helper.random_coordinate(latitude, 4) new_longitude = helper.random_coordinate(longitude, 4) print(new_latitude) # 10 miles retrieved_name = db.get_location_name(new_latitude, new_longitude, 1600*10) assert name == retrieved_name, 'Name (%r) did not match retrieved name (%r)' % (name, retrieved_name)
def test_get_location_coordinates_exists(): db = Manifest() latitude, longitude, name = helper.get_test_location() name = '%s-%s' % (name, helper.random_string(10)) latitude = helper.random_coordinate(latitude, 1) longitude = helper.random_coordinate(longitude, 1) db.add_location(latitude, longitude, name) location = db.get_location_coordinates(name) assert location is not None assert location[0] == latitude assert location[1] == longitude
def _find(manifest_path, target_file_name): manifest = Manifest().load_from_file(manifest_path) for k, v in manifest.entries.items(): if v["target"]["name"] == target_file_name: print("Hash {}".format(k)) print(json.dumps(v, indent=2)) print("Search complete.")
def _verify(debug): constants.debug = debug result = Result() db = Manifest() for checksum, file_path in db.all(): if not os.path.isfile(file_path): result.append((file_path, False)) log.progress('x') continue actual_checksum = db.checksum(file_path) if checksum == actual_checksum: result.append((file_path, True)) log.progress() else: result.append((file_path, False)) log.progress('x') log.progress('', True) result.write()
def test_add_hash_explicit_write(): db = Manifest() random_key = helper.random_string(10) random_value = helper.random_string(12) # Test with explicit True value as 3rd param db.add_hash(random_key, random_value, True) assert db.check_hash(random_key) == True, 'Lookup for hash did not return True' # Instnatiate new db class to confirm random_key exists db2 = Manifest() assert db2.check_hash(random_key) == True
def test_add_hash_default_do_not_write(): db = Manifest() random_key = helper.random_string(10) random_value = helper.random_string(12) # Test with default False value as 3rd param db.add_hash(random_key, random_value) assert db.check_hash(random_key) == True, 'Lookup for hash did not return True' # Instnatiate new db class to confirm random_key does not exist db2 = Manifest() assert db2.check_hash(random_key) == False
def _analyze(manifest_path, debug): constants.debug = debug manifest = Manifest() manifest.load_from_file(manifest_path) manifest_key_count = len(manifest) duplicate_source_file_count = {} # Could be made into a reduce, but I want more functionality here ( ie a list of the duplicated files ) for k, v in manifest.entries.items(): if len(v["sources"]) > 1: length = len(v["sources"]) if length in duplicate_source_file_count: duplicate_source_file_count[length] += 1 else: duplicate_source_file_count[length] = 1 log.info("Statistics:") log.info("Manifest: Total Hashes {}".format(manifest_key_count)) for k, v in duplicate_source_file_count.items(): log.info("Manifest: Duplicate (x{}) Source Files {}".format(k, v))
def coordinates_by_name(name): # Try to get cached location first db = Manifest() cached_coordinates = db.get_location_coordinates(name) if (cached_coordinates is not None): return { 'latitude': cached_coordinates[0], 'longitude': cached_coordinates[1] } # If the name is not cached then we go ahead with an API lookup geolocation_info = lookup(location=name) if (geolocation_info is not None): if ('results' in geolocation_info and len(geolocation_info['results']) != 0 and 'locations' in geolocation_info['results'][0] and len(geolocation_info['results'][0]['locations']) != 0): # By default we use the first entry unless we find one with # geocodeQuality=city. geolocation_result = geolocation_info['results'][0] use_location = geolocation_result['locations'][0]['latLng'] # Loop over the locations to see if we come accross a # geocodeQuality=city. # If we find a city we set that to the use_location and break for location in geolocation_result['locations']: if ('latLng' in location and 'lat' in location['latLng'] and 'lng' in location['latLng'] and location['geocodeQuality'].lower() == 'city'): use_location = location['latLng'] break return { 'latitude': use_location['lat'], 'longitude': use_location['lng'] } return None
def test_regenerate_valid_source(): temporary_folder, folder = helper.create_working_folder() origin = '%s/valid.txt' % folder shutil.copyfile(helper.get_file('valid.txt'), origin) helper.reset_dbs() runner = CliRunner() result = runner.invoke(elodie._generate_db, ['--source', folder]) db = Manifest() helper.restore_dbs() shutil.rmtree(folder) assert result.exit_code == 0, result.exit_code assert '3c19a5d751cf19e093b7447297731124d9cc987d3f91a9d1872c3b1c1b15639a' in db.hash_db, db.hash_db
def _merge(manifest_paths, output_path, debug): constants.debug = debug manifest = Manifest() for manifest_path in manifest_paths: manifest.load_from_file(manifest_path) manifest.write(output_path, overwrite=False) manifest_key_count = len(manifest) log.info("Statistics:") log.info("Merged Manifest: Total Hashes {}".format(manifest_key_count))
def test_reset_hash_db(): db = Manifest() random_key = helper.random_string(10) random_value = helper.random_string(12) # Test with explicit False value as 3rd param db.add_hash(random_key, random_value, False) assert random_key in db.hash_db, random_key db.reset_hash_db() assert random_key not in db.hash_db, random_key
def test_regenerate_valid_source_with_invalid_files(): temporary_folder, folder = helper.create_working_folder() origin_valid = '%s/valid.txt' % folder shutil.copyfile(helper.get_file('valid.txt'), origin_valid) origin_invalid = '%s/invalid.invalid' % folder shutil.copyfile(helper.get_file('invalid.invalid'), origin_invalid) helper.reset_dbs() runner = CliRunner() result = runner.invoke(elodie._generate_db, ['--source', folder]) db = Manifest() helper.restore_dbs() shutil.rmtree(folder) assert result.exit_code == 0, result.exit_code assert '3c19a5d751cf19e093b7447297731124d9cc987d3f91a9d1872c3b1c1b15639a' in db.hash_db, db.hash_db assert 'e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855' not in db.hash_db, db.hash_db
def test_get_all(): db = Manifest() db.reset_hash_db() random_keys = [] random_values = [] for _ in range(10): random_keys.append(helper.random_string(10)) random_values.append(helper.random_string(12)) db.add_hash(random_keys[-1:][0], random_values[-1:][0], False) counter = 0 for key, value in db.all(): assert key in random_keys, key assert value in random_values, value counter += 1 assert counter == 10, counter
def place_name(lat, lon): lookup_place_name_default = {'default': __DEFAULT_LOCATION__} if (lat is None or lon is None): return lookup_place_name_default # Convert lat/lon to floats if (not isinstance(lat, float)): lat = float(lat) if (not isinstance(lon, float)): lon = float(lon) # Try to get cached location first db = Manifest() # 3km distace radious for a match cached_place_name = db.get_location_name(lat, lon, 3000) # We check that it's a dict to coerce an upgrade of the location # db from a string location to a dictionary. See gh-160. if (isinstance(cached_place_name, dict)): return cached_place_name lookup_place_name = {} geolocation_info = lookup(lat=lat, lon=lon) if (geolocation_info is not None and 'address' in geolocation_info): address = geolocation_info['address'] for loc in ['city', 'state', 'country']: if (loc in address): lookup_place_name[loc] = address[loc] # In many cases the desired key is not available so we # set the most specific as the default. if ('default' not in lookup_place_name): lookup_place_name['default'] = address[loc] if (lookup_place_name): db.add_location(lat, lon, lookup_place_name) # TODO: Maybe this should only be done on exit and not for every write. db.update_location_db() if ('default' not in lookup_place_name): lookup_place_name = lookup_place_name_default return lookup_place_name
def _generate_db(source, debug): """Regenerate the hash.json database which contains all of the sha256 signatures of media files. The hash.json file is located at ~/.elodie/. """ constants.debug = debug result = Result() source = os.path.abspath(os.path.expanduser(source)) if not os.path.isdir(source): log.error('Source is not a valid directory %s' % source) sys.exit(1) db = Manifest() db.backup_hash_db() db.reset_hash_db() for current_file in FILESYSTEM.get_all_files(source): result.append((current_file, True)) db.add_hash(db.checksum(current_file), current_file) log.progress() db.update_hash_db() log.progress('', True) result.write()
def _import(source, config_path, manifest_path, allow_duplicates, dryrun, debug, move=False, indent_manifest=False, no_overwrite_manifest=False): """Import files or directories by reading their EXIF and organizing them accordingly. """ start_time = round(time.time()) constants.debug = debug has_errors = False result = Result() # Load the configuration from the json file. config = Config().load_from_file(config_path) source = config["sources"][0] # For now, only one. target = config["targets"][ 0] # For now, only one target allowed...but data structure allows more source_file_path = source["file_path"] manifest = Manifest() if manifest_path is not None: manifest.load_from_file(manifest_path) log_base_path, _ = os.path.split(manifest.file_path) FILESYSTEM.create_directory(os.path.join(log_base_path, '.elodie')) log_path = os.path.join(log_base_path, '.elodie', 'import_{}.log'.format(utility.timestamp_string())) def signal_handler(sig, frame): log.warn('[ ] Import cancelled') log.write(log_path) sys.exit(0) signal.signal(signal.SIGINT, signal_handler) original_manifest_key_count = len(manifest) # destination = _decode(destination) # destination = os.path.abspath(os.path.expanduser(destination)) exiftool_addedargs = [ # '-overwrite_original', u'-config', u'"{}"'.format(constants.exiftool_config) ] file_generator = FILESYSTEM.get_all_files(source_file_path, None) source_file_count = 0 with ExifTool(addedargs=exiftool_addedargs) as et: while True: file_batch = list( itertools.islice(file_generator, constants.exiftool_batch_size)) if len(file_batch) == 0: break # This will cause slight discrepancies in file counts: since elodie.json is counted but not imported, # each one will set the count off by one. source_file_count += len(file_batch) metadata_list = et.get_metadata_batch(file_batch) if not metadata_list: raise Exception("Metadata scrape failed.") # Key on the filename to make for easy access, metadata_dict = dict((os.path.abspath(el["SourceFile"]), el) for el in metadata_list) for current_file in file_batch: # Don't import localized config files. if current_file.endswith( "elodie.json"): # Faster than a os.path.split continue try: result = import_file(current_file, config, manifest, metadata_dict, move=move, dryrun=dryrun, allow_duplicates=allow_duplicates) except Exception as e: log.warn("[!] Error importing {}: {}".format( current_file, e)) result = False has_errors = has_errors or not result exiftool_waiting_time = et.waiting_time manifest.write(indent=indent_manifest, overwrite=(not no_overwrite_manifest)) manifest_key_count = len(manifest) try: total_time = round(time.time() - start_time) log.info("Statistics:") log.info("Source: File Count {}".format(source_file_count)) log.info("Manifest: New Hashes {}".format(manifest_key_count - original_manifest_key_count)) log.info("Manifest: Total Hashes {}".format(manifest_key_count)) log.info("Time: Total {}s".format(total_time)) log.info("Time: Files/sec {}".format( round(source_file_count / total_time))) log.info("Time: Waiting on ExifTool {}s".format( round(exiftool_waiting_time))) except Exception as e: log.error("[!] Error generating statistics: {}".format(e)) log.write(log_path) if has_errors: sys.exit(1)
def test_check_hash_does_not_exist(): db = Manifest() random_key = helper.random_string(10) assert db.check_hash(random_key) == False, 'Lookup for hash that should not exist returned True'
def test_get_hash_does_not_exist(): db = Manifest() random_key = helper.random_string(10) assert db.get_hash(random_key) is None, 'Lookup for hash that should not exist did not return None'
def test_init_writes_files(): db = Manifest() assert os.path.isfile(constants.hash_db) == True assert os.path.isfile(constants.location_db) == True