def build_recording_toolchain(config, key_storage_pool, encryption_conf): """Instantiate the whole toolchain of sensors and aggregators, depending on the config. Returns None if no toolchain is enabled by config. """ # TODO make this part more resilient against exceptions def get_conf_value(*args, converter=None, **kwargs): value = config.getdefault("usersettings", *args, **kwargs) if converter: value = converter(value) return value # BEFORE ANYTHING we ensure that it's worth building all the nodes below # Note that values are stored as "0" or "1", so bool() is not a proper converter record_gyroscope = get_conf_value("record_gyroscope", False, converter=int) record_gps = get_conf_value("record_gps", False, converter=int) record_microphone = get_conf_value("record_microphone", False, converter=int) if not any([record_gyroscope, record_gps, record_microphone]): logger.warning("No sensor is enabled, aborting recorder setup") return None max_containers_count = get_conf_value("max_containers_count", 100, converter=int) container_recording_duration_s = get_conf_value( "container_recording_duration_s", 60, converter=float ) container_member_duration_s = get_conf_value( "container_member_duration_s", 60, converter=float ) polling_interval_s = get_conf_value("polling_interval_s", 0.5, converter=float) max_free_keys_per_type = get_conf_value("max_free_keys_per_type", 1, converter=int) logger.info( "Toolchain configuration is %s", str( dict( max_containers_count=max_containers_count, container_recording_duration_s=container_recording_duration_s, container_member_duration_s=container_member_duration_s, polling_interval_s=polling_interval_s, ) ), ) container_storage = ContainerStorage( default_encryption_conf=encryption_conf, containers_dir=INTERNAL_CONTAINERS_DIR, max_containers_count=max_containers_count, key_storage_pool=key_storage_pool, ) # Tarfile builder level tarfile_aggregator = TarfileRecordsAggregator( container_storage=container_storage, max_duration_s=container_recording_duration_s, ) # Data aggregation level gyroscope_json_aggregator = JsonDataAggregator( max_duration_s=container_member_duration_s, tarfile_aggregator=tarfile_aggregator, sensor_name="gyroscope", ) gps_json_aggregator = JsonDataAggregator( max_duration_s=container_member_duration_s, tarfile_aggregator=tarfile_aggregator, sensor_name="gps", ) # Sensors level sensors = [] if record_gyroscope: # No need for specific permission! gyroscope_sensor = get_gyroscope_sensor( json_aggregator=gyroscope_json_aggregator, polling_interval_s=polling_interval_s ) sensors.append(gyroscope_sensor) if record_gps and not warn_if_permission_missing("ACCESS_FINE_LOCATION"): gps_sensor = get_gps_sensor( polling_interval_s=polling_interval_s, json_aggregator=gps_json_aggregator ) sensors.append(gps_sensor) if record_microphone and not warn_if_permission_missing("RECORD_AUDIO"): microphone_sensor = get_microphone_sensor( interval_s=container_member_duration_s, tarfile_aggregator=tarfile_aggregator ) sensors.append(microphone_sensor) if not sensors: logger.warning("No sensor is allowed by app permissions, aborting recorder setup") return None sensors_manager = SensorsManager(sensors=sensors) local_key_storage = key_storage_pool.get_local_key_storage() # Off-band workers if max_free_keys_per_type: free_keys_generator_worker = get_free_keys_generator_worker( key_storage=local_key_storage, max_free_keys_per_type=max_free_keys_per_type, sleep_on_overflow_s=0.5 * max_free_keys_per_type * container_member_duration_s, # TODO make it configurable? key_types=PREGENERATED_KEY_TYPES, ) else: free_keys_generator_worker = None toolchain = dict( sensors_manager=sensors_manager, data_aggregators=[gyroscope_json_aggregator, gps_json_aggregator], tarfile_aggregators=[tarfile_aggregator], container_storage=container_storage, free_keys_generator_worker=free_keys_generator_worker, local_key_storage=local_key_storage, ) return toolchain
def test_tarfile_aggregator(tmp_path): offload_data_ciphertext = random.choice((True, False)) container_storage = FakeTestContainerStorage( default_encryption_conf={"whatever": True}, containers_dir=tmp_path, offload_data_ciphertext=offload_data_ciphertext, ) tarfile_aggregator = TarfileRecordsAggregator( container_storage=container_storage, max_duration_s=10) assert len(tarfile_aggregator) == 0 assert not tarfile_aggregator._current_start_time assert len(container_storage) == 0 with freeze_time() as frozen_datetime: tarfile_aggregator.finalize_tarfile() container_storage.wait_for_idle_state() assert len(tarfile_aggregator) == 0 assert not tarfile_aggregator._current_start_time assert len(container_storage) == 0 data1 = "hêllö".encode("utf8") tarfile_aggregator.add_record( sensor_name="smartphone_front_camera", from_datetime=datetime(year=2014, month=1, day=2, hour=22, minute=11, second=55, tzinfo=timezone.utc), to_datetime=datetime(year=2015, month=2, day=3, tzinfo=timezone.utc), extension=".txt", data=data1, ) assert len(tarfile_aggregator) == 1 assert tarfile_aggregator._current_start_time data2 = b"123xyz" tarfile_aggregator.add_record( sensor_name="smartphone_recorder", from_datetime=datetime(year=2017, month=10, day=11, tzinfo=timezone.utc), to_datetime=datetime(year=2017, month=12, day=1, tzinfo=timezone.utc), extension=".mp3", data=data2, ) assert len(tarfile_aggregator) == 2 frozen_datetime.tick(delta=timedelta(seconds=1)) tarfile_aggregator.finalize_tarfile() container_storage.wait_for_idle_state() assert len(container_storage) == 1 tarfile_bytestring = container_storage.decrypt_container_from_storage( container_name_or_idx=-1) tar_file = TarfileRecordsAggregator.read_tarfile_from_bytestring( tarfile_bytestring) assert len(tarfile_aggregator) == 0 assert not tarfile_aggregator._current_start_time filenames = sorted(tar_file.getnames()) assert filenames == [ "20140102221155_20150203000000_smartphone_front_camera.txt", "20171011000000_20171201000000_smartphone_recorder.mp3", ] assert tar_file.extractfile(filenames[0]).read() == data1 assert tar_file.extractfile(filenames[1]).read() == data2 for i in range(2): frozen_datetime.tick(delta=timedelta(seconds=1)) tarfile_aggregator.finalize_tarfile() container_storage.wait_for_idle_state() assert len(tarfile_aggregator) == 0 assert not tarfile_aggregator._current_start_time assert len(container_storage) == 1 # Unchanged data3 = b"" tarfile_aggregator.add_record( sensor_name="abc", from_datetime=datetime(year=2017, month=10, day=11, tzinfo=timezone.utc), to_datetime=datetime(year=2017, month=12, day=1, tzinfo=timezone.utc), extension=".avi", data=data3, ) assert len(tarfile_aggregator) == 1 assert tarfile_aggregator._current_start_time frozen_datetime.tick(delta=timedelta(seconds=1)) tarfile_aggregator.finalize_tarfile() container_storage.wait_for_idle_state() assert len(container_storage) == 2 tarfile_bytestring = container_storage.decrypt_container_from_storage( container_name_or_idx=-1) tar_file = TarfileRecordsAggregator.read_tarfile_from_bytestring( tarfile_bytestring) assert len(tarfile_aggregator) == 0 assert not tarfile_aggregator._current_start_time filenames = sorted(tar_file.getnames()) assert filenames == ["20171011000000_20171201000000_abc.avi"] assert tar_file.extractfile(filenames[0]).read() == b"" for i in range(2): frozen_datetime.tick(delta=timedelta(seconds=1)) tarfile_aggregator.finalize_tarfile() container_storage.wait_for_idle_state() assert len(tarfile_aggregator) == 0 assert not tarfile_aggregator._current_start_time assert len(container_storage) == 2 # Unchanged # We test time-limited aggregation simple_add_record = lambda: tarfile_aggregator.add_record( sensor_name="somedata", from_datetime=datetime( year=2017, month=10, day=11, tzinfo=timezone.utc), to_datetime=datetime( year=2017, month=12, day=1, tzinfo=timezone.utc), extension=".dat", data=b"hiiii", ) simple_add_record() assert len(tarfile_aggregator) == 1 assert tarfile_aggregator._current_start_time current_start_time_saved = tarfile_aggregator._current_start_time frozen_datetime.tick(delta=timedelta(seconds=9)) assert datetime.now( tz=timezone.utc ) - tarfile_aggregator._current_start_time == timedelta(seconds=9) simple_add_record() assert len(tarfile_aggregator) == 2 assert tarfile_aggregator._current_start_time == current_start_time_saved frozen_datetime.tick(delta=timedelta(seconds=2)) simple_add_record() assert len(tarfile_aggregator) == 1 assert tarfile_aggregator._current_start_time assert tarfile_aggregator._current_start_time != current_start_time_saved # AUTO FLUSH occurred container_storage.wait_for_idle_state() assert len(container_storage) == 3 tarfile_aggregator.finalize_tarfile() # CLEANUP container_storage.wait_for_idle_state() assert len(container_storage) == 4 # We tests conflicts between identifical tar record names for i in range(3): # Three times the same file name! tarfile_aggregator.add_record( sensor_name="smartphone_recorder", from_datetime=datetime(year=2017, month=10, day=11, tzinfo=timezone.utc), to_datetime=datetime(year=2017, month=12, day=1, tzinfo=timezone.utc), extension=".mp3", data=bytes([i] * 500), ) frozen_datetime.tick(delta=timedelta(seconds=1)) tarfile_aggregator.finalize_tarfile() container_storage.wait_for_idle_state() assert len(container_storage) == 5 tarfile_bytestring = container_storage.decrypt_container_from_storage( container_name_or_idx=-1) tar_file = TarfileRecordsAggregator.read_tarfile_from_bytestring( tarfile_bytestring) assert len(tar_file.getmembers()) == 3 assert len(tar_file.getnames()) == 3 # The LAST record has priority over others with the same name assert tar_file.extractfile(tar_file.getnames()[0]).read() == bytes( [2] * 500)
def test_periodic_value_poller(tmp_path): offload_data_ciphertext = random.choice((True, False)) container_storage = FakeTestContainerStorage( default_encryption_conf={"zexcsc": True}, containers_dir=tmp_path, offload_data_ciphertext=offload_data_ciphertext, ) tarfile_aggregator = TarfileRecordsAggregator( container_storage=container_storage, max_duration_s=100) assert len(tarfile_aggregator) == 0 json_aggregator = JsonDataAggregator(max_duration_s=100, tarfile_aggregator=tarfile_aggregator, sensor_name="some_sensors") def task_func(): return dict(time=int(time.time()), type="current time") poller = PeriodicValuePoller(interval_s=0.1, task_func=task_func, json_aggregator=json_aggregator) check_sensor_state_machine(poller, run_duration=0.45) # We have variations due to machine load (but data was fetched immediately on start) assert 5 <= len(json_aggregator) <= 6 data_sets = json_aggregator._current_dataset assert all(rec["type"] == "current time" for rec in data_sets), data_sets json_aggregator.flush_dataset( ) # From here one, everything is just standard assert len(json_aggregator) == 0 # CASE OF SLOW FETCHER # def task_func_slow(): time.sleep(0.2) return dict(time=int(time.time()), type="current time 2") poller = PeriodicValuePoller(interval_s=0.05, task_func=task_func_slow, json_aggregator=json_aggregator) poller.start() time.sleep(0.3) poller.stop() poller.join() assert len(json_aggregator) == 2 # Second fetching could complete data_sets = json_aggregator._current_dataset assert all(rec["type"] == "current time 2" for rec in data_sets), data_sets json_aggregator.flush_dataset( ) # From here one, everything is just standard assert len(json_aggregator) == 0 # CASE OF BROKEN TASK # broken_iterations = 0 def task_func_broken(): nonlocal broken_iterations broken_iterations += 1 ABCDE poller = PeriodicValuePoller(interval_s=0.05, task_func=task_func_broken, json_aggregator=json_aggregator) check_sensor_state_machine(poller, run_duration=0.5) assert broken_iterations > 5
def test_aggregators_thread_safety(tmp_path): offload_data_ciphertext = random.choice((True, False)) container_storage = FakeTestContainerStorage( default_encryption_conf={"zesvscc": True}, containers_dir=tmp_path, offload_data_ciphertext=offload_data_ciphertext, ) tarfile_aggregator = TarfileRecordsAggregator( container_storage=container_storage, max_duration_s=100) json_aggregator = JsonDataAggregator(max_duration_s=1, tarfile_aggregator=tarfile_aggregator, sensor_name="some_sensors") misc_futures = [] record_data = "hêllo".encode("utf8") with ThreadPoolExecutor(max_workers=30) as executor: for burst in range(10): for idx in range(100): misc_futures.append( executor.submit(json_aggregator.add_data, dict(res=idx))) misc_futures.append( executor.submit(json_aggregator.flush_dataset)) misc_futures.append( executor.submit( tarfile_aggregator.add_record, sensor_name="some_recorder_%s_%s" % (burst, idx), from_datetime=datetime(year=2017, month=10, day=11, tzinfo=timezone.utc), to_datetime=datetime(year=2017, month=12, day=1, tzinfo=timezone.utc), extension=".txt", data=record_data, )) misc_futures.append( executor.submit(tarfile_aggregator.finalize_tarfile)) time.sleep(0.2) json_aggregator.flush_dataset() tarfile_aggregator.finalize_tarfile() container_storage.wait_for_idle_state() misc_results = set(future.result() for future in misc_futures) assert misc_results == set( [None]) # No results expected from any of these methods container_names = container_storage.list_container_names(as_sorted=True) tarfiles_bytes = [ container_storage.decrypt_container_from_storage(container_name) for container_name in container_names ] tarfiles = [ TarfileRecordsAggregator.read_tarfile_from_bytestring(bytestring) for bytestring in tarfiles_bytes if bytestring ] tarfiles_count = len(tarfiles) print("Tarfiles count:", tarfiles_count) total_idx = 0 txt_count = 0 for tarfile in tarfiles: print("NEW TARFILE") members = tarfile.getmembers() for member in members: print(">>>>", member.name) ext = os.path.splitext(member.name)[1] record_bytes = tarfile.extractfile(member).read() if ext == ".json": data_array = load_from_json_bytes(record_bytes) total_idx += sum(data["res"] for data in data_array) elif ext == ".txt": assert record_bytes == record_data txt_count += 1 else: raise RuntimeError(ext) assert txt_count == 1000 assert total_idx == 1000 * 99 / 2 == 49500 # Sum of idx sequences
def test_json_aggregator(tmp_path): offload_data_ciphertext = random.choice((True, False)) container_storage = FakeTestContainerStorage( default_encryption_conf={"qsdqsdsd": True}, containers_dir=tmp_path, offload_data_ciphertext=offload_data_ciphertext, ) tarfile_aggregator = TarfileRecordsAggregator( container_storage=container_storage, max_duration_s=100) assert len(tarfile_aggregator) == 0 json_aggregator = JsonDataAggregator(max_duration_s=2, tarfile_aggregator=tarfile_aggregator, sensor_name="some_sensors") assert len(json_aggregator) == 0 assert json_aggregator.sensor_name == "some_sensors" json_aggregator.flush_dataset() # Does nothing assert len(tarfile_aggregator) == 0 assert len(json_aggregator) == 0 assert not json_aggregator._current_start_time with freeze_time() as frozen_datetime: json_aggregator.add_data(dict(pulse=42)) json_aggregator.add_data(dict(timing=True)) assert len(tarfile_aggregator) == 0 assert len(json_aggregator) == 2 assert json_aggregator._current_start_time frozen_datetime.tick(delta=timedelta(seconds=1)) json_aggregator.add_data(dict(abc=2.2)) assert len(tarfile_aggregator) == 0 assert len(json_aggregator) == 3 frozen_datetime.tick(delta=timedelta(seconds=1)) json_aggregator.add_data(dict(x="abc")) assert len(tarfile_aggregator) == 1 # Single json file assert len(json_aggregator) == 1 assert json_aggregator._current_start_time json_aggregator.flush_dataset() assert not json_aggregator._current_start_time assert len(tarfile_aggregator) == 2 # 2 json files assert len(json_aggregator) == 0 frozen_datetime.tick(delta=timedelta(seconds=10)) json_aggregator.flush_dataset() # Unchanged assert len(tarfile_aggregator) == 2 assert len(json_aggregator) == 0 tarfile_aggregator.finalize_tarfile() container_storage.wait_for_idle_state() assert len(container_storage) == 1 tarfile_bytestring = container_storage.decrypt_container_from_storage( container_name_or_idx=-1) tar_file = TarfileRecordsAggregator.read_tarfile_from_bytestring( tarfile_bytestring) assert len(tarfile_aggregator) == 0 filenames = sorted(tar_file.getnames()) assert len(filenames) == 2 for filename in filenames: assert "some_sensors" in filename assert filename.endswith(".json") data = tar_file.extractfile(filenames[0]).read() assert data == b'[{"pulse": {"$numberInt": "42"}}, {"timing": true}, {"abc": {"$numberDouble": "2.2"}}]' data = tar_file.extractfile(filenames[1]).read() assert data == b'[{"x": "abc"}]' tarfile_aggregator.finalize_tarfile() container_storage.wait_for_idle_state() assert len(container_storage) == 1 # Unchanged assert not json_aggregator._current_start_time
def test_nominal_recording_toolchain_case(): config = ConfigParser() # Empty but OK config.setdefaults("usersettings", { "record_gyroscope": 1, "record_gps": 1, "record_microphone": 1 }) key_storage_pool = FilesystemKeyStoragePool(INTERNAL_KEYS_DIR) encryption_conf = get_encryption_conf("test") toolchain = build_recording_toolchain(config, key_storage_pool=key_storage_pool, encryption_conf=encryption_conf) sensors_manager = toolchain["sensors_manager"] data_aggregators = toolchain["data_aggregators"] tarfile_aggregators = toolchain["tarfile_aggregators"] container_storage = toolchain["container_storage"] purge_test_containers() # TODO - make this a PURGE() methods of storage!!! # CLEANUP of already existing containers # for container_name in container_storage.list_container_names(sorted=True): # container_storage._delete_container(container_name) # assert not len(container_storage) start_recording_toolchain(toolchain) time.sleep(2) stop_recording_toolchain(toolchain) for i in range(2): assert not sensors_manager.is_running for data_aggregator in data_aggregators: assert len(data_aggregator) == 0 for tarfile_aggregator in tarfile_aggregators: assert len(tarfile_aggregator) == 0 time.sleep(1) assert len(container_storage ) == 1 # Too quick recording to have container rotation (container_name, ) = container_storage.list_container_names(as_sorted=True) tarfile_bytestring = container_storage.decrypt_container_from_storage( container_name) tar_file = TarfileRecordsAggregator.read_tarfile_from_bytestring( tarfile_bytestring) tarfile_members = tar_file.getnames() assert len(tarfile_members) == 3 # Gyroscope data gyroscope_filenames = [m for m in tarfile_members if "gyroscope" in m] assert len(gyroscope_filenames) == 1 assert gyroscope_filenames[0].endswith(".json") json_bytestring = tar_file.extractfile(gyroscope_filenames[0]).read() gyroscope_data = load_from_json_bytes(json_bytestring) assert isinstance(gyroscope_data, list) assert len(gyroscope_data) >= 4 assert gyroscope_data[0] == { "rotation_rate_x": None, "rotation_rate_y": None, "rotation_rate_z": None, } # GPS data microphone_filenames = [m for m in tarfile_members if "gps" in m] assert len(microphone_filenames) == 1 assert microphone_filenames[0].endswith(".json") json_bytestring = tar_file.extractfile(microphone_filenames[0]).read() gyroscope_data = load_from_json_bytes(json_bytestring) # Fake data pushed by sensor assert gyroscope_data == [{ 'altitude': 2.2 }, { 'message_type': 'some_message_type', 'status': 'some_status_value' }] # Microphone data microphone_filenames = [m for m in tarfile_members if "microphone" in m] assert len(microphone_filenames) == 1 assert microphone_filenames[0].endswith(".mp4") mp4_bytestring = tar_file.extractfile(microphone_filenames[0]).read() assert mp4_bytestring == b"fake_microphone_recording_data"