Example #1
0
def build_recording_toolchain(config, key_storage_pool, encryption_conf):
    """Instantiate the whole toolchain of sensors and aggregators, depending on the config.

    Returns None if no toolchain is enabled by config.
    """

    # TODO make this part more resilient against exceptions

    def get_conf_value(*args, converter=None, **kwargs):
        value = config.getdefault("usersettings", *args, **kwargs)
        if converter:
            value = converter(value)
        return value

    # BEFORE ANYTHING we ensure that it's worth building all the nodes below
    # Note that values are stored as "0" or "1", so bool() is not a proper converter
    record_gyroscope = get_conf_value("record_gyroscope", False, converter=int)
    record_gps = get_conf_value("record_gps", False, converter=int)
    record_microphone = get_conf_value("record_microphone", False, converter=int)
    if not any([record_gyroscope, record_gps, record_microphone]):
        logger.warning("No sensor is enabled, aborting recorder setup")
        return None

    max_containers_count = get_conf_value("max_containers_count", 100, converter=int)
    container_recording_duration_s = get_conf_value(
        "container_recording_duration_s", 60, converter=float
    )
    container_member_duration_s = get_conf_value(
        "container_member_duration_s", 60, converter=float
    )
    polling_interval_s = get_conf_value("polling_interval_s", 0.5, converter=float)
    max_free_keys_per_type = get_conf_value("max_free_keys_per_type", 1, converter=int)

    logger.info(
        "Toolchain configuration is %s",
        str(
            dict(
                max_containers_count=max_containers_count,
                container_recording_duration_s=container_recording_duration_s,
                container_member_duration_s=container_member_duration_s,
                polling_interval_s=polling_interval_s,
            )
        ),
    )

    container_storage = ContainerStorage(
        default_encryption_conf=encryption_conf,
        containers_dir=INTERNAL_CONTAINERS_DIR,
        max_containers_count=max_containers_count,
        key_storage_pool=key_storage_pool,
    )

    # Tarfile builder level

    tarfile_aggregator = TarfileRecordsAggregator(
        container_storage=container_storage,
        max_duration_s=container_recording_duration_s,
    )

    # Data aggregation level

    gyroscope_json_aggregator = JsonDataAggregator(
        max_duration_s=container_member_duration_s,
        tarfile_aggregator=tarfile_aggregator,
        sensor_name="gyroscope",
    )

    gps_json_aggregator = JsonDataAggregator(
        max_duration_s=container_member_duration_s,
        tarfile_aggregator=tarfile_aggregator,
        sensor_name="gps",
    )

    # Sensors level

    sensors = []

    if record_gyroscope:  # No need for specific permission!
        gyroscope_sensor = get_gyroscope_sensor(
            json_aggregator=gyroscope_json_aggregator, polling_interval_s=polling_interval_s
        )
        sensors.append(gyroscope_sensor)

    if record_gps and not warn_if_permission_missing("ACCESS_FINE_LOCATION"):
        gps_sensor = get_gps_sensor(
            polling_interval_s=polling_interval_s, json_aggregator=gps_json_aggregator
        )
        sensors.append(gps_sensor)

    if record_microphone and not warn_if_permission_missing("RECORD_AUDIO"):
        microphone_sensor = get_microphone_sensor(
            interval_s=container_member_duration_s, tarfile_aggregator=tarfile_aggregator
        )
        sensors.append(microphone_sensor)

    if not sensors:
        logger.warning("No sensor is allowed by app permissions, aborting recorder setup")
        return None

    sensors_manager = SensorsManager(sensors=sensors)

    local_key_storage = key_storage_pool.get_local_key_storage()

    # Off-band workers

    if max_free_keys_per_type:
        free_keys_generator_worker = get_free_keys_generator_worker(
            key_storage=local_key_storage,
            max_free_keys_per_type=max_free_keys_per_type,
            sleep_on_overflow_s=0.5
            * max_free_keys_per_type
            * container_member_duration_s,  # TODO make it configurable?
            key_types=PREGENERATED_KEY_TYPES,
        )
    else:
        free_keys_generator_worker = None

    toolchain = dict(
        sensors_manager=sensors_manager,
        data_aggregators=[gyroscope_json_aggregator, gps_json_aggregator],
        tarfile_aggregators=[tarfile_aggregator],
        container_storage=container_storage,
        free_keys_generator_worker=free_keys_generator_worker,
        local_key_storage=local_key_storage,
    )
    return toolchain
Example #2
0
def test_tarfile_aggregator(tmp_path):

    offload_data_ciphertext = random.choice((True, False))
    container_storage = FakeTestContainerStorage(
        default_encryption_conf={"whatever": True},
        containers_dir=tmp_path,
        offload_data_ciphertext=offload_data_ciphertext,
    )

    tarfile_aggregator = TarfileRecordsAggregator(
        container_storage=container_storage, max_duration_s=10)
    assert len(tarfile_aggregator) == 0
    assert not tarfile_aggregator._current_start_time
    assert len(container_storage) == 0

    with freeze_time() as frozen_datetime:

        tarfile_aggregator.finalize_tarfile()
        container_storage.wait_for_idle_state()
        assert len(tarfile_aggregator) == 0
        assert not tarfile_aggregator._current_start_time
        assert len(container_storage) == 0

        data1 = "hêllö".encode("utf8")
        tarfile_aggregator.add_record(
            sensor_name="smartphone_front_camera",
            from_datetime=datetime(year=2014,
                                   month=1,
                                   day=2,
                                   hour=22,
                                   minute=11,
                                   second=55,
                                   tzinfo=timezone.utc),
            to_datetime=datetime(year=2015,
                                 month=2,
                                 day=3,
                                 tzinfo=timezone.utc),
            extension=".txt",
            data=data1,
        )
        assert len(tarfile_aggregator) == 1
        assert tarfile_aggregator._current_start_time

        data2 = b"123xyz"
        tarfile_aggregator.add_record(
            sensor_name="smartphone_recorder",
            from_datetime=datetime(year=2017,
                                   month=10,
                                   day=11,
                                   tzinfo=timezone.utc),
            to_datetime=datetime(year=2017,
                                 month=12,
                                 day=1,
                                 tzinfo=timezone.utc),
            extension=".mp3",
            data=data2,
        )
        assert len(tarfile_aggregator) == 2

        frozen_datetime.tick(delta=timedelta(seconds=1))

        tarfile_aggregator.finalize_tarfile()
        container_storage.wait_for_idle_state()
        assert len(container_storage) == 1
        tarfile_bytestring = container_storage.decrypt_container_from_storage(
            container_name_or_idx=-1)
        tar_file = TarfileRecordsAggregator.read_tarfile_from_bytestring(
            tarfile_bytestring)
        assert len(tarfile_aggregator) == 0
        assert not tarfile_aggregator._current_start_time

        filenames = sorted(tar_file.getnames())
        assert filenames == [
            "20140102221155_20150203000000_smartphone_front_camera.txt",
            "20171011000000_20171201000000_smartphone_recorder.mp3",
        ]
        assert tar_file.extractfile(filenames[0]).read() == data1
        assert tar_file.extractfile(filenames[1]).read() == data2

        for i in range(2):
            frozen_datetime.tick(delta=timedelta(seconds=1))
            tarfile_aggregator.finalize_tarfile()
            container_storage.wait_for_idle_state()
            assert len(tarfile_aggregator) == 0
            assert not tarfile_aggregator._current_start_time
            assert len(container_storage) == 1  # Unchanged

        data3 = b""
        tarfile_aggregator.add_record(
            sensor_name="abc",
            from_datetime=datetime(year=2017,
                                   month=10,
                                   day=11,
                                   tzinfo=timezone.utc),
            to_datetime=datetime(year=2017,
                                 month=12,
                                 day=1,
                                 tzinfo=timezone.utc),
            extension=".avi",
            data=data3,
        )
        assert len(tarfile_aggregator) == 1
        assert tarfile_aggregator._current_start_time

        frozen_datetime.tick(delta=timedelta(seconds=1))
        tarfile_aggregator.finalize_tarfile()
        container_storage.wait_for_idle_state()
        assert len(container_storage) == 2
        tarfile_bytestring = container_storage.decrypt_container_from_storage(
            container_name_or_idx=-1)
        tar_file = TarfileRecordsAggregator.read_tarfile_from_bytestring(
            tarfile_bytestring)
        assert len(tarfile_aggregator) == 0
        assert not tarfile_aggregator._current_start_time

        filenames = sorted(tar_file.getnames())
        assert filenames == ["20171011000000_20171201000000_abc.avi"]
        assert tar_file.extractfile(filenames[0]).read() == b""

        for i in range(2):
            frozen_datetime.tick(delta=timedelta(seconds=1))
            tarfile_aggregator.finalize_tarfile()
            container_storage.wait_for_idle_state()
            assert len(tarfile_aggregator) == 0
            assert not tarfile_aggregator._current_start_time
            assert len(container_storage) == 2  # Unchanged

        # We test time-limited aggregation
        simple_add_record = lambda: tarfile_aggregator.add_record(
            sensor_name="somedata",
            from_datetime=datetime(
                year=2017, month=10, day=11, tzinfo=timezone.utc),
            to_datetime=datetime(
                year=2017, month=12, day=1, tzinfo=timezone.utc),
            extension=".dat",
            data=b"hiiii",
        )
        simple_add_record()
        assert len(tarfile_aggregator) == 1
        assert tarfile_aggregator._current_start_time
        current_start_time_saved = tarfile_aggregator._current_start_time

        frozen_datetime.tick(delta=timedelta(seconds=9))
        assert datetime.now(
            tz=timezone.utc
        ) - tarfile_aggregator._current_start_time == timedelta(seconds=9)

        simple_add_record()
        assert len(tarfile_aggregator) == 2
        assert tarfile_aggregator._current_start_time == current_start_time_saved

        frozen_datetime.tick(delta=timedelta(seconds=2))

        simple_add_record()
        assert len(tarfile_aggregator) == 1
        assert tarfile_aggregator._current_start_time
        assert tarfile_aggregator._current_start_time != current_start_time_saved  # AUTO FLUSH occurred
        container_storage.wait_for_idle_state()

        assert len(container_storage) == 3

        tarfile_aggregator.finalize_tarfile()  # CLEANUP
        container_storage.wait_for_idle_state()

        assert len(container_storage) == 4

        # We tests conflicts between identifical tar record names
        for i in range(3):  # Three times the same file name!
            tarfile_aggregator.add_record(
                sensor_name="smartphone_recorder",
                from_datetime=datetime(year=2017,
                                       month=10,
                                       day=11,
                                       tzinfo=timezone.utc),
                to_datetime=datetime(year=2017,
                                     month=12,
                                     day=1,
                                     tzinfo=timezone.utc),
                extension=".mp3",
                data=bytes([i] * 500),
            )

        frozen_datetime.tick(delta=timedelta(seconds=1))
        tarfile_aggregator.finalize_tarfile()
        container_storage.wait_for_idle_state()
        assert len(container_storage) == 5
        tarfile_bytestring = container_storage.decrypt_container_from_storage(
            container_name_or_idx=-1)
        tar_file = TarfileRecordsAggregator.read_tarfile_from_bytestring(
            tarfile_bytestring)
        assert len(tar_file.getmembers()) == 3
        assert len(tar_file.getnames()) == 3
        # The LAST record has priority over others with the same name
        assert tar_file.extractfile(tar_file.getnames()[0]).read() == bytes(
            [2] * 500)
Example #3
0
def test_periodic_value_poller(tmp_path):

    offload_data_ciphertext = random.choice((True, False))
    container_storage = FakeTestContainerStorage(
        default_encryption_conf={"zexcsc": True},
        containers_dir=tmp_path,
        offload_data_ciphertext=offload_data_ciphertext,
    )

    tarfile_aggregator = TarfileRecordsAggregator(
        container_storage=container_storage, max_duration_s=100)

    assert len(tarfile_aggregator) == 0

    json_aggregator = JsonDataAggregator(max_duration_s=100,
                                         tarfile_aggregator=tarfile_aggregator,
                                         sensor_name="some_sensors")

    def task_func():
        return dict(time=int(time.time()), type="current time")

    poller = PeriodicValuePoller(interval_s=0.1,
                                 task_func=task_func,
                                 json_aggregator=json_aggregator)

    check_sensor_state_machine(poller, run_duration=0.45)

    # We have variations due to machine load (but data was fetched immediately on start)
    assert 5 <= len(json_aggregator) <= 6
    data_sets = json_aggregator._current_dataset
    assert all(rec["type"] == "current time" for rec in data_sets), data_sets

    json_aggregator.flush_dataset(
    )  # From here one, everything is just standard
    assert len(json_aggregator) == 0

    # CASE OF SLOW FETCHER #

    def task_func_slow():
        time.sleep(0.2)
        return dict(time=int(time.time()), type="current time 2")

    poller = PeriodicValuePoller(interval_s=0.05,
                                 task_func=task_func_slow,
                                 json_aggregator=json_aggregator)
    poller.start()
    time.sleep(0.3)
    poller.stop()
    poller.join()

    assert len(json_aggregator) == 2  # Second fetching could complete
    data_sets = json_aggregator._current_dataset
    assert all(rec["type"] == "current time 2" for rec in data_sets), data_sets

    json_aggregator.flush_dataset(
    )  # From here one, everything is just standard
    assert len(json_aggregator) == 0

    # CASE OF BROKEN TASK #

    broken_iterations = 0

    def task_func_broken():
        nonlocal broken_iterations
        broken_iterations += 1
        ABCDE

    poller = PeriodicValuePoller(interval_s=0.05,
                                 task_func=task_func_broken,
                                 json_aggregator=json_aggregator)

    check_sensor_state_machine(poller, run_duration=0.5)
    assert broken_iterations > 5
Example #4
0
def test_aggregators_thread_safety(tmp_path):

    offload_data_ciphertext = random.choice((True, False))
    container_storage = FakeTestContainerStorage(
        default_encryption_conf={"zesvscc": True},
        containers_dir=tmp_path,
        offload_data_ciphertext=offload_data_ciphertext,
    )

    tarfile_aggregator = TarfileRecordsAggregator(
        container_storage=container_storage, max_duration_s=100)
    json_aggregator = JsonDataAggregator(max_duration_s=1,
                                         tarfile_aggregator=tarfile_aggregator,
                                         sensor_name="some_sensors")

    misc_futures = []

    record_data = "hêllo".encode("utf8")

    with ThreadPoolExecutor(max_workers=30) as executor:
        for burst in range(10):
            for idx in range(100):
                misc_futures.append(
                    executor.submit(json_aggregator.add_data, dict(res=idx)))
                misc_futures.append(
                    executor.submit(json_aggregator.flush_dataset))
                misc_futures.append(
                    executor.submit(
                        tarfile_aggregator.add_record,
                        sensor_name="some_recorder_%s_%s" % (burst, idx),
                        from_datetime=datetime(year=2017,
                                               month=10,
                                               day=11,
                                               tzinfo=timezone.utc),
                        to_datetime=datetime(year=2017,
                                             month=12,
                                             day=1,
                                             tzinfo=timezone.utc),
                        extension=".txt",
                        data=record_data,
                    ))
                misc_futures.append(
                    executor.submit(tarfile_aggregator.finalize_tarfile))
            time.sleep(0.2)

    json_aggregator.flush_dataset()
    tarfile_aggregator.finalize_tarfile()
    container_storage.wait_for_idle_state()

    misc_results = set(future.result() for future in misc_futures)
    assert misc_results == set(
        [None])  # No results expected from any of these methods

    container_names = container_storage.list_container_names(as_sorted=True)

    tarfiles_bytes = [
        container_storage.decrypt_container_from_storage(container_name)
        for container_name in container_names
    ]

    tarfiles = [
        TarfileRecordsAggregator.read_tarfile_from_bytestring(bytestring)
        for bytestring in tarfiles_bytes if bytestring
    ]

    tarfiles_count = len(tarfiles)
    print("Tarfiles count:", tarfiles_count)

    total_idx = 0
    txt_count = 0

    for tarfile in tarfiles:
        print("NEW TARFILE")
        members = tarfile.getmembers()
        for member in members:
            print(">>>>", member.name)
            ext = os.path.splitext(member.name)[1]
            record_bytes = tarfile.extractfile(member).read()
            if ext == ".json":
                data_array = load_from_json_bytes(record_bytes)
                total_idx += sum(data["res"] for data in data_array)
            elif ext == ".txt":
                assert record_bytes == record_data
                txt_count += 1
            else:
                raise RuntimeError(ext)

    assert txt_count == 1000
    assert total_idx == 1000 * 99 / 2 == 49500  # Sum of idx sequences
Example #5
0
def test_json_aggregator(tmp_path):

    offload_data_ciphertext = random.choice((True, False))
    container_storage = FakeTestContainerStorage(
        default_encryption_conf={"qsdqsdsd": True},
        containers_dir=tmp_path,
        offload_data_ciphertext=offload_data_ciphertext,
    )

    tarfile_aggregator = TarfileRecordsAggregator(
        container_storage=container_storage, max_duration_s=100)

    assert len(tarfile_aggregator) == 0

    json_aggregator = JsonDataAggregator(max_duration_s=2,
                                         tarfile_aggregator=tarfile_aggregator,
                                         sensor_name="some_sensors")
    assert len(json_aggregator) == 0
    assert json_aggregator.sensor_name == "some_sensors"

    json_aggregator.flush_dataset()  # Does nothing
    assert len(tarfile_aggregator) == 0
    assert len(json_aggregator) == 0
    assert not json_aggregator._current_start_time

    with freeze_time() as frozen_datetime:

        json_aggregator.add_data(dict(pulse=42))
        json_aggregator.add_data(dict(timing=True))

        assert len(tarfile_aggregator) == 0
        assert len(json_aggregator) == 2
        assert json_aggregator._current_start_time

        frozen_datetime.tick(delta=timedelta(seconds=1))

        json_aggregator.add_data(dict(abc=2.2))

        assert len(tarfile_aggregator) == 0
        assert len(json_aggregator) == 3

        frozen_datetime.tick(delta=timedelta(seconds=1))

        json_aggregator.add_data(dict(x="abc"))

        assert len(tarfile_aggregator) == 1  # Single json file
        assert len(json_aggregator) == 1
        assert json_aggregator._current_start_time

        json_aggregator.flush_dataset()
        assert not json_aggregator._current_start_time

        assert len(tarfile_aggregator) == 2  # 2 json files
        assert len(json_aggregator) == 0

        frozen_datetime.tick(delta=timedelta(seconds=10))

        json_aggregator.flush_dataset()

        # Unchanged
        assert len(tarfile_aggregator) == 2
        assert len(json_aggregator) == 0

        tarfile_aggregator.finalize_tarfile()
        container_storage.wait_for_idle_state()
        assert len(container_storage) == 1
        tarfile_bytestring = container_storage.decrypt_container_from_storage(
            container_name_or_idx=-1)
        tar_file = TarfileRecordsAggregator.read_tarfile_from_bytestring(
            tarfile_bytestring)
        assert len(tarfile_aggregator) == 0

        filenames = sorted(tar_file.getnames())
        assert len(filenames) == 2

        for filename in filenames:
            assert "some_sensors" in filename
            assert filename.endswith(".json")

        data = tar_file.extractfile(filenames[0]).read()
        assert data == b'[{"pulse": {"$numberInt": "42"}}, {"timing": true}, {"abc": {"$numberDouble": "2.2"}}]'

        data = tar_file.extractfile(filenames[1]).read()
        assert data == b'[{"x": "abc"}]'

        tarfile_aggregator.finalize_tarfile()
        container_storage.wait_for_idle_state()
        assert len(container_storage) == 1  # Unchanged
        assert not json_aggregator._current_start_time
Example #6
0
def test_nominal_recording_toolchain_case():

    config = ConfigParser()  # Empty but OK
    config.setdefaults("usersettings", {
        "record_gyroscope": 1,
        "record_gps": 1,
        "record_microphone": 1
    })

    key_storage_pool = FilesystemKeyStoragePool(INTERNAL_KEYS_DIR)
    encryption_conf = get_encryption_conf("test")
    toolchain = build_recording_toolchain(config,
                                          key_storage_pool=key_storage_pool,
                                          encryption_conf=encryption_conf)
    sensors_manager = toolchain["sensors_manager"]
    data_aggregators = toolchain["data_aggregators"]
    tarfile_aggregators = toolchain["tarfile_aggregators"]
    container_storage = toolchain["container_storage"]

    purge_test_containers()

    # TODO - make this a PURGE() methods of storage!!!
    # CLEANUP of already existing containers
    # for container_name in container_storage.list_container_names(sorted=True):
    #    container_storage._delete_container(container_name)
    # assert not len(container_storage)

    start_recording_toolchain(toolchain)
    time.sleep(2)
    stop_recording_toolchain(toolchain)

    for i in range(2):
        assert not sensors_manager.is_running
        for data_aggregator in data_aggregators:
            assert len(data_aggregator) == 0
        for tarfile_aggregator in tarfile_aggregators:
            assert len(tarfile_aggregator) == 0
        time.sleep(1)

    assert len(container_storage
               ) == 1  # Too quick recording to have container rotation
    (container_name, ) = container_storage.list_container_names(as_sorted=True)

    tarfile_bytestring = container_storage.decrypt_container_from_storage(
        container_name)

    tar_file = TarfileRecordsAggregator.read_tarfile_from_bytestring(
        tarfile_bytestring)
    tarfile_members = tar_file.getnames()
    assert len(tarfile_members) == 3

    # Gyroscope data

    gyroscope_filenames = [m for m in tarfile_members if "gyroscope" in m]
    assert len(gyroscope_filenames) == 1
    assert gyroscope_filenames[0].endswith(".json")

    json_bytestring = tar_file.extractfile(gyroscope_filenames[0]).read()
    gyroscope_data = load_from_json_bytes(json_bytestring)
    assert isinstance(gyroscope_data, list)
    assert len(gyroscope_data) >= 4
    assert gyroscope_data[0] == {
        "rotation_rate_x": None,
        "rotation_rate_y": None,
        "rotation_rate_z": None,
    }

    # GPS data

    microphone_filenames = [m for m in tarfile_members if "gps" in m]
    assert len(microphone_filenames) == 1
    assert microphone_filenames[0].endswith(".json")

    json_bytestring = tar_file.extractfile(microphone_filenames[0]).read()
    gyroscope_data = load_from_json_bytes(json_bytestring)
    # Fake data pushed by sensor
    assert gyroscope_data == [{
        'altitude': 2.2
    }, {
        'message_type': 'some_message_type',
        'status': 'some_status_value'
    }]

    # Microphone data

    microphone_filenames = [m for m in tarfile_members if "microphone" in m]
    assert len(microphone_filenames) == 1
    assert microphone_filenames[0].endswith(".mp4")

    mp4_bytestring = tar_file.extractfile(microphone_filenames[0]).read()
    assert mp4_bytestring == b"fake_microphone_recording_data"