예제 #1
0
def test_watcher_parses_existing_files():
    raw_data_dir = TemporaryDirectory(dir="/tmp", prefix="raw_data")
    with open(
        join(raw_data_dir.name, f"events_data_{str(random.getrandbits(50))}.json"), "w"
    ) as f:
        f.write(get_mock_json())
        f.flush()
    queue = Queue()
    Watcher(path=raw_data_dir.name, watchdog_queue=queue, rabbitmq_queue=TEST_QUEUE)
    raw_data_dir.cleanup()
예제 #2
0
def test_extract_data():
    """This tests the extraction function from a JSON file. NamedTemporaryFile/Temporary files are used
    for creating temporary environments to store the outputs/inputs and read from them for assertions.
    """
    expected = [
        "id,event_type,event_ts\n",
        "foo,created,2020-12-08 20:03:16.759617\n",
        "bar,created,2014-12-08 20:03:16.759617\n",
    ]
    with NamedTemporaryFile() as inputfile:
        inputfile.write(get_mock_json().encode("utf-8"))
        inputfile.flush()
        with TemporaryDirectory(dir="/tmp") as tmpdir:
            outputfile = join(tmpdir, "test")
            extract_data(src_path=inputfile.name, dst_path=outputfile)
            with open(outputfile, "r") as f:
                data = f.readlines()
                assert expected == data
예제 #3
0
def test_watcher_detects_new_file():
    raw_data_dir = TemporaryDirectory(dir="/tmp", prefix="raw_data")
    queue = Queue()
    watcher = Watcher(
        path=raw_data_dir.name, watchdog_queue=queue, rabbitmq_queue=TEST_QUEUE
    )
    watcher_thread = threading.Thread(target=watcher.start, daemon=True)
    watcher_thread.start()

    with open(
        join(raw_data_dir.name, f"events_data_{str(random.getrandbits(50))}.json"), "w"
    ) as f:
        f.write(get_mock_json())
        f.flush()

    sleep(5)  # wait for the watcher to parse the file

    watcher_thread._running = False
    raw_data_dir.cleanup()
예제 #4
0
def test_import_sources():
    """This function includes the entire mechanism of extracting the data and loading it to the database.
    It then verifies that the data exists and asserts its content.
    TemporaryDirectories and NamedTemporaryFiles are used for simulating the environment. The import sources function
    uses prefix to pick up files for loading, and therefore requires that the raw data is stored in the following
    structure:
    .
    └── raw_data directory
        └── subdir (e.g. organization_data)
            ├── foo.csv
            └── bar.csv

    """
    raw_data_dir = TemporaryDirectory(dir="/tmp", prefix="raw_data")
    test_data_dir = join(raw_data_dir.name, "test")
    mkdir(test_data_dir)
    raw_data_file = NamedTemporaryFile(dir=test_data_dir,
                                       prefix="test",
                                       suffix=".json")
    raw_data_file.write(get_mock_json().encode("utf-8"))
    raw_data_file.flush()

    table_md_yaml = get_mock_table_md_yaml()
    with TemporaryDirectory(dir="/tmp", prefix="table_metadata") as md_dir:
        with NamedTemporaryFile(dir=md_dir) as md:
            md.write(table_md_yaml.encode("utf-8"))
            md.flush()
            import_sources(tables_md_dir=md_dir,
                           raw_data_dir=raw_data_dir.name)

    raw_data_file.close()
    raw_data_dir.cleanup()

    expected = [
        ("foo", "created", datetime(2020, 12, 8, 20, 3, 16, 759617)),
        ("bar", "created", datetime(2014, 12, 8, 20, 3, 16, 759617)),
    ]
    pg_hook = PgHook()
    with pg_hook.get_conn() as conn:
        with conn.cursor() as cur:
            cur.execute("SELECT * FROM test.test_table_delta;")
            assert cur.fetchall() == expected
예제 #5
0
def publish_for_consumption():
    producer = Producer(host=RABBIT_MQ_HOST, queue=TEST_QUEUE)
    data = json.loads(get_mock_json())
    for msg in data:
        producer.publish_event(msg=json.dumps(msg))