예제 #1
0
파일: test_io.py 프로젝트: spotify/klio
def test_write_to_file():
    file_path_read = os.path.join(FIXTURE_PATH, "elements_text_file.txt")

    with tempfile.TemporaryDirectory() as tmp_path:
        with test_pipeline.TestPipeline() as p:
            (p
             | io_transforms.KlioReadFromText(file_path_read)
             | io_transforms.KlioWriteToText(tmp_path))
        # WriteToText will shard files so we iterate through each
        # file in the directory
        write_results = []
        exp_element_count = 0
        for file_name in glob.glob(tmp_path + "*"):
            if os.path.isfile(os.path.join(tmp_path, file_name)):
                with open(file_name, "rb") as f:
                    lines = f.readlines()
                    exp_element_count = len(lines)
                    write_results.extend(lines)
        with open(file_path_read, "rb") as fr:
            read_results = fr.readlines()
        assert write_results == read_results

    actual_counters = p.result.metrics().query()["counters"]
    assert 2 == len(actual_counters)

    read_counter = actual_counters[0]
    write_counter = actual_counters[1]
    assert exp_element_count == read_counter.committed
    assert "KlioReadFromText" == read_counter.key.metric.namespace
    assert "kmsg-read" == read_counter.key.metric.name

    assert exp_element_count == write_counter.committed
    assert "KlioWriteToText" == write_counter.key.metric.namespace
    assert "kmsg-write" == write_counter.key.metric.name
예제 #2
0
def test_read_from_file():
    file_path = os.path.join(FIXTURE_PATH, "elements_text_file.txt")

    transform = io_transforms.KlioReadFromText(file_path)
    with test_pipeline.TestPipeline() as p:
        (p
         | "Read" >> transform
         | beam.Map(assert_expected_klio_msg_from_file))

    assert transform._REQUIRES_IO_READ_WRAP is False
예제 #3
0
def test_write_to_file():
    file_path_read = os.path.join(FIXTURE_PATH, "elements_text_file.txt")

    with tempfile.TemporaryDirectory() as tmp_path:
        with test_pipeline.TestPipeline() as p:
            (p
             | io_transforms.KlioReadFromText(file_path_read)
             | io_transforms.KlioWriteToText(tmp_path))
        # WriteToText will shard files so we iterate through each
        # file in the directory
        write_results = []

        for file_name in glob.glob(tmp_path + "*"):
            if os.path.isfile(os.path.join(tmp_path, file_name)):
                with open(file_name, "rb") as f:
                    write_results.extend(f.readlines())
        with open(file_path_read, "rb") as fr:
            read_results = fr.readlines()
        assert write_results == read_results
예제 #4
0
파일: test_io.py 프로젝트: spotify/klio
def test_read_from_file():
    file_path = os.path.join(FIXTURE_PATH, "elements_text_file.txt")

    exp_element_count = 0
    with open(file_path, "r") as f:
        exp_element_count = len(f.readlines())

    transform = io_transforms.KlioReadFromText(file_path)
    with test_pipeline.TestPipeline() as p:
        (p
         | "Read" >> transform
         | beam.Map(assert_expected_klio_msg_from_file))

    assert transform._reader._REQUIRES_IO_READ_WRAP is False

    actual_counters = p.result.metrics().query()["counters"]
    assert 1 == len(actual_counters)
    assert exp_element_count == actual_counters[0].committed
    assert "KlioReadFromText" == actual_counters[0].key.metric.namespace
    assert "kmsg-read" == actual_counters[0].key.metric.name
예제 #5
0
def test_write_to_avro():

    file_path_read = os.path.join(FIXTURE_PATH, "elements_text_file.txt")

    with tempfile.TemporaryDirectory() as tmp_path:
        with test_pipeline.TestPipeline() as p:

            p | io_transforms.KlioReadFromText(
                file_path_read) | io_transforms.KlioWriteToAvro(
                    file_path_prefix=tmp_path)

        files = glob.glob(tmp_path + "*")
        assert len(files) > 0
        assert (os.path.isfile(os.path.join(tmp_path, file_name))
                for file_name in files)

        with test_pipeline.TestPipeline() as p2:
            p2 | io_transforms.KlioReadFromAvro(file_pattern=(
                tmp_path +
                "*")) | beam.Map(assert_expected_klio_msg_from_avro_write)
예제 #6
0
파일: test_io.py 프로젝트: spotify/klio
def test_write_to_avro():

    file_path_read = os.path.join(FIXTURE_PATH, "elements_text_file.txt")
    exp_element_count = 0
    with open(file_path_read, "r") as f:
        exp_element_count = len(f.readlines())

    with tempfile.TemporaryDirectory() as tmp_path:
        with test_pipeline.TestPipeline() as p:

            p | io_transforms.KlioReadFromText(
                file_path_read) | io_transforms.KlioWriteToAvro(
                    file_path_prefix=tmp_path)

        files = glob.glob(tmp_path + "*")
        assert len(files) > 0
        assert (os.path.isfile(os.path.join(tmp_path, file_name))
                for file_name in files)

        with test_pipeline.TestPipeline() as p2:
            p2 | io_transforms.KlioReadFromAvro(file_pattern=(
                tmp_path +
                "*")) | beam.Map(assert_expected_klio_msg_from_avro_write)

    actual_counters = p.result.metrics().query()["counters"]
    assert 2 == len(actual_counters)

    read_counter = actual_counters[0]
    write_counter = actual_counters[1]

    assert exp_element_count == read_counter.committed
    assert "KlioReadFromText" == read_counter.key.metric.namespace
    assert "kmsg-read" == read_counter.key.metric.name

    assert exp_element_count == write_counter.committed
    assert "KlioWriteToAvro" == write_counter.key.metric.namespace
    assert "kmsg-write" == write_counter.key.metric.name