Esempio n. 1
0
def test_returns_csv_row_as_dictionary():
    conn = boto3.resource("s3", region_name=MOTO_MOCK_REGION)
    bucket = conn.create_bucket(Bucket="test_bucket")
    s3_object = bucket.Object("test_object.csv.gz")
    s3_object.put(Body=build_gzip_csv(
        header=["id", "message", "comment"],
        rows=[["123", "A message", "A comment"],
              ["321", "Another message", "Another comment"]],
    ))

    s3_manager = S3DataManager(conn)

    expected = [
        {
            "id": "123",
            "message": "A message",
            "comment": "A comment"
        },
        {
            "id": "321",
            "message": "Another message",
            "comment": "Another comment"
        },
    ]

    actual = s3_manager.read_gzip_csv("s3://test_bucket/test_object.csv.gz")

    assert list(actual) == expected
Esempio n. 2
0
def test_returns_csv_row_as_dictionary(fs):
    file_path = "input.csv.gz"
    fs.create_file(
        file_path,
        contents=build_gzip_csv(
            header=["id", "message", "comment"],
            rows=[["123", "A message", "A comment"],
                  ["321", "Another message", "Another comment"]],
        ),
    )

    expected = [
        {
            "id": "123",
            "message": "A message",
            "comment": "A comment"
        },
        {
            "id": "321",
            "message": "Another message",
            "comment": "Another comment"
        },
    ]

    actual = read_gzip_csv_file(file_path)

    assert list(actual) == expected
Esempio n. 3
0
def test_loads_one_file(fs):
    file_path = "input.csv.gz"
    fs.create_file(
        file_path,
        contents=build_gzip_csv(
            header=["id", "message"],
            rows=[["A", "A message"], ["B", "B message"], ["C", "C message"]],
        ),
    )

    input_files = [file_path]

    expected = [
        {
            "id": "A",
            "message": "A message"
        },
        {
            "id": "B",
            "message": "B message"
        },
        {
            "id": "C",
            "message": "C message"
        },
    ]

    actual = read_gzip_csv_files(input_files)

    assert list(actual) == expected
Esempio n. 4
0
def test_returns_csv_row_as_dictionary():
    conn = boto3.resource("s3", region_name=MOTO_MOCK_REGION)
    bucket = conn.create_bucket(Bucket="test_bucket")
    s3_object = bucket.Object("test_object.csv.gz")
    s3_object.put(Body=build_gzip_csv(
        header=["header1", "header2"],
        rows=[["row1-col1", "row1-col2"], ["row2-col1", "row2-col2"]],
    ))

    s3_manager = S3DataManager(conn)

    expected = [
        {
            "header1": "row1-col1",
            "header2": "row1-col2"
        },
        {
            "header1": "row2-col1",
            "header2": "row2-col2"
        },
    ]

    actual = s3_manager.read_gzip_csv("s3://test_bucket/test_object.csv.gz")

    assert list(actual) == expected
Esempio n. 5
0
def test_loads_two_files(fs):
    file_path_one = "input1.csv.gz"
    file_path_two = "input2.csv.gz"
    fs.create_file(
        file_path_one,
        contents=build_gzip_csv(
            header=["id", "message"],
            rows=[["A", "A message"], ["B", "B message"], ["C", "C message"]],
        ),
    )

    fs.create_file(
        file_path_two,
        contents=build_gzip_csv(
            header=["id", "message"],
            rows=[["D", "D message"], ["E", "E message"]],
        ),
    )

    input_files = [file_path_one, file_path_two]

    expected = [
        {
            "id": "A",
            "message": "A message"
        },
        {
            "id": "B",
            "message": "B message"
        },
        {
            "id": "C",
            "message": "C message"
        },
        {
            "id": "D",
            "message": "D message"
        },
        {
            "id": "E",
            "message": "E message"
        },
    ]

    actual = read_gzip_csv_files(input_files)

    assert list(actual) == expected
Esempio n. 6
0
def test_will_log_reading_file_event():
    conn = boto3.resource("s3", region_name=MOTO_MOCK_REGION)
    bucket_name = "test_bucket"
    bucket = conn.create_bucket(Bucket=bucket_name)
    s3_object = bucket.Object("test_object.csv.gz")
    s3_object.put(Body=build_gzip_csv(
        header=["header1", "header2"],
        rows=[["row1-col1", "row1-col2"], ["row2-col1", "row2-col2"]],
    ))

    s3_manager = S3DataManager(conn)
    object_uri = f"s3://{bucket_name}/test_object.csv.gz"

    with mock.patch.object(logger, "info") as mock_log_info:
        gzip_csv = s3_manager.read_gzip_csv(object_uri)
        list(gzip_csv)
        mock_log_info.assert_called_once_with(
            f"Reading file from: {object_uri}",
            extra={
                "event": "READING_FILE_FROM_S3",
                "object_uri": object_uri
            },
        )
def _build_input_asid_csv():
    return BytesIO(build_gzip_csv(header=INPUT_HEADERS, rows=INPUT_ROWS))
        "123433357014", "C12345", "Test GP 3", "Supplier", "Other System",
        "Practice", "HP87 1PQ"
    ],
    [
        "000044357014", "D12345", "Test GP 4", "Supplier", "System",
        "Practice", "HQ87 1PQ"
    ],
    [
        "000055357014", "E12345", "Test GP 5", "Supplier", "System",
        "Practice", "HZ87 1PQ"
    ],
]

INPUT_ASID_CSV = BytesIO(
    build_gzip_csv(header=[
        "ASID", "NACS", "OrgName", "MName", "PName", "OrgType", "PostCode"
    ],
                   rows=INPUT_ROWS))

MOCK_PRACTICE_RESPONSE_CONTENT = (
    b'{"Organisations": [{"Name": "Test GP", "OrgId": "A12345"}, '
    b'{"Name": "Test GP 2", "OrgId": "B12345"}, '
    b'{"Name": "Test GP 3", "OrgId": "C12345"}]}')
MOCK_CCG_RESPONSE_CONTENT = (
    b'{"Organisations": [{"Name": "Test CCG", "OrgId": "12A"}, '
    b'{"Name": "Test CCG 2", "OrgId": "13B"}, '
    b'{"Name": "Test CCG 3", "OrgId": "14C"}]}')

EXPECTED_PRACTICES = [
    {
        "ods_code": "A12345",
        "name": "Test GP",
def _spine_csv_gz(rows):
    return build_gzip_csv(
        header=_SPINE_CSV_COLUMNS,
        rows=[[row.get(field, a_string()) for field in _SPINE_CSV_COLUMNS] for row in rows],
    )