def test_returns_csv_row_as_dictionary(): conn = boto3.resource("s3", region_name=MOTO_MOCK_REGION) bucket = conn.create_bucket(Bucket="test_bucket") s3_object = bucket.Object("test_object.csv.gz") s3_object.put(Body=build_gzip_csv( header=["id", "message", "comment"], rows=[["123", "A message", "A comment"], ["321", "Another message", "Another comment"]], )) s3_manager = S3DataManager(conn) expected = [ { "id": "123", "message": "A message", "comment": "A comment" }, { "id": "321", "message": "Another message", "comment": "Another comment" }, ] actual = s3_manager.read_gzip_csv("s3://test_bucket/test_object.csv.gz") assert list(actual) == expected
def test_returns_csv_row_as_dictionary(fs): file_path = "input.csv.gz" fs.create_file( file_path, contents=build_gzip_csv( header=["id", "message", "comment"], rows=[["123", "A message", "A comment"], ["321", "Another message", "Another comment"]], ), ) expected = [ { "id": "123", "message": "A message", "comment": "A comment" }, { "id": "321", "message": "Another message", "comment": "Another comment" }, ] actual = read_gzip_csv_file(file_path) assert list(actual) == expected
def test_loads_one_file(fs): file_path = "input.csv.gz" fs.create_file( file_path, contents=build_gzip_csv( header=["id", "message"], rows=[["A", "A message"], ["B", "B message"], ["C", "C message"]], ), ) input_files = [file_path] expected = [ { "id": "A", "message": "A message" }, { "id": "B", "message": "B message" }, { "id": "C", "message": "C message" }, ] actual = read_gzip_csv_files(input_files) assert list(actual) == expected
def test_returns_csv_row_as_dictionary(): conn = boto3.resource("s3", region_name=MOTO_MOCK_REGION) bucket = conn.create_bucket(Bucket="test_bucket") s3_object = bucket.Object("test_object.csv.gz") s3_object.put(Body=build_gzip_csv( header=["header1", "header2"], rows=[["row1-col1", "row1-col2"], ["row2-col1", "row2-col2"]], )) s3_manager = S3DataManager(conn) expected = [ { "header1": "row1-col1", "header2": "row1-col2" }, { "header1": "row2-col1", "header2": "row2-col2" }, ] actual = s3_manager.read_gzip_csv("s3://test_bucket/test_object.csv.gz") assert list(actual) == expected
def test_loads_two_files(fs): file_path_one = "input1.csv.gz" file_path_two = "input2.csv.gz" fs.create_file( file_path_one, contents=build_gzip_csv( header=["id", "message"], rows=[["A", "A message"], ["B", "B message"], ["C", "C message"]], ), ) fs.create_file( file_path_two, contents=build_gzip_csv( header=["id", "message"], rows=[["D", "D message"], ["E", "E message"]], ), ) input_files = [file_path_one, file_path_two] expected = [ { "id": "A", "message": "A message" }, { "id": "B", "message": "B message" }, { "id": "C", "message": "C message" }, { "id": "D", "message": "D message" }, { "id": "E", "message": "E message" }, ] actual = read_gzip_csv_files(input_files) assert list(actual) == expected
def test_will_log_reading_file_event(): conn = boto3.resource("s3", region_name=MOTO_MOCK_REGION) bucket_name = "test_bucket" bucket = conn.create_bucket(Bucket=bucket_name) s3_object = bucket.Object("test_object.csv.gz") s3_object.put(Body=build_gzip_csv( header=["header1", "header2"], rows=[["row1-col1", "row1-col2"], ["row2-col1", "row2-col2"]], )) s3_manager = S3DataManager(conn) object_uri = f"s3://{bucket_name}/test_object.csv.gz" with mock.patch.object(logger, "info") as mock_log_info: gzip_csv = s3_manager.read_gzip_csv(object_uri) list(gzip_csv) mock_log_info.assert_called_once_with( f"Reading file from: {object_uri}", extra={ "event": "READING_FILE_FROM_S3", "object_uri": object_uri }, )
def _build_input_asid_csv(): return BytesIO(build_gzip_csv(header=INPUT_HEADERS, rows=INPUT_ROWS))
"123433357014", "C12345", "Test GP 3", "Supplier", "Other System", "Practice", "HP87 1PQ" ], [ "000044357014", "D12345", "Test GP 4", "Supplier", "System", "Practice", "HQ87 1PQ" ], [ "000055357014", "E12345", "Test GP 5", "Supplier", "System", "Practice", "HZ87 1PQ" ], ] INPUT_ASID_CSV = BytesIO( build_gzip_csv(header=[ "ASID", "NACS", "OrgName", "MName", "PName", "OrgType", "PostCode" ], rows=INPUT_ROWS)) MOCK_PRACTICE_RESPONSE_CONTENT = ( b'{"Organisations": [{"Name": "Test GP", "OrgId": "A12345"}, ' b'{"Name": "Test GP 2", "OrgId": "B12345"}, ' b'{"Name": "Test GP 3", "OrgId": "C12345"}]}') MOCK_CCG_RESPONSE_CONTENT = ( b'{"Organisations": [{"Name": "Test CCG", "OrgId": "12A"}, ' b'{"Name": "Test CCG 2", "OrgId": "13B"}, ' b'{"Name": "Test CCG 3", "OrgId": "14C"}]}') EXPECTED_PRACTICES = [ { "ods_code": "A12345", "name": "Test GP",
def _spine_csv_gz(rows): return build_gzip_csv( header=_SPINE_CSV_COLUMNS, rows=[[row.get(field, a_string()) for field in _SPINE_CSV_COLUMNS] for row in rows], )