def test_final_error_code_is_converted_to_column(): transfer = build_transfer(final_error_code=5) expected_error_code_column = {"final_error_code": [5]} table = convert_transfers_to_table([transfer]) actual_error_code_column = table.select(["final_error_code"]).to_pydict() assert actual_error_code_column == expected_error_code_column
def test_conversation_id_is_converted_to_column(): transfer = build_transfer(conversation_id="123") expected_conversation_column = {"conversation_id": ["123"]} table = convert_transfers_to_table([transfer]) actual_conversation_column = table.select(["conversation_id"]).to_pydict() assert actual_conversation_column == expected_conversation_column
def test_sender_error_code_is_converted_to_column_when_missing(): transfer = build_transfer(sender_error_code=None) expected_error_code_column = {"sender_error_code": [None]} table = convert_transfers_to_table([transfer]) actual_error_code_column = table.select(["sender_error_code"]).to_pydict() assert actual_error_code_column == expected_error_code_column
def test_sla_duration_is_converted_to_column_when_missing(): transfer = build_transfer(sla_duration=None) expected_sla_duration_column = {"sla_duration": [None]} table = convert_transfers_to_table([transfer]) actual_sla_duration_column = table.select(["sla_duration"]).to_pydict() assert actual_sla_duration_column == expected_sla_duration_column
def test_sending_practice_asid_is_converted_to_column(): transfer = build_transfer(sending_practice_asid="001112345678") expected_asid_column = {"sending_practice_asid": ["001112345678"]} table = convert_transfers_to_table([transfer]) actual_asid_column = table.select(["sending_practice_asid"]).to_pydict() assert actual_asid_column == expected_asid_column
def test_date_completed_is_converted_to_column_when_missing(): transfer = build_transfer(date_completed=None) expected_date_column = {"date_completed": [None]} table = convert_transfers_to_table([transfer]) actual_date_column = table.select(["date_completed"]).to_pydict() assert actual_date_column == expected_date_column
def test_status_is_converted_to_column(): transfer = build_transfer(status=TransferStatus.INTEGRATED) expected_status_column = {"status": ["INTEGRATED"]} table = convert_transfers_to_table([transfer]) actual_status_column = table.select(["status"]).to_pydict() assert actual_status_column == expected_status_column
def test_sla_duration_is_rounded_to_integer(): transfer = build_transfer(sla_duration=timedelta( days=2, hours=1, minutes=3, seconds=6, milliseconds=1)) expected_sla_duration_column = {"sla_duration": [176586]} table = convert_transfers_to_table([transfer]) actual_sla_duration_column = table.select(["sla_duration"]).to_pydict() assert actual_sla_duration_column == expected_sla_duration_column
def test_intermediate_error_codes_is_converted_to_column_when_empty(): transfer = build_transfer(intermediate_error_codes=[]) expected_error_code_column = {"intermediate_error_codes": [[]]} table = convert_transfers_to_table([transfer]) actual_error_code_column = table.select(["intermediate_error_codes" ]).to_pydict() assert actual_error_code_column == expected_error_code_column
def test_date_completed_is_converted_to_column(): transfer = build_transfer( date_completed=datetime(year=2020, month=7, day=28, hour=17)) expected_date_column = { "date_completed": [datetime(year=2020, month=7, day=28, hour=17)] } table = convert_transfers_to_table([transfer]) actual_date_column = table.select(["date_completed"]).to_pydict() assert actual_date_column == expected_date_column
def test_converts_multiple_rows_into_table(): transfers = [ build_transfer(conversation_id="123", final_error_code=1), build_transfer(conversation_id="456", final_error_code=2), build_transfer(conversation_id="789", final_error_code=3), ] expected_columns = { "conversation_id": ["123", "456", "789"], "final_error_code": [1, 2, 3] } table = convert_transfers_to_table(transfers) actual_columns = table.select(["conversation_id", "final_error_code"]).to_pydict() assert actual_columns == expected_columns
def test_table_has_correct_schema(): transfers = [build_transfer()] expected_schema = pa.schema([ ("conversation_id", pa.string()), ("sla_duration", pa.uint64()), ("requesting_practice_asid", pa.string()), ("sending_practice_asid", pa.string()), ("requesting_supplier", pa.string()), ("sending_supplier", pa.string()), ("sender_error_code", pa.int64()), ("final_error_code", pa.int64()), ("intermediate_error_codes", pa.list_(pa.int64())), ("status", pa.string()), ("date_requested", pa.timestamp("us")), ("date_completed", pa.timestamp("us")), ]) table = convert_transfers_to_table(transfers) actual_schema = table.schema assert actual_schema == expected_schema
def main(): args = parse_platform_metrics_calculator_pipeline_arguments(sys.argv[1:]) time_range = _get_time_range(args.year, args.month) organisation_data = read_json_file(args.organisation_list_file) organisation_metadata = construct_organisation_list_from_dict(data=organisation_data) spine_messages = _read_spine_csv_gz_files(args.input_files) transfers = list(parse_transfers_from_messages(spine_messages, time_range)) practice_metrics_data = calculate_practice_metrics_data( transfers, organisation_metadata.practices, time_range ) national_metrics_data = calculate_national_metrics_data( transfers=transfers, time_range=time_range ) organisation_metadata = construct_organisation_metadata(organisation_metadata) transfer_table = convert_transfers_to_table(transfers) practice_metrics_file_name = "practiceMetrics.json" organisation_metadata_file_name = "organisationMetadata.json" national_metrics_file_name = "nationalMetrics.json" transfers_file_name = "transfers.parquet" if _is_outputting_to_file(args): _write_data_platform_json_file( practice_metrics_data, f"{args.output_directory}/{args.month}-{args.year}-{practice_metrics_file_name}", ) _write_data_platform_json_file( organisation_metadata, f"{args.output_directory}/{args.month}-{args.year}-{organisation_metadata_file_name}", ) _write_data_platform_json_file( national_metrics_data, f"{args.output_directory}/{args.month}-{args.year}-{national_metrics_file_name}", ) write_table( transfer_table, f"{args.output_directory}/{args.month}-{args.year}-{transfers_file_name}", ) elif _is_outputting_to_s3(args): s3 = boto3.resource("s3", endpoint_url=args.s3_endpoint_url) bucket_name = args.output_bucket version = "v2" s3_path = f"{version}/{args.year}/{args.month}" _upload_data_platform_json_object( practice_metrics_data, s3.Object(bucket_name, f"{s3_path}/{practice_metrics_file_name}"), ) _upload_data_platform_json_object( organisation_metadata, s3.Object(bucket_name, f"{s3_path}/{organisation_metadata_file_name}"), ) _upload_data_platform_json_object( national_metrics_data, s3.Object(bucket_name, f"{s3_path}/{national_metrics_file_name}"), ) write_table( table=transfer_table, where=bucket_name + "/" + f"{s3_path}/{transfers_file_name}", filesystem=S3FileSystem(endpoint_override=args.s3_endpoint_url), )