def parse_daily_areas_pdf(date, country, local_pdf_file): if country == "Northern Ireland": pdf = pdfplumber.open(local_pdf_file) for page in pdf.pages: try: table = page.extract_table() if table[0][0] == "Local Government District": output_rows = [[ "Date", "Country", "AreaCode", "Area", "TotalCases" ]] for table_row in table[1:]: if table_row[0].lower() == "total": continue area = normalize_whitespace(titlecase(table_row[0])) area = area.replace("Ards and North Down", "North Down and Ards") area_code = lookup_local_government_district_code(area) cases = normalize_int(table_row[1]) output_row = [date, country, area_code, area, cases] output_rows.append(output_row) return output_rows except IndexError: pass # no table on page elif country == "Wales": pdf = pdfplumber.open(local_pdf_file) for page in pdf.pages: try: table = page.extract_table( table_settings={ # use text alignment since the table doesn't have lines "vertical_strategy": "text", "horizontal_strategy": "text" }) found_start = False output_rows = [[ "Date", "Country", "AreaCode", "Area", "TotalCases" ]] for table_row in table: if table_row[0] is not None and table_row[0].startswith( "Aneurin"): found_start = True if found_start: area = ( normalize_whitespace(table_row[2]).replace( "Anglesey", "Isle of Anglesey").replace( "ff", "ff") # fix ligatures .replace("fi", "fi")) if area.startswith("Wales total"): continue area_code = lookup_local_authority_code(area) cases = normalize_int(table_row[4]) output_row = [date, country, area_code, area, cases] output_rows.append(output_row) if table_row[2] is not None and normalize_whitespace( table_row[2]) == 'Resident outside Wales': break return convert_wales_la_to_hb(date, country, output_rows) except IndexError: pass # no table on page return None
def parse_daily_areas_json(date, country, json_data): if country == "England": output_rows = [["Date", "Country", "AreaCode", "Area", "TotalCases"]] for area_code, o in json_data["utlas"].items(): area = o["name"]["value"] cases = normalize_int(o["totalCases"]["value"]) if area_code != lookup_local_authority_code(area): print( "Area code mismatch for {}, JSON file gave {}, but lookup was {}" .format(area, area_code, lookup_local_authority_code(area))) return None output_row = [date, country, area_code, area, cases] output_rows.append(output_row) return output_rows return None
def test_lookup_local_authority_code(): assert lookup_local_authority_code("Powys") == "W06000023" assert lookup_local_authority_code("Bogus") == ""