コード例 #1
0
def parse_daily_areas_pdf(date, country, local_pdf_file):
    if country == "Northern Ireland":
        pdf = pdfplumber.open(local_pdf_file)
        for page in pdf.pages:
            try:
                table = page.extract_table()
                if table[0][0] == "Local Government District":
                    output_rows = [[
                        "Date", "Country", "AreaCode", "Area", "TotalCases"
                    ]]
                    for table_row in table[1:]:
                        if table_row[0].lower() == "total":
                            continue
                        area = normalize_whitespace(titlecase(table_row[0]))
                        area = area.replace("Ards and North Down",
                                            "North Down and Ards")
                        area_code = lookup_local_government_district_code(area)
                        cases = normalize_int(table_row[1])
                        output_row = [date, country, area_code, area, cases]
                        output_rows.append(output_row)
                    return output_rows
            except IndexError:
                pass  # no table on page
    elif country == "Wales":
        pdf = pdfplumber.open(local_pdf_file)
        for page in pdf.pages:
            try:
                table = page.extract_table(
                    table_settings={
                        # use text alignment since the table doesn't have lines
                        "vertical_strategy": "text",
                        "horizontal_strategy": "text"
                    })
                found_start = False
                output_rows = [[
                    "Date", "Country", "AreaCode", "Area", "TotalCases"
                ]]
                for table_row in table:
                    if table_row[0] is not None and table_row[0].startswith(
                            "Aneurin"):
                        found_start = True
                    if found_start:
                        area = (
                            normalize_whitespace(table_row[2]).replace(
                                "Anglesey", "Isle of Anglesey").replace(
                                    "ff", "ff")  # fix ligatures
                            .replace("fi", "fi"))
                        if area.startswith("Wales total"):
                            continue
                        area_code = lookup_local_authority_code(area)
                        cases = normalize_int(table_row[4])
                        output_row = [date, country, area_code, area, cases]
                        output_rows.append(output_row)
                    if table_row[2] is not None and normalize_whitespace(
                            table_row[2]) == 'Resident outside Wales':
                        break
                return convert_wales_la_to_hb(date, country, output_rows)
            except IndexError:
                pass  # no table on page
    return None
コード例 #2
0
ファイル: parsers.py プロジェクト: janason/covid-19-uk-data
def parse_daily_areas_json(date, country, json_data):
    if country == "England":
        output_rows = [["Date", "Country", "AreaCode", "Area", "TotalCases"]]
        for area_code, o in json_data["utlas"].items():
            area = o["name"]["value"]
            cases = normalize_int(o["totalCases"]["value"])
            if area_code != lookup_local_authority_code(area):
                print(
                    "Area code mismatch for {}, JSON file gave {}, but lookup was {}"
                    .format(area, area_code,
                            lookup_local_authority_code(area)))
                return None
            output_row = [date, country, area_code, area, cases]
            output_rows.append(output_row)
        return output_rows

    return None
コード例 #3
0
def test_lookup_local_authority_code():
    assert lookup_local_authority_code("Powys") == "W06000023"
    assert lookup_local_authority_code("Bogus") == ""