Esempio n. 1
0
def __parse_df_from_2021_01_18(source: Source,
                               df: pd.DataFrame) -> Iterable[Vaccinated]:
    df = df.drop("Unnamed: 0", axis=1)
    df_iterrows = df.iterrows()

    for row in df_iterrows:
        _, (title, *data) = row
        if type(title) == str and re.match("^(nhs )?region of residence$",
                                           title.lower()):
            break

    for row in df_iterrows:
        _, (location, *data) = row
        if type(location) == float and math.isnan(location):
            continue
        if location == "Data quality notes:":
            break
        dose_1, dose_2, cumulative = filter(lambda d: not math.isnan(d), data)
        if re.match(r"^(Total\d?|England)$", location):
            location = ALL_LOCATIONS
        else:
            location = Location(location)
        yield Vaccinated(source, dose_1,
                         Slice(location=location, dose=Dose.DOSE_1))
        yield Vaccinated(source, dose_2,
                         Slice(location=location, dose=Dose.DOSE_2))
        yield Vaccinated(source, cumulative,
                         Slice(location=location, dose=Dose.ALL))
Esempio n. 2
0
def vaccinated_to_df(vaccinated: List[Vaccinated]) -> pd.DataFrame:
    df = pd.DataFrame(vaccinated)
    # Move field to top level.
    df["data_date"] = df["source"].apply(lambda s: s["data_date"])
    df["real_date"] = df["source"].apply(lambda s: s["real_date"])
    df["dose"] = df["slice"].apply(lambda s: s["dose"].csv_str())
    df["group"] = df["slice"].apply(lambda s: Group(**s["group"]).csv_str())
    df["location"] = df["slice"].apply(
        lambda s: Location(**s["location"]).csv_str())
    df = df.drop("source", axis=1)
    df = df.drop("slice", axis=1)
    df["vaccinated"] = df["vaccinated"].astype(int)
    return df
Esempio n. 3
0
def __parse_df_weekly(source: Source,
                      df: pd.DataFrame) -> Iterable[Vaccinated]:
    def is_start(cell) -> bool:
        return type(cell) == str and (cell.lower() == "region of residence"
                                      or cell.lower()
                                      == "nhs region of residence")

    def is_end(cell) -> bool:
        return type(cell) == str and cell.lower() == "data quality notes:"

    def is_nan(cell) -> bool:
        return type(cell) == float and math.isnan(cell)

    a = df.to_numpy()

    # Trim.
    (start_y, ), (start_x, ) = np.where(np.vectorize(is_start)(a))
    (end_y, ), (_, ) = np.where(np.vectorize(is_end)(a))
    a = a[start_y:end_y, start_x:]

    # Remove NaNs.
    is_nans = np.vectorize(is_nan)(a)
    a = a[:, ~np.all(is_nans, axis=0)]
    a = a[~np.all(is_nans, axis=1), :]

    # Fill in dose row.
    filled_in_doses = []
    current_dose = None
    for population in a[0, 1:]:
        if not is_nan(population):
            current_dose = population
        filled_in_doses.append(current_dose)
    a[0, 1:] = filled_in_doses

    vaccinated_by_slice: DefaultDict[Slice, int] = defaultdict(int)

    for y in range(2, a.shape[0]):
        for x in range(1, a.shape[1]):
            dose = a[0, x]
            group = a[1, x]
            location = a[y, 0]
            vaccinated = a[y, x]

            ignore = [
                # Ignore population estimates.
                "population estimates",
                # Ignore precalculated %
                "% who have had at least 1 dose",
                "% who have had both doses",
                # Ignore dose summaries.
                "total 1st doses",
                "total 2nd doses",
            ]

            if any(map(lambda d: d in dose.lower(), ignore)):
                continue
            if type(group) == str and "percent of all" in group.lower():
                # Ignore percentage reports.
                continue

            is_dose_and_group_all = "cumulative total doses to date" in dose.lower(
            )

            if dose in ["1st dose", "1st dose5"]:
                dose = Dose.DOSE_1
            elif dose in ["2nd dose", "2nd dose5"]:
                dose = Dose.DOSE_2
            elif is_dose_and_group_all:
                dose = Dose.ALL
            else:
                raise AssertionError(
                    f"Unexpected dose {dose} in source {source}")

            if is_dose_and_group_all:
                group = ALL_AGES
            else:
                group = Group.from_csv_str(group)

            if re.match(r"^Total\d?$", location):
                location = ALL_LOCATIONS
            else:
                location = Location(location)

            vaccinated_by_slice[Slice(dose, group, location)] += vaccinated

    for slice_, vaccinated in vaccinated_by_slice.items():
        yield Vaccinated(source, vaccinated, slice_)