예제 #1
0
    def parse_dataframes(
        self, dataframes: Dict[Any, DataFrame], aux: Dict[str, DataFrame], **parse_opts
    ) -> DataFrame:
        tables = [df for name, df in dataframes.items() if name != "geo"]
        column_adapter = dict(_column_adapter, state="idxs", date=f"date_new_confirmed")
        data = table_rename(concat(tables), column_adapter=column_adapter, drop=True)

        # Correct data types where necessary
        data["idxs"] = data["idxs"].astype(str)
        data["age"] = data["age"].apply(lambda x: None if x < 0 else x)
        data["sex"] = data["sex"].apply({0: "female", 1: "male"}.get)

        # Convert to our preferred time series format
        data = convert_cases_to_time_series(data, ["idxs"])

        # Geo name lookup
        geo_col_adapter = {"state": "subregion1_name", "district": "subregion2_name"}
        geo = table_rename(dataframes["geo"], geo_col_adapter, drop=False)
        geo["idxs"] = geo["idxs"].astype(str)
        geo["subregion1_name"] = geo["subregion1_name"].str.replace("W.P. ", "")
        geo = geo.groupby(["subregion1_name", "idxs"]).first().reset_index()
        data = table_merge([data, geo], on=["idxs"], how="inner")

        # Since only the cases have district level data, ignore it
        data["country_code"] = "MY"
        data["subregion2_name"] = None
        return data
    def parse_dataframes(
        self, dataframes: Dict[str, DataFrame], aux: Dict[str, DataFrame], **parse_opts
    ) -> DataFrame:

        data = table_multimerge(
            [
                table_rename(dataframes["confirmed"], _column_adapter, drop=True),
                table_rename(dataframes["deceased"], _column_adapter, drop=True),
            ],
            how="outer",
        )

        # Province names are sometimes codes (but not always compliant with ISO codes)
        data["subregion1_code"] = data["subregion1_name"].apply(_province_map.get)
        data.drop(columns=["subregion1_name"], inplace=True)

        # Convert date to ISO format
        data["date"] = data["date"].apply(lambda x: datetime_isoformat(x, "%d-%m-%Y"))

        # Aggregate subregion1 level
        l1_index = ["date", "subregion1_code"]
        l1 = data.drop(columns=["match_string"]).groupby(l1_index).sum().reset_index()

        # Make sure all records have the country code and subregion2_name
        l1["country_code"] = "CA"
        l1["subregion2_name"] = None
        data["country_code"] = "CA"
        data["subregion2_name"] = ""

        # Remove bogus data
        data = data[data["match_string"] != "Not Reported"]

        # Output the results
        return concat([l1, data])
예제 #3
0
    def parse_dataframes(self, dataframes: Dict[str, DataFrame],
                         aux: Dict[str, DataFrame], **parse_opts) -> DataFrame:

        hospitalizations = dataframes[0]
        icu = table_rename(
            hospitalizations.loc[hospitalizations["DPHCategory"] == "ICU"],
            {
                "reportDate": "date",
                "PatientCount": "current_intensive_care"
            },
            drop=True,
        )
        hosp = table_rename(
            hospitalizations.loc[hospitalizations["DPHCategory"] ==
                                 "Med/Surg"],
            {
                "reportDate": "date",
                "PatientCount": "current_hospitalized"
            },
            drop=True,
        )

        data = icu.merge(hosp, on="date")
        data["date"] = data["date"].apply(
            lambda x: datetime_isoformat(x, "%Y/%m/%d"))
        data["key"] = "US_CA_SFO"
        return data
예제 #4
0
    def parse_dataframes(self, dataframes: Dict[str, DataFrame],
                         aux: Dict[str, DataFrame], **parse_opts) -> DataFrame:

        data = table_merge(
            [
                table_rename(
                    dataframes["confirmed"],
                    {
                        "Fecha": "date",
                        "Total": "new_confirmed",
                        "Region": "match_string"
                    },
                    drop=True,
                ),
                table_rename(
                    dataframes["deceased"],
                    # The file name indicates the counts are cumulative, but they are not
                    {
                        "Fecha": "date",
                        "Total": "total_deceased",
                        "Region": "match_string"
                    },
                    drop=True,
                ),
                table_rename(
                    dataframes["tested"],
                    {
                        "Fecha": "date",
                        "numero": "new_tested",
                        "Region": "match_string"
                    },
                    drop=True,
                ),
            ],
            how="outer",
        )

        # Convert date to ISO format
        data["date"] = data["date"].astype(str)

        # Extract cities from the regions
        city = _extract_cities(data)

        # Make sure all records have country code and no subregion code or key
        data["country_code"] = "CL"
        data["key"] = None
        data["subregion2_code"] = None

        # Country is reported as "Total"
        data.loc[data["match_string"] == "Total", "key"] = "CL"

        # Drop bogus records from the data
        data.dropna(subset=["date", "match_string"], inplace=True)

        return concat([data, city])
예제 #5
0
    def parse_dataframes(
        self, dataframes: Dict[str, DataFrame], aux: Dict[str, DataFrame], **parse_opts
    ) -> DataFrame:

        data = table_multimerge(
            [
                table_rename(
                    dataframes["confirmed"],
                    {
                        "Fecha": "date",
                        "Casos confirmados": "new_confirmed",
                        "Codigo region": "subregion1_code",
                        "Codigo comuna": "subregion2_code",
                    },
                    drop=True,
                ),
                table_rename(
                    dataframes["deceased"],
                    {
                        "Fecha": "date",
                        "Casos fallecidos": "total_deceased",
                        "Codigo region": "subregion1_code",
                        "Codigo comuna": "subregion2_code",
                    },
                    drop=True,
                ),
            ],
            how="outer",
        )

        # Convert date to ISO format
        data["date"] = data["date"].astype(str)

        # Parse region codes as strings
        data["subregion1_code"] = data["subregion1_code"].apply(
            lambda x: numeric_code_as_string(x, 2)
        )
        data["subregion2_code"] = data["subregion2_code"].apply(
            lambda x: numeric_code_as_string(x, 5)
        )

        # Use proper ISO codes for the subregion1 level
        data["subregion1_code"] = data["subregion1_code"].apply(_SUBREGION1_CODE_MAP.get)

        # Extract cities from the municipalities
        city = _extract_cities(data)

        # We can build the key for the data directly from the subregion codes
        data["key"] = "CL_" + data["subregion1_code"] + "_" + data["subregion2_code"]

        # Drop bogus records from the data
        data.dropna(subset=["subregion1_code", "subregion2_code"], inplace=True)

        return concat([data, city])
예제 #6
0
    def parse_dataframes(
        self, dataframes: Dict[Any, DataFrame], aux: Dict[str, DataFrame], **parse_opts
    ) -> DataFrame:
        data = table_merge(
            [
                table_rename(
                    dataframes['vaccDosesAdministered'],
                    {
                        "date": "date",
                        "geoRegion": "subregion1_code",
                        "sumTotal": "total_vaccine_doses_administered",
                    },
                    drop=True,
                ),
                table_rename(
                    dataframes['fullyVaccPersons'],
                    {
                        "date": "date",
                        "geoRegion": "subregion1_code",
                        "sumTotal": "total_persons_fully_vaccinated",
                    },
                    drop=True,
                ),
            ],
            on=["date", "subregion1_code"],
            how="outer",
        )

        # Assuming fully and partially vaccinated persons have 2 and 1 doses respectively,
        # total_persons_partially_vaccinated = total_vaccine_doses_administered - 2 * total_persons_fully_vaccinated
        # Therefore, total_persons_vaccinated = total_persons_partially_vaccinated + total_persons_fully_vaccinated
        # = total_vaccine_doses_administered - total_persons_fully_vaccinated
        data["total_persons_vaccinated"] = data["total_vaccine_doses_administered"] - data["total_persons_fully_vaccinated"]

        # Make sure all records have the country code and match subregion1 only
        data["key"] = None
        data["country_code"] = "CH"
        data["subregion2_code"] = None
        data["locality_code"] = None

        # Country-level records have a known key
        country_mask = data["subregion1_code"] == "CH"
        data.loc[country_mask, "key"] = "CH"

        # Principality of Liechtenstein is not in CH but is in the data as FL
        country_mask = data["subregion1_code"] == "FL"
        data.loc[country_mask, "key"] = "LI"

        # Output the results
        return data
예제 #7
0
    def parse_dataframes(self, dataframes: Dict[Any, DataFrame],
                         aux: Dict[str, DataFrame], **parse_opts) -> DataFrame:
        data = table_rename(dataframes[0], _column_adapter, drop=True)

        # Convert data to int type
        for col in data.columns[2:]:
            data[col] = data[col].apply(safe_int_cast)

        # Match data with GB subregions
        data["key"] = None
        data["country_code"] = "GB"
        data["subregion2_code"] = None
        data["locality_code"] = None

        # East Of England
        data.loc[data["_location"] == "Total", "key"] = "GB_ENG"
        data.loc[data["_location"] == "East Of England", "key"] = "GB_UKH"
        data.loc[data["_location"] == "London", "key"] = "GB_UKI"
        # data.loc[data["_location"] == "Midlands", "key"] = ""
        # data.loc[data["_location"] == "North East And Yorkshire", "key"] = ""
        data.loc[data["_location"] == "North West", "key"] = "GB_UKD"
        data.loc[data["_location"] == "South East", "key"] = "GB_UKJ"
        data.loc[data["_location"] == "South West", "key"] = "GB_UKK"

        return data
예제 #8
0
    def parse_dataframes(self, dataframes: Dict[Any, DataFrame],
                         aux: Dict[str, DataFrame], **parse_opts) -> DataFrame:
        data = table_rename(
            dataframes["fullyVaccPersons"],
            {
                "date": "date",
                "geoRegion": "subregion1_code",
                "type": "_statistic",
                "entries": "_new_count",
                "sumTotal": "_total_count",
            },
            drop=True,
        )

        # Combine all the different variable indicators
        tables = []
        for col, var in {
                "COVID19AtLeastOneDosePersons": "persons_vaccinated",
                "COVID19FullyVaccPersons": "persons_fully_vaccinated",
        }.items():
            adapter = {
                "_new_count": f"new_{var}",
                "_total_count": f"total_{var}"
            }
            subset = data.loc[data["_statistic"] == col].drop(
                columns=["_statistic"])
            tables.append(subset.rename(columns=adapter))
        data = table_merge(tables, on=["date", "subregion1_code"], how="outer")

        # Output the results
        return _output_ch_data(data)
예제 #9
0
    def parse_dataframes(self, dataframes: Dict[str, DataFrame],
                         aux: Dict[str, DataFrame], **parse_opts) -> DataFrame:
        columns = dataframes[0].iloc[5]
        data = dataframes[0].iloc[6:]
        data.columns = columns
        data = table_rename(
            data,
            {
                "Datum": "date",
                "Fallzahlen pro Tag": "new_confirmed",
                "Fallzahlen pro Tag, kumuliert": "total_confirmed",
                "Hospitalisationen pro Tag": "new_hospitalized",
                "Hospitalisationen pro Tag, Kumuliert": "total_hospitalized",
                "Todesfälle pro Tag": "new_deceased",
                "Todesfälle pro Tag, kumuliert": "total_deceased",
            },
            drop=True,
        )

        # Parse date into ISO format
        data["date"] = data["date"].apply(lambda x: str(x)[:10])

        # The key is just the country code
        data["key"] = "CH"

        return data
예제 #10
0
    def parse(self, sources: Dict[str, str], aux: Dict[str, DataFrame],
              **parse_opts) -> DataFrame:
        cases = {
            "date": "date",
            "areaCode": "areaCode",
            "newCasesBySpecimenDate": "newCasesBySpecimenDate",
            "cumCasesBySpecimenDate": "cumCasesBySpecimenDate",
        }

        api = Cov19API(filters=["areaType=utla"], structure=cases)
        data = api.get_dataframe()

        data.areaCode = data.areaCode.apply(_apply_area_code_map)
        data = data.groupby(["date", "areaCode"], as_index=False).sum()

        data = table_rename(
            data,
            {
                "areaCode": "subregion2_code",
                "newCasesBySpecimenDate": "new_confirmed",
                "cumCasesBySpecimenDate": "total_confirmed",
                "date": "date",
            },
            drop=True,
        )

        data.date = data.date.apply(
            lambda x: datetime_isoformat(x, "%Y-%m-%d"))

        return data
예제 #11
0
    def parse_dataframes(self, dataframes: List[DataFrame],
                         aux: Dict[str, DataFrame], **parse_opts) -> DataFrame:
        # Date_reported	 Country_code	 Country	 WHO_region	 New_cases	 Cumulative_cases	 New_deaths	 Cumulative_deaths
        data = table_rename(
            dataframes[0],
            {
                "Date_reported": "date",
                "Country_code": "key",
                "New_cases": "new_confirmed",
                "Cumulative_cases": "total_confirmed",
                "New_deaths": "new_deceased",
                "Cumulative_deaths": "total_deceases",
            },
            drop=True,
        )

        # Convert date to ISO format
        data["date"] = data["date"].astype(str).apply(lambda x: x[:10])

        # Adjust the date of the records to match local reporting
        data = _adjust_date(data, aux["metadata"])

        # Remove bogus entries
        data = data[data["key"].str.strip() != ""]

        # We consider some countries as subregions of other countries
        data.loc[data["key"] == "BL", "key"] = "FR_BL"
        data.loc[data["key"] == "GP", "key"] = "FR_GUA"
        data.loc[data["key"] == "MF", "key"] = "FR_MF"
        data.loc[data["key"] == "PM", "key"] = "FR_PM"

        return data
예제 #12
0
    def parse(self, sources: Dict[str, str], aux: Dict[str, DataFrame],
              **parse_opts) -> DataFrame:

        with open(sources[0], "r") as fd:
            data = json.load(fd)["features"]
        data = table_rename(
            DataFrame.from_records([row["attributes"] for row in data]),
            _column_adapter,
            remove_regex=r"[^a-z\s\d]",
            drop=True,
        )

        # Add the age bins
        data["age_bin_00"] = "0-9"
        data["age_bin_01"] = "10-19"
        data["age_bin_02"] = "20-29"
        data["age_bin_03"] = "30-39"
        data["age_bin_04"] = "40-49"
        data["age_bin_05"] = "50-59"
        data["age_bin_06"] = "60-69"
        data["age_bin_07"] = "70-79"
        data["age_bin_08"] = "80-"

        # Convert date to ISO format
        data = data.dropna(subset=["date"])
        data.date = data.date.apply(lambda x: datetime.datetime.fromtimestamp(
            x // 1000).date().isoformat())

        data["key"] = "FI"
        return data
예제 #13
0
    def parse_dataframes(self, dataframes: Dict[str, DataFrame],
                         aux: Dict[str, DataFrame], **parse_opts) -> DataFrame:

        # Rename appropriate columns
        col = parse_opts["column_name"]
        cases = table_rename(dataframes[0], _column_adapter)
        cases = cases.rename(columns={"date": f"date_{col}"})
        cases = _parse_region_codes(cases).dropna(subset=[f"date_{col}"])

        # Rename the sex values
        cases["sex"] = cases["sex"].apply({"M": "male", "Z": "female"}.get)

        # Go from individual case records to key-grouped records in a flat table
        data = convert_cases_to_time_series(
            cases, index_columns=["subregion1_code", "subregion2_code"])

        # Make sure the region codes are strings before parsing them
        data["subregion1_code"] = data["subregion1_code"].astype(str)
        data["subregion2_code"] = data["subregion2_code"].astype(str)

        # Aggregate L2 + L3 data
        data = _aggregate_regions(data,
                                  ["date", "subregion1_code", "age", "sex"])

        # Remove bogus values
        data = data[data["key"] != "CZ_99"]
        data = data[data["key"] != "CZ_99_99Y"]

        # Convert all dates to ISO format
        data["date"] = (
            data["date"].astype(str).apply(lambda x: datetime_isoformat(
                x, "%d.%m.%Y" if "." in x else "%Y-%m-%d")))

        return data
예제 #14
0
    def parse_dataframes(
        self, dataframes: Dict[str, DataFrame], aux: Dict[str, DataFrame], **parse_opts
    ) -> DataFrame:
        data = table_rename(
            dataframes["vaccination_trends_data"],
            {
                "Location": "key",
                "Date": "date",
                "Date_Type": "_date_type",
                "Administered_Daily": "new_vaccine_doses_administered",
                "Administered_Cumulative": "total_vaccine_doses_administered",
                "Admin_Dose_1_Daily": "new_persons_vaccinated",
                "Admin_Dose_1_Cumulative": "total_persons_vaccinated",
                "Admin_Dose_2_Daily": "new_persons_fully_vaccinated",
                "Admin_Dose_2_Cumulative": "total_persons_fully_vaccinated",
            },
            drop=True,
            remove_regex=r"[^0-9a-z\s]",
        )

        data = data[data["key"] == "US"]
        data = data[data["_date_type"] == "Admin"]
        data = data.sort_values("date")
        data = data.drop(columns=["_date_type"])
        return data
예제 #15
0
    def parse_dataframes(self, dataframes: Dict[str, DataFrame],
                         aux: Dict[str, DataFrame], **parse_opts) -> DataFrame:
        data = dataframes[0]
        # Get all the states
        states = list(data.columns.difference(["Status", "Date"]))

        data = table_rename(
            data,
            {
                "Confirmed": "total_confirmed",
                "Deceased": "total_deceased",
                "Recovered": "total_recovered",
                "Tested": "total_tested",
                "Date": "date",
                "District": "match_string",
                "State": "subregion1_name",
            },
            drop=True,
        )
        data.match_string = data.match_string.apply(self._replace_subregion)

        data = data[~data.match_string.isin(L3_INDIA_REMOVE_SET)]

        data["country_code"] = "IN"

        return data
예제 #16
0
def _get_country(url_tpl: str):
    data = read_file(url_tpl.format("FRA"))
    data["key"] = "FR"
    # For country level, there is no need to estimate confirmed from tests
    _column_adapter_2 = dict(_column_adapter)
    _column_adapter_2.pop("testsPositifs")
    return table_rename(data, _column_adapter_2, drop=True)
예제 #17
0
    def parse_dataframes(
        self, dataframes: Dict[Any, DataFrame], aux: Dict[str, DataFrame], **parse_opts
    ) -> DataFrame:
        data = table_rename(dataframes[0], _column_adapter, drop=True)
        int_cols = ["total_vaccine_doses_administered"]
        data = data.dropna(subset=int_cols)
        for col in int_cols:
            data[col] = data[col].apply(safe_int_cast)

        # Fix typos and merge subregions manually
        data["match_string"] = data["match_string_2"].fillna(data["match_string_1"])
        data["match_string"] = data["match_string"].str.replace("Amazionas", "Amazonas")
        data["match_string"] = data["match_string"].str.replace("Baranquilla", "Atlántico")
        data["match_string"] = data["match_string"].str.replace("Benaventura", "Valle del Cauca")
        data["match_string"] = data["match_string"].str.replace("Cartagena", "Bolivar")
        data["match_string"] = data["match_string"].str.replace("Santa Marta", "Magdalena")

        # Match string does not follow strict hierarchy
        data = data.groupby(["date", "match_string"]).sum().reset_index()

        # Make sure only subregion1 level is matched
        data["country_code"] = "CO"
        data["subregion2_code"] = None
        data["locality_code"] = None

        return data
예제 #18
0
    def parse_dataframes(
        self, dataframes: Dict[str, DataFrame], aux: Dict[str, DataFrame], **parse_opts
    ) -> DataFrame:
        data = table_rename(
            dataframes[0],
            {
                "submission_date": "date",
                "state": "subregion1_code",
                "tot_cases": "total_confirmed",
                # "conf_cases": "total_confirmed",
                # "prob_cases": "",
                "new_case": "new_confirmed",
                # "pnew_case": "",
                "tot_death": "total_deceased",
                # "conf_death": "",
                # "prob_death": "",
                "new_death": "new_deceased",
                # "pnew_death": "",
                # "created_at": "",
                # "consent_cases": "",
                # "consent_deaths": "",
            },
            drop=True,
        )

        data["key"] = "US_" + data["subregion1_code"]
        data["date"] = data["date"].apply(lambda x: datetime_isoformat(x, "%m/%d/%Y"))

        # A few "states" are considered independent territories by our dataset or need correction
        data.loc[data["subregion1_code"] == "PW", "key"] = "PW"
        data.loc[data["subregion1_code"] == "FSM", "key"] = "FM"
        data.loc[data["subregion1_code"] == "RMI", "key"] = "MH"
        data.loc[data["subregion1_code"] == "NYC", "key"] = "US_NY_NYC"

        return data
예제 #19
0
    def parse(self, sources: Dict[str, str], aux: Dict[str, DataFrame], **parse_opts) -> DataFrame:
        with open(sources[0], "r") as fd:
            data = json.load(fd)["Data"]

        # "Date":"01\/01\/2020","NewConfirmed":0,"NewRecovered":0,"NewHospitalized":0,"NewDeaths":0,"Confirmed":0,"Recovered":0,"Hospitalized":0,"Deaths":0
        data = table_rename(
            DataFrame.from_records(data),
            {
                "Date": "date",
                "NewConfirmed": "new_confirmed",
                "NewRecovered": "new_recovered",
                "NewHospitalized": "new_hospitalized",
                "NewDeaths": "new_deceased",
                "Confirmed": "total__confirmed",
                "Recovered": "total__recovered",
                "Hospitalized": "total__hospitalized",
                "Deaths": "total__deceased",
            },
            drop=True,
            remove_regex=r"[^0-9a-z\s]",
        )

        # Format date as ISO date
        data["date"] = data["date"].apply(lambda x: datetime_isoformat(x, "%m/%d/%Y"))

        # Add key and return data
        data["key"] = "TH"
        return data
예제 #20
0
    def parse_dataframes(self, dataframes: Dict[str, DataFrame],
                         aux: Dict[str, DataFrame], **parse_opts) -> DataFrame:

        data = table_rename(
            dataframes["intensive_care"],
            {
                "fecha": "date",
                "numero": "current_intensive_care",
                "Region": "match_string"
            },
            drop=True,
        )

        # Convert date to ISO format
        data["date"] = data["date"].astype(str)

        # Extract cities from the regions
        city = _extract_cities(data)

        # Make sure all records have country code and no subregion code
        data["country_code"] = "CL"
        data["subregion2_code"] = None

        # Drop bogus records from the data
        data.dropna(subset=["date", "match_string"], inplace=True)

        return concat([data, city])
예제 #21
0
def _parse_summary(data: DataFrame) -> DataFrame:
    data = data[data.columns[1:]]
    data.columns = ["statistic"] + list(data.columns[1:])
    data = data.dropna(subset=data.columns[1:], how="all")

    data = pivot_table_date_columns(data.set_index("statistic"), value_name="statistic")
    data = data.reset_index().dropna(subset=["date"])
    data.statistic = data.statistic.apply(safe_float_cast).astype(float)

    data = data.pivot_table(index="date", columns=["index"], values="statistic")
    data = data.reset_index()

    data = table_rename(
        data,
        {
            "date": "date",
            "Total Positives": "total_confirmed",
            "Number of Deaths": "total_deceased",
            "Total Overall Tested": "total_tested",
            "Cleared From Isolation": "total_recovered",
            "Total COVID-19 Patients in DC Hospitals": "total_hospitalized",
            "Total COVID-19 Patients in ICU": "total_intensive_care",
        },
        drop=True,
    )
    return data
예제 #22
0
    def parse_dataframes(self, dataframes: Dict[str, DataFrame],
                         aux: Dict[str, DataFrame], **parse_opts) -> DataFrame:

        deceased = table_rename(dataframes["deceased"],
                                {"FECHA / CCAA": "date"})
        deceased = pivot_table(deceased.set_index("date"),
                               value_name="new_deceased",
                               pivot_name="match_string")

        # Convert dates to ISO format
        deceased["date"] = deceased["date"].apply(lambda x: str(x)[:10])
        deceased["date"] = deceased["date"].apply(
            lambda x: datetime_isoformat(x, "%Y-%m-%d"))

        # Add the country code to all records and declare matching as subregion1
        deceased["country_code"] = "ES"
        deceased["subregion2_code"] = None
        deceased["locality_code"] = None

        # Country level is declared as "espana"
        deceased["key"] = None
        deceased.loc[deceased["match_string"] == "espana", "key"] = "ES"

        # Output the results
        return deceased.dropna(subset=["date"])
예제 #23
0
    def parse_dataframes(self, dataframes: Dict[str, DataFrame],
                         aux: Dict[str, DataFrame], **parse_opts) -> DataFrame:
        # The headers are a bit funny-looking, so we must manually manipulate them first
        data = dataframes[0]
        data.columns = [
            col.split("|")[0].split("~")[0] for col in data.iloc[0]
        ]
        data = data.iloc[1:]

        data = table_rename(
            data,
            {
                "Date": "date",
                "Nombre de personnes en soins intensifs":
                "current_intensive_care",
                "Nombre cumulé de décès": "total_deceased",
                "Nombre de personnes testées COVID+": "new_tested",
            },
            drop=True,
        )

        # Get date in ISO format
        data["date"] = data["date"].apply(
            lambda x: datetime_isoformat(x, "%d/%m/%Y"))

        # Only country-level data is provided
        data["key"] = "LU"

        # Output the results
        return data
예제 #24
0
def _get_department(record: Dict[str, str]):
    subregion1_code = record["subregion1_code"]
    subregion2_code = record["subregion2_code"]
    code = f"DEP-{subregion2_code}"
    data = read_file(_api_url_tpl.format(code))
    data["key"] = f"FR_{subregion1_code}_{subregion2_code}"
    return table_rename(data, _column_adapter, drop=True)
예제 #25
0
    def parse_dataframes(self, dataframes: List[DataFrame],
                         aux: Dict[str, DataFrame], **parse_opts) -> DataFrame:

        data = table_rename(
            dataframes[0],
            parse_opts.get(
                "column_adapter",
                {
                    "discharged_cumulative": "total_discharged",
                    "hospitalized_current": "current_hospitalized",
                    "number hospitalised": "current_hospitalized",
                    "hospitalized_cumulative": "total_hospitalized",
                    "icu_current": "current_intensive_care",
                    "number in icu": "current_intensive_care",
                    "icu_cumulative": "cumulative_intensive_care",
                    "ventilator_current": "current_ventilator",
                    "ventilator_cumulative": "cumulative_ventilator",
                    "new hospital admissions": "new_hospitalized",
                    "new intensive care admissions": "new_intensive_care",
                },
            ),
        )

        # Add key and parse date in ISO format
        data["key"] = parse_opts.get("key")
        data["date"] = data[parse_opts.get("date_column", "date")].astype(str)
        date_format = parse_opts.get("date_format", "%Y-%m-%d")
        data.date = data.date.apply(
            lambda x: datetime_isoformat(x, date_format))

        return data
예제 #26
0
    def parse_dataframes(self, dataframes: Dict[Any, DataFrame],
                         aux: Dict[str, DataFrame], **parse_opts) -> DataFrame:

        data = table_rename(
            dataframes[0],
            _column_adapter,
            drop=True,
        )

        # Filter data for 2020 and remove all other years
        data = data[data["date"] == 2020].drop(columns=["date"])

        # We only care about the population count indicators
        data = data[data["indicator"] == "Medium"]

        # Population counts are in thousands, convert back to single units
        for col in [
                col for col in data.columns if col.startswith("population")
        ]:
            data[col] = data[col] * 1000

        # Derive key from our country names mapping
        names = aux["un_country_names"]
        data = data.merge(names, how="left")

        return data
    def parse_dataframes(
        self, dataframes: Dict[str, DataFrame], aux: Dict[str, DataFrame], **parse_opts
    ) -> DataFrame:
        data = table_rename(
            dataframes["counties"],
            {
                "Date": "date",
                "County": "match_string",
                "Count": "total_confirmed",
                "Deaths": "total_deceased",
            },
        )

        # Convert date to ISO format
        data["date"] = data["date"].astype(str).apply(lambda x: datetime_isoformat(x, "%m/%d/%Y"))

        # Drop bogus values
        data = data[data["match_string"] != "Unknown"]

        # Dukes and Nantucket are separate counties but reported as one, so drop them from the data
        data = data[data["match_string"] != "Dukes and Nantucket"]

        data["country_code"] = "US"
        data["subregion1_code"] = "MA"
        return data
예제 #28
0
 def _rename_columns(data: DataFrame,
                     column_adapter: Dict[str, str]) -> DataFrame:
     data.columns = data.iloc[0]
     data.columns = [str(col).replace("\n", " ") for col in data.columns]
     data = table_rename(data.iloc[1:].replace(".", numpy.nan),
                         column_adapter)
     return data[column_adapter.values()]
예제 #29
0
    def parse_dataframes(
        self, dataframes: Dict[Any, DataFrame], aux: Dict[str, DataFrame], **parse_opts
    ) -> DataFrame:
        tables = []
        for df in dataframes.values():
            df = table_rename(df, _column_adapter, drop=True, remove_regex=r"[^a-z]")

            # Make sure the date is a timestamp
            df["date"] = df["date"].apply(safe_datetime_parse)
            df.dropna(subset=["date"], inplace=True)

            # Fill the date when blank
            df["date"] = df["date"].fillna(df["date"].max())
            df["date"] = df["date"].apply(lambda x: x.date().isoformat())

            # Correct the obvious date typos
            df["date"] = df["date"].apply(lambda x: x.replace("2022", "2021"))

            tables.append(df)

        data = concat(tables)

        # Estimate first doses from total doses and second doses
        data["total_persons_vaccinated"] = (
            data["total_vaccine_doses_administered"] - data["total_persons_fully_vaccinated"]
        )

        data["key"] = None
        data["country_code"] = "ES"
        data["subregion2_code"] = None
        data["locality_code"] = None

        data.loc[data["match_string"] == "Totales", "key"] = "ES"

        return data
예제 #30
0
    def parse_dataframes(self, dataframes: Dict[str, DataFrame],
                         aux: Dict[str, DataFrame], **parse_opts) -> DataFrame:
        data = table_rename(
            dataframes[0],
            {
                "e(0)": "life_expectancy",
                "STATE2KX": "state_code",
                "CNTY2KX": "county_code"
            },
            drop=True,
        )

        # Derive the FIPS subregion code from state and county codes
        data["state_code"] = data["state_code"].apply(
            lambda x: numeric_code_as_string(x, 2))
        data["county_code"] = data["county_code"].apply(
            lambda x: numeric_code_as_string(x, 3))
        data["subregion2_code"] = data["state_code"] + data["county_code"]

        # Data is more granular than county level, use a crude average for estimate
        data = (data.drop(columns=["state_code", "county_code"]).groupby(
            "subregion2_code").mean().reset_index())

        # Add country code to all records and return
        data["country_code"] = "US"
        return data