Esempio n. 1
0
    def read(self) -> pd.DataFrame:
        params = load_query("ireland-metrics", to_str=False)

        data_primary = request_json(self.source_url["primary"], params=params)
        data_primary = self._parse_data_primary(data_primary)

        data_booster = request_json(self.source_url["booster"], params=params)
        data_booster = self._parse_data_boosters(data_booster)

        return pd.merge(data_primary,
                        data_booster,
                        how="outer",
                        on="date",
                        validate="one_to_one")
Esempio n. 2
0
 def read(self) -> pd.DataFrame:
     """Reads data from source."""
     data = request_json(self.source_url)
     df = pd.json_normalize(data, record_path=[
         "features"
     ]).dropna(subset=["attributes.unique_public_private_test"])
     return df
Esempio n. 3
0
 def read(self):
     data = request_json(self.source_url[self.source_name])
     if self.source_name == "mohfw":
         return self.read_mohfw(data)
     elif self.source_name == "cowin":
         return self.read_cowin(data)
     raise ValueError(f"Not valid class attribute `source_name`: {self.source_name}")
Esempio n. 4
0
 def _get_file_url(self) -> str:
     response = request_json("https://www.covid19.admin.ch/api/data/context")
     context = response["sources"]["individual"]["csv"]
     doses_url = context["vaccDosesAdministered"]
     people_url = context["vaccPersonsV2"]
     manufacturer_url = context["weeklyVacc"]["byVaccine"]["vaccDosesAdministered"]
     return doses_url, people_url, manufacturer_url
Esempio n. 5
0
 def _parse_data(self) -> pd.Series:
     data = request_json(self.source_url)["stats"]
     data = pd.DataFrame.from_records(data, columns=["tested"]).iloc[0]
     return {
         "count": clean_count(data[0]),
         "date": localdate("Atlantic/Faeroe"),
     }
Esempio n. 6
0
 def _parse_last_update_date(self):
     field_name = "lastUpdated"
     date_json = request_json(self.source_url_date)
     if field_name in date_json:
         date_raw = date_json[field_name]
         return datetime.fromisoformat(date_raw).date()
     raise ValueError(f"{field_name} field not found!")
Esempio n. 7
0
def main(paths):

    url = "https://services6.arcgis.com/bKYAIlQgwHslVRaK/arcgis/rest/services/Vaccination_Individual_Total/FeatureServer/0/query?f=json&cacheHint=true&outFields=*&resultType=standard&returnGeometry=false&spatialRel=esriSpatialRelIntersects&where=1%3D1"

    data = request_json(url)

    df = pd.DataFrame.from_records(elem["attributes"]
                                   for elem in data["features"])

    df = df.drop(columns=["ObjectId", "LastValue", "Total_Individuals"])

    df = df.rename(
        columns={
            "Reportdt": "date",
            "Total_Vaccinations": "total_vaccinations",
            "FirstDose": "people_vaccinated",
            "SecondDose": "people_fully_vaccinated",
        })

    df["date"] = pd.to_datetime(df.date, unit="ms").dt.date.astype(str)

    df = df.groupby("date", as_index=False).max()

    df.loc[:, "location"] = "Saudi Arabia"
    df.loc[:, "vaccine"] = "Pfizer/BioNTech"
    df.loc[df.date >= "2021-02-18",
           "vaccine"] = "Oxford/AstraZeneca, Pfizer/BioNTech"
    df.loc[:, "source_url"] = "https://covid19.moh.gov.sa/"

    df = df[df.total_vaccinations > 0].sort_values("date")

    # The data contains an error that creates a negative change
    df = df[df.date != "2021-03-03"]

    df.to_csv(paths.tmp_vax_out("Saudi Arabia"), index=False)
Esempio n. 8
0
 def read(self) -> pd.DataFrame:
     data = request_json(self.source_url)
     df = pd.DataFrame.from_records(data["data"])
     check_known_columns(
         df,
         [
             "date",
             "change_cases",
             "change_fatalities",
             "change_tests",
             "change_hospitalizations",
             "change_criticals",
             "change_recoveries",
             "change_vaccinations",
             "change_vaccinated",
             "change_boosters_1",
             "change_boosters_2",
             "change_vaccines_distributed",
             "total_cases",
             "total_fatalities",
             "total_tests",
             "total_hospitalizations",
             "total_criticals",
             "total_recoveries",
             "total_vaccinations",
             "total_vaccinated",
             "total_boosters_1",
             "total_boosters_2",
             "total_vaccines_distributed",
         ],
     )
     return df[["date", "total_vaccinations", "total_vaccinated", "total_boosters_1", "total_boosters_2"]]
Esempio n. 9
0
 def get_data(self) -> pd.DataFrame:
     json_data = request_json(DATA_URL, params=PARAMS)
     df = pd.DataFrame(
         [feat["attributes"] for feat in json_data["features"]])
     df["reportdt"] = df["reportdt"].astype(int).apply(
         lambda dt: datetime.datetime.utcfromtimestamp(dt / 1000))
     df = df.rename(columns={"totalTests": "Cumulative total"})
     df["Cumulative total"] = df["Cumulative total"].astype(int)
     # KLUDGE: there are a few days with two reports on the same day (but at
     # different times, like 10am vs 10pm). Upon inspection, it appears that the
     # latter reports (e.g. the 10pm reports) actually correspond to official cumulative
     # totals for the subsequent day (as determined by comparing to official updates
     # published on Twitter and Facebook). So I increment the date of these latter
     # reports by one.
     df = df.sort_values("reportdt")
     duplicate_idx = df.index[df["reportdt"].dt.date.duplicated(
         keep="first")]
     for idx in duplicate_idx:
         df.loc[idx,
                "reportdt"] = df.loc[idx, "reportdt"] + datetime.timedelta(
                    days=1)
     df["Date"] = df["reportdt"].dt.strftime("%Y-%m-%d")
     df = df[["Date", "Cumulative total"]]
     df = df[df["Cumulative total"] > 0]
     df = df.groupby("Cumulative total", as_index=False).min()
     df = df.groupby("Date", as_index=False).min()
     # manual fix: drop incorrect data point on 2021-10-30
     df = df.drop(index=df[df["Date"] == "2021-10-30"].index.values)
     df = make_monotonic(df)
     return df
Esempio n. 10
0
 def read(self) -> pd.DataFrame:
     data = request_json(self.source_url)
     df = pd.DataFrame.from_records(data)
     check_known_columns(
         df,
         [
             "Day_Date",
             "vaccinated",
             "vaccinated_cum",
             "vaccinated_population_perc",
             "vaccinated_seconde_dose",
             "vaccinated_seconde_dose_cum",
             "vaccinated_seconde_dose_population_perc",
             "vaccinated_third_dose",
             "vaccinated_third_dose_cum",
             "vaccinated_third_dose_population_perc",
             "vaccinated_fourth_dose_population_perc",
             "vaccinated_fourth_dose",
             "vaccinated_validity_perc",
             "vaccinated_expired_perc",
             "not_vaccinated_perc",
             "vaccinated_fourth_dose_cum",
         ],
     )
     return df
Esempio n. 11
0
 def read(self) -> pd.DataFrame:
     data = request_json(self.source_url)
     return (pd.DataFrame.from_dict(
         data["historicalData"],
         orient="index",
         columns=["vaccines", "numberTotalDosesAdministered"],
     ).reset_index().dropna().sort_values(by="index"))
Esempio n. 12
0
 def read(self) -> pd.DataFrame:
     data = request_json(self.source_url)
     df = pd.DataFrame.from_dict(data["historicalData"], orient="index")
     check_known_columns(
         df,
         [
             "parsedOn",
             "parsedOnString",
             "fileName",
             "complete",
             "averageAge",
             "numberInfected",
             "numberCured",
             "numberDeceased",
             "percentageOfWomen",
             "percentageOfMen",
             "percentageOfChildren",
             "numberTotalDosesAdministered",
             "distributionByAge",
             "countyInfectionsNumbers",
             "incidence",
             "large_cities_incidence",
             "small_cities_incidence",
             "vaccines",
         ],
     )
     return df[["vaccines", "numberTotalDosesAdministered"
                ]].reset_index().dropna().sort_values(by="index")
Esempio n. 13
0
 def read(self) -> pd.DataFrame:
     """Reads data from source."""
     data = request_json(self.source_url, params=self.params)
     df = pd.json_normalize(data, record_path=[
         "features"
     ]).dropna(subset=["attributes.cumulative_test"])
     return df
Esempio n. 14
0
 def _get_api_value(self, query: str):
     query = json.loads(query)
     data = request_json(self.source_url,
                         json=query,
                         headers=self.headers,
                         request_method="post")
     value = int(data["hits"]["total"])
     return value
Esempio n. 15
0
 def read(self) -> pd.DataFrame:
     json_dict = request_json(self.source_url)
     df = pd.DataFrame(json_dict)[[
         "datum", "entries", "entries_pos", "nachweismethode", "geoRegion"
     ]]
     # [["datum", "entries", "geoRegion"]]
     df = df[df.geoRegion == "FL"]
     return df
Esempio n. 16
0
 def read(self) -> pd.DataFrame:
     data = request_json(self.source_url)
     return pd.DataFrame.from_records(data["data"],
                                      columns=[
                                          "date", "total_vaccinations",
                                          "total_vaccinated",
                                          "total_boosters_1"
                                      ])
Esempio n. 17
0
def get_api_value(source: str, query: str, headers: dict):
    query = json.loads(query)
    data = request_json(source,
                        json=query,
                        headers=headers,
                        request_method="post")
    value = int(data["hits"]["total"])
    return value
Esempio n. 18
0
 def read(self) -> pd.DataFrame:
     data = request_json(self.source_url)
     data = [x["value"] for x in data["rows"]]
     df = pd.DataFrame.from_records(data,
                                    columns=["report_time", "samples"
                                             ])  # Load only columns needed
     df["report_time"] = clean_date_series(
         df["report_time"], "%Y-%m-%dT%H:%M:%S.%f%z")  # Clean date
     return df
Esempio n. 19
0
 def _read_antigens(self):
     ## Antigen
     url = "https://atlas.jifo.co/api/connectors/425b93dc-c055-477c-b81a-5d4d9a1275f7"
     data = request_json(url)["data"][4]
     df = pd.DataFrame.from_records(data[1:], columns=data[0])
     # Clean
     df = df.assign(Date=clean_date_series(df[""], "%d/%m/%Y"))
     df["Positivas"] = df["Positivas"].apply(clean_count)
     df["Total Px Ag"] = df["Total Px Ag"].apply(clean_count)
     return df
Esempio n. 20
0
 def read(self) -> pd.DataFrame:
     data = request_json(self.source_url)
     df = pd.DataFrame.from_records(data,
                                    columns=[
                                        "Date_of_statistics",
                                        "Tested_with_result",
                                        "Security_region_name",
                                        "Tested_positive"
                                    ])
     df = df.groupby("Date_of_statistics").sum().reset_index()
     return df
Esempio n. 21
0
 def _read_art(self):
     url = f"{self.base_url}1ee4d904-b17e-41de-a731-854578b036e6"
     json_dict = request_json(url)["result"]["records"]
     df = pd.DataFrame.from_records(json_dict).drop(columns=["_id"])
     # correct errors
     df.loc[(df[df["week_of"] == "14/12/2022"].index.values),
            "week_of"] = "14/12/2021"
     df.loc[(df[df["week_of"] == "28/12/2022"].index.values),
            "week_of"] = "28/12/2021"
     df["week_of"] = clean_date_series(df["week_of"], "%d/%m/%Y")
     return df
Esempio n. 22
0
    def read(self):
        data = request_json(self.source_url)["features"][0]["attributes"]
        date = clean_date(datetime.fromtimestamp(data["EditDate"] / 1000))

        return pd.Series({
            "total_vaccinations":
            data["Vaccine_total"],
            "people_fully_vaccinated":
            data["Vaccine_total_last24"],
            "date":
            date,
        })
Esempio n. 23
0
def read(source: str) -> pd.Series:
    data = request_json(source)
    for count in data:
        if count[0] == "2nd Vaccine taken":
            people_fully_vaccinated = count[1]
        if count[0] == "1st Vaccine taken":
            dose1_only = count[1]

    people_vaccinated = dose1_only + people_fully_vaccinated

    return pd.Series({
        "people_vaccinated": people_vaccinated,
        "people_fully_vaccinated": people_fully_vaccinated,
    })
Esempio n. 24
0
def read(source: str) -> pd.DataFrame:
    data = request_json(source)
    return pd.DataFrame.from_dict([{
        "date":
        d["date"],
        "people_vaccinated":
        d["firstDose"]["cumulative"]["total"],
        "people_fully_vaccinated":
        d["secondDose"]["cumulative"]["total"],
        "total_vaccinations":
        d["totalDose"]["cumulative"]["total"],
        "total_pfizer":
        d["totalDose"]["cumulative"]["biontech"],
        "total_sinovac":
        d["totalDose"]["cumulative"]["sinovac"],
    } for d in data])
Esempio n. 25
0
def read(source: str) -> pd.Series:

    data = request_json(source)

    total_vaccinations = int(data["CijepljenjeBrUtrosenihDoza"])
    people_vaccinated = int(data["CijepljeniJednomDozom"])
    people_fully_vaccinated = int(data["CijepljeniDvijeDoze"])
    date = str((pd.to_datetime(data["Datum"]) - timedelta(days=1)).date())

    return pd.Series(
        data={
            "total_vaccinations": total_vaccinations,
            "people_vaccinated": people_vaccinated,
            "people_fully_vaccinated": people_fully_vaccinated,
            "date": date,
        })
Esempio n. 26
0
 def read(self) -> pd.DataFrame:
     data = request_json(self.source_url)
     res = [d["attributes"] for d in data["features"]]
     df = pd.DataFrame(
         res,
         columns=[
             "ADM0_SOVRN",
             "ISO_3_CODE",
             "TotAmtAdmi",
             "VacAd1Dose",
             "VacAd2Dose",
             "FullyVacc",
             "VaccApprov",
         ],
     )
     return df
Esempio n. 27
0
 def _api_request(self):
     date_low = localdatenow(self.timezone)
     date_up = localdatenow(self.timezone, sum_days=1)
     params = {
         "f": "json",
         "outFields": "*",
         "outStatistics": (
             "[{'onStatisticField':'total_dosis_adminsitradas','outStatisticFieldName':'total_vaccinations','statisticType':'sum'},"
             "{'onStatisticField':'total_primera_dosis','outStatisticFieldName':'dose_1','statisticType':'sum'},"
             "{'onStatisticField':'total_segunda_dosis','outStatisticFieldName':'dose_2','statisticType':'max'}]"
         ),
         "returnGeometry": "false",
         "where": f"fecha BETWEEN timestamp '{date_low} 05:00:00' AND timestamp '{date_up} 04:59:59'",
     }
     data = request_json(self.source_url, params=params)
     return data
Esempio n. 28
0
 def read(self):
     data = request_json(self.source_url)
     df = pd.DataFrame.from_records(elem["attributes"]
                                    for elem in data["features"])
     check_known_columns(
         df,
         [
             "Reportdt",
             "Total_Vaccinations",
             "Total_Individuals",
             "LastValue",
             "ObjectId",
             "Elderly",
             "FirstDose",
             "SecondDose",
             "BoosterDose",
         ],
     )
     return df
Esempio n. 29
0
 def extract(self) -> dict:
     data = request_json(self.source_url)
     data = list(filter(lambda x: x["region"] == "World",
                        data["regions"]))[0]["distributions"]
     return data
Esempio n. 30
0
 def read(self) -> pd.DataFrame:
     columns = "%2C".join(self.columns_rename.keys())
     url = f"https://services9.arcgis.com/DnERH4rcjw7NU6lv/arcgis/rest/services/Vaccine_Distribution_Program/FeatureServer/2/query?where=1%3D1&objectIds=&time=&geometry=&geometryType=esriGeometryEnvelope&inSR=&spatialRel=esriSpatialRelIntersects&resultType=none&distance=0.0&units=esriSRUnit_Meter&returnGeodetic=false&outFields={columns}&returnGeometry=true&featureEncoding=esriDefault&multipatchOption=xyFootprint&maxAllowableOffset=&geometryPrecision=&outSR=&datumTransformation=&applyVCSProjection=false&returnIdsOnly=false&returnUniqueIdsOnly=false&returnCountOnly=false&returnExtentOnly=false&returnQueryGeometry=false&returnDistinctValues=false&cacheHint=false&orderByFields=&groupByFieldsForStatistics=&outStatistics=&having=&resultOffset=&resultRecordCount=&returnZ=false&returnM=false&returnExceededLimitFeatures=true&quantizationParameters=&sqlFormat=none&f=pjson&token="
     data = request_json(url)
     return pd.DataFrame.from_records(elem["attributes"]
                                      for elem in data["features"])