コード例 #1
0
 def load(self, df: pd.DataFrame, output_path: str) -> None:
     # Export data
     if output_path.startswith("s3://"):
         obj_to_s3(df, s3_path=output_path,
                   public=False)  # df, output_path, public=True)
     else:
         df.to_csv(output_path, index=False)
コード例 #2
0
def create_subnational():
    global_cases = clean_global_subnational("confirmed")
    global_deaths = clean_global_subnational("deaths")
    us_cases = clean_us_subnational("confirmed")
    us_deaths = clean_us_subnational("deaths")

    df = pd.concat([
        pd.merge(global_cases,
                 global_deaths,
                 on=["location1", "location2", "location3", "date"],
                 how="outer"),
        pd.merge(us_cases,
                 us_deaths,
                 on=["location1", "location2", "location3", "date"],
                 how="outer"),
    ]).sort_values(["location1", "location2", "location3", "date"])[[
        "location1",
        "location2",
        "location3",
        "date",
        "total_cases",
        "new_cases",
        "new_cases_smoothed",
        "total_deaths",
        "new_deaths",
        "new_deaths_smoothed",
    ]]
    df = df[df.total_cases > 0]
    filename = "subnational_cases_deaths"
    compression = {"method": "zip", "archive_name": f"{filename}.csv"}
    # df.to_csv(os.path.join(OUTPUT_PATH, f"{filename}.zip"), index=False, compression=compression)
    obj_to_s3(df,
              s3_path="s3://covid-19/public/jhu/{filename}.zip",
              compression=compression,
              public=True)
コード例 #3
0
ファイル: public.py プロジェクト: LimLim0a0/covid-19-data
def create_latest(df):
    """Export dataset as CSV, XLSX and JSON (latest data points)."""
    df = df[df.date >= str(date.today() - timedelta(weeks=2))]
    df = df.sort_values("date")

    latest = [
        df[df.location == loc].ffill().tail(1).round(3)
        for loc in set(df.location)
    ]
    latest = pd.concat(latest)
    latest = latest.sort_values("location").rename(
        columns={"date": "last_updated_date"})

    print("Writing latest version…")
    # CSV
    latest.to_csv(os.path.join(DATA_DIR, "latest", "owid-covid-latest.csv"),
                  index=False)
    S3().upload_to_s3(
        os.path.join(DATA_DIR, "latest", "owid-covid-latest.csv"),
        "s3://covid-19/public/latest/owid-covid-latest.csv",
        public=True,
    )
    # XLSX
    obj_to_s3(latest,
              s3_path="s3://covid-19/public/latest/owid-covid-latest.xlsx",
              public=True)
    # JSON
    latest.dropna(subset=["iso_code"]).set_index("iso_code").to_json(
        os.path.join(DATA_DIR, "latest", "owid-covid-latest.json"),
        orient="index")
    S3().upload_to_s3(
        os.path.join(DATA_DIR, "latest", "owid-covid-latest.json"),
        "s3://covid-19/public/latest/owid-covid-latest.json",
        public=True,
    )
コード例 #4
0
def _export_log_info(df_exec, t_sec_1, t_sec_2):
    # print(len(df_new), len(MODULES_NAME), len(df_new) == len(MODULES_NAME))
    if len(df_exec) == len(MODULES_NAME):
        print("EXPORTING LOG DETAILS")
        details = system_details()
        date_now = localdate(force_today=True)
        machine = details["id"]
        # Export timings per country
        df_exec = df_exec.reset_index().assign(date=date_now, machine=machine)
        df = obj_from_s3(LOG_GET_COUNTRIES)
        df = df[df.date + df.machine != date_now + machine]
        df = pd.concat([df, df_exec])
        obj_to_s3(df, LOG_GET_COUNTRIES)
        # Export machine info
        data = obj_from_s3(LOG_MACHINES)
        if machine not in data:
            data = {**details, machine: details["info"]}
            obj_to_s3(data, LOG_MACHINES)
        # Export overall timing
        report = {
            "machine": machine,
            "date": date_now,
            "t_sec": t_sec_1,
            "t_sec_retry": t_sec_2
        }
        df_new = pd.DataFrame([report])
        df = obj_from_s3(LOG_GET_GLOBAL)
        df = df[df.date + df.machine != date_now + machine]
        df = pd.concat([df, df_new])
        obj_to_s3(df, LOG_GET_GLOBAL)
コード例 #5
0
ファイル: public.py プロジェクト: LimLim0a0/covid-19-data
def create_dataset(df, macro_variables):
    """Export dataset as CSV, XLSX and JSON (complete time series)."""
    print("Writing to CSV…")
    filename = os.path.join(DATA_DIR, "owid-covid-data.csv")
    df.to_csv(filename, index=False)
    S3().upload_to_s3(filename,
                      "s3://covid-19/public/owid-covid-data.csv",
                      public=True)

    print("Writing to XLSX…")
    # filename = os.path.join(DATA_DIR, "owid-covid-data.xlsx")
    # all_covid.to_excel(os.path.join(DATA_DIR, "owid-covid-data.xlsx"), index=False, engine="xlsxwriter")
    # upload_to_s3(filename, "public/owid-covid-data.xlsx", public=True)
    obj_to_s3(df,
              s3_path="s3://covid-19/public/owid-covid-data.xlsx",
              public=True)

    print("Writing to JSON…")
    data = df_to_dict(
        df,
        macro_variables.keys(),
        valid_json=True,
    )
    obj_to_s3(data, "s3://covid-19/public/owid-covid-data.json", public=True)
コード例 #6
0
def main():
    for country in countries:
        logger.info(f"VAX - ICE - {country.location}")
        df = country.read()
        obj_to_s3(df, f"{PATH_ICE}/{country.location}.csv")