def normalize(df): df["country"].fillna("", inplace=True) df["countryCode"].fillna("", inplace=True) df["province"].fillna("", inplace=True) df["provinceCode"].fillna("", inplace=True) df["city"].fillna("", inplace=True) df["cityCode"].fillna("", inplace=True) df["confirmed"] = df["confirmed"].fillna(0).astype(int) df["suspected"] = df["suspected"].fillna(0).astype(int) df["cured"] = df["cured"].fillna(0).astype(int) df["dead"] = df["dead"].fillna(0).astype(int) # 修正数据 df["countryCode"] = df.apply(lambda x: get_country_code(x.country), axis=1) df["provinceCode"] = df.apply( lambda x: get_china_province_code(x.province, x.provinceCode), axis=1) df["province"] = df.apply( lambda x: get_china_area_name(x.provinceCode, x.province), axis=1) df["cityCode"] = df.apply( lambda x: get_china_city_code(x.provinceCode, x.city, x.cityCode), axis=1) df["city"] = df.apply(lambda x: get_china_area_name(x.cityCode, x.city), axis=1) df.drop_duplicates(subset=["date", "country", "province", "city"], keep="last", inplace=True) df.sort_values(["date", "countryCode", "provinceCode", "cityCode", "city"], inplace=True) return df
df = pd.concat([df, pd.DataFrame(data_list)], sort=False) df["country"].fillna("", inplace=True) df["countryCode"].fillna("", inplace=True) df["province"].fillna("", inplace=True) df["provinceCode"].fillna("", inplace=True) df["city"].fillna("", inplace=True) df["cityCode"].fillna("", inplace=True) df["confirmed"] = df["confirmed"].fillna(0).astype(int) df["suspected"] = df["suspected"].fillna(0).astype(int) df["cured"] = df["cured"].fillna(0).astype(int) df["dead"] = df["dead"].fillna(0).astype(int) # fix data df["countryCode"] = df.apply( lambda x: get_country_code(x.country), axis=1) df["provinceCode"] = df.apply( lambda x: get_china_province_code(x.province, x.provinceCode), axis=1) df["province"] = df.apply( lambda x: get_china_area_name(x.provinceCode, x.province), axis=1) df["cityCode"] = df.apply( lambda x: get_china_city_code(x.provinceCode, x.city, x.cityCode), axis=1) df["city"] = df.apply( lambda x: get_china_area_name(x.cityCode, x.city), axis=1) df.drop_duplicates( subset=["date", "country", "province", "city"], keep="last", inplace=True) df.sort_values(["date", "countryCode", "provinceCode", "cityCode", "city"], inplace=True) df.to_csv(csv_file, index=False, encoding='utf-8') df.to_json(json_file, orient="records", force_ascii=False) df.to_excel(xlsx_file, index=False) print(f"""{datetime.now().strftime("%Y-%m-%d %H:%M:%S")} : Data update completed. """)
def parse_report(report): date = str(report["时间"]) province = report.get("省", "") confirmed = parse_int(report.get("确诊")) suspected = parse_int(report.get("疑似")) cured = parse_int(report.get("治愈")) dead = parse_int(report.get("死亡")) area_key = "city" if province else "province" confirmed_list = parse_list(report.get("确诊详情"), [area_key, "confirmed"]) suspected_list = parse_list(report.get("疑似详情"), [area_key, "suspected"]) cured_list = parse_list(report.get("治愈详情"), [area_key, "cured"]) dead_list = parse_list(report.get("死亡详情"), [area_key, "dead"]) foreign_confirmed_list = parse_list(report.get("国外确诊详情"), ["country", "confirmed"]) foreign_cured_list = parse_list(report.get("国外治愈详情"), ["country", "cured"]) provinceCode = get_china_province_code(province) if province else "" province = get_china_area_name(provinceCode, province) if provinceCode else "" data = { "provinceCode": provinceCode, "province": province, "confirmed": confirmed, "suspected": suspected, "cured": cured, "dead": dead } for data_list in [confirmed_list, suspected_list, cured_list, dead_list]: if data_list: for x in data_list: if province: x["provinceCode"] = provinceCode x["province"] = province x["cityCode"] = get_china_city_code(provinceCode, x["city"]) x["city"] = get_china_area_name(x["cityCode"], x["city"]) else: x["provinceCode"] = get_china_province_code(x["province"]) x["province"] = get_china_area_name(x["provinceCode"], x["province"]) df_list = [pd.DataFrame(x) for x in [confirmed_list, suspected_list, cured_list, dead_list] if x] df = None for index, x in enumerate(df_list): if df is None: df = x else: df = pd.merge(df, x, on=area_key, how="outer", suffixes=["", f"""_{index}"""], sort=False, copy=False) columns = [ "date", "country", "countryCode", "province", "provinceCode", "city", "cityCode", "confirmed", "suspected", "cured", "dead" ] if df is None: df = pd.DataFrame([data], columns=columns) else: df = pd.DataFrame(df, columns=columns) df = df.append([data]) df["country"] = "中国" df["countryCode"] = "CN" df["province"].fillna(province, inplace=True) df["provinceCode"].fillna(provinceCode, inplace=True) df["city"].fillna("", inplace=True) df["cityCode"].fillna("", inplace=True) df["provinceCode"] = df["province"].map(get_china_province_code) df["cityCode"] = df.apply( lambda x: get_china_city_code(x.provinceCode, x.city), axis=1) for data_list in [foreign_confirmed_list, foreign_cured_list]: if data_list: for x in data_list: x["countryCode"] = get_country_code(x["country"]) foreign_df = None foreign_df_list = [pd.DataFrame(x) for x in [foreign_confirmed_list, foreign_cured_list] if x] for index, x in enumerate(foreign_df_list): if foreign_df is None: foreign_df = x else: foreign_df = pd.merge(foreign_df, x, on="country", how="outer", suffixes=["", f"""_{index}"""], sort=False, copy=False) if foreign_df is not None: df = pd.concat([df, pd.DataFrame(foreign_df, columns=columns)], sort=False) df["date"] = date df.sort_values(["date", "countryCode", "provinceCode", "cityCode", "city"], inplace=True) return df