def create_timeseries_jhu(data, lookup_table, value_name): try: logger.info('timeseries: ' + value_name) id_vars = "region" var_name = "date" timeseries = pd.melt(data, id_vars=id_vars, var_name=var_name, value_name=value_name) timeseries.loc[:, var_name] = pd.to_datetime( timeseries.loc[:, var_name]) timeseries = pd.merge( lookup_table[[ "iso2", "iso3", "code3", "Lat", "Lon", id_vars ]].groupby(id_vars).first(), timeseries, on=id_vars, how="inner", ) return timeseries except BaseException: logger.error('failed to create timeseries') return None
def countries_geojson(self): logger.info("geojson") error_msg = "cannot load geojson data, no url provided" try: with urlopen(self.parser.get("urls", "mapbox_countries_url")) as response: countries = json.load(response) return countries except BaseException: logger.error(error_msg) return None
def read_prepare_data(url): try: # logger.info(url) data_raw = pd.read_csv(self.parser.get("urls", url)) data_raw.rename(columns={"Country/Region": "region"}, inplace=True) data = (data_raw.groupby("region").sum().drop( columns=["Lat", "Long"]).reset_index()) logger.info('length {}: {}'.format(url.rstrip('_url'), len(data))) return data except BaseException: logger.error('prepare_data') logger.error(error_msg) return None
def post(self, cid): logger.debug(f"{str(request).replace(request.url_root, '/')}") post_data = request.form name = post_data.get('name') gender = post_data.get('gender') birthday = post_data.get('birthday') if gender == '男': gender = 1 else: gender = 0 student = Student() student.cid = int(cid) student.name = name student.gender = gender try: birthday_date = DT.date_str2date(birthday) student.birthday = birthday_date db.session.add(student) db.session.commit() msg = f'添加成功!' logger.success(msg) status_code = 200 return make_response( jsonify( dict( status_code=status_code, msg=msg ) ), status_code ) except Exception as e: msg = f'error:{e} \n请按照 2001-01-01的格式输入生日' logger.error(msg) status_code = 400 return make_response( jsonify( dict( status_code=status_code, msg=msg ) ), status_code )
def read_geonames_country_info(self): logger.info("geonames") error_msg = "cannot load geonames data, no url provided" try: res = requests.get( self.parser.get("urls", "geonames_countries_url")) soup = BeautifulSoup(res.content, "lxml") table = soup.find_all("table", id="countries") country_info = pd.read_html(str(table), keep_default_na=False)[0] country_info.rename( columns={ "ISO-3166alpha2": "iso_alpha2", "ISO-3166alpha3": "iso_alpha", "ISO-3166numeric": "iso_num", "Country": "region", "Population": "population", "Continent": "continent", "Area in km²": "area", }, inplace=True, ) country_info.loc[country_info["continent"] == "EU", "continent"] = "Europe" country_info.loc[country_info["continent"] == "NA", "continent"] = "North-A." country_info.loc[country_info["continent"] == "SA", "continent"] = "South-A." country_info.loc[country_info["continent"] == "AS", "continent"] = "Asia" country_info.loc[country_info["continent"] == "OC", "continent"] = "Oceania" country_info.loc[country_info["continent"] == "AF", "continent"] = "Africa" return country_info except BaseException: logger.error(error_msg) return None
def load_jhu(self): logger.info("load jhu") error_msg = "cannot load JHU data, no url provided" def read_prepare_data(url): try: # logger.info(url) data_raw = pd.read_csv(self.parser.get("urls", url)) data_raw.rename(columns={"Country/Region": "region"}, inplace=True) data = (data_raw.groupby("region").sum().drop( columns=["Lat", "Long"]).reset_index()) logger.info('length {}: {}'.format(url.rstrip('_url'), len(data))) return data except BaseException: logger.error('prepare_data') logger.error(error_msg) return None def create_timeseries_jhu(data, lookup_table, value_name): try: logger.info('timeseries: ' + value_name) id_vars = "region" var_name = "date" timeseries = pd.melt(data, id_vars=id_vars, var_name=var_name, value_name=value_name) timeseries.loc[:, var_name] = pd.to_datetime( timeseries.loc[:, var_name]) timeseries = pd.merge( lookup_table[[ "iso2", "iso3", "code3", "Lat", "Lon", id_vars ]].groupby(id_vars).first(), timeseries, on=id_vars, how="inner", ) return timeseries except BaseException: logger.error('failed to create timeseries') return None try: lookup_table = pd.read_csv( self.parser.get("urls", "jhu_lookup_url")) lookup_table.rename(columns={ "Country_Region": "region", "Long_": "Lon" }, inplace=True) logger.info('length data ({}): {}'.format('lookup table', len(lookup_table))) except BaseException: logger.error('lookup table') logger.error(error_msg) return None if not lookup_table.empty: confirmed_data = read_prepare_data("jhu_confirmed_url") deaths_data = read_prepare_data("jhu_deaths_url") confirmed = create_timeseries_jhu(confirmed_data, lookup_table, "confirmed") deaths = create_timeseries_jhu(deaths_data, lookup_table, "deaths") data = pd.merge( deaths[["date", "region", "iso3", "Lat", "Lon", "deaths"]], confirmed[["date", "confirmed", "iso3"]], on=["iso3", "date"], how="inner", ) # data = pd.merge( # data, # recovered[["date", "recovered", "iso3"]], # on=["iso3", "date"], # how="inner", # ) data.rename(columns={"confirmed": "cases"}, inplace=True) return data else: logger.error(error_msg) return None