def _get_population_by_state(config, country="br"): """Get population by state.""" # adapted from endpoints.get_health # download population (by city) spreadsheet df = download_from_drive( config[country]["drive_paths"]["cities_population"]) # Fix for default places ids - before "health_system_region" places_ids = get_places_id.now(config).assign( city_id=lambda df: df["city_id"].astype(int)) df = df.drop(["city_name", "state_name"], axis=1).merge(places_ids, on=["city_id", "state_id"]) # Fix date types time_cols = [c for c in df.columns if "last_updated" in c] df[time_cols] = df[time_cols].apply(pd.to_datetime) # adapted from endpoints.get_states_farolcovid_main.now() # sum all cities in state df = (df.groupby([ "country_iso", "country_name", "state_num_id", "state_id", "state_name", ]).agg({ "population": "sum" }).reset_index().sort_values("state_num_id").set_index("state_num_id")) return df
def now(config): # Parameters used on health region level for now! params = get_health_region_parameters.gen_stratified_parameters( config, "health_region_id") return params.merge( get_places_id.now(config)[["health_region_id", "city_id"]], on="health_region_id", )
def now(config): def _get_city_match(name): if name in replaces.keys(): return (1, replaces[name]) return (cities.get(name)[0][0], cities.get(name)[0][1]) # Get places ids df_places_id = get_places_id.now(config) df = get_googledrive_df(os.getenv("INLOCO_CITIES_ID")) time.sleep(2) # Get states closest matches states = fuzzyset.FuzzySet() for x in df_places_id["state_name"].unique(): states.add(x) df["state_name"] = df["state_name"].apply(lambda x: states.get(x)[0][1]) # Get cities closest matches by state+city name cities = fuzzyset.FuzzySet() df_places_id[ "state_city"] = df_places_id["state_name"] + df_places_id["city_name"] for x in df_places_id["state_city"].drop_duplicates(): cities.add(x) # Cities with changed names replaces = { v["state_name"] + name: v["state_name"] + v["correct_name"] for name, v in config["br"]["inloco"]["replace"].items() } df["state_city"] = df["state_name"] + df["city_name"] df["state_city_match"], df["state_city"] = zip( *df["state_city"].apply(lambda x: _get_city_match(x))) # Merge to get places ids del df["state_name"], df["city_name"] df = df.merge( df_places_id[[ "state_city", "state_num_id", "state_name", "health_region_name", "health_region_id", "city_name", "city_id", ]].drop_duplicates(), on=["state_city"], how="left", ) del df["state_city"] return df
def now(config): df = get_googledrive_df(os.getenv("INLOCO_STATES_ID")) time.sleep(2) states_table = ( get_places_id.now(config)[["state_id", "state_name", "state_num_id"]] .drop_duplicates() .sort_values(by=["state_name"]) ) return df.merge(states_table, on="state_name")
def now(config, country="br"): df = _read_df_data(country, config) places_ids = get_places_id.now(config).assign( city_id=lambda df: df["city_id"].astype(int)) # Fix for default places ids - before "health_system_region" df = df.drop(["city_name", "state_name"], axis=1).merge(places_ids, on=["city_id", "state_id"]) # Fix date types time_cols = [c for c in df.columns if "last_updated" in c] df[time_cols] = df[time_cols].apply(pd.to_datetime) df[["number_beds", "number_ventilators", "number_icu_beds" ]] = df[["number_beds", "number_ventilators", "number_icu_beds"]].fillna(0) # Add DataSUS author df["author_number_beds"] = config[country]["cnes"]["source"] df["author_number_ventilators"] = config[country]["cnes"]["source"] df["author_number_icu_beds"] = config[country]["cnes"]["source"] return df
def now(config, country="br"): if country == "br": infectious_period = ( config["br"]["seir_parameters"]["severe_duration"] + config["br"]["seir_parameters"]["critical_duration"]) # Get data & clean table df = (download_brasilio_table(config["br"]["cases"]["url"]).query( "place_type == 'state'").fillna(0).rename( columns=config["br"]["cases"]["rename"]).assign( last_updated=lambda x: pd.to_datetime(x["last_updated"])). sort_values( ["state_id", "last_updated"]).groupby("state_id").apply( lambda group: get_until_last(group)).reset_index( drop=True).drop(columns="estimated_population_2019")) # Fix places_ids by city_id => Get health_region_id df = df.merge( get_places_id.now(config).assign( state_num_id=lambda df: df["state_num_id"].astype(int), )[[ "state_name", "state_id", "state_num_id", ]].drop_duplicates(), on="state_id", ) cols = [ "state_num_id", "state_id", "state_name", "last_updated", "confirmed_cases", "deaths", "daily_cases", "new_deaths", ] # Add population data from CNES df = df[cols].merge( get_health.now(config).assign( state_num_id=lambda df: df["state_num_id"].astype(int), ).groupby("state_num_id")["population"].sum().reset_index(), on="state_num_id", ) # Transform cases data df = ( df.groupby([ "state_num_id", "state_id", "state_name", ]).apply(correct_negatives) # correct negative values .pipe( get_infectious_period_cases, infectious_period, config["br"]["cases"], "state_num_id", ) # get infectious period cases .rename(columns=config["br"]["cases"]["rename"])) # Get indicators of mavg & growth df = get_mavg_indicators(df, "daily_cases", place_id="state_num_id") df = get_mavg_indicators(df, "new_deaths", place_id="state_num_id") # Get notification rates & active cases on date df = df.merge( get_notification_rate.now(df, "state_num_id"), on=["state_num_id", "last_updated"], how="left", ).assign(active_cases=lambda x: np.where( x["notification_rate"].isnull(), np.nan, # round(x["infectious_period_cases"], 0), round(x["infectious_period_cases"] / x["notification_rate"], 0), )) return df
def now(config): chrome_options = webdriver.ChromeOptions() chrome_options.add_argument("--no-sandbox") chrome_options.add_argument("--window-size=1420,1080") chrome_options.add_argument("--headless") chrome_options.add_argument("--disable-gpu") driver = webdriver.Chrome( chrome_options=chrome_options ) # chromedriver é instalado via dockerfile # Pega dados de Leitos pela especialidade de todos os municipios # logger.info("Baixando dados de leitos") urlleitos = "http://tabnet.datasus.gov.br/cgi/deftohtm.exe?cnes/cnv/leiintbr.def" df_leitos, updatedate = get_leitos(driver, urlleitos) # Ultima data de atualizacao do dado CNES updatedate = get_date(updatedate) # Pega dados de Leitos complementares de todos os municipios # logger.info("Baixando dados de leitos UTI") urlleitoscomp = ( "http://tabnet.datasus.gov.br/cgi/deftohtm.exe?cnes/cnv/leiutibr.def" ) df_leitos_comp = get_urlleitoscomp(driver, urlleitoscomp) # Pega dados de Respiradores dos Municipios # logger.info("Baixando dados de respiradores") urlresp = "http://tabnet.datasus.gov.br/cgi/deftohtm.exe?cnes/cnv/equipobr.def" df_respiradores = get_respiradores(driver, urlresp) # Une os diferentes dataframes # df_cnes = df_leitos.merge(df_leitos_comp, how="left", on=["city_id", "city_name"]) df_cnes = df_cnes.merge(df_respiradores, how="left", on=["city_id", "city_name"]) logger.info("Une dados de leitos, leitos UTI e respiradores") # df_cnes["city_id"] = df_cnes["city_id"].astype(str) df_cnes = df_cnes.replace({"-": 0}, regex=True) df_cnes = df_cnes.replace(np.nan, 0, regex=True) # Conserta tipos de colunas resources = [ "cirurgico_tot", "clinico_tot", "hospital_dia_tot", "UTI_adulto_I_tot", "UTI_adulto_II_tot", "UTI_adulto_III_tot", "UTI_adulto_II_COVID_SUS", "UTI_adulto_II_COVID_nao_SUS", "UTI_pediatrica_II_COVID_SUS", "UTI_pediatrica_II_COVID_nao_SUS", "number_ventilators", ] for col in resources: df_cnes[col] = df_cnes[col].astype(str).astype(float).astype(int).fillna(0) # Agrupa total de leitos enfermaria df_cnes["number_beds"] = ( df_cnes["cirurgico_tot"] + df_cnes["clinico_tot"] + df_cnes["hospital_dia_tot"] ) # Agrupa total de leitos UTI df_cnes["number_icu_beds"] = ( df_cnes["UTI_adulto_I_tot"] + df_cnes["UTI_adulto_II_tot"] + df_cnes["UTI_adulto_III_tot"] ) # Agrupa total de leitos UTI Covid df_cnes["number_covid_icu_beds"] = ( df_cnes["UTI_adulto_II_COVID_SUS"] + df_cnes["UTI_adulto_II_COVID_nao_SUS"] + df_cnes["UTI_pediatrica_II_COVID_SUS"] + df_cnes["UTI_pediatrica_II_COVID_nao_SUS"] ) # Da merge com os dados de populacao places_ids = get_places_id.now(config) # Cria coluna de IBGE 6 dígitos para match places_ids["city_id_7d"] = places_ids["city_id"] places_ids["city_id"] = places_ids["city_id"] places_ids["city_id"] = places_ids["city_id"].astype(str).apply(lambda x: x[:-1]) df_cnes = places_ids.merge(df_cnes, how="left", on=["city_id"], suffixes=["", "_y"]) df_cnes["city_id"] = df_cnes["city_id_7d"] df_cnes = df_cnes.drop(columns="city_id_7d") df_pop = download_from_drive(config["br"]["drive_paths"]["cities_population"])[ [ "country_iso", "country_name", "state_id", "state_name", "city_id", "city_name", "population", ] ] df_cnes = pd.merge(df_cnes, df_pop, on="city_id", how="left", suffixes=("", "_y")) df_cnes = df_cnes.drop( [ "state_name_y", "UTI_pediatrica_II_COVID_nao_SUS", "city_name_y", "pediatrico_tot", "UTI_adulto_II_COVID_SUS", "UTI_pediatrica_II_COVID_SUS", "UTI_adulto_II_COVID_nao_SUS", "state_id_y", "cirurgico_tot", "clinico_tot", "hospital_dia_tot", "UTI_adulto_I_tot", "UTI_adulto_II_tot", "UTI_adulto_III_tot", ], axis=1, ) # Preenche zero recursos para cidades com NaN resources = [ "number_icu_beds", "number_beds", "number_covid_icu_beds", "number_ventilators", ] df_cnes[resources] = df_cnes[resources].fillna(0) # todayday = datetime.now().strftime("%Y-%m-%d") ( df_cnes["last_updated_number_ventilators"], df_cnes["last_updated_number_beds"], df_cnes["last_updated_number_icu_beds"], df_cnes["last_updated_number_covid_icu_beds"], df_cnes["author_number_beds"], df_cnes["author_number_ventilators"], df_cnes["author_number_icu_beds"], ) = ( updatedate, updatedate, updatedate, updatedate, "DataSUS", "DataSUS", "DataSUS", ) return df_cnes