def get_models(regions, targets, duration) -> dict: models = {} for region in regions: with st.spinner(_("Processing {name}").format(name=region.name)): result = process_region(region, targets, duration) models.update({(region, k): v for k, v in result.items()}) return models
def __get_models(self) -> dict: models = {} regions = self.user_inputs["regions"] for region in regions: with st.spinner(_("Processing {name}").format(name=region.name)): result = self.__process_region(region) models.update({(region, k): v for k, v in result.items()}) return models
def sari_br_state_dataframe(region: Region) -> pd.DataFrame: """ Return the full table of SARI hospital vigilance for the given region. """ region = mundi.region(region) content = sari_br_state_content(region.id) lines = content.splitlines() content = lines[0] + b"\n" + b"\n".join(lines[-1000:]) fd = io.BytesIO(content) with st.spinner(f"Converting to CSV ({region.name})"): chunks = [] date_columns = [ "dataNotificacao", "dataInicioSintomas", "dataNascimento", "dataEncerramento", "dataTeste", ] for df in pd.read_csv( fd, index_col=0, sep=";", parse_dates=date_columns, dtype=DTYPES, converters=CONVERTERS, engine="c", chunksize=1000, encoding="latin1", ): df: pd.DataFrame = (df.astype(DTYPES).rename( columns=RENAME).astype({ "status": Status.categories, "gender": Gender.categories, "evolution": Evolution.categories, "test_status": Test.categories, })) def localtime(x): if pd.isna(x): return x return x.time() df["notification_time"] = df["notification_date"].apply(localtime) df["notification_date"] = df["notification_date"].apply( lambda x: x if pd.isna(x) else x.date()) df.index.name = "id" chunks.append(df) df = pd.concat(chunks) return df
def get_dataframe(self, days, columns, info_cols=()): regions = self.user_inputs["regions"] steps = len(self.user_inputs["regions"]) duration = max(days) days_ranges = np.array([0, *days]) columns = list(columns) progress_bar = st.progress(0) with st.spinner(_("Running simulations")): rows = {} for i, region in enumerate(regions): base, group = self.__run_simulations(region, duration) progress_bar.progress(int(100 * i / steps)) cols = {} for a, b in sk.window(2, days_ranges): day = b a += base.time + 1 b += a - 1 renames = dict(zip(itertools.count(), columns)) name = _("{} days").format(day) cols[name] = ( pd.DataFrame(group[columns, a:b].max(0)) .T.rename(columns=renames) .rename(index={0: region.id}) .astype(int) ) keys = [*cols] cols_data = [*cols.values()] rows[region.id] = pd.concat(cols_data, axis=1, names=[_("days")], keys=keys) progress_bar.empty() cols_data = pd.concat(list(rows.values())) cols_data.index = rows.keys() if info_cols: extra_info = cols_data.mundi[info_cols] extra_info = extra_info.astype(object) # streamlit bug? extra_info.columns = pd.MultiIndex.from_tuples(("", "info", x) for x in extra_info.columns) data = pd.concat([extra_info, cols_data], axis=1) return cols_data.sort_values(cols_data.columns[0]) else: return cols_data.sort_index()