def subset(self, country, province=None, start_date=None, end_date=None, population=None): """ Return the subset of dataset with Recovered > 0. Args: country (str): country name or ISO3 code province (str or None): province name start_date (str or None): start date, like 22Jan2020 end_date (str or None): end date, like 01Feb2020 population (int or None): population value Returns: pandas.DataFrame Index reset index Columns - Date (pd.Timestamp): Observation date - Confirmed (int): the number of confirmed cases - Infected (int): the number of currently infected cases - Fatal (int): the number of fatal cases - Recovered (int): the number of recovered cases (> 0) - Susceptible (int): the number of susceptible cases, if calculated Note: If @population (high priority) is not None or population values are registered in subset, the number of susceptible cases will be calculated. """ country_alias = self.ensure_country_name(country) # Subset with area, start/end date subset_df = self._subset(country=country, province=province, start_date=start_date, end_date=end_date) if subset_df.empty: raise SubsetNotFoundError(country=country, country_alias=country_alias, province=province, start_date=start_date, end_date=end_date) # Calculate Susceptible df = self._calculate_susceptible(subset_df, population) # Select records where Recovered > 0 df = df.loc[df[self.R] > 0, :].reset_index(drop=True) if df.empty: raise SubsetNotFoundError(country=country, country_alias=country_alias, province=province, start_date=start_date, end_date=end_date, message="with 'Recovered > 0'") from None return df
def _subset_by_area(self, country, province=None): """ Return subset for the country/province. Args: country (str): country name province (str or None): province name or None (country level data) Returns: pandas.DataFrame: subset for the country/province, columns are not changed Raises: SubsetNotFoundError: no records were found for the condition """ # Country level if province is None or province == self.UNKNOWN: df = self.layer(country=None) country_alias = self.ensure_country_name(country) df = df.loc[df[self.COUNTRY] == country_alias] return df.reset_index(drop=True) # Province level df = self.layer(country=country) df = df.loc[df[self.PROVINCE] == province] if df.empty: raise SubsetNotFoundError(country=country) return df.reset_index(drop=True)
def subset(self, country, **kwargs): """ Create a subset for a country. Args: country (str): country name or ISO 3166-1 alpha-3, like JPN kwargs: the other arguments will be ignored in the latest version. Raises: covsirphy.SubsetNotFoundError: no records were found Returns: pandas.DataFrame Index reset index Columns - Date (pd.Timestamp): Observation date - other column names are defined by OxCGRTData.COL_DICT """ country_arg = country country = self.ensure_country_name(country) try: df = super().subset(country=country) except SubsetNotFoundError: raise SubsetNotFoundError(country=country_arg, country_alias=country) from None df = df.groupby(self.DATE).last().reset_index() return df.loc[:, self.OXCGRT_COLS_WITHOUT_COUNTRY]
def value(self, country, province=None, date=None): """ Return the value of population in the place. Args: country (str): country name or ISO3 code province (str): province name date (str or None): observation date, like 01Jun2020 Returns: int: population in the place Note: If @date is None, the created date of the instancewill be used """ country_alias = self.ensure_country_name(country) try: df = self.subset(country=country, province=province, start_date=date, end_date=date) except KeyError: raise SubsetNotFoundError(country=country, country_alias=country_alias, province=province, date=date) df = df.sort_values(self.DATE) return int(df.loc[df.index[-1], [self.N]].values[0])
def ensure_country_name(self, country): """ Ensure that the country name is correct. If not, the correct country name will be found. Args: country (str): country name Returns: str: country name """ df = self._ensure_dataframe(self._cleaned_df, name="the cleaned dataset", columns=[self.COUNTRY]) selectable_set = set(df[self.COUNTRY].unique()) # return country name as-is if selectable if country in selectable_set: return country # Convert country name converted = coco.convert(country, to="name_short", not_found=None) # Additional abbr abbr_dict = { "Congo Republic": "Republic of the Congo", "DR Congo": "Democratic Republic of the Congo", "UK": "United Kingdom", "Vatican": "Holy See", } name = abbr_dict.get(converted, converted) # Return the name if registered in the dataset if name in selectable_set: return name raise SubsetNotFoundError(country=country, country_alias=name)
def retrieve(self, country): """ Retrieve the dataset of the country from the local file or the server. Args: country (str): country name Returns: pandas.DataFrame: retrieved data Index reset index Columns - Country (pandas.Category): country name - Year (int): year - Sex (str): Female/Male - Age (int): age - Population (int): population value """ if not self._raw.empty and country in self._raw[self.COUNTRY].unique(): df = self._raw.copy() df = df.loc[df[self.COUNTRY] == country, :].reset_index(drop=True) else: # Retrieve from World Bank Open Data try: df = self._retrieve_from_server(country) except SubsetNotFoundError: raise SubsetNotFoundError(country=country) from None # Add to raw dataset self._raw = pd.concat([self._raw, df], ignore_index=True, axis=0) self._raw.to_csv(self._filename, index=False) # Data types cat_cols, int_cols = [self.COUNTRY, self.SEX], [self.AGE, self.N] df[cat_cols] = df[cat_cols].astype("category") df[int_cols] = df[int_cols].astype(np.int64) return df
def _colored_map_country(self, country, variable, title, date, **kwargs): """ Create country-specific colored map to show the values at province level. Args: country (str): country name variable (str): variable name to show title (str): title of the figure date (str or None): date of the records or None (the last value) kwargs: arguments of covsirphy.ColoredMap() and covsirphy.ColoredMap.plot() """ df = self._cleaned_df.copy() country_alias = self.ensure_country_name(country) # Check variable name if variable not in df.columns: candidates = [col for col in df.columns if col not in self.AREA_ABBR_COLS] raise UnExpectedValueError(name="variable", value=variable, candidates=candidates) # Select country-specific data self._ensure_dataframe(df, name="cleaned dataset", columns=[self.COUNTRY, self.PROVINCE]) df = df.loc[df[self.COUNTRY] == country_alias] df = df.loc[df[self.PROVINCE] != self.UNKNOWN] if df.empty: raise SubsetNotFoundError( country=country, country_alias=country_alias, message="at province level") # Select date if date is not None: self._ensure_dataframe(df, name="cleaned dataset", columns=[self.DATE]) df = df.loc[df[self.DATE] == pd.to_datetime(date)] df = df.groupby(self.PROVINCE).last().reset_index() # Plotting df[self.COUNTRY] = country_alias df.rename(columns={variable: "Value"}, inplace=True) self._colored_map(title=title, data=df, level=self.PROVINCE, **kwargs)
def subset(self, country, province=None, start_date=None, end_date=None): """ Return subset with country/province name and start/end date. Args: country (str): country name or ISO3 code province (str or None): province name start_date (str or None): start date, like 22Jan2020 end_date (str or None): end date, like 01Feb2020 Returns: pandas.DataFrame Index reset index Columns without ISO3, Country, Province column Raises: SubsetNotFoundError: no records were found for the condition """ country_alias = self.ensure_country_name(country, errors="coerce") try: df = self._subset_by_area(country=country, province=province) except SubsetNotFoundError: raise SubsetNotFoundError(country=country, country_alias=country_alias, province=province) from None df = df.drop([self.COUNTRY, self.ISO3, self.PROVINCE], axis=1, errors="ignore") # Subset with Start/end date if start_date is None and end_date is None: return df.reset_index(drop=True) df = self._ensure_dataframe(df, name="the cleaned dataset", columns=[self.DATE]) series = df[self.DATE].copy() start_obj = self.date_obj(date_str=start_date, default=series.min()) end_obj = self.date_obj(date_str=end_date, default=series.max()) df = df.loc[(start_obj <= series) & (series <= end_obj), :] if df.empty: raise SubsetNotFoundError(country=country, country_alias=country_alias, province=province, start_date=start_date, end_date=end_date) from None return df.reset_index(drop=True)
def _retrieve_from_server(self, country): """ Retrieve the dataset of the country from the server. Args: country (str): country name Returns: pandas.DataFrame: retrieved data Index reset index Columns - Country (object): country name - Year (int): year - Sex (object): Female/Male - Age (object): age - Population (object): population value """ if self.verbose: print( f"Retrieving population pyramid dataset ({country}) from https://data.worldbank.org/" ) # Retrieve from World Bank Open Data iso3_code = coco.convert(country, to="ISO3", not_found=None) try: df = wbdata.get_dataframe(self.INDICATOR_DICT, country=iso3_code, convert_date=True) except RuntimeError: raise SubsetNotFoundError(country=country) from None # Preprocessing (-> Country, Population, Min, Max, Sex, Year) df = df.stack().reset_index() df.insert(0, self.COUNTRY, country) df.columns = [self.COUNTRY, "Date", "Attribute", self.N] df2 = df["Attribute"].str.split("-", expand=True) df2.columns = ["Min", "Max", self.SEX] df = pd.concat([df.drop("Attribute", axis=1), df2], axis=1) df["Max"] = df["Max"].replace("UP", self.ELDEST) for col in [self.N, "Min", "Max"]: df[col] = pd.to_numeric(df[col], downcast="integer") df[self.SEX].replace({"FE": "Female", "MA": "Male"}, inplace=True) df[self.YEAR] = df["Date"].dt.year df = df.drop("Date", axis=1) # Preprocessing (-> Country, Year, Sex, Age, Population) df[self.AGE] = df[["Min", "Max"]].apply(lambda x: range(x[0], x[1] + 1), axis=1) df[self.N] = df[["Min", "Max", self.N]].apply(lambda x: x[2] / (x[1] - x[0] + 1), axis=1) df = df.explode(self.AGE).reset_index(drop=True) df[self.N] = df[self.N].astype(np.int64) return df.loc[:, self.PYRAMID_COLS]
def records(self, country, province=None, start_date=None, end_date=None, auto_complement=True, **kwargs): """ Return the subset. If necessary, complemention will be performed. Args: country (str): country name or ISO3 code province (str or None): province name start_date (str or None): start date, like 22Jan2020 end_date (str or None): end date, like 01Feb2020 auto_complement (bool): if True and necessary, the number of cases will be complemented kwargs: the other arguments of complement Returns: pandas.DataFrame Index reset index Columns without ISO3, Country, Province column """ country_alias = self.ensure_country_name(country) subset_arg_dict = { "country": country, "province": province, "start_date": start_date, "end_date": end_date } if auto_complement: try: df, is_complemented = self.subset_complement( **subset_arg_dict, **kwargs) if not df.empty: return (df, is_complemented) except NotImplementedError: pass try: return (self.subset(**subset_arg_dict), False) except SubsetNotFoundError: raise SubsetNotFoundError(country=country, country_alias=country_alias, province=province, start_date=start_date, end_date=end_date) from None
def subset(self, country, product=None, start_date=None, end_date=None): """ Return subset of the country/province and start/end date. Args: country (str or None): country name or ISO3 code product (str or None): product name start_date (str or None): start date, like 22Jan2020 end_date (str or None): end date, like 01Feb2020 Returns: pandas.DataFrame Index reset index Columns - Date (pandas.TimeStamp): observation date - Vaccinations (int): the number of vaccinations """ df = self._cleaned_df.copy() # Subset by country country_alias = self.ensure_country_name(country) df = df.loc[df[self.COUNTRY] == country_alias] # Subset by product name if product is not None: df = df.loc[df[self.PRODUCT] == product] # Subset with start date if start_date is not None: df = df.loc[df[self.DATE] >= self.date_obj(start_date)] # Subset with end date if end_date is not None: df = df.loc[df[self.DATE] <= self.date_obj(end_date)] # Resampling df = df.set_index(self.DATE).resample("D").sum().reset_index() # Fill in the blanks df[self.VAC] = df[self.VAC].replace(0, None) df[self.VAC] = df[self.VAC].fillna(method="ffill").fillna(0) # Check records were found if df.empty: raise SubsetNotFoundError(country=country, country_alias=country_alias, province=product, start_date=start_date, end_date=end_date) return df
def subset(self, country, province=None, start_date=None, end_date=None, dataset="COVID-19 Data Hub"): """ Return subset of the country/province and start/end date. Args: country (str): country name or ISO3 code province (str or None): province name start_date (str or None): start date, like 22Jan2020 end_date (str or None): end date, like 01Feb2020 dataset (str): 'COVID-19 Data Hub' or 'Our World In Data' Returns: pandas.DataFrame Index reset index Columns - Date (pd.TimeStamp): Observation date - Tests (int): the number of total tests performed - Confirmed (int): the number of confirmed cases """ country_alias = self.ensure_country_name(country) df = self._subset_by_area(country=country_alias, province=province, dataset=dataset) df = df.drop([self.COUNTRY, self.ISO3, self.PROVINCE], axis=1) # Subset with Start/end date if start_date is None and end_date is None: return df.reset_index(drop=True) series = df[self.DATE].copy() start_obj = self.date_obj(date_str=start_date, default=series.min()) end_obj = self.date_obj(date_str=end_date, default=series.max()) df = df.loc[(start_obj <= series) & (series <= end_obj), :] if df.empty: raise SubsetNotFoundError(country=country, country_alias=country_alias, province=province, start_date=start_date, end_date=end_date) return df.reset_index(drop=True)
def layer(self, country=None): """ Return the cleaned data at the selected layer. Args: country (str or None): country name or None (country level data or country-specific dataset) Returns: pandas.DataFrame: Index reset index Columns - Country (str): country names - Province (str): province names (or removed when country level data) - any other columns of the cleaned data Raises: SubsetNotFoundError: no records were found for the country (when @country is not None) KeyError: @country was None, but country names were not registered in the dataset Note: When @country is None, country level data will be returned. When @country is a country name, province level data in the selected country will be returned. """ df = self._cleaned_df.copy() self._ensure_dataframe(df, name="the cleaned dataset", columns=[self.COUNTRY]) if self.PROVINCE not in df: df[self.PROVINCE] = self.UNKNOWN df[self.AREA_COLUMNS] = df[self.AREA_COLUMNS].astype(str) # Country level data if country is None: df = df.loc[df[self.PROVINCE] == self.UNKNOWN] return df.drop(self.PROVINCE, axis=1).reset_index(drop=True) # Province level data at the selected country country_alias = self.ensure_country_name(country, errors="coerce") df = df.loc[df[self.COUNTRY] == country_alias] if df.empty: raise SubsetNotFoundError(country=country, country_alias=country_alias) from None df = df.loc[df[self.PROVINCE] != self.UNKNOWN] return df.reset_index(drop=True)
def specialized(self, model=None, country=None, province=None): """ Return dimensional records with model variables. Args: model (cs.ModelBase or None): the first ODE model country (str or None): country name province (str or None): province name Note: If country is None, the name of the model will be used. If province is None, '-' will be used. """ country, province = self._model_to_area( model=model, country=country, province=province) try: return self._specialized_dict[country][province] except KeyError: raise SubsetNotFoundError(country=country, province=province)
def non_dim(self, model=None, country=None, province=None): """ Return non-dimensional data. Args: model (cs.ModelBase or None): the first ODE model country (str or None): country name province (str or None): province name Note: If country is None, the name of the model will be used. If province is None, '-' will be used. """ country, province = self._model_to_area( model=model, country=country, province=province) try: return self.nondim_dict[country][province] except KeyError: raise SubsetNotFoundError(country=country, province=province)
def subset(self, country, province=None, start_date=None, end_date=None): """ Return subset of the country/province and start/end date. Args: country (str): country name or ISO3 code province (str or None): province name start_date (str or None): start date, like 22Jan2020 end_date (str or None): end date, like 01Feb2020 Returns: pandas.DataFrame Index reset index Columns - Date (pd.Timestamp): Observation date - Tests (int): the number of total tests performed - Tests_diff (int): daily number of tests on date - Confirmed (int): the number of confirmed cases """ country_alias = self.ensure_country_name(country) df = self._subset_select(country=country_alias, province=province or self.UNKNOWN) # Calculate Tests_diff df[self.T_DIFF] = df[self.TESTS].diff().fillna(0) df.loc[df[self.T_DIFF] < 0, self.T_DIFF] = 0 df[self.T_DIFF] = df[self.T_DIFF].astype(np.int64) df = df.loc[:, [self.DATE, self.TESTS, self.T_DIFF, self.C]] # Subset with Start/end date if start_date is None and end_date is None: return df.reset_index(drop=True) series = df[self.DATE].copy() start_obj = self._ensure_date(start_date, default=series.min()) end_obj = self._ensure_date(end_date, default=series.max()) df = df.loc[(start_obj <= series) & (series <= end_obj), :] if df.empty: raise SubsetNotFoundError(country=country, country_alias=country_alias, province=province, start_date=start_date, end_date=end_date) return df.reset_index(drop=True)
def subset(self, country, product=None, start_date=None, end_date=None): """ Return subset of the country/province and start/end date. Args: country (str or None): country name or ISO3 code product (str or None): product name start_date (str or None): start date, like 22Jan2020 end_date (str or None): end date, like 01Feb2020 Returns: pandas.DataFrame Index reset index Columns - Date (pandas.TimeStamp): observation date - Vaccinations (int): the number of vaccinations - Vaccinated_once (int): cumulative number of people who received at least one vaccine dose - Vaccinated_full (int): cumulative number of people who received all doses prescrived by the protocol """ df = self._cleaned_df.copy() # Subset by country country_alias = self.ensure_country_name(country) df = df.loc[df[self.COUNTRY] == country_alias] # Subset by product name if product is not None: df = df.loc[df[self.PRODUCT] == product] # Subset with start date if start_date is not None: df = df.loc[df[self.DATE] >= self._ensure_date(start_date)] # Subset with end date if end_date is not None: df = df.loc[df[self.DATE] <= self._ensure_date(end_date)] # Check records were found if df.empty: raise SubsetNotFoundError(country=country, country_alias=country_alias, province=product, start_date=start_date, end_date=end_date) return df.loc[:, self.VAC_SUBSET_COLS].reset_index(drop=True)
def subset(self, country, province=None): """ Return subset of the country/province. Args: country (str): country name or ISO3 code province (str or None): province name Returns: pandas.DataFrame Index reset index Columns - Hospitalized_date (pandas.TimeStamp or NT) - Confirmation_date (pandas.TimeStamp or NT) - Outcome_date (pandas.TimeStamp or NT) - Confirmed (bool) - Infected (bool) - Recovered (bool) - Fatal (bool) - Symtoms (str) - Chronic_disease (str) - Age (int or None) - Sex (str) """ df = self._cleaned_df.copy() # Subset by country name country = self.ensure_country_name(country) df = df.loc[df[self.COUNTRY] == country] # Subset by province name if province not in (None, self.UNKNOWN): df = df.loc[df[self.PROVINCE] == province] # Check records are registered country_alias = self.ensure_country_name(country) if df.empty: raise SubsetNotFoundError(country=country, country_alias=country_alias, province=province) df = df.drop([self.COUNTRY, self.PROVINCE], axis=1) return df.reset_index(drop=True)
def ensure_country_name(self, country, errors="raise"): """ Ensure that the country name is correct. If not, the correct country name will be found. Args: country (str): country name errors (str): 'raise' or 'coerce' Returns: str: country name Raises: SubsetNotFoundError: no records were found for the country and @errors is 'raise' """ df = self._cleaned_df.copy() self._ensure_dataframe(df, name="the cleaned dataset", columns=[self.COUNTRY]) selectable_set = set(df[self.COUNTRY].unique()) # return country name as-is if selectable if country in selectable_set: return country # Convert country name warnings.simplefilter("ignore", FutureWarning) converted = coco.convert(country, to="name_short", not_found=None) # Additional abbr abbr_dict = { "Congo Republic": "Republic of the Congo", "DR Congo": "Democratic Republic of the Congo", "UK": "United Kingdom", "Vatican": "Holy See", } name = abbr_dict.get(converted, converted) # Return the name if registered in the dataset if name in selectable_set: return name if errors == "raise": raise SubsetNotFoundError(country=country, country_alias=name)
def subset(self, country, **kwargs): """ Create a subset for a country. Args: country (str): country name or ISO 3166-1 alpha-3, like JPN kwargs: the other arguments will be ignored in the latest version. Raises: covsirphy.SubsetNotFoundError: no records were found Returns: pandas.DataFrame Index reset index Columns - Date (pandas.Timestamp): Observation date - School_closing - Workplace_closing - Cancel_events - Gatherings_restrictions - Transport_closing - Stay_home_restrictions - Internal_movement_restrictions - International_movement_restrictions - Information_campaigns - Testing_policy - Contact_tracing - Stringency_index """ country_arg = country country = self.ensure_country_name(country) try: df = super().subset(country=country) except SubsetNotFoundError: raise SubsetNotFoundError(country=country_arg, country_alias=country) from None df = df.groupby(self.DATE).last().reset_index() return df.loc[:, self.SUBSET_COLS]
def records(self, country, province=None, start_date=None, end_date=None, population=None, auto_complement=True, **kwargs): """ JHU-style dataset for the area from the start date to the end date. Records with Recovered > 0 will be selected. Args: country(str): country name or ISO3 code province(str or None): province name start_date(str or None): start date, like 22Jan2020 end_date(str or None): end date, like 01Feb2020 population(int or None): population value auto_complement (bool): if True and necessary, the number of cases will be complemented kwargs: the other arguments of JHUData.subset_complement() Returns: tuple(pandas.DataFrame, bool): pandas.DataFrame: Index reset index Columns - Date(pd.TimeStamp): Observation date - Confirmed(int): the number of confirmed cases - Infected(int): the number of currently infected cases - Fatal(int): the number of fatal cases - Recovered (int): the number of recovered cases ( > 0) - Susceptible(int): the number of susceptible cases, if calculated str or bool: kind of complement or False Note: - If @ population is not None, the number of susceptible cases will be calculated. - If necessary and @auto_complement is True, complement recovered data. """ country_alias = self.ensure_country_name(country) subset_arg_dict = { "country": country, "province": province, "start_date": start_date, "end_date": end_date, "population": population, } if auto_complement: df, is_complemented = self.subset_complement( **subset_arg_dict, **kwargs) if not df.empty: return (df, is_complemented) try: return (self.subset(**subset_arg_dict), False) except ValueError: raise SubsetNotFoundError(country=country, country_alias=country_alias, province=province, start_date=start_date, end_date=end_date, message="with 'Recovered > 0'") from None
def subset_complement(self, country, province=None, start_date=None, end_date=None, population=None, **kwargs): """ Return the subset of dataset and complement recovered data, if necessary. Records with Recovered > 0 will be selected. Args: country(str): country name or ISO3 code province(str or None): province name start_date(str or None): start date, like 22Jan2020 end_date(str or None): end date, like 01Feb2020 population(int or None): population value kwargs: keyword arguments of JHUDataComplementHandler(), control factors of complement Returns: tuple(pandas.DataFrame, str or bool): pandas.DataFrame: Index reset index Columns - Date(pd.TimeStamp): Observation date - Confirmed(int): the number of confirmed cases - Infected(int): the number of currently infected cases - Fatal(int): the number of fatal cases - Recovered (int): the number of recovered cases ( > 0) - Susceptible(int): the number of susceptible cases, if calculated str or bool: kind of complement or False Note: If @population is not None, the number of susceptible cases will be calculated. """ # Subset with area, start/end date and calculate Susceptible country_alias = self.ensure_country_name(country) subset_df = self._subset(country=country, province=province, start_date=start_date, end_date=end_date) if subset_df.empty: raise SubsetNotFoundError(country=country, country_alias=country_alias, province=province, start_date=start_date, end_date=end_date) from None # Complement, if necessary self._recovery_period = self._recovery_period or self.calculate_recovery_period( ) handler = JHUDataComplementHandler( recovery_period=self._recovery_period, **kwargs) df, status, _ = handler.run(subset_df) # Calculate Susceptible df = self._calculate_susceptible(df, population) # Kind of complement or False is_complemented = status or False # Select records where Recovered > 0 df = df.loc[df[self.R] > 0, :].reset_index(drop=True) return (df, is_complemented)
def show_complement(self, country=None, province=None, start_date=None, end_date=None, **kwargs): """ To monitor effectivity and safety of complement on JHU subset, we need to know what kind of complement was done for JHU subset for each country (if country/countries specified) or for all countries. Args: country (str or list[str] or None): country/countries name or None (all countries) province(str or None): province name start_date(str or None): start date, like 22Jan2020 end_date(str or None): end date, like 01Feb2020 kwargs: keyword arguments of JHUDataComplementHandler(), control factors of complement Raises: ValueError: @province was specified when @country is not a string covsirphy.SubsetNotFoundError: No records were registered for the area/dates Returns: pandas.DataFrame Index reset index Columns - country (str): country name - province (str): province name - Monotonic_confirmed (bool): True if applied for confirmed cases or False otherwise - Monotonic_fatal (bool): True if applied for fatal cases or False otherwise - Monotonic_recovered (bool): True if applied for recovered or False otherwise - Full_recovered (bool): True if applied for recovered or False otherwise - Partial_recovered (bool): True if applied for recovered or False otherwise """ self._recovery_period = self._recovery_period or self.calculate_recovery_period( ) # Area name if country is None: country = [ c for c in self._cleaned_df[self.COUNTRY].unique() if c != "Others" ] province = province or self.UNKNOWN if not isinstance(country, str) and province != self.UNKNOWN: raise ValueError( "@province cannot be specified when @country is not a string.") if not isinstance(country, list): country = [country] # Create complement handler handler = JHUDataComplementHandler( recovery_period=self._recovery_period, **kwargs) # Check each country complement_df = pd.DataFrame(columns=[ self.COUNTRY, self.PROVINCE, *JHUDataComplementHandler.SHOW_COMPLEMENT_FULL_COLS ]) complement_df.set_index(self.COUNTRY, inplace=True) for cur_country in country: subset_df = self._subset(country=cur_country, province=province, start_date=start_date, end_date=end_date) if subset_df.empty: raise SubsetNotFoundError(country=cur_country, province=province, start_date=start_date, end_date=end_date) *_, complement_dict = handler.run(subset_df) complement_dict_values = pd.Series(complement_dict.values(), dtype=bool).values complement_df.loc[cur_country] = [ province, *complement_dict_values ] return complement_df.reset_index()