def _colored_map_country(self, country, variable, title, date, **kwargs): """ Create country-specific colored map to show the values at province level. Args: country (str): country name variable (str): variable name to show title (str): title of the figure date (str or None): date of the records or None (the last value) kwargs: arguments of covsirphy.ColoredMap() and covsirphy.ColoredMap.plot() """ df = self._cleaned_df.copy() country_alias = self.ensure_country_name(country) # Check variable name if variable not in df.columns: candidates = [col for col in df.columns if col not in self.AREA_ABBR_COLS] raise UnExpectedValueError(name="variable", value=variable, candidates=candidates) # Select country-specific data self._ensure_dataframe(df, name="cleaned dataset", columns=[self.COUNTRY, self.PROVINCE]) df = df.loc[df[self.COUNTRY] == country_alias] df = df.loc[df[self.PROVINCE] != self.UNKNOWN] if df.empty: raise SubsetNotFoundError( country=country, country_alias=country_alias, message="at province level") # Select date if date is not None: self._ensure_dataframe(df, name="cleaned dataset", columns=[self.DATE]) df = df.loc[df[self.DATE] == pd.to_datetime(date)] df = df.groupby(self.PROVINCE).last().reset_index() # Plotting df[self.COUNTRY] = country_alias df.rename(columns={variable: "Value"}, inplace=True) self._colored_map(title=title, data=df, level=self.PROVINCE, **kwargs)
def _colored_map_global(self, variable, title, date, **kwargs): """ Create global colored map to show the values at country level. Args: variable (str): variable name to show title (str): title of the figure date (str or None): date of the records or None (the last value) kwargs: arguments of ColoredMap() and ColoredMap.plot() """ df = self._cleaned_df.copy() # Check variable name if variable not in df.columns: candidates = [col for col in df.columns if col not in self.AREA_ABBR_COLS] raise UnExpectedValueError(name="variable", value=variable, candidates=candidates) # Remove cruise ships df = df.loc[df[self.COUNTRY] != self.OTHERS] # Recognize province as a region/country if self.PROVINCE in df: df[self.ISO3] = df[self.ISO3].cat.add_categories(["GRL"]) df[self.COUNTRY] = df[self.COUNTRY].cat.add_categories(["Greenland"]) df.loc[df[self.PROVINCE] == "Greenland", self.AREA_ABBR_COLS] = ["GRL", "Greenland", self.UNKNOWN] # Select country level data if self.PROVINCE in df.columns: df = df.loc[df[self.PROVINCE] == self.UNKNOWN] # Select date if date is not None: self._ensure_dataframe(df, name="cleaned dataset", columns=[self.DATE]) df = df.loc[df[self.DATE] == pd.to_datetime(date)] df[self.COUNTRY] = df[self.COUNTRY].astype(str) df = df.groupby(self.COUNTRY).last().reset_index() # Plotting df.rename(columns={variable: "Value"}, inplace=True) self._colored_map(title=title, data=df, level=self.COUNTRY, **kwargs)
def _register_extras(self, extras): """ Verify the extra datasets. Args: extras (list[covsirphy.CleaningBase]): extra datasets Raises: TypeError: non-data cleaning instance was included as an extra dataset UnExpectedValueError: instance of un-expected data cleaning class was included as an extra dataset """ self._ensure_list(extras, name="extras") # Verify the datasets for (i, extra_data) in enumerate(extras, start=1): statement = f"{self.num2str(i)} extra dataset" # Check the data is a data cleaning class self._ensure_instance(extra_data, CleaningBase, name=statement) # Check the data can be accepted as an extra dataset if isinstance(extra_data, tuple(self.EXTRA_DICT.values())): continue raise UnExpectedValueError(name=statement, value=type(extra_data), candidates=list(self.EXTRA_DICT.keys())) # Register the datasets extra_iter = itertools.product(extras, self.EXTRA_DICT.items()) for (extra_data, (name, data_class)) in extra_iter: if isinstance(extra_data, data_class): self._data_dict[name] = extra_data
def _ensure_selectable(self, target, candidates, name="target"): """ Ensure that the target can be selectable. Args: target (object): target to check candidates (list[object]): list of candidates name (str): name of the target """ self._ensure_list(candidates, name="candidates") if target in candidates: return target raise UnExpectedValueError(name=name, value=target, candidates=candidates)
def score(self, metric=None, metrics="RMSLE"): """ Calculate score with specified metric. Args: metric (str or None): ME, MAE, MSE, MSLE, MAPE, RMSE, RMSLE, R2 or None (use @metrics) metrics (str): alias of @metric Raises: UnExpectedValueError: un-expected metric was applied ValueError: ME was selected as metric when the targets have multiple columns Returns: float: score with the metric Note: ME: maximum residual error MAE: mean absolute error MSE: mean square error MSLE: mean squared logarithmic error MAPE: mean absolute percentage error RMSE: root mean squared error RMSLE: root mean squared logarithmic error R2: the coefficient of determination Note: When @metric is None, @metrics will be used as @metric. Default value is "RMSLE". """ metric = (metric or metrics).upper() # Check metric name if metric not in self._METRICS_DICT: raise UnExpectedValueError("metric", metric, candidates=list( self._METRICS_DICT.keys())) # Calculate score try: return float(self._METRICS_DICT[metric][0](self._true, self._pred)) except ValueError: # Multioutput not supported raise ValueError( f"When the targets have multiple columns, we cannot select {metric}." ) from None
def smaller_is_better(cls, metric=None, metrics="RMSLE"): """ Whether smaller value of the metric is better or not. Args: metric (str or None): ME, MAE, MSE, MSLE, MAPE, RMSE, RMSLE, R2 or None (use @metrics) metrics (str): alias of @metric Returns: bool: whether smaller value is better or not """ metric = (metric or metrics).upper() # Check metric name if metric not in cls._METRICS_DICT: raise UnExpectedValueError("metric", metric, candidates=list( cls._METRICS_DICT.keys())) return cls._METRICS_DICT[metric][1]