def test_fit_data(self): """Test fit_data function""" haz_stats = pd.DataFrame({ "year": [1999, 2000, 2001, 2002], "eventcount": [14, 12, 16, 11], "intensity_mean": [9, 13, 11, 11], "gmt": [4, 6, 5, 5], "esoi": [1, -1, 1, -1], "dummy": [0, 1, 2, 0], }) sm_results = stats.fit_data(haz_stats, "intensity_mean", ["gmt", "esoi"]) self.assertTrue("gmt" in sm_results[0].params) self.assertTrue("esoi" in sm_results[0].params) self.assertTrue("gmt" in sm_results[1].params) self.assertFalse("esoi" in sm_results[1].params) self.assertAlmostEqual(sm_results[1].params['gmt'], 2) self.assertAlmostEqual(sm_results[1].params['const'], 1) sm_results = stats.fit_data(haz_stats, "eventcount", ["gmt", "esoi"], poisson=True) self.assertTrue("gmt" in sm_results[0].params) self.assertTrue("esoi" in sm_results[0].params) self.assertTrue("gmt" in sm_results[1].params) self.assertTrue("esoi" in sm_results[1].params) self.assertAlmostEqual(sm_results[1].params['gmt'], 0.1, places=1) self.assertAlmostEqual(sm_results[1].params['esoi'], 0.2, places=1) self.assertAlmostEqual(sm_results[1].params['const'], 2, places=1)
def calibrate_statistics(self, climate_indices): """Statistically fit hazard data to given climate indices The internal statistics are truncated to fit the temporal range of the climate indices. Parameters ---------- climate_indices : list of DataFrames { year, month, ... } Yearly or monthly time series of GMT, ESOI etc. """ if len(self.ci_cols) > 0: self.stats = self.stats.drop(labels=self.ci_cols, axis=1) self.ci_cols = [] for cidx in climate_indices: ci_name = cidx.columns.values.tolist() ci_name.remove("year") ci_name.remove("month") self.ci_cols += ci_name avg_season = const.PDO_SEASON if "pdo" in ci_name else self.region.season avg = stats.seasonal_average(cidx, avg_season) self.stats = pd.merge(self.stats, avg, on="year", how="inner", sort=True) self.stats = self.stats.dropna(axis=0, how="any", subset=self.explaineds + self.ci_cols) self.fit_info = {} for explained in self.explaineds: self.fit_info[explained] = stats.fit_data( self.stats, explained, self.ci_cols, poisson=(explained == 'eventcount'))