def test_compute_regression_sensor_intercept(self): """Verified with lm(y~x).""" test_covariate = LocationSeries( data={ date(2020, 1, 1): 1, date(2020, 1, 2): 3, date(2020, 1, 3): 5, date(2020, 1, 4): 6, date(2020, 1, 5): 7, date(2020, 1, 6): 9, date(2020, 1, 7): 12 }) test_response = LocationSeries( data={ date(2020, 1, 1): 10, date(2020, 1, 2): 16, date(2020, 1, 3): 22, date(2020, 1, 4): 29, date(2020, 1, 5): 28, date(2020, 1, 6): 35, date(2020, 1, 7): 42 }) assert np.isclose( compute_regression_sensor(date(2020, 1, 6), test_covariate, test_response, True), 6.586207 + 3.275862 * 9)
def test_compute_regression_sensor_insufficient_data(self): test_covariate = LocationSeries( data={ date(2020, 1, 1): 1, date(2020, 1, 2): 3, date(2020, 1, 3): np.nan, date(2020, 1, 4): 6, date(2020, 1, 5): 7, date(2020, 1, 6): 9, date(2020, 1, 7): 12 }) test_response = LocationSeries( data={ date(2020, 1, 1): 10, date(2020, 1, 2): 16, date(2020, 1, 3): 22, date(2020, 1, 4): 29, date(2020, 1, 5): 28, date(2020, 1, 6): 35, date(2020, 1, 7): 42 }) assert np.isnan( compute_regression_sensor(date(2020, 1, 1), test_covariate, test_response, False)) assert np.isnan( compute_regression_sensor(date(2020, 1, 6), test_covariate, test_response, False))
def test_add_data(self): test_ls = LocationSeries(data={date(2020, 1, 1): 2}) test_ls.add_data(date(2020, 1, 3), 4) assert test_ls == LocationSeries(data={ date(2020, 1, 1): 2, date(2020, 1, 3): 4 })
def test_compute_sensors_covariates(self, mock_get_indicator_data, mock_compute_ar_sensor, mock_compute_regression_sensor): """Test ground truth sensor and non-na regression sensor ar returned""" mock_get_indicator_data.return_value = { ("a", "b", "state", "ca"): ["placeholder"], ("x", "y", "state", "ca"): ["placeholder"] } mock_compute_ar_sensor.return_value = 1.5 mock_compute_regression_sensor.side_effect = [ 2.5, np.nan ] # nan means 2nd sensor is skipped test_sensors = [ SensorConfig("a", "b", "c", 1), SensorConfig("x", "y", "z", 2) ] test_ground_truth_sensor = SensorConfig("i", "j", "k", 3) test_ground_truth = [LocationSeries("ca", "state")] assert compute_sensors( date(2020, 5, 5), test_sensors, test_ground_truth_sensor, test_ground_truth, False) == { SensorConfig("i", "j", "k", 3): [LocationSeries("ca", "state", {date(2020, 5, 2): 1.5})], SensorConfig("a", "b", "c", 1): [LocationSeries("ca", "state", {date(2020, 5, 4): 2.5})], }
def test_get_data_range_mean_impute(self): test_ls = LocationSeries(data={ date(2020, 1, 1): 7, date(2020, 1, 2): np.nan, date(2020, 1, 3): 9 }) assert test_ls.get_data_range(date(2020, 1, 1), date(2020, 1, 3), "mean") == [7, 8.0, 9] assert test_ls.get_data_range(date(2020, 1, 1), date(2020, 1, 2), "mean") == [7, 7]
def test_get_data_range_no_impute(self): test_ls = LocationSeries(data={ date(2020, 1, 1): 7, date(2020, 1, 2): np.nan, date(2020, 1, 3): 9 }) assert test_ls.get_data_range(date(2020, 1, 1), date(2020, 1, 3), None) == [7, np.nan, 9] assert test_ls.get_data_range(date(2020, 1, 1), date(2020, 1, 2), None) == [7, np.nan]
def test_historical_sensors_no_data(self, mock_historical): """Test nothing returned for any sensor.""" mock_historical.return_value = (LocationSeries(), []) test_sensors = [ SensorConfig("i", "j", "k", 3), SensorConfig("a", "b", "c", 1), SensorConfig("x", "y", "z", 2) ] test_ground_truth = [LocationSeries("ca", "state")] assert historical_sensors(None, None, test_sensors, test_ground_truth) == {}
def test_get_data_range_invalid_impute(self): test_ls = LocationSeries(data={ date(2020, 1, 1): 7, date(2020, 1, 2): np.nan, date(2020, 1, 3): 9 }) with pytest.raises( ValueError, match="Invalid imputation method. Must be None or 'mean'"): test_ls.get_data_range(date(2020, 1, 1), date(2020, 1, 3), "fakeimpute")
def test_compute_ar_sensor_regularize(self, random_normal): """coefficients verified with lm.ridge(y~x1+x2, lambda=1*12/11) x1 and x2 constructed by hand, lambda is scaled since lm.ridge does some scaling by n/(n-1) """ random_normal.return_value = 0 values = LocationSeries( data={ date(2020, 1, 1): -4.27815483, date(2020, 1, 2): -4.83962077, date(2020, 1, 3): -4.09548122, date(2020, 1, 4): -3.86647783, date(2020, 1, 5): -2.64494168, date(2020, 1, 6): -3.99573135, date(2020, 1, 7): -3.4824841, date(2020, 1, 8): -2.77490127, date(2020, 1, 9): -3.64162355, date(2020, 1, 10): -2.5762891, date(2020, 1, 11): -2.46793048, date(2020, 1, 12): -3.20454941, date(2020, 1, 13): -1.77057154, date(2020, 1, 14): -0.02058535, date(2020, 1, 15): 0.81182691, date(2020, 1, 16): 0.32741982 }) assert np.isclose( compute_ar_sensor(date(2020, 1, 15), values, 2, 1), -2.8784639 + 0.2315984 * (-1.77057154 - -3.48901547) / 0.7637391 + 0.5143709 * (-0.02058535 - -3.28005019) / 0.8645852)
def test__export_to_csv(self): """Test export creates the right file and right contents.""" test_sensor = SensorConfig(source="src", signal="sig", name="test", lag=4) test_value = LocationSeries("ca", "state", {date(2020, 1, 1): 1.5}) with tempfile.TemporaryDirectory() as tmpdir: out_files = _export_to_csv(test_value, test_sensor, date(2020, 1, 5), receiving_dir=tmpdir) assert len(out_files) == 1 out_file = out_files[0] assert os.path.isfile(out_file) assert out_file.endswith( "issue_20200105/src/20200101_state_sig.csv") out_file_df = pd.read_csv(out_file) pd.testing.assert_frame_equal( out_file_df, pd.DataFrame({ "sensor_name": ["test"], "geo_value": ["ca"], "value": [1.5] }))
def test_compute_ar_sensor_seed(self): """Test same result over 50 runs""" values = LocationSeries( data={ date(2020, 1, 1): -4.27815483, date(2020, 1, 2): -4.83962077, date(2020, 1, 3): -4.09548122, date(2020, 1, 4): -3.86647783, date(2020, 1, 5): -2.64494168, date(2020, 1, 6): -3.99573135, date(2020, 1, 7): -3.4824841, date(2020, 1, 8): -2.77490127, date(2020, 1, 9): -3.64162355, date(2020, 1, 10): -2.5762891, date(2020, 1, 11): -2.46793048, date(2020, 1, 12): -3.20454941, date(2020, 1, 13): -1.77057154, date(2020, 1, 14): -0.02058535, date(2020, 1, 15): 0.81182691, date(2020, 1, 16): 0.32741982 }) assert len( set( compute_ar_sensor(date(2020, 1, 15), values, 1, 0) for _ in range(50))) == 1
def test_compute_ar_sensor_no_regularize(self, random_normal): """Verified with ar.ols(x, FALSE, ar_size, intercept=TRUE, demean=FALSE).""" random_normal.return_value = 0 values = LocationSeries( data={ date(2020, 1, 1): -4.27815483, date(2020, 1, 2): -4.83962077, date(2020, 1, 3): -4.09548122, date(2020, 1, 4): -3.86647783, date(2020, 1, 5): -2.64494168, date(2020, 1, 6): -3.99573135, date(2020, 1, 7): -3.4824841, date(2020, 1, 8): -2.77490127, date(2020, 1, 9): -3.64162355, date(2020, 1, 10): -2.5762891, date(2020, 1, 11): -2.46793048, date(2020, 1, 12): -3.20454941, date(2020, 1, 13): -1.77057154, date(2020, 1, 14): -0.02058535, date(2020, 1, 15): 0.81182691, date(2020, 1, 16): 0.32741982 }) assert np.isclose(compute_ar_sensor(date(2020, 1, 15), values, 1, 0), -0.09105891 + 0.87530957 * -0.02058535) assert np.isclose( compute_ar_sensor(date(2020, 1, 15), values, 2, 0), 0.31865395 + 0.64751725 * -0.02058535 + 0.30760218 * -1.77057154)
def test_compute_ar_sensor_insufficient_data(self): values = LocationSeries(data={ date(2020, 1, 1): -4.27815483, date(2020, 1, 2): -4.83962077 }) assert np.isnan(compute_ar_sensor(date(2020, 1, 2), values, 1, 0)) assert np.isnan(compute_ar_sensor(date(2020, 1, 7), values, 1, 0))
def test_compute_sensors_no_covariates(self, mock_get_indicator_data, mock_compute_ar_sensor): """Test only ground truth sensor is returned if no data is available to compute the rest.""" mock_get_indicator_data.return_value = {} mock_compute_ar_sensor.return_value = 1.5 test_sensors = [ SensorConfig("a", "b", "c", 1), SensorConfig("x", "y", "z", 2) ] test_ground_truth_sensor = SensorConfig("i", "j", "k", 3) test_ground_truth = [LocationSeries("ca", "state")] assert compute_sensors( date(2020, 5, 5), test_sensors, test_ground_truth_sensor, test_ground_truth, False) == { SensorConfig("i", "j", "k", 3): [LocationSeries("ca", "state", {date(2020, 5, 2): 1.5})], }
def test_no_results(self, mock_epidata): mock_epidata.return_value = {"result": -2} test_output = get_historical_sensor_data( SensorConfig(None, None, None, None), None, None, date(2020, 1, 1), date(2020, 1, 4)) assert test_output == (LocationSeries(None, None), [ date(2020, 1, 1), date(2020, 1, 2), date(2020, 1, 3), date(2020, 1, 4) ])
def test_no_results(self, mock_epidata): mock_epidata.return_value = [ ({ "result": -2 }, { "data_source": "src1", "signals": "sig1", "geo_type": "state", "geo_value": "ca" }), ({ "result": -2 }, { "data_source": "src1", "signals": "sig1", "geo_type": "county", "geo_value": "01001" }), ] test_output = get_indicator_data([ SensorConfig("src1", "sig1", None, None), SensorConfig("src2", "sig2", None, None) ], [LocationSeries("ca", "state")], date(2020, 1, 1)) assert test_output == {} mock_epidata.assert_called_once_with([ { "source": "covidcast", "data_source": "src1", "signals": "sig1", "time_type": "day", "geo_type": "state", "geo_value": "ca", "time_values": f"{EPIDATA_START_DATE}-20200101", "as_of": "20200101" }, { "source": "covidcast", "data_source": "src2", "signals": "sig2", "time_type": "day", "geo_type": "state", "geo_value": "ca", "time_values": f"{EPIDATA_START_DATE}-20200101", "as_of": "20200101" }, ])
def test_error(self, mock_epidata): mock_epidata.return_value = [({ "result": -3, "message": "test failure" }, {})] with pytest.raises(Exception, match="Bad result from Epidata: test failure"): get_indicator_data([SensorConfig(None, None, None, None)], [LocationSeries(None, None)], date(2020, 1, 1)) mock_epidata.assert_called_once_with([{ "source": "covidcast", "data_source": None, "signals": None, "time_type": "day", "geo_type": None, "geo_value": None, "time_values": f"{EPIDATA_START_DATE}-20200101", "as_of": "20200101" }])
def test_get_data_range_out_of_bounds(self): test_ls = LocationSeries(data={ date(2020, 1, 1): 7, date(2020, 1, 2): 8, date(2020, 1, 3): 9 }) with pytest.raises(ValueError, match="Data range must be within existing dates " "2020-01-01 to 2020-01-03"): test_ls.get_data_range(date(2019, 12, 31), date(2020, 1, 3)) with pytest.raises(ValueError, match="Data range must be within existing dates " "2020-01-01 to 2020-01-03"): test_ls.get_data_range(date(2020, 1, 1), date(2020, 1, 4))
def test_results(self, mock_epidata): mock_epidata.return_value = { "result": 1, "epidata": [{ "time_value": 20200101, "value": 1 }, { "time_value": 20200102, "value": np.nan }] } test_output = get_historical_sensor_data( SensorConfig(None, None, None, None), None, None, date(2020, 1, 1), date(2020, 1, 4)) assert test_output == (LocationSeries(None, None, {date(2020, 1, 1): 1}), [ date(2020, 1, 2), date(2020, 1, 3), date(2020, 1, 4) ])
def test_historical_sensors_some_data(self, mock_historical): """Test non empty data is returned for first two sensors.""" mock_historical.side_effect = [ (LocationSeries(data={date(2020, 1, 1): 2}), []), (LocationSeries(data={date(2020, 1, 3): 4}), []), (LocationSeries(), []) ] test_sensors = [ SensorConfig("i", "j", "k", 3), SensorConfig("a", "b", "c", 1), SensorConfig("x", "y", "z", 2) ] test_ground_truth = [LocationSeries("ca", "state")] assert historical_sensors( None, None, test_sensors, test_ground_truth) == { SensorConfig("i", "j", "k", 3): [LocationSeries(data={date(2020, 1, 1): 2})], SensorConfig("a", "b", "c", 1): [LocationSeries(data={date(2020, 1, 3): 4})] }
def test_compute_regression_sensor_no_data(self): test_covariate = LocationSeries() test_response = LocationSeries() assert np.isnan( compute_regression_sensor(date(2020, 1, 16), test_covariate, test_response, False))
def test_no_data(self): test_ls = LocationSeries() with pytest.raises(ValueError, match="No data"): test_ls.dates with pytest.raises(ValueError, match="No data"): test_ls.values
def test_compute_ar_sensor_out_of_range(self): values = LocationSeries(data={ date(2020, 1, 1): -4.27815483, date(2020, 1, 2): -4.83962077 }) assert np.isnan(compute_ar_sensor(date(2020, 1, 7), values, 1, 0))