예제 #1
0
 def test_compute_regression_sensor_intercept(self):
     """Verified with lm(y~x)."""
     test_covariate = LocationSeries(
         data={
             date(2020, 1, 1): 1,
             date(2020, 1, 2): 3,
             date(2020, 1, 3): 5,
             date(2020, 1, 4): 6,
             date(2020, 1, 5): 7,
             date(2020, 1, 6): 9,
             date(2020, 1, 7): 12
         })
     test_response = LocationSeries(
         data={
             date(2020, 1, 1): 10,
             date(2020, 1, 2): 16,
             date(2020, 1, 3): 22,
             date(2020, 1, 4): 29,
             date(2020, 1, 5): 28,
             date(2020, 1, 6): 35,
             date(2020, 1, 7): 42
         })
     assert np.isclose(
         compute_regression_sensor(date(2020, 1, 6), test_covariate,
                                   test_response, True),
         6.586207 + 3.275862 * 9)
예제 #2
0
 def test_compute_regression_sensor_insufficient_data(self):
     test_covariate = LocationSeries(
         data={
             date(2020, 1, 1): 1,
             date(2020, 1, 2): 3,
             date(2020, 1, 3): np.nan,
             date(2020, 1, 4): 6,
             date(2020, 1, 5): 7,
             date(2020, 1, 6): 9,
             date(2020, 1, 7): 12
         })
     test_response = LocationSeries(
         data={
             date(2020, 1, 1): 10,
             date(2020, 1, 2): 16,
             date(2020, 1, 3): 22,
             date(2020, 1, 4): 29,
             date(2020, 1, 5): 28,
             date(2020, 1, 6): 35,
             date(2020, 1, 7): 42
         })
     assert np.isnan(
         compute_regression_sensor(date(2020, 1, 1), test_covariate,
                                   test_response, False))
     assert np.isnan(
         compute_regression_sensor(date(2020, 1, 6), test_covariate,
                                   test_response, False))
 def test_add_data(self):
     test_ls = LocationSeries(data={date(2020, 1, 1): 2})
     test_ls.add_data(date(2020, 1, 3), 4)
     assert test_ls == LocationSeries(data={
         date(2020, 1, 1): 2,
         date(2020, 1, 3): 4
     })
예제 #4
0
 def test_compute_sensors_covariates(self, mock_get_indicator_data,
                                     mock_compute_ar_sensor,
                                     mock_compute_regression_sensor):
     """Test ground truth sensor and non-na regression sensor ar returned"""
     mock_get_indicator_data.return_value = {
         ("a", "b", "state", "ca"): ["placeholder"],
         ("x", "y", "state", "ca"): ["placeholder"]
     }
     mock_compute_ar_sensor.return_value = 1.5
     mock_compute_regression_sensor.side_effect = [
         2.5, np.nan
     ]  # nan means 2nd sensor is skipped
     test_sensors = [
         SensorConfig("a", "b", "c", 1),
         SensorConfig("x", "y", "z", 2)
     ]
     test_ground_truth_sensor = SensorConfig("i", "j", "k", 3)
     test_ground_truth = [LocationSeries("ca", "state")]
     assert compute_sensors(
         date(2020, 5, 5), test_sensors, test_ground_truth_sensor,
         test_ground_truth, False) == {
             SensorConfig("i", "j", "k", 3):
             [LocationSeries("ca", "state", {date(2020, 5, 2): 1.5})],
             SensorConfig("a", "b", "c", 1):
             [LocationSeries("ca", "state", {date(2020, 5, 4): 2.5})],
         }
 def test_get_data_range_mean_impute(self):
     test_ls = LocationSeries(data={
         date(2020, 1, 1): 7,
         date(2020, 1, 2): np.nan,
         date(2020, 1, 3): 9
     })
     assert test_ls.get_data_range(date(2020, 1, 1), date(2020, 1, 3),
                                   "mean") == [7, 8.0, 9]
     assert test_ls.get_data_range(date(2020, 1, 1), date(2020, 1, 2),
                                   "mean") == [7, 7]
 def test_get_data_range_no_impute(self):
     test_ls = LocationSeries(data={
         date(2020, 1, 1): 7,
         date(2020, 1, 2): np.nan,
         date(2020, 1, 3): 9
     })
     assert test_ls.get_data_range(date(2020, 1, 1), date(2020, 1, 3),
                                   None) == [7, np.nan, 9]
     assert test_ls.get_data_range(date(2020, 1, 1), date(2020, 1, 2),
                                   None) == [7, np.nan]
예제 #7
0
 def test_historical_sensors_no_data(self, mock_historical):
     """Test nothing returned for any sensor."""
     mock_historical.return_value = (LocationSeries(), [])
     test_sensors = [
         SensorConfig("i", "j", "k", 3),
         SensorConfig("a", "b", "c", 1),
         SensorConfig("x", "y", "z", 2)
     ]
     test_ground_truth = [LocationSeries("ca", "state")]
     assert historical_sensors(None, None, test_sensors,
                               test_ground_truth) == {}
 def test_get_data_range_invalid_impute(self):
     test_ls = LocationSeries(data={
         date(2020, 1, 1): 7,
         date(2020, 1, 2): np.nan,
         date(2020, 1, 3): 9
     })
     with pytest.raises(
             ValueError,
             match="Invalid imputation method. Must be None or 'mean'"):
         test_ls.get_data_range(date(2020, 1, 1), date(2020, 1, 3),
                                "fakeimpute")
예제 #9
0
    def test_compute_ar_sensor_regularize(self, random_normal):
        """coefficients verified with lm.ridge(y~x1+x2, lambda=1*12/11)

        x1 and x2 constructed by hand, lambda is scaled since lm.ridge does some scaling by n/(n-1)
        """
        random_normal.return_value = 0
        values = LocationSeries(
            data={
                date(2020, 1, 1): -4.27815483,
                date(2020, 1, 2): -4.83962077,
                date(2020, 1, 3): -4.09548122,
                date(2020, 1, 4): -3.86647783,
                date(2020, 1, 5): -2.64494168,
                date(2020, 1, 6): -3.99573135,
                date(2020, 1, 7): -3.4824841,
                date(2020, 1, 8): -2.77490127,
                date(2020, 1, 9): -3.64162355,
                date(2020, 1, 10): -2.5762891,
                date(2020, 1, 11): -2.46793048,
                date(2020, 1, 12): -3.20454941,
                date(2020, 1, 13): -1.77057154,
                date(2020, 1, 14): -0.02058535,
                date(2020, 1, 15): 0.81182691,
                date(2020, 1, 16): 0.32741982
            })
        assert np.isclose(
            compute_ar_sensor(date(2020, 1, 15), values, 2, 1),
            -2.8784639 + 0.2315984 * (-1.77057154 - -3.48901547) / 0.7637391 +
            0.5143709 * (-0.02058535 - -3.28005019) / 0.8645852)
예제 #10
0
 def test__export_to_csv(self):
     """Test export creates the right file and right contents."""
     test_sensor = SensorConfig(source="src",
                                signal="sig",
                                name="test",
                                lag=4)
     test_value = LocationSeries("ca", "state", {date(2020, 1, 1): 1.5})
     with tempfile.TemporaryDirectory() as tmpdir:
         out_files = _export_to_csv(test_value,
                                    test_sensor,
                                    date(2020, 1, 5),
                                    receiving_dir=tmpdir)
         assert len(out_files) == 1
         out_file = out_files[0]
         assert os.path.isfile(out_file)
         assert out_file.endswith(
             "issue_20200105/src/20200101_state_sig.csv")
         out_file_df = pd.read_csv(out_file)
         pd.testing.assert_frame_equal(
             out_file_df,
             pd.DataFrame({
                 "sensor_name": ["test"],
                 "geo_value": ["ca"],
                 "value": [1.5]
             }))
예제 #11
0
 def test_compute_ar_sensor_seed(self):
     """Test same result over 50 runs"""
     values = LocationSeries(
         data={
             date(2020, 1, 1): -4.27815483,
             date(2020, 1, 2): -4.83962077,
             date(2020, 1, 3): -4.09548122,
             date(2020, 1, 4): -3.86647783,
             date(2020, 1, 5): -2.64494168,
             date(2020, 1, 6): -3.99573135,
             date(2020, 1, 7): -3.4824841,
             date(2020, 1, 8): -2.77490127,
             date(2020, 1, 9): -3.64162355,
             date(2020, 1, 10): -2.5762891,
             date(2020, 1, 11): -2.46793048,
             date(2020, 1, 12): -3.20454941,
             date(2020, 1, 13): -1.77057154,
             date(2020, 1, 14): -0.02058535,
             date(2020, 1, 15): 0.81182691,
             date(2020, 1, 16): 0.32741982
         })
     assert len(
         set(
             compute_ar_sensor(date(2020, 1, 15), values, 1, 0)
             for _ in range(50))) == 1
예제 #12
0
 def test_compute_ar_sensor_no_regularize(self, random_normal):
     """Verified with ar.ols(x, FALSE, ar_size, intercept=TRUE, demean=FALSE)."""
     random_normal.return_value = 0
     values = LocationSeries(
         data={
             date(2020, 1, 1): -4.27815483,
             date(2020, 1, 2): -4.83962077,
             date(2020, 1, 3): -4.09548122,
             date(2020, 1, 4): -3.86647783,
             date(2020, 1, 5): -2.64494168,
             date(2020, 1, 6): -3.99573135,
             date(2020, 1, 7): -3.4824841,
             date(2020, 1, 8): -2.77490127,
             date(2020, 1, 9): -3.64162355,
             date(2020, 1, 10): -2.5762891,
             date(2020, 1, 11): -2.46793048,
             date(2020, 1, 12): -3.20454941,
             date(2020, 1, 13): -1.77057154,
             date(2020, 1, 14): -0.02058535,
             date(2020, 1, 15): 0.81182691,
             date(2020, 1, 16): 0.32741982
         })
     assert np.isclose(compute_ar_sensor(date(2020, 1, 15), values, 1, 0),
                       -0.09105891 + 0.87530957 * -0.02058535)
     assert np.isclose(
         compute_ar_sensor(date(2020, 1, 15), values, 2, 0),
         0.31865395 + 0.64751725 * -0.02058535 + 0.30760218 * -1.77057154)
예제 #13
0
 def test_compute_ar_sensor_insufficient_data(self):
     values = LocationSeries(data={
         date(2020, 1, 1): -4.27815483,
         date(2020, 1, 2): -4.83962077
     })
     assert np.isnan(compute_ar_sensor(date(2020, 1, 2), values, 1, 0))
     assert np.isnan(compute_ar_sensor(date(2020, 1, 7), values, 1, 0))
예제 #14
0
 def test_compute_sensors_no_covariates(self, mock_get_indicator_data,
                                        mock_compute_ar_sensor):
     """Test only ground truth sensor is returned if no data is available to compute the rest."""
     mock_get_indicator_data.return_value = {}
     mock_compute_ar_sensor.return_value = 1.5
     test_sensors = [
         SensorConfig("a", "b", "c", 1),
         SensorConfig("x", "y", "z", 2)
     ]
     test_ground_truth_sensor = SensorConfig("i", "j", "k", 3)
     test_ground_truth = [LocationSeries("ca", "state")]
     assert compute_sensors(
         date(2020, 5, 5), test_sensors, test_ground_truth_sensor,
         test_ground_truth, False) == {
             SensorConfig("i", "j", "k", 3):
             [LocationSeries("ca", "state", {date(2020, 5, 2): 1.5})],
         }
    def test_no_results(self, mock_epidata):
        mock_epidata.return_value = {"result": -2}
        test_output = get_historical_sensor_data(
            SensorConfig(None, None, None, None), None, None, date(2020, 1, 1),
            date(2020, 1, 4))

        assert test_output == (LocationSeries(None, None), [
            date(2020, 1, 1),
            date(2020, 1, 2),
            date(2020, 1, 3),
            date(2020, 1, 4)
        ])
 def test_no_results(self, mock_epidata):
     mock_epidata.return_value = [
         ({
             "result": -2
         }, {
             "data_source": "src1",
             "signals": "sig1",
             "geo_type": "state",
             "geo_value": "ca"
         }),
         ({
             "result": -2
         }, {
             "data_source": "src1",
             "signals": "sig1",
             "geo_type": "county",
             "geo_value": "01001"
         }),
     ]
     test_output = get_indicator_data([
         SensorConfig("src1", "sig1", None, None),
         SensorConfig("src2", "sig2", None, None)
     ], [LocationSeries("ca", "state")], date(2020, 1, 1))
     assert test_output == {}
     mock_epidata.assert_called_once_with([
         {
             "source": "covidcast",
             "data_source": "src1",
             "signals": "sig1",
             "time_type": "day",
             "geo_type": "state",
             "geo_value": "ca",
             "time_values": f"{EPIDATA_START_DATE}-20200101",
             "as_of": "20200101"
         },
         {
             "source": "covidcast",
             "data_source": "src2",
             "signals": "sig2",
             "time_type": "day",
             "geo_type": "state",
             "geo_value": "ca",
             "time_values": f"{EPIDATA_START_DATE}-20200101",
             "as_of": "20200101"
         },
     ])
 def test_error(self, mock_epidata):
     mock_epidata.return_value = [({
         "result": -3,
         "message": "test failure"
     }, {})]
     with pytest.raises(Exception,
                        match="Bad result from Epidata: test failure"):
         get_indicator_data([SensorConfig(None, None, None, None)],
                            [LocationSeries(None, None)], date(2020, 1, 1))
     mock_epidata.assert_called_once_with([{
         "source": "covidcast",
         "data_source": None,
         "signals": None,
         "time_type": "day",
         "geo_type": None,
         "geo_value": None,
         "time_values": f"{EPIDATA_START_DATE}-20200101",
         "as_of": "20200101"
     }])
 def test_get_data_range_out_of_bounds(self):
     test_ls = LocationSeries(data={
         date(2020, 1, 1): 7,
         date(2020, 1, 2): 8,
         date(2020, 1, 3): 9
     })
     with pytest.raises(ValueError,
                        match="Data range must be within existing dates "
                        "2020-01-01 to 2020-01-03"):
         test_ls.get_data_range(date(2019, 12, 31), date(2020, 1, 3))
     with pytest.raises(ValueError,
                        match="Data range must be within existing dates "
                        "2020-01-01 to 2020-01-03"):
         test_ls.get_data_range(date(2020, 1, 1), date(2020, 1, 4))
    def test_results(self, mock_epidata):
        mock_epidata.return_value = {
            "result":
            1,
            "epidata": [{
                "time_value": 20200101,
                "value": 1
            }, {
                "time_value": 20200102,
                "value": np.nan
            }]
        }
        test_output = get_historical_sensor_data(
            SensorConfig(None, None, None, None), None, None, date(2020, 1, 1),
            date(2020, 1, 4))

        assert test_output == (LocationSeries(None, None,
                                              {date(2020, 1, 1): 1}), [
                                                   date(2020, 1, 2),
                                                   date(2020, 1, 3),
                                                   date(2020, 1, 4)
                                               ])
예제 #20
0
 def test_historical_sensors_some_data(self, mock_historical):
     """Test non empty data is returned for first two sensors."""
     mock_historical.side_effect = [
         (LocationSeries(data={date(2020, 1, 1): 2}), []),
         (LocationSeries(data={date(2020, 1, 3): 4}), []),
         (LocationSeries(), [])
     ]
     test_sensors = [
         SensorConfig("i", "j", "k", 3),
         SensorConfig("a", "b", "c", 1),
         SensorConfig("x", "y", "z", 2)
     ]
     test_ground_truth = [LocationSeries("ca", "state")]
     assert historical_sensors(
         None, None, test_sensors, test_ground_truth) == {
             SensorConfig("i", "j", "k", 3):
             [LocationSeries(data={date(2020, 1, 1): 2})],
             SensorConfig("a", "b", "c", 1):
             [LocationSeries(data={date(2020, 1, 3): 4})]
         }
예제 #21
0
 def test_compute_regression_sensor_no_data(self):
     test_covariate = LocationSeries()
     test_response = LocationSeries()
     assert np.isnan(
         compute_regression_sensor(date(2020, 1, 16), test_covariate,
                                   test_response, False))
 def test_no_data(self):
     test_ls = LocationSeries()
     with pytest.raises(ValueError, match="No data"):
         test_ls.dates
     with pytest.raises(ValueError, match="No data"):
         test_ls.values
예제 #23
0
 def test_compute_ar_sensor_out_of_range(self):
     values = LocationSeries(data={
         date(2020, 1, 1): -4.27815483,
         date(2020, 1, 2): -4.83962077
     })
     assert np.isnan(compute_ar_sensor(date(2020, 1, 7), values, 1, 0))