def test_read(self, mock_read_json, mock_open):
        # pylint: disable=protected-access
        task = CleanIntensiveCareDataset(None, Store())
        task._read("test.csv")

        mock_read_json.assert_called_once_with(mock.ANY)
        mock_open.assert_called_once_with("test.csv", "r")
    def test_run(self, mock_write, mock_get):
        response = self.intensive_care_response
        mock_get.side_effect = response

        task = GetIntensiveCareDataset(self.config["collector"], Client(),
                                       Store())
        task(output_folder="raw")

        mock_get.assert_has_calls([
            mock.call(self.config["collector"]["urls"]["intensive_care"][0]),
            mock.call(self.config["collector"]["urls"]["intensive_care"][1]),
            mock.call(self.config["collector"]["urls"]["intensive_care"][2]),
        ])
        mock_write.assert_has_calls([
            mock.call(json.loads(response[0]), "raw/1970-01-01-ic-count.json"),
            mock.call(
                json.loads(response[1])[0],
                "raw/1970-01-01-new-intake-confirmed.json",
            ),
            mock.call(
                json.loads(response[1])[1],
                "raw/1970-01-01-new-intake-suspicious.json",
            ),
            mock.call(
                json.loads(response[2])[0],
                "raw/1970-01-01-died-cumulative.json",
            ),
            mock.call(
                json.loads(response[2])[1],
                "raw/1970-01-01-survived-cumulative.json",
            ),
        ])
    def test_write(self, mock_to_csv, mock_open):
        # pylint: disable=protected-access
        task = CleanIntensiveCareDataset(None, Store())
        task._write(pd.DataFrame(np.zeros(shape=(3, 3))), "test.csv", index=False)

        mock_to_csv.assert_called_once_with(mock.ANY, index=False)
        mock_open.assert_called_once_with("test.csv", "w")
    def test_read(self, mock_read_csv, mock_open):
        # pylint: disable=protected-access
        task = MergeIntensiveCareDataset(None, Store())
        task._read("test.csv", delimiter=",")

        mock_read_csv.assert_called_once_with(mock.ANY, delimiter=",")
        mock_open.assert_called_once_with("test.csv", "r")
    def test_run_valid_input(self, mock_run):
        task = CleanIntensiveCareDataset(self.config["collector"], Store())
        task(input_folder="raw", output_folder="processed")

        mock_run.assert_called_once_with(
            {"input_folder": "raw", "output_folder": "processed"}
        )
コード例 #6
0
    def test_run(self, mock_write, mock_read, mock_list):
        mock_list.return_value = ["raw/1970-01-01.csv"]
        mock_read.return_value = pd.DataFrame(
            {"PositiefGetest": [1000], "Opgenomen": [2000], "Overleden": [3000]}
        )

        task = CleanNationalDataset(self.config["collector"], Store())
        task(input_folder="raw", output_folder="interim")

        mock_list.assert_called_once_with("raw/*.csv")
        mock_read.assert_called_once_with("raw/1970-01-01.csv")
        mock_write.assert_called_once_with(
            mock.ANY, "interim/1970-01-01.csv", index=False
        )

        pd.testing.assert_frame_equal(
            mock_write.call_args.args[0],
            pd.DataFrame(
                {
                    "PositiefGetest": [1000],
                    "Opgenomen": [2000],
                    "Overleden": [3000],
                    "Datum": ["1970-01-01"],
                }
            ),
            check_dtype=False,
        )
    def test_run_valid_input(self, mock_run):
        task = MergeMunicipalityDataset(self.config["collector"], Store())
        task(name="test", input_folder="interim", output_folder="processed")

        mock_run.assert_called_once_with(
            {"name": "test", "input_folder": "interim", "output_folder": "processed"}
        )
    def test_run_invalid_input(self, mock_run, inputs, messages):
        task = CleanIntensiveCareDataset(self.config["collector"], Store())
        with pytest.raises(ValidationError) as error:
            task(**inputs)

        mock_run.assert_not_called()

        for (idx, error) in enumerate(error.value.errors):
            assert error.message == messages[idx]
    def test_run(self, mock_write, mock_read, mock_list):
        mock_list.return_value = [
            "interim/1970-01-02-file-1.csv",
            "interim/1970-01-02-file-2.csv",
        ]
        mock_read.side_effect = [
            pd.DataFrame({
                "Datum": ["1970-01-01", "1970-01-02"],
                "NieuwOpgenomen": [100, 200],
                "OverledenCumulatief": [100, 200],
                "OverleeftCumulatief": [100, 200],
                "Opgenomen": [100, 200],
                "OpgenomenCumulatief": [100, 200],
                "IntensiveCare": [100, 200],
                "IntensiveCareCumulatief": [100, 200],
            }),
            pd.DataFrame({
                "Datum": ["1970-01-01", "1970-01-02"],
                "OverledenCumulatief": [200, 300],
                "OverleeftCumulatief": [200, 100],
            }),
        ]

        task = MergeIntensiveCareDataset(self.config["collector"], Store())
        task(name="test", input_folder="interim", output_folder="processed")

        mock_list.assert_called_once_with("interim/*.csv")
        mock_read.assert_has_calls([
            mock.call("interim/1970-01-02-file-1.csv"),
            mock.call("interim/1970-01-02-file-2.csv"),
        ])
        mock_write.assert_called_once_with(mock.ANY,
                                           "processed/test.csv",
                                           index=False)

        pd.testing.assert_frame_equal(
            mock_write.call_args.args[0],
            pd.DataFrame({
                "Datum": ["1970-01-01", "1970-01-02"],
                "NieuwOpgenomen": [100, 200],
                "Opgenomen": [100, 200],
                "OpgenomenCumulatief": [100, 200],
                "IntensiveCare": [100, 200],
                "IntensiveCareCumulatief": [100, 200],
                "OverledenCumulatief": [200, 300],
                "OverleeftCumulatief": [200, 200],
            }),
            check_dtype=False,
        )
    def test_run_old_format(self, mock_write, mock_read, mock_list):
        mock_list.return_value = [
            "raw/1970-01-01.json",
        ]
        mock_read.side_effect = [
            pd.DataFrame(
                {
                    "date": ["1970-01-01"],
                    "newIntake": [100],
                    "diedCumulative": [200],
                    "survivedCumulative": [300],
                    "intakeCount": [400],
                    "intakeCumulative": [500],
                    "icCount": [600],
                    "icCumulative": [700],
                },
            ),
        ]

        task = CleanIntensiveCareDataset(self.config["collector"], Store())
        task(input_folder="raw", output_folder="interim")

        mock_list.assert_called_once_with("raw/*.json")
        mock_read.assert_called_once_with("raw/1970-01-01.json")
        mock_write.assert_called_once_with(
            mock.ANY, "interim/1970-01-01.csv", index=False
        )

        pd.testing.assert_frame_equal(
            mock_write.call_args_list[0].args[0],
            pd.DataFrame(
                {
                    "Datum": ["1970-01-01"],
                    "NieuwOpgenomen": [100],
                    "OverledenCumulatief": [200],
                    "OverleeftCumulatief": [300],
                    "Opgenomen": [400],
                    "OpgenomenCumulatief": [500],
                    "IntensiveCare": [600],
                    "IntensiveCareCumulatief": [700],
                }
            ),
            check_dtype=False,
        )
    def test_run_new_format(
        self, mock_write, mock_read, mock_list, file, input_dataset, output_dataset,
    ):
        mock_list.return_value = [file]
        mock_read.return_value = pd.DataFrame(input_dataset)

        task = CleanIntensiveCareDataset(self.config["collector"], Store())
        task(input_folder="raw", output_folder="interim")

        mock_list.assert_called_once_with("raw/*.json")
        mock_read.assert_called_once_with(file)
        mock_write.assert_called_once_with(
            mock.ANY, f"interim/{file[4:-5]}.csv", index=False
        )

        pd.testing.assert_frame_equal(
            mock_write.call_args_list[0].args[0],
            pd.DataFrame(output_dataset),
            check_dtype=False,
        )
コード例 #12
0
    def test_run(
        self,
        mock_write,
        mock_read,
        mock_list,
        input_date,
        input_dataset,
        output_dataset,
    ):
        mock_list.return_value = [f"raw/{input_date}.csv"]
        mock_read.side_effect = [
            pd.DataFrame({
                "Gemeentecode": [1],
                "Gemeente": ["gemeente 1"],
                "Provinciecode": [2],
                "Provincie": ["provincie 2"],
            }),
            pd.DataFrame(input_dataset),
        ]

        task = CleanMunicipalityDataset(self.config["collector"], Store())
        task(input_folder="raw", output_folder="interim")

        mock_list.assert_called_once_with("raw/*.csv")
        mock_read.assert_has_calls([
            mock.call("external/gemeenten.csv"),
            mock.call(f"raw/{input_date}.csv")
        ])
        mock_write.assert_called_once_with(mock.ANY,
                                           f"interim/{input_date}.csv",
                                           index=False)

        pd.testing.assert_frame_equal(
            mock_write.call_args.args[0],
            pd.DataFrame(output_dataset),
            check_dtype=False,
        )
コード例 #13
0
    def test_run(self, mock_write, mock_get):
        mock_get.side_effect = self.national_response

        task = GetNationalDataset(self.config["collector"], Client(), Store())
        task(output_folder="raw")

        mock_get.assert_has_calls([
            mock.call(self.config["collector"]["urls"]["national"]["cases"]),
            mock.call(
                self.config["collector"]["urls"]["national"]["hospitalized"]),
        ])
        mock_write.assert_called_once_with(mock.ANY,
                                           "raw/1970-01-01.csv",
                                           index=False)

        pd.testing.assert_frame_equal(
            mock_write.call_args.args[0],
            pd.DataFrame({
                "PositiefGetest": [1000],
                "Opgenomen": [2000],
                "Overleden": [3000]
            }),
            check_dtype=False,
        )
    def test_write(self, mock_open):
        # pylint: disable=protected-access
        task = GetIntensiveCareDataset(None, Client(), Store())
        task._write({"key": "value"}, "test.csv")

        mock_open.assert_called_once_with("test.csv", "w")
    def test_run_valid_input(self, mock_run):
        task = GetIntensiveCareDataset(self.config["collector"], Client(),
                                       Store())
        task(output_folder="raw")

        mock_run.assert_called_once_with({"output_folder": "raw"})
    def test_run(self, mock_write, mock_read, mock_list):
        mock_list.return_value = [
            "interim/1970-01-01.csv",
            "interim/1970-01-02.csv",
            "interim/1970-01-03.csv",
        ]
        mock_read.side_effect = [
            pd.DataFrame(
                {
                    "Gemeentecode": [1],
                    "PositiefGetest": [100],
                    "Gemeente": ["gemeente 1"],
                    "Provinciecode": [2],
                    "Provincie": ["provincie 2"],
                    "Datum": ["1970-01-01"],
                }
            ),
            pd.DataFrame(
                {
                    "Gemeentecode": [1],
                    "Opgenomen": [150],
                    "Gemeente": ["gemeente 1"],
                    "Provinciecode": [2],
                    "Provincie": ["provincie 2"],
                    "Datum": ["1970-01-02"],
                }
            ),
            pd.DataFrame(
                {
                    "Gemeentecode": [1],
                    "PositiefGetest": [200],
                    "Gemeente": ["gemeente 1"],
                    "Provinciecode": [2],
                    "Provincie": ["provincie 2"],
                    "Datum": ["1970-01-03"],
                }
            ),
        ]

        task = MergeMunicipalityDataset(self.config["collector"], Store())
        task(name="test", input_folder="interim", output_folder="processed")

        mock_list.assert_called_once_with("interim/*.csv")
        mock_read.assert_has_calls(
            [
                mock.call("interim/1970-01-01.csv"),
                mock.call("interim/1970-01-02.csv"),
                mock.call("interim/1970-01-03.csv"),
            ]
        )
        mock_write.assert_called_once_with(mock.ANY, "processed/test.csv", index=False)

        pd.testing.assert_frame_equal(
            mock_write.call_args.args[0],
            pd.DataFrame(
                {
                    "Gemeentecode": [1, 1, 1],
                    "PositiefGetest": [100, 150, 200],
                    "Gemeente": ["gemeente 1", "gemeente 1", "gemeente 1"],
                    "Provinciecode": [2, 2, 2],
                    "Provincie": ["provincie 2", "provincie 2", "provincie 2"],
                    "Datum": ["1970-01-01", "1970-01-02", "1970-01-03"],
                }
            ),
            check_dtype=False,
        )