def test_read(self, mock_read_json, mock_open): # pylint: disable=protected-access task = CleanIntensiveCareDataset(None, Store()) task._read("test.csv") mock_read_json.assert_called_once_with(mock.ANY) mock_open.assert_called_once_with("test.csv", "r")
def test_run(self, mock_write, mock_get): response = self.intensive_care_response mock_get.side_effect = response task = GetIntensiveCareDataset(self.config["collector"], Client(), Store()) task(output_folder="raw") mock_get.assert_has_calls([ mock.call(self.config["collector"]["urls"]["intensive_care"][0]), mock.call(self.config["collector"]["urls"]["intensive_care"][1]), mock.call(self.config["collector"]["urls"]["intensive_care"][2]), ]) mock_write.assert_has_calls([ mock.call(json.loads(response[0]), "raw/1970-01-01-ic-count.json"), mock.call( json.loads(response[1])[0], "raw/1970-01-01-new-intake-confirmed.json", ), mock.call( json.loads(response[1])[1], "raw/1970-01-01-new-intake-suspicious.json", ), mock.call( json.loads(response[2])[0], "raw/1970-01-01-died-cumulative.json", ), mock.call( json.loads(response[2])[1], "raw/1970-01-01-survived-cumulative.json", ), ])
def test_write(self, mock_to_csv, mock_open): # pylint: disable=protected-access task = CleanIntensiveCareDataset(None, Store()) task._write(pd.DataFrame(np.zeros(shape=(3, 3))), "test.csv", index=False) mock_to_csv.assert_called_once_with(mock.ANY, index=False) mock_open.assert_called_once_with("test.csv", "w")
def test_read(self, mock_read_csv, mock_open): # pylint: disable=protected-access task = MergeIntensiveCareDataset(None, Store()) task._read("test.csv", delimiter=",") mock_read_csv.assert_called_once_with(mock.ANY, delimiter=",") mock_open.assert_called_once_with("test.csv", "r")
def test_run_valid_input(self, mock_run): task = CleanIntensiveCareDataset(self.config["collector"], Store()) task(input_folder="raw", output_folder="processed") mock_run.assert_called_once_with( {"input_folder": "raw", "output_folder": "processed"} )
def test_run(self, mock_write, mock_read, mock_list): mock_list.return_value = ["raw/1970-01-01.csv"] mock_read.return_value = pd.DataFrame( {"PositiefGetest": [1000], "Opgenomen": [2000], "Overleden": [3000]} ) task = CleanNationalDataset(self.config["collector"], Store()) task(input_folder="raw", output_folder="interim") mock_list.assert_called_once_with("raw/*.csv") mock_read.assert_called_once_with("raw/1970-01-01.csv") mock_write.assert_called_once_with( mock.ANY, "interim/1970-01-01.csv", index=False ) pd.testing.assert_frame_equal( mock_write.call_args.args[0], pd.DataFrame( { "PositiefGetest": [1000], "Opgenomen": [2000], "Overleden": [3000], "Datum": ["1970-01-01"], } ), check_dtype=False, )
def test_run_valid_input(self, mock_run): task = MergeMunicipalityDataset(self.config["collector"], Store()) task(name="test", input_folder="interim", output_folder="processed") mock_run.assert_called_once_with( {"name": "test", "input_folder": "interim", "output_folder": "processed"} )
def test_run_invalid_input(self, mock_run, inputs, messages): task = CleanIntensiveCareDataset(self.config["collector"], Store()) with pytest.raises(ValidationError) as error: task(**inputs) mock_run.assert_not_called() for (idx, error) in enumerate(error.value.errors): assert error.message == messages[idx]
def test_run(self, mock_write, mock_read, mock_list): mock_list.return_value = [ "interim/1970-01-02-file-1.csv", "interim/1970-01-02-file-2.csv", ] mock_read.side_effect = [ pd.DataFrame({ "Datum": ["1970-01-01", "1970-01-02"], "NieuwOpgenomen": [100, 200], "OverledenCumulatief": [100, 200], "OverleeftCumulatief": [100, 200], "Opgenomen": [100, 200], "OpgenomenCumulatief": [100, 200], "IntensiveCare": [100, 200], "IntensiveCareCumulatief": [100, 200], }), pd.DataFrame({ "Datum": ["1970-01-01", "1970-01-02"], "OverledenCumulatief": [200, 300], "OverleeftCumulatief": [200, 100], }), ] task = MergeIntensiveCareDataset(self.config["collector"], Store()) task(name="test", input_folder="interim", output_folder="processed") mock_list.assert_called_once_with("interim/*.csv") mock_read.assert_has_calls([ mock.call("interim/1970-01-02-file-1.csv"), mock.call("interim/1970-01-02-file-2.csv"), ]) mock_write.assert_called_once_with(mock.ANY, "processed/test.csv", index=False) pd.testing.assert_frame_equal( mock_write.call_args.args[0], pd.DataFrame({ "Datum": ["1970-01-01", "1970-01-02"], "NieuwOpgenomen": [100, 200], "Opgenomen": [100, 200], "OpgenomenCumulatief": [100, 200], "IntensiveCare": [100, 200], "IntensiveCareCumulatief": [100, 200], "OverledenCumulatief": [200, 300], "OverleeftCumulatief": [200, 200], }), check_dtype=False, )
def test_run_old_format(self, mock_write, mock_read, mock_list): mock_list.return_value = [ "raw/1970-01-01.json", ] mock_read.side_effect = [ pd.DataFrame( { "date": ["1970-01-01"], "newIntake": [100], "diedCumulative": [200], "survivedCumulative": [300], "intakeCount": [400], "intakeCumulative": [500], "icCount": [600], "icCumulative": [700], }, ), ] task = CleanIntensiveCareDataset(self.config["collector"], Store()) task(input_folder="raw", output_folder="interim") mock_list.assert_called_once_with("raw/*.json") mock_read.assert_called_once_with("raw/1970-01-01.json") mock_write.assert_called_once_with( mock.ANY, "interim/1970-01-01.csv", index=False ) pd.testing.assert_frame_equal( mock_write.call_args_list[0].args[0], pd.DataFrame( { "Datum": ["1970-01-01"], "NieuwOpgenomen": [100], "OverledenCumulatief": [200], "OverleeftCumulatief": [300], "Opgenomen": [400], "OpgenomenCumulatief": [500], "IntensiveCare": [600], "IntensiveCareCumulatief": [700], } ), check_dtype=False, )
def test_run_new_format( self, mock_write, mock_read, mock_list, file, input_dataset, output_dataset, ): mock_list.return_value = [file] mock_read.return_value = pd.DataFrame(input_dataset) task = CleanIntensiveCareDataset(self.config["collector"], Store()) task(input_folder="raw", output_folder="interim") mock_list.assert_called_once_with("raw/*.json") mock_read.assert_called_once_with(file) mock_write.assert_called_once_with( mock.ANY, f"interim/{file[4:-5]}.csv", index=False ) pd.testing.assert_frame_equal( mock_write.call_args_list[0].args[0], pd.DataFrame(output_dataset), check_dtype=False, )
def test_run( self, mock_write, mock_read, mock_list, input_date, input_dataset, output_dataset, ): mock_list.return_value = [f"raw/{input_date}.csv"] mock_read.side_effect = [ pd.DataFrame({ "Gemeentecode": [1], "Gemeente": ["gemeente 1"], "Provinciecode": [2], "Provincie": ["provincie 2"], }), pd.DataFrame(input_dataset), ] task = CleanMunicipalityDataset(self.config["collector"], Store()) task(input_folder="raw", output_folder="interim") mock_list.assert_called_once_with("raw/*.csv") mock_read.assert_has_calls([ mock.call("external/gemeenten.csv"), mock.call(f"raw/{input_date}.csv") ]) mock_write.assert_called_once_with(mock.ANY, f"interim/{input_date}.csv", index=False) pd.testing.assert_frame_equal( mock_write.call_args.args[0], pd.DataFrame(output_dataset), check_dtype=False, )
def test_run(self, mock_write, mock_get): mock_get.side_effect = self.national_response task = GetNationalDataset(self.config["collector"], Client(), Store()) task(output_folder="raw") mock_get.assert_has_calls([ mock.call(self.config["collector"]["urls"]["national"]["cases"]), mock.call( self.config["collector"]["urls"]["national"]["hospitalized"]), ]) mock_write.assert_called_once_with(mock.ANY, "raw/1970-01-01.csv", index=False) pd.testing.assert_frame_equal( mock_write.call_args.args[0], pd.DataFrame({ "PositiefGetest": [1000], "Opgenomen": [2000], "Overleden": [3000] }), check_dtype=False, )
def test_write(self, mock_open): # pylint: disable=protected-access task = GetIntensiveCareDataset(None, Client(), Store()) task._write({"key": "value"}, "test.csv") mock_open.assert_called_once_with("test.csv", "w")
def test_run_valid_input(self, mock_run): task = GetIntensiveCareDataset(self.config["collector"], Client(), Store()) task(output_folder="raw") mock_run.assert_called_once_with({"output_folder": "raw"})
def test_run(self, mock_write, mock_read, mock_list): mock_list.return_value = [ "interim/1970-01-01.csv", "interim/1970-01-02.csv", "interim/1970-01-03.csv", ] mock_read.side_effect = [ pd.DataFrame( { "Gemeentecode": [1], "PositiefGetest": [100], "Gemeente": ["gemeente 1"], "Provinciecode": [2], "Provincie": ["provincie 2"], "Datum": ["1970-01-01"], } ), pd.DataFrame( { "Gemeentecode": [1], "Opgenomen": [150], "Gemeente": ["gemeente 1"], "Provinciecode": [2], "Provincie": ["provincie 2"], "Datum": ["1970-01-02"], } ), pd.DataFrame( { "Gemeentecode": [1], "PositiefGetest": [200], "Gemeente": ["gemeente 1"], "Provinciecode": [2], "Provincie": ["provincie 2"], "Datum": ["1970-01-03"], } ), ] task = MergeMunicipalityDataset(self.config["collector"], Store()) task(name="test", input_folder="interim", output_folder="processed") mock_list.assert_called_once_with("interim/*.csv") mock_read.assert_has_calls( [ mock.call("interim/1970-01-01.csv"), mock.call("interim/1970-01-02.csv"), mock.call("interim/1970-01-03.csv"), ] ) mock_write.assert_called_once_with(mock.ANY, "processed/test.csv", index=False) pd.testing.assert_frame_equal( mock_write.call_args.args[0], pd.DataFrame( { "Gemeentecode": [1, 1, 1], "PositiefGetest": [100, 150, 200], "Gemeente": ["gemeente 1", "gemeente 1", "gemeente 1"], "Provinciecode": [2, 2, 2], "Provincie": ["provincie 2", "provincie 2", "provincie 2"], "Datum": ["1970-01-01", "1970-01-02", "1970-01-03"], } ), check_dtype=False, )