Beispiel #1
0
def testWriteToBq(mock_append_to_bq: mock.MagicMock, mock_csv: mock.MagicMock,
                  mock_download: mock.MagicMock):
    ctp = CovidTrackingProject()
    kwargs = {'filename': 'test_file.csv',
              'metadata_table_id': 'test_metadata',
              'table_name': 'output_table'}
    ctp.write_to_bq('dataset', 'gcs_bucket', **kwargs)
    assert mock_append_to_bq.call_count == 4
    var_types = ['cases', 'deaths', 'tests', 'hosp']
    for i in range(len(var_types)):
        result = mock_append_to_bq.call_args_list[i].args[0]
        expected_rows = (_RACE_CATEGORIES - 1) * _NUM_ROWS
        expected_col_names = [
            'date', 'state_postal_abbreviation', 'race',
            var_types[i], 'reports_race', 'race_ethnicity_separately']
        assert result.shape == (expected_rows, len(expected_col_names))
        assert set(result.columns) == set(expected_col_names)
        expected_ind_rows = {'cases': 1, 'deaths': 1}
        assert (len(result.loc[
            result['race'] == col_std.Race.INDIGENOUS.value].index) ==
            expected_ind_rows.get(var_types[i], 0))
        expected_api_rows = {'cases': 4, 'deaths': 2}
        assert (len(result.loc[
            result['race'] == col_std.Race.API.value].index) ==
            expected_api_rows.get(var_types[i], 0))
        expected_dtypes = {col: np.object for col in result.columns}
        expected_dtypes[var_types[i]] = np.float64
        for col in result.columns:
            assert result[col].dtype == expected_dtypes[col]
def testWriteToBq_MetadataMissing(mock_csv: mock.MagicMock,
                                  mock_download: mock.MagicMock):
    ctp = CovidTrackingProject()
    kwargs = {'filename': 'test_file.csv', 'table_name': 'output_table'}
    with pytest.raises(RuntimeError,
                       match=r'BigQuery call to dataset returned 0 rows'):
        ctp.write_to_bq('dataset', 'gcs_bucket', **kwargs)
def testWriteToBq(mock_append_to_bq: mock.MagicMock, mock_csv: mock.MagicMock,
                  mock_download: mock.MagicMock):
    ctp = CovidTrackingProject()
    kwargs = {
        'filename': 'test_file.csv',
        'metadata_table_id': 'test_metadata',
        'table_name': 'output_table'
    }
    ctp.write_to_bq('dataset', 'gcs_bucket', **kwargs)
    mock_append_to_bq.assert_called_once()
    result = mock_append_to_bq.call_args.args[0]
    expected_rows = (_RACE_CATEGORIES - 1) * _VARIABLE_TYPES * _NUM_ROWS
    expected_cols = 7
    assert result.shape == (expected_rows, expected_cols)
    expected_col_names = [
        'date', 'state_postal_abbreviation', 'race', 'variable_type', 'value',
        'reports_race', 'race_ethnicity_separately'
    ]
    assert set(result.columns) == set(expected_col_names)
    assert len(
        result.loc[result['race'] == col_std.Race.INDIGENOUS.value].index) == 2
    assert len(result.loc[result['race'] == col_std.Race.API.value].index) == 6
    expected_dtypes = {col: np.object for col in result.columns}
    expected_dtypes['date'] = np.dtype('datetime64[ns]')
    expected_dtypes['value'] = np.float64
    for col in result.columns:
        assert result[col].dtype == expected_dtypes[col]
def testWriteToBq_MissingAttr():
    ctp = CovidTrackingProject()
    kwargs = {}
    with pytest.raises(RuntimeError, match=r'filename not found'):
        ctp.write_to_bq('dataset', 'gcs_bucket', **kwargs)

    kwargs = {'filename': 'test_file.csv'}
    with pytest.raises(RuntimeError, match=r'metadata_table_id not found'):
        ctp.write_to_bq('dataset', 'gcs_bucket', **kwargs)

    kwargs = {
        'filename': 'test_file.csv',
        'metadata_table_id': 'test_metadata'
    }
    with pytest.raises(RuntimeError, match=r'table_name not found'):
        ctp.write_to_bq('dataset', 'gcs_bucket', **kwargs)
Beispiel #5
0
def testWriteToBq_MissingAttr():
    ctp = CovidTrackingProject()
    kwargs = {}
    with pytest.raises(RuntimeError, match=r'filename not found'):
        ctp.write_to_bq('dataset', 'gcs_bucket', **kwargs)