def test_EntityKey_no_name(): type_ = 'population' measure = 'structure' key = hdf.EntityKey(f'{type_}.{measure}') assert key.type == type_ assert key.name == '' assert key.measure == measure assert key.group_prefix == '/' assert key.group_name == type_ assert key.group == f'/{type_}' assert key.path == f'/{type_}/{measure}' assert key.with_measure('age_groups') == hdf.EntityKey( 'population.age_groups')
def test_EntityKey_with_name(): type_ = 'cause' name = 'diarrheal_diseases' measure = 'incidence' key = hdf.EntityKey(f'{type_}.{name}.{measure}') assert key.type == type_ assert key.name == name assert key.measure == measure assert key.group_prefix == f'/{type_}' assert key.group_name == name assert key.group == f'/{type_}/{name}' assert key.path == f'/{type_}/{name}/{measure}' assert key.with_measure('prevalence') == hdf.EntityKey( f'{type_}.{name}.prevalence')
def test_get_valid_filter_terms_all_valid(hdf_key, hdf_file): node = hdf_file.get_node(hdf.EntityKey(hdf_key).path) if not isinstance(node, tables.earray.EArray): columns = node.table.colnames valid_filter_terms = _construct_all_valid_filters(columns) assert set(hdf._get_valid_filter_terms( valid_filter_terms, columns)) == set(valid_filter_terms)
def test_write_empty_data_frame(hdf_file_path): key = hdf.EntityKey('cause.test.prevalence') data = pd.DataFrame(columns=('age', 'year', 'sex', 'draw', 'location', 'value')) with pytest.raises(ValueError): hdf._write_pandas_data(hdf_file_path, key, data)
def test_load(hdf_file_path, hdf_key): key = hdf.EntityKey(hdf_key) data = hdf.load(hdf_file_path, key, filter_terms=None, column_filters=None) if 'restrictions' in key or 'versions' in key: assert isinstance(data, dict) elif 'metadata' in key: assert isinstance(data, list) else: assert isinstance(data, pd.DataFrame)
def test_touch_existing_file(tmpdir): path = f'{str(tmpdir)}/test.hdf' hdf.touch(path) hdf.write(path, hdf.EntityKey('test.key'), 'data') assert hdf.get_keys(path) == ['test.key'] # should wipe out and make it again hdf.touch(path) assert hdf.get_keys(path) == []
def test_write_empty_data_frame_index(hdf_file_path): key = hdf.EntityKey('cause.test.prevalence') data = pd.DataFrame(data={ 'age': range(10), 'year': range(10), 'draw': range(10) }) data = data.set_index(list(data.columns)) hdf._write_pandas_data(hdf_file_path, key, data) written_data = pd.read_hdf(hdf_file_path, key.path) written_data = written_data.set_index( list(written_data)) # write resets index. only calling load undoes it assert written_data.equals(data)
def test_load_with_valid_filters(hdf_file_path, hdf_key): key = hdf.EntityKey(hdf_key) data = hdf.load(hdf_file_path, key, filter_terms=["year == 2006"], column_filters=None) if 'restrictions' in key or 'versions' in key: assert isinstance(data, dict) elif 'metadata' in key: assert isinstance(data, list) else: assert isinstance(data, pd.DataFrame) if 'year' in data.columns: assert set(data.year) == {2006}
def test_write_load_empty_data_frame_index(hdf_file_path): key = hdf.EntityKey('cause.test.prevalence') data = pd.DataFrame(data={ 'age': range(10), 'year': range(10), 'draw': range(10) }) data = data.set_index(list(data.columns)) hdf._write_pandas_data(hdf_file_path, key, data) loaded_data = hdf.load(hdf_file_path, key, filter_terms=None, column_filters=None) assert loaded_data.equals(data)
def test_write_data_frame(hdf_file_path): key = hdf.EntityKey('cause.test.prevalence') data = build_table( [lambda *args, **kwargs: random.choice([0, 1]), "Kenya", 1], 2005, 2010, columns=('age', 'year', 'sex', 'draw', 'location', 'value')) non_val_columns = data.columns.difference({'value'}) data = data.set_index(list(non_val_columns)) hdf._write_pandas_data(hdf_file_path, key, data) written_data = pd.read_hdf(hdf_file_path, key.path) assert written_data.equals(data) filter_terms = ['draw == 0'] written_data = pd.read_hdf(hdf_file_path, key.path, where=filter_terms) assert written_data.equals(data.xs(0, level='draw', drop_level=False))
def mock_key(request): return hdf.EntityKey(request.param)
def test_EntityKey_init_failure(): bad_keys = ['hello', 'a.b.c.d', '', '.', '.coconut', 'a.', 'a..c'] for k in bad_keys: with pytest.raises(ValueError): hdf.EntityKey(k)
def test_get_node_name(hdf_file, hdf_key): key = hdf.EntityKey(hdf_key) assert hdf._get_node_name(hdf_file.get_node(key.path)) == key.measure
def test_remove(hdf_file_path, hdf_key): key = hdf.EntityKey(hdf_key) hdf.remove(hdf_file_path, key) with tables.open_file(str(hdf_file_path)) as file: assert key.path not in file