def test_ref_missing_required(): spec_builder = datacraft.spec_builder() spec_builder.add_ref('values', datacraft.builder.values([1, 2, 3])) spec_builder.ref('points_at_nothing') generator = spec_builder.build().generator(1) with pytest.raises(datacraft.SpecException): next(generator)
def test_keywords_are_now_valid(keyword): spec_builder = datacraft.spec_builder() spec_builder.values('field', 84) mapping = {'field': keyword} formula = '{{%s}}/2' % keyword spec_builder.calculate('meaning_of_life', fields=mapping, formula=formula) assert next(spec_builder.build().generator(1))['meaning_of_life'] == 42.0
def _build_csv_spec(field_name, **config): base = { "datafile": "weighted.csv" } base.update(config) return datacraft.spec_builder() \ .add_field(field_name, datacraft.builder.weighted_csv(**base)) \ .build()
def test_calculate_missing_required(fields, refs, formula): spec_builder = datacraft.spec_builder() spec_builder.calculate('something_interesting', fields=fields, refs=refs, formula=formula) with pytest.raises(datacraft.SpecException): next(spec_builder.build().generator(1))
def test_calculate_valid_from_builder(): spec_builder = datacraft.spec_builder() spec_builder.values('field', 21) mapping = {'field': 'a'} formula = '{{a}} * 2' spec_builder.calculate('meaning_of_life', fields=mapping, formula=formula) assert next(spec_builder.build().generator( 1, enforce_schema=True))['meaning_of_life'] == 42.0
def test_weighted_csv_from_builder(): spec = datacraft.spec_builder() \ .weighted_csv('status', datafile='weighted.csv', column='status', weight_column='weight', headers=True) \ .to_spec() gen = spec.generator(1, enforce_schema=True, data_dir=test_dir) val = next(gen) assert 'status' in val assert val['status'].isnumeric()
def test_row_level_sampling(): builder = datacraft.spec_builder() config = {"datafile": "test.csv", "headers": True, "sample_rows": "on"} builder.config_ref('csvconfig', **config) builder.csv('status', config_ref='csvconfig', column=1) builder.csv('status_description', config_ref='csvconfig', column=2) spec = builder.build() with open(os.sep.join([test_dir, 'test.csv'])) as handle: parts = [line.strip().split(',') for line in handle.readlines()] valid_mappings = {parts[0]: parts[1] for parts in parts} gen = spec.generator(10, data_dir=test_dir) for i in range(10): value = next(gen) status = value['status'] assert status in valid_mappings assert value['status_description'] == valid_mappings[status]
def _build_csv_spec(field_name, **config): base = {"datafile": "test.csv", "headers": True, "column": 1} base.update(config) return datacraft.spec_builder() \ .add_field(field_name, datacraft.builder.csv(**base)) \ .build()
def test_random_range_invalid_data_type(field_type, data): with pytest.raises(datacraft.SpecException): builder = datacraft.spec_builder() spec = builder.add_field('test', {'type': field_type, 'data': data}).build() next(spec.generator(1))['test']
def test_random_range_parameterized(field_type, data, lower, upper): builder = datacraft.spec_builder() spec = builder.add_field('test', {'type': field_type, 'data': data}).build() val = next(spec.generator(1, enforce_schema=True))['test'] assert lower <= float(val) <= upper
def test_ref_with_ref_name(): spec_builder = datacraft.spec_builder() spec_builder.add_ref('values', datacraft.builder.values([1, 2, 3])) spec_builder.ref('points_at_values', ref_name='values') generator = spec_builder.build().generator(1) assert next(generator) == {'points_at_values': 1}
def test_config_ref_in_refs(): spec_builder = datacraft.spec_builder() spec_builder.refs().config_ref('test', key1='value1', key2='value2') spec = spec_builder.build() assert 'refs' in spec assert spec['refs'].get('test') == {'type': 'config_ref', 'config': {'key1': 'value1', 'key2': 'value2'}}
def test_ref_with_data_as_name(): spec_builder = datacraft.spec_builder() spec_builder.add_ref('values', datacraft.builder.values([1, 2, 3])) spec_builder.ref('points_at_values_with_prefix', data='values', prefix='@') generator = spec_builder.build().generator(1) assert next(generator) == {'points_at_values_with_prefix': '@1'}
def builder(): return datacraft.spec_builder()