def test_unicode_no_data_element(): spec = builder.single_field("field", builder.unicode_range(data=None)).build() spec['field'].pop('data') with pytest.raises(SpecException): field_loader(spec).get("field")
def test_load_spec_undefined_refs(): spec_undefined_refs = builder.spec_builder() \ .add_field('foo', builder.combine(['ONE', 'TWO'])) \ .build() loader = datacraft.field_loader(spec_undefined_refs) with pytest.raises(datacraft.SpecException): loader.get('foo')
def test_shortcut_notation_config_in_key(): config_in_key_spec = {'foo?prefix=TEST': [1, 2, 3, 4, 5]} loader = datacraft.field_loader(config_in_key_spec) supplier = loader.get('foo') _verify_expected_values(supplier, 5, ['TEST1', 'TEST2', 'TEST3', 'TEST4', 'TEST5'])
def test_unicode_range_single_range_as_hex(): field_spec = builder.unicode_range(data=[0x3040, 0x309f], count=5) spec = builder.single_field("text", field_spec).build() supplier = field_loader(spec).get('text') first = supplier.next(0) for c in first: assert 0x3040 <= ord(c) <= 0x309f
def test_range_wrap_around(): data = [1, 3] spec = builder.single_field("field:range", data).build() supplier = field_loader(spec).get('field') vals = [supplier.next(i) for i in range(4)] assert vals == [1, 2, 3, 1]
def test_values_list_order(): data = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] spec = builder.single_field('field', data).build() supplier = field_loader(spec).get('field') values = [supplier.next(i) for i in range(10)] assert values == data
def test_load_spec_valid(): loader = datacraft.field_loader(spec) supplier = loader.get('foo') assert supplier.next(0) == 'dog' assert supplier.next(1) == 'cat' assert supplier.next(2) == 'pig'
def test_geo_spec_pair_lat_first(): spec = _geo_pair_spec(precision=2, lat_first="yes") supplier = field_loader(spec).get('pair') value = supplier.next(0) parts = value.split(',') _verify_long(parts[1], -2) _verify_lat(parts[0], -2)
def test_geo_spec_pair_default_order(): spec = _geo_pair_spec(precision=1) supplier = field_loader(spec).get('pair') value = supplier.next(0) parts = value.split(',') _verify_long(parts[0], -1) _verify_lat(parts[1], -1)
def test_config_ref_for_values(): """ verifies that the values ref inherits the config from the config_ref """ spec = builder.single_field("name?config_ref=quoteit", ["bob", "joe", "ann", "sue"]) \ .add_ref("quoteit", builder.config_ref(quote="\"")) \ .build() supplier = field_loader(spec).get('name') assert supplier.next(0) == '"bob"'
def test_load_spec_missing_type_defaults_to_values(): spec_missing_type = {'foo': {'data': ['one', 'two', 'tre']}} loader = datacraft.field_loader(spec_missing_type) supplier = loader.get('foo') assert supplier.next(0) == 'one' assert supplier.next(1) == 'two' assert supplier.next(2) == 'tre'
def test_nested_range_lists_mixed_types_and_step_cast(): data = [[0.5, 2.5, 0.5], [20.01234, 30.56789]] spec = builder.single_field("field:range?cast=str&precision=2", data).build() supplier = field_loader(spec).get('field') assert supplier.next(0) == '0.5' assert supplier.next(1) == '20.01'
def test_char_class_abbreviations(): abbreviations = ['cc-' + key for key in _CLASS_MAPPING.keys()] for abbreviation in abbreviations: spec = _cc_abbrev_spec(abbrev=abbreviation, count=7) supplier = field_loader(spec).get('name') _verify_values(supplier, 7, 7)
def test_uuid_valid_schema(key, spec): # for coverage spec = builder.single_field(key, spec).build() loader = field_loader(spec, enforce_schema=True) supplier = loader.get('foo') value1 = supplier.next(0) assert UUID_REGEX.match(value1)
def test_csv_valid_sample_mode_with_count_as_list(): spec = {"status_desc:csv?datafile=test.csv&headers=true&column=2&sample=true": {"config": {"count": [4, 3, 2]}}} loader = datacraft.field_loader(spec, data_dir=test_dir) supplier = loader.get('status_desc') assert len(supplier.next(0)) == 4 assert len(supplier.next(1)) == 3 assert len(supplier.next(2)) == 2
def test_values_count_as_list(): data = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] spec = builder.single_field('field', builder.values(data, count=[2, 3])).build() supplier = field_loader(spec).get('field') first = supplier.next(0) assert isinstance(first, list) and len(first) == 2 second = supplier.next(1) assert isinstance(second, list) and len(second) == 3
def test_load_ref_by_name(): refs_only_spec = builder.spec_builder() \ .add_ref('ONE', 'uno') \ .add_ref('TWO', 'dos') \ .build() loader = datacraft.field_loader(refs_only_spec) assert loader.get('ONE').next(0) == 'uno' assert loader.get('TWO').next(0) == 'dos'
def test_nested_range_lists_simple(): data = [[0, 10], [20, 30]] spec = builder.single_field("field:range", data).build() supplier = field_loader(spec).get('field') first = supplier.next(0) assert 0 <= first <= 10 second = supplier.next(1) assert 20 <= second <= 30
def test_csv_single_column(): # we don't specify the column number or name, so default is to expect single column of values spec = {"user_agent:csv?datafile=single_column.csv&headers=false": {}} loader = datacraft.field_loader(spec, data_dir=test_dir) supplier = loader.get('user_agent') expected = 'Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.1.2) Gecko/20090803 Firefox/3.5.2 Slackware' assert supplier.next(4) == expected
def test_csv_valid_with_header_indexed_column(): spec = _build_csv_spec('status') loader = datacraft.field_loader(spec, data_dir=test_dir) supplier = loader.get('status') assert supplier.next(0) == '100' # last entry assert supplier.next(39) == '505' # verify wrap around assert supplier.next(40) == '100'
def test_char_class_multiple_classes(): exclude = "CUSTOM" spec = _char_class_spec(data=["lower", "digits", "CUSTOM"], exclude=exclude) supplier = field_loader(spec).get('name') value = supplier.next(0) assert isinstance(value, str) for char in value: assert char in string.ascii_lowercase or char in string.digits
def test_rand_range(): spec = builder.spec_builder() \ .add_field("field", builder.rand_range([100.9, 109.9], cast="int")) \ .build() supplier = field_loader(spec).get('field') first = supplier.next(0) assert str(first).isnumeric() # occasionally gets rounded down to 100 assert 100 <= first <= 110
def test_unicode_multiple_ranges(): data = [['0x0590', '0x05ff'], ['0x3040', '0x309f']] field_spec = builder.unicode_range(data=data, min=3, max=7) spec = builder.single_field("text", field_spec).build() supplier = field_loader(spec).get('text') first = supplier.next(0) assert 3 <= len(first) <= 7 for c in first: assert 0x0590 <= ord(c) <= 0x05ff or 0x3040 <= ord(c) <= 0x309f
def test_nested_range_lists_mixed_types_and_step(): data = [[0, 10, 2], [20.0, 30.0]] spec = builder.single_field("field:range", data).build() supplier = field_loader(spec).get('field') first = supplier.next(0) assert first % 2 == 0 assert 0 <= first <= 10 second = supplier.next(1) assert 20.0 <= second <= 30.0
def test_uuid_spec(): spec = builder.single_field("foo:uuid", {}).build() loader = field_loader(spec) supplier = loader.get('foo') value1 = supplier.next(0) assert UUID_REGEX.match(value1) value2 = supplier.next(1) assert UUID_REGEX.match(value2) assert value1 != value2
def test_unicode_range_single_range_as_hex_strings(): field_spec = builder.unicode_range(data=[0x3040, 0x309f], mean=5, stddev=2, min=2, max=7) spec = builder.single_field("text", field_spec).build() supplier = field_loader(spec).get('text') first = supplier.next(0) assert 2 <= len(first) <= 7 for c in first: assert 0x3040 <= ord(c) <= 0x309f
def test_geo_spec_pair_reduced_ranges_bbox(): start_lat = -90 end_lat = -45.0 start_long = 90.0 end_long = 180.0 spec = _geo_pair_spec(bbox=[start_long, start_lat, end_long, end_lat]) supplier = field_loader(spec).get('pair') value = supplier.next(0) parts = value.split(',') _verify_in_range_and_has_precision(parts[0], start_long, end_long, -4) _verify_in_range_and_has_precision(parts[1], start_lat, end_lat, -4)
def test_geo_pair_as_list(): start_lat = -45.0 end_lat = 45.0 start_long = 50.0 end_long = 60.0 spec = _geo_pair_spec(bbox=[start_long, start_lat, end_long, end_lat], as_list=True) supplier = field_loader(spec).get('pair') value = supplier.next(0) assert isinstance(value, list) _verify_in_range_and_has_precision(value[0], start_long, end_long, -4) _verify_in_range_and_has_precision(value[1], start_lat, end_lat, -4)
def test_geo_spec_pair_reduced_ranges(): start_lat = 0.0 end_lat = 75.0 start_long = -180.0 end_long = -90.0 spec = _geo_pair_spec( start_lat=start_lat, end_lat=end_lat, start_long=start_long, end_long=end_long) supplier = field_loader(spec).get('pair') value = supplier.next(0) parts = value.split(',') _verify_in_range_and_has_precision(parts[0], start_long, end_long, -4) _verify_in_range_and_has_precision(parts[1], start_lat, end_lat, -4)
def test_single_nested(): # Geo # - Place # - Coord geo_spec = builder.spec_builder() \ .add_field("place_id:uuid", {}) \ .add_field("coordinates", builder.geo_pair(as_list=True)) spec = builder.spec_builder() \ .add_field("id:uuid", {}) \ .add_field("geo", builder.nested(fields=geo_spec.build())) \ .build() supplier = field_loader(spec).get('geo') first = supplier.next(0) assert isinstance(first, dict) assert list(first.keys()) == ['place_id', 'coordinates']