Exemple #1
0
def test_nested_count_edge_cases(count, as_list, expected):
    inner_spec = builder.spec_builder().values("inner",
                                               ["a", "b", "c"]).to_spec()
    spec = builder.spec_builder().nested("outer",
                                         inner_spec,
                                         count=count,
                                         as_list=as_list).to_spec()
    generator = spec.generator(1)
    single_record = next(generator)
    assert single_record['outer'] == expected
Exemple #2
0
def test_single_nested():
    # Geo
    # - Place
    # - Coord
    geo_spec = builder.spec_builder() \
        .add_field("place_id:uuid", {}) \
        .add_field("coordinates", builder.geo_pair(as_list=True))
    spec = builder.spec_builder() \
        .add_field("id:uuid", {}) \
        .add_field("geo", builder.nested(fields=geo_spec.build())) \
        .build()
    supplier = Loader(spec).get('geo')

    first = supplier.next(0)
    assert isinstance(first, dict)
    assert list(first.keys()) == ['place_id', 'coordinates']
Exemple #3
0
def test_mac_address_dashes():
    spec_builder = builder.spec_builder()
    spec_builder.mac('mac', dashes='true')
    spec = spec_builder.build()
    value = next(spec.generator(1, enforce_schema=True))['mac']
    assert len(value) == 17
    assert '-' in value, f'No dashes in: {value}'
Exemple #4
0
def test_api_change():
    animal_names = ['zebra', 'hedgehog', 'llama', 'flamingo']
    action_list = ['fling', 'jump', 'launch', 'dispatch']
    domain_weights = {"gmail.com": 0.6, "yahoo.com": 0.3, "hotmail.com": 0.1}

    # for building the final spec
    spec_builder = builder.spec_builder()

    # for building the regs section of the spec
    refs = spec_builder.refs_builder

    # spec for each field and reference
    animals = refs.values('ANIMALS', data=animal_names)
    actions = refs.values('ACTIONS', data=action_list)
    domains = refs.values('DOMAINS', data=domain_weights)

    # combines ANIMALS and ACTIONS
    handles = refs.combine('HANDLE', refs=[animals, actions], join_with='_')

    spec_builder.combine('email', refs=[handles, domains], join_with='@')

    spec = spec_builder.build()

    first = next(spec.generator(1))
    assert first['email'].startswith('zebra_fling@')
Exemple #5
0
def test_load_spec_undefined_refs():
    spec_undefined_refs = builder.spec_builder() \
        .add_field('foo', builder.combine(['ONE', 'TWO'])) \
        .build()
    loader = datacraft.Loader(spec_undefined_refs)
    with pytest.raises(datacraft.SpecException):
        loader.get('foo')
Exemple #6
0
def test_add_refs():
    spec_builder = builder.spec_builder()
    spec_builder.add_refs(foo=builder.values(['bob', 'joe', 'bobby joe']),
                          bar=builder.values([1, 2, 3]))
    spec = spec_builder.build()
    assert 'foo' in spec.get('refs', [])
    assert 'bar' in spec.get('refs', [])
Exemple #7
0
def test_spec_builder():
    spec_builder = builder.spec_builder()
    spec = spec_builder.values('name', ['a', 'b', 'c']).to_spec()
    assert isinstance(spec, DataSpec)

    single = next(spec.generator(1))
    assert single == {'name': 'a'}
Exemple #8
0
def test_add_fields():
    spec_builder = builder.spec_builder()
    spec_builder.add_fields(foo=builder.templated('{{first}}: {{last}}'),
                            bar=builder.values([1, 2, 3]))
    spec = spec_builder.build()
    assert 'foo' in spec
    assert 'bar' in spec
Exemple #9
0
def test_shorthand_key_support(key_as_spec, field_name, first_value_contains):
    spec_builder = builder.spec_builder()
    spec_builder.add_field(key_as_spec, {})
    spec = spec_builder.build()
    first = next(spec.generator(1, enforce_schema=True))
    assert field_name in first
    assert first_value_contains in str(first[field_name])
Exemple #10
0
def test_load_ref_by_name():
    refs_only_spec = builder.spec_builder() \
        .add_ref('ONE', 'uno') \
        .add_ref('TWO', 'dos') \
        .build()

    loader = datacraft.Loader(refs_only_spec)
    assert loader.get('ONE').next(0) == 'uno'
    assert loader.get('TWO').next(0) == 'dos'
Exemple #11
0
def test_distribution_as_count():
    spec_builder = builder.spec_builder()
    distribution = distributions.uniform(1, 3)
    spec = spec_builder.values('name', ['a', 'b', 'c'],
                               count=distribution).to_spec()
    assert isinstance(spec, DataSpec)

    single = next(spec.generator(1))
    assert 1 <= len(single['name']) < 3
Exemple #12
0
def test_rand_range():
    spec = builder.spec_builder() \
        .add_field("field", builder.rand_range([100.9, 109.9], cast="int")) \
        .build()
    supplier = Loader(spec).get('field')

    first = supplier.next(0)
    assert str(first).isnumeric()
    # occasionally gets rounded down to 100
    assert 100 <= first <= 110
Exemple #13
0
def test_select_list_ref_contains_data():
    spec_builder = builder.spec_builder()
    spec_builder.select_list_subset('pets',
                                    data=None,
                                    ref_name='pets_list',
                                    count=2)
    spec_builder.refs().values(key='pets_list',
                               data=['goat', 'sheep', 'bear', 'cow', 'dragon'])
    loader = Loader(spec_builder.build())
    supplier = loader.get('pets')
    value = supplier.next(0)
    assert isinstance(value, list)
    assert len(value) == 2
Exemple #14
0
def test_to_pandas():
    spec_builder = builder.spec_builder()
    spec_builder.rand_range('id', [1, 100], cast='int')
    spec_builder.geo_lat('lat')
    spec_builder.geo_long('lon')

    df = spec_builder.build().to_pandas(30)

    assert len(df) == 30
    assert sorted(list(df.columns)) == sorted(['id', 'lat', 'lon'])
    assert df.lat.min() >= -90
    assert df.lon.min() >= -180
    assert df.lat.max() <= 90
    assert df.lon.max() <= 180
Exemple #15
0
def test_multi_nested():
    # User
    # - Geo
    # - - Place
    # - - Coord
    geo_spec = builder.spec_builder() \
        .add_field("place_id:uuid", {}) \
        .add_field("coordinates", builder.geo_pair(as_list=True))
    user_spec = builder.spec_builder() \
        .add_field("user_id:uuid", {}) \
        .add_field("geo", builder.nested(fields=geo_spec.build()))
    spec = builder.spec_builder() \
        .add_field("id:uuid", {}) \
        .add_field("user", builder.nested(fields=user_spec.build())) \
        .build()
    supplier = Loader(spec).get('user')

    first = supplier.next(0)
    assert isinstance(first, dict)
    assert list(first.keys()) == ['user_id', 'geo']

    second = first['geo']
    assert isinstance(second, dict)
    assert list(second.keys()) == ['place_id', 'coordinates']
Exemple #16
0
def test_api_builder():
    # raw data for both specs
    animal_names = ['zebra', 'hedgehog', 'llama', 'flamingo']
    action_list = ['fling', 'jump', 'launch', 'dispatch']
    domain_weights = {"gmail.com": 0.6, "yahoo.com": 0.3, "hotmail.com": 0.1}

    # first build uses direct build methods
    builder1 = builder.spec_builder()
    refs1 = builder1.refs_builder
    domains = refs1.values('DOMAINS', domain_weights)
    animals = refs1.values('ANIMALS', animal_names)
    actions = refs1.values('ACTIONS', action_list, sample=True)
    handles = refs1.combine('HANDLE', join_with='_')
    builder1.combine('email', refs=[handles, domains])

    spec1 = builder1.build()

    # second builder uses the fluent api style
    builder2 = builder.spec_builder()

    animals_spec = builder.values(data=animal_names)
    actions_spec = builder.values(data=action_list, sample=True)
    domains_spec = builder.values(data=domain_weights)
    # combines ANIMALS and ACTIONS
    handle_spec = builder.combine(refs=['ANIMALS', 'ACTIONS'], join_with='_')

    builder2.add_ref('DOMAINS', domains_spec) \
        .add_ref('ANIMALS', animals_spec) \
        .add_ref('ACTIONS', actions_spec) \
        .add_ref('HANDLE', handle_spec)

    builder2.add_field('email', builder.combine(refs=['HANDLE', 'DOMAINS']))

    spec2 = builder2.build()

    assert spec1 == spec2
Exemple #17
0
def test_load_spec_weighted_ref():
    ref_weights = {
        "POSITIVE": 0.5,
        "NEGATIVE": 0.4,
        "NEUTRAL": 0.1
    }
    weighted_ref = builder.spec_builder() \
        .add_field('foo', builder.weighted_ref(ref_weights)) \
        .add_ref('POSITIVE', ['yes']) \
        .add_ref('NEGATIVE', ['no']) \
        .add_ref('NEUTRAL', ['meh']) \
        .build()
    loader = datacraft.Loader(weighted_ref)
    supplier = loader.get('foo')

    # expect mostly positive and negative values
    data = [supplier.next(i) for i in range(0, 100)]
    counter = Counter(data)
    # get the top two most common entries, which should be yes and no
    most_common_keys = [item[0] for item in counter.most_common(2)]

    assert 'yes' in most_common_keys
    assert 'no' in most_common_keys
Exemple #18
0
def _geo_pair_spec(**config):
    return builder.spec_builder() \
        .add_field('pair', builder.geo_pair(**config)) \
        .build()
Exemple #19
0
def _ip_precise_spec(**config):
    return builder.spec_builder() \
        .add_field('network', builder.ip_precise(**config)) \
        .build()
Exemple #20
0
def _ipv4_spec(**config):
    return builder.spec_builder() \
        .add_field('network', builder.ipv4(**config)) \
        .build()
Exemple #21
0
def _date_iso_us_spec(**config):
    return builder.spec_builder().add_field(
        'foo', builder.date_iso_us(**config)).build()
Exemple #22
0
def _char_class_spec(data, **config):
    return builder.spec_builder() \
        .add_field("name", builder.char_class(data=data, **config)) \
        .build()
Exemple #23
0
def _geo_long_spec(**config):
    return builder.spec_builder() \
        .add_field('long', builder.geo_long(**config)) \
        .build()
Exemple #24
0
def test_add_refs_edge_cases():
    spec_builder = builder.spec_builder()
    spec_builder.templated('foo', data='{{first}}: {{last}}')
    spec_builder.add_ref('foo', builder.templated('{{first}}: {{last}}'))
    spec = spec_builder.build()
    assert 'foo' in spec
Exemple #25
0
                "type": "values",
                "data": 1
            }
        }
    }),
]


@pytest.mark.parametrize("generated_spec,expected_spec",
                         field_spec_build_tests)
def test_spec_builder(generated_spec, expected_spec):
    assert generated_spec == expected_spec


full_spec_build_tests = [
    (builder.spec_builder().values('name', [1, 2, 3], prefix="foo"), {
        "name": {
            "type": "values",
            "data": [1, 2, 3],
            "config": {
                "prefix": "foo"
            }
        }
    }),
    (builder.spec_builder().combine('name', refs=["ONE", "TWO"],
                                    join_with='@'), {
                                        "name": {
                                            "type": "combine",
                                            "refs": ["ONE", "TWO"],
                                            "config": {
                                                "join_with": "@"
Exemple #26
0
def one_two_builder():
    spec_builder = builder.spec_builder()
    spec_builder.values('one', ["uno", "ichi"])
    spec_builder.values('two', ["dos", "ni"])
    return spec_builder
Exemple #27
0
def test_default_delim_mac_address():
    spec = builder.spec_builder() \
        .add_field('mac', builder.mac()) \
        .build()
    value = next(spec.generator(1))['mac']
    assert len(value) == 17
Exemple #28
0
def _cc_abbrev_spec(abbrev, **config):
    return builder.spec_builder() \
        .add_field("name", builder.char_class_abbrev(cc_abbrev=abbrev, **config)) \
        .build()
Exemple #29
0
def _geo_lat_spec(**config):
    return builder.spec_builder() \
        .add_field('lat', builder.geo_lat(**config)) \
        .build()