Example #1
0
def test_load_spec_valid():
    loader = datacraft.Loader(spec)
    supplier = loader.get('foo')

    assert supplier.next(0) == 'dog'
    assert supplier.next(1) == 'cat'
    assert supplier.next(2) == 'pig'
Example #2
0
def test_load_spec_undefined_refs():
    spec_undefined_refs = builder.spec_builder() \
        .add_field('foo', builder.combine(['ONE', 'TWO'])) \
        .build()
    loader = datacraft.Loader(spec_undefined_refs)
    with pytest.raises(datacraft.SpecException):
        loader.get('foo')
Example #3
0
def test_csv_valid_no_header_indexed_column():
    spec = _build_csv_spec('status',
                           datafile="test_no_headers.csv",
                           headers=False)
    loader = datacraft.Loader(spec, data_dir=test_dir)
    supplier = loader.get('status')

    assert supplier.next(0) == '100'
Example #4
0
def test_shortcut_notation_config_in_key():
    config_in_key_spec = {
        'foo?prefix=TEST': [1, 2, 3, 4, 5]
    }
    loader = datacraft.Loader(config_in_key_spec)
    supplier = loader.get('foo')

    _verify_expected_values(supplier, 5, ['TEST1', 'TEST2', 'TEST3', 'TEST4', 'TEST5'])
Example #5
0
def test_csv_valid_sample_mode_with_count():
    spec = {
        "status_desc:csv?datafile=test.csv&headers=true&column=2&sample=true&count=2":
        {}
    }
    loader = datacraft.Loader(spec, data_dir=test_dir)
    supplier = loader.get('status_desc')

    assert len(supplier.next(0)) == 2
Example #6
0
def test_csv_valid_sample_mode():
    spec = {
        "status_desc:csv?datafile=test.csv&headers=true&column=2&sample=true":
        {}
    }
    loader = datacraft.Loader(spec, data_dir=test_dir)
    supplier = loader.get('status_desc')

    assert supplier.next(0) is not None
Example #7
0
def test_csv_single_column():
    # we don't specify the column number or name, so default is to expect single column of values
    spec = {"user_agent:csv?datafile=single_column.csv&headers=false": {}}
    loader = datacraft.Loader(spec, data_dir=test_dir)
    supplier = loader.get('user_agent')

    expected = 'Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.1.2) Gecko/20090803 Firefox/3.5.2 Slackware'

    assert supplier.next(4) == expected
Example #8
0
def test_load_ref_by_name():
    refs_only_spec = builder.spec_builder() \
        .add_ref('ONE', 'uno') \
        .add_ref('TWO', 'dos') \
        .build()

    loader = datacraft.Loader(refs_only_spec)
    assert loader.get('ONE').next(0) == 'uno'
    assert loader.get('TWO').next(0) == 'dos'
Example #9
0
def test_csv_valid_with_header_indexed_column():
    spec = _build_csv_spec('status')
    loader = datacraft.Loader(spec, data_dir=test_dir)
    supplier = loader.get('status')

    assert supplier.next(0) == '100'
    # last entry
    assert supplier.next(39) == '505'
    # verify wrap around
    assert supplier.next(40) == '100'
Example #10
0
def test_load_spec_missing_type_defaults_to_values():
    spec_missing_type = {
        'foo': {
            'data': ['one', 'two', 'tre']
        }
    }
    loader = datacraft.Loader(spec_missing_type)
    supplier = loader.get('foo')

    assert supplier.next(0) == 'one'
    assert supplier.next(1) == 'two'
    assert supplier.next(2) == 'tre'
Example #11
0
def test_csv_valid_sample_mode_with_count_as_list():
    spec = {
        "status_desc:csv?datafile=test.csv&headers=true&column=2&sample=true":
        {
            "config": {
                "count": [4, 3, 2]
            }
        }
    }
    loader = datacraft.Loader(spec, data_dir=test_dir)
    supplier = loader.get('status_desc')

    assert len(supplier.next(0)) == 4
    assert len(supplier.next(1)) == 3
    assert len(supplier.next(2)) == 2
Example #12
0
def test_load_spec_weighted_ref():
    ref_weights = {
        "POSITIVE": 0.5,
        "NEGATIVE": 0.4,
        "NEUTRAL": 0.1
    }
    weighted_ref = builder.spec_builder() \
        .add_field('foo', builder.weighted_ref(ref_weights)) \
        .add_ref('POSITIVE', ['yes']) \
        .add_ref('NEGATIVE', ['no']) \
        .add_ref('NEUTRAL', ['meh']) \
        .build()
    loader = datacraft.Loader(weighted_ref)
    supplier = loader.get('foo')

    # expect mostly positive and negative values
    data = [supplier.next(i) for i in range(0, 100)]
    counter = Counter(data)
    # get the top two most common entries, which should be yes and no
    most_common_keys = [item[0] for item in counter.most_common(2)]

    assert 'yes' in most_common_keys
    assert 'no' in most_common_keys
Example #13
0
def test_csv_valid_with_header_field_name_column_shorthand():
    spec = {"status_desc:csv?datafile=test.csv&headers=true&column=2": {}}
    loader = datacraft.Loader(spec, data_dir=test_dir)
    supplier = loader.get('status_desc')

    assert supplier.next(0) == 'Continue'
Example #14
0
def test_csv_valid_with_header_field_name_column():
    spec = _build_csv_spec('status', column="status")
    loader = datacraft.Loader(spec, data_dir=test_dir)
    supplier = loader.get('status')

    assert supplier.next(0) == '100'
Example #15
0
def test_load_spec_invalid_key():
    loader = datacraft.Loader(spec)
    with pytest.raises(datacraft.SpecException):
        loader.get('unknown')