def test_load_spec_valid(): loader = datacraft.Loader(spec) supplier = loader.get('foo') assert supplier.next(0) == 'dog' assert supplier.next(1) == 'cat' assert supplier.next(2) == 'pig'
def test_load_spec_undefined_refs(): spec_undefined_refs = builder.spec_builder() \ .add_field('foo', builder.combine(['ONE', 'TWO'])) \ .build() loader = datacraft.Loader(spec_undefined_refs) with pytest.raises(datacraft.SpecException): loader.get('foo')
def test_csv_valid_no_header_indexed_column(): spec = _build_csv_spec('status', datafile="test_no_headers.csv", headers=False) loader = datacraft.Loader(spec, data_dir=test_dir) supplier = loader.get('status') assert supplier.next(0) == '100'
def test_shortcut_notation_config_in_key(): config_in_key_spec = { 'foo?prefix=TEST': [1, 2, 3, 4, 5] } loader = datacraft.Loader(config_in_key_spec) supplier = loader.get('foo') _verify_expected_values(supplier, 5, ['TEST1', 'TEST2', 'TEST3', 'TEST4', 'TEST5'])
def test_csv_valid_sample_mode_with_count(): spec = { "status_desc:csv?datafile=test.csv&headers=true&column=2&sample=true&count=2": {} } loader = datacraft.Loader(spec, data_dir=test_dir) supplier = loader.get('status_desc') assert len(supplier.next(0)) == 2
def test_csv_valid_sample_mode(): spec = { "status_desc:csv?datafile=test.csv&headers=true&column=2&sample=true": {} } loader = datacraft.Loader(spec, data_dir=test_dir) supplier = loader.get('status_desc') assert supplier.next(0) is not None
def test_csv_single_column(): # we don't specify the column number or name, so default is to expect single column of values spec = {"user_agent:csv?datafile=single_column.csv&headers=false": {}} loader = datacraft.Loader(spec, data_dir=test_dir) supplier = loader.get('user_agent') expected = 'Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.1.2) Gecko/20090803 Firefox/3.5.2 Slackware' assert supplier.next(4) == expected
def test_load_ref_by_name(): refs_only_spec = builder.spec_builder() \ .add_ref('ONE', 'uno') \ .add_ref('TWO', 'dos') \ .build() loader = datacraft.Loader(refs_only_spec) assert loader.get('ONE').next(0) == 'uno' assert loader.get('TWO').next(0) == 'dos'
def test_csv_valid_with_header_indexed_column(): spec = _build_csv_spec('status') loader = datacraft.Loader(spec, data_dir=test_dir) supplier = loader.get('status') assert supplier.next(0) == '100' # last entry assert supplier.next(39) == '505' # verify wrap around assert supplier.next(40) == '100'
def test_load_spec_missing_type_defaults_to_values(): spec_missing_type = { 'foo': { 'data': ['one', 'two', 'tre'] } } loader = datacraft.Loader(spec_missing_type) supplier = loader.get('foo') assert supplier.next(0) == 'one' assert supplier.next(1) == 'two' assert supplier.next(2) == 'tre'
def test_csv_valid_sample_mode_with_count_as_list(): spec = { "status_desc:csv?datafile=test.csv&headers=true&column=2&sample=true": { "config": { "count": [4, 3, 2] } } } loader = datacraft.Loader(spec, data_dir=test_dir) supplier = loader.get('status_desc') assert len(supplier.next(0)) == 4 assert len(supplier.next(1)) == 3 assert len(supplier.next(2)) == 2
def test_load_spec_weighted_ref(): ref_weights = { "POSITIVE": 0.5, "NEGATIVE": 0.4, "NEUTRAL": 0.1 } weighted_ref = builder.spec_builder() \ .add_field('foo', builder.weighted_ref(ref_weights)) \ .add_ref('POSITIVE', ['yes']) \ .add_ref('NEGATIVE', ['no']) \ .add_ref('NEUTRAL', ['meh']) \ .build() loader = datacraft.Loader(weighted_ref) supplier = loader.get('foo') # expect mostly positive and negative values data = [supplier.next(i) for i in range(0, 100)] counter = Counter(data) # get the top two most common entries, which should be yes and no most_common_keys = [item[0] for item in counter.most_common(2)] assert 'yes' in most_common_keys assert 'no' in most_common_keys
def test_csv_valid_with_header_field_name_column_shorthand(): spec = {"status_desc:csv?datafile=test.csv&headers=true&column=2": {}} loader = datacraft.Loader(spec, data_dir=test_dir) supplier = loader.get('status_desc') assert supplier.next(0) == 'Continue'
def test_csv_valid_with_header_field_name_column(): spec = _build_csv_spec('status', column="status") loader = datacraft.Loader(spec, data_dir=test_dir) supplier = loader.get('status') assert supplier.next(0) == '100'
def test_load_spec_invalid_key(): loader = datacraft.Loader(spec) with pytest.raises(datacraft.SpecException): loader.get('unknown')