Example #1
0
def test_unique_constraint_violation():
    schema = deepcopy(SCHEMA_CSV)
    schema['fields'][0]['constraints'] = {'unique': True}
    source = [
        ['id', 'age', 'name'],
        [1, 39, 'Paul'],
        [1, 36, 'Jane'],
    ]
    table = Table(source, schema=schema)
    with pytest.raises(exceptions.TableSchemaException) as excinfo:
        table.read()
    assert 'duplicates' in str(excinfo.value)
Example #2
0
def test_unique_primary_key_violation():
    schema = deepcopy(SCHEMA_CSV)
    schema['primaryKey'] = 'id'
    source = [
        ['id', 'age', 'name'],
        [1, 39, 'Paul'],
        [1, 36, 'Jane'],
    ]
    table = Table(source, schema=schema)
    with pytest.raises(exceptions.TableSchemaException) as excinfo:
        table.read()
    assert 'duplicates' in str(excinfo.value)
Example #3
0
def test_read_invalid_col_value_handled():
    # Test a schema-invalid column value in one row, handled
    source = [
        ['key', 'value'],
        ['one', 'not_an_int'],
        ['two', 2],
    ]
    table = Table(source, schema=SCHEMA_MIN)
    errors = []

    def handler(exc, row_number, row_data, error_data):
        errors.append((exc, row_number, row_data, error_data))

    actual = table.read(exc_handler=handler)
    expect = [
        ['one', 'not_an_int'],
        ['two', 2],
    ]
    assert actual == expect
    assert isinstance(actual[0][1], FailedCast)
    assert len(errors) == 1
    expect_row_data = OrderedDict([('key', 'one'), ('value', 'not_an_int')])
    expect_error_data = OrderedDict([('value', 'not_an_int')])
    _check_error(errors[0],
                 expect_exc_class=exceptions.CastError,
                 expect_exc_str='There are 1 cast errors',
                 expect_row_number=2,
                 expect_row_data=expect_row_data,
                 expect_error_data=expect_error_data)
Example #4
0
def test_read_single_field_foreign_key_invalid_handled():
    relations = deepcopy(FK_RELATIONS)
    relations['people'][2]['firstname'] = 'Max'
    table = Table(FK_SOURCE, schema=FK_SCHEMA)
    errors = []

    def handler(exc, row_number, row_data, error_data):
        errors.append((exc, row_number, row_data, error_data))

    expect = [
        ['1', {
            'firstname': 'Alex',
            'surname': 'Martin'
        }, 'Martin'],
        ['2', {
            'firstname': 'John',
            'surname': 'Dockins'
        }, 'Dockins'],
        ['3', {}, 'White'],
    ]
    actual = table.read(relations=relations, exc_handler=handler)
    assert actual == expect
    assert len(errors) == 1
    exc, row_number, row_data, error_data = errors[0]
    assert row_number == 4
    expect_keyed_row_data = OrderedDict(zip(FK_SOURCE[0], FK_SOURCE[3]))
    assert row_data == expect_keyed_row_data
    assert error_data == OrderedDict([('name', 'Walter')])
    assert isinstance(exc, exceptions.UnresolvedFKError)
    assert 'Foreign key' in str(exc)
Example #5
0
def test_read_with_headers_field_names_mismatch_handled():
    source = [
        ['id', 'bad', 'name'],
        [1, 39, 'Paul'],
        [2, 42, 'Peter'],
    ]
    table = Table(source, schema=SCHEMA_CSV)

    errors = []

    def handler(exc, row_number, row_data, error_data):
        errors.append((exc, row_number, row_data, error_data))

    expect = []
    actual = table.read(exc_handler=handler)
    assert actual == expect
    assert len(errors) == 2
    for i, error in enumerate(errors):
        expect_keyed_row_data = OrderedDict(zip(source[0], source[i + 1]))
        exc, row_number, row_data, error_data = error
        assert isinstance(exc, exceptions.CastError)
        assert row_number == i + 2  # actual row number including header line
        assert row_data == expect_keyed_row_data
        assert error_data == expect_keyed_row_data
        assert 'match schema field names' in str(exc)
Example #6
0
def test_read_unique_primary_key_violation_handled():
    schema = deepcopy(SCHEMA_CSV)
    schema['primaryKey'] = 'id'
    source = [
        ['id', 'age', 'name'],
        [1, 39, 'Paul'],
        [1, 36, 'Jane'],
    ]
    table = Table(source, schema=schema)

    errors = []

    def handler(exc, row_number, row_data, error_data):
        errors.append((exc, row_number, row_data, error_data))

    expect = [
        [1, 39, 'Paul'],
        [1, 36, 'Jane'],
    ]
    actual = table.read(exc_handler=handler)
    assert actual == expect
    assert len(errors) == 1
    exc, row_number, row_data, error_data = errors[0]
    assert isinstance(exc, exceptions.UniqueKeyError)
    assert row_number == 3  # actual row number including header line
    assert row_data == OrderedDict([('id', 1), ('age', 36), ('name', 'Jane')])
    assert error_data == OrderedDict([('id', 1)])
    assert 'duplicates' in str(exc)
Example #7
0
def test_read_missing_cols_handled():
    source = [
        ['key', 'value'],
        [
            'one',
        ],
        ['two', 2],
    ]
    table = Table(source, schema=SCHEMA_MIN)
    errors = []

    def handler(exc, row_number, row_data, error_data):
        errors.append((exc, row_number, row_data, error_data))

    actual = table.read(exc_handler=handler)
    expect = [
        ['one', None],
        ['two', 2],
    ]
    assert actual == expect
    expect_row_data = OrderedDict([('key', 'one'), ('value', None)])
    _check_error(errors[0],
                 expect_exc_class=exceptions.CastError,
                 expect_exc_str='Row length',
                 expect_row_number=2,
                 expect_row_data=expect_row_data,
                 expect_error_data=expect_row_data)
Example #8
0
def test_read_invalid_extra_cols_handled():
    # Test a schema-invalid extra column in one row
    source = [
        ['key', 'value'],
        ['one', 1, 'unexpected'],
        ['two', 2],
    ]
    table = Table(source, schema=SCHEMA_MIN)
    errors = []

    def handler(exc, row_number, row_data, error_data):
        errors.append((exc, row_number, row_data, error_data))

    actual = table.read(exc_handler=handler)
    expect = [
        ['one', 1],
        ['two', 2],
    ]
    assert actual == expect
    assert len(errors) == 1
    expect_row_data = OrderedDict([('key', 'one'), ('value', 1),
                                   ('tableschema-cast-error-extra-col-3',
                                    'unexpected')])
    _check_error(errors[0],
                 expect_exc_class=exceptions.CastError,
                 expect_exc_str='Row length',
                 expect_row_number=2,
                 expect_row_data=expect_row_data,
                 expect_error_data=expect_row_data)
Example #9
0
def test_schema_infer_missing_values():
    table = Table('data/data_infer_missing_values.csv')
    table.infer(missing_values=['-'])
    schema = deepcopy(SCHEMA_CSV)
    schema['missingValues'] = ['-']
    assert table.schema.descriptor == schema
    assert table.read() == [[1, 39, 'Paul'], [None, 25, 'Test'],
                            [2, 23, 'Jimmy'], [None, 25, 'Test'],
                            [3, 36, 'Jane'], [None, 25, 'Test'],
                            [4, 28, 'Judy']]
Example #10
0
def test_multiple_foreign_keys_same_field():
    schema = deepcopy(FK_SCHEMA)
    relations = deepcopy(FK_RELATIONS)
    relations['gender'] = [{
        'firstname': 'Alex',
        'gender': 'male/female'
    }, {
        'firstname': 'John',
        'gender': 'male'
    }, {
        'firstname': 'Walter',
        'gender': 'male'
    }, {
        'firstname': 'Alice',
        'gender': 'female'
    }]
    # the main ressource now has tow foreignKeys using the same 'name' field
    schema['foreignKeys'].append({
        'fields': 'name',
        'reference': {
            'resource': 'gender',
            'fields': 'firstname'
        },
    })
    table = Table(FK_SOURCE, schema=schema)
    keyed_rows = table.read(keyed=True, relations=relations)
    assert keyed_rows == [
        {
            'id': '1',
            'name': {
                'firstname': 'Alex',
                'surname': 'Martin',
                'gender': 'male/female'
            },
            'surname': 'Martin'
        },
        {
            'id': '2',
            'name': {
                'firstname': 'John',
                'surname': 'Dockins',
                'gender': 'male'
            },
            'surname': 'Dockins'
        },
        {
            'id': '3',
            'name': {
                'firstname': 'Walter',
                'surname': 'White',
                'gender': 'male'
            },
            'surname': 'White'
        },
    ]
Example #11
0
def test_read_storage(import_module):
    # Mocks
    import_module.return_value = Mock(Storage=Mock(return_value=Mock(
        describe=Mock(return_value=SCHEMA_MIN),
        iter=Mock(return_value=DATA_MIN[1:]),
    )))
    # Tests
    table = Table('table', backend='storage')
    expect = [['one', 1], ['two', 2]]
    actual = table.read()
    assert actual == expect
Example #12
0
def test_composite_primary_key_fails_unique_issue_194():
    source = [
        ['id1', 'id2'],
        ['a', '1'],
        ['a', '1'],
    ]
    schema = {
        'fields': [
            {
                'name': 'id1'
            },
            {
                'name': 'id2'
            },
        ],
        'primaryKey': ['id1', 'id2']
    }
    table = Table(source, schema=schema)
    with pytest.raises(exceptions.CastError) as excinfo:
        table.read()
    assert 'duplicates' in str(excinfo.value)
Example #13
0
def task5(inputcsv,outputjson):
  ! pip install tableschema
  from tableschema import Table 
  table = Table(inputcsv)
  table.read(keyed=True)

  table.infer()
  table.schema.descriptor

  table.schema.descriptor["missingValues"] = "N/A"
  table.schema.commit()
  table.schema.valid
  table.schema.errors

  table.schema.descriptor["missingValues"] = "N/A"
  table.schema.commit()
  table.schema.valid
  table.schema.errors

  table.read(keyed=True)
  table.schema.save(outputjson)
Example #14
0
def test_read_storage_passed_as_instance():
    # Mocks
    storage = Mock(
        describe=Mock(return_value=SCHEMA_MIN),
        iter=Mock(return_value=DATA_MIN[1:]),
        spec=Storage,
    )
    # Tests
    table = Table('table', storage=storage)
    table.infer()
    expect = [['one', 1], ['two', 2]]
    actual = table.read()
    assert actual == expect
def test_table_sql(name, resource):

    # Storage
    engine = create_engine('sqlite:///')
    storage = Storage.connect('sql', engine=engine)

    # Save
    table = Table(resource['data'], schema=resource['schema'])
    table.save('table', storage=storage)

    # Load
    table = Table('table', schema=resource['schema'], storage=storage)
    assert table.read() == cast(resource)['data']
Example #16
0
def test_processors():
    # Processor
    def skip_under_30(erows):
        for row_number, headers, row in erows:
            krow = dict(zip(headers, row))
            if krow['age'] >= 30:
                yield (row_number, headers, row)

    # Create table
    table = Table('data/data_infer.csv', post_cast=[skip_under_30])
    table.infer()
    expect = [[1, 39, 'Paul'], [3, 36, 'Jane']]
    actual = table.read()
    assert actual == expect
Example #17
0
def test_read_invalid_col_value():
    # Test a schema-invalid column value in one row
    source = [
        ['key', 'value'],
        ['one', 'not_an_int'],
        ['two', 2],
    ]
    table = Table(source, schema=SCHEMA_MIN)
    with pytest.raises(exceptions.CastError) as excinfo:
        actual = table.read()
    assert 'There are 1 cast errors' in str(excinfo.value)
    error = excinfo.value.errors[0]
    assert isinstance(error, exceptions.CastError)
    assert ('Field "value" can\'t cast value "not_an_int" for type "integer"'
            in str(error))
Example #18
0
def test_read_invalid_col_value_no_cast():
    # Test a schema-invalid column value in one row, without value-casting
    source = [
        ['key', 'value'],
        ['one', 'not_an_int'],
        ['two', 2],
    ]
    expect = [
        ['one', 'not_an_int'],
        ['two', 2],
    ]
    table = Table(source, schema=SCHEMA_MIN)
    actual = table.read(cast=False)
    # no actual casting, no cast errors
    assert actual == expect
Example #19
0
def test_single_field_foreign_key():
    table = Table(FK_SOURCE, schema=FK_SCHEMA)
    rows = table.read(relations=FK_RELATIONS)
    assert rows == [
        ['1', {
            'firstname': 'Alex',
            'surname': 'Martin'
        }, 'Martin'],
        ['2', {
            'firstname': 'John',
            'surname': 'Dockins'
        }, 'Dockins'],
        ['3', {
            'firstname': 'Walter',
            'surname': 'White'
        }, 'White'],
    ]
Example #20
0
def test_composite_primary_key_issue_194():
    source = [
        ['id1', 'id2'],
        ['a', '1'],
        ['a', '2'],
    ]
    schema = {
        'fields': [
            {
                'name': 'id1'
            },
            {
                'name': 'id2'
            },
        ],
        'primaryKey': ['id1', 'id2']
    }
    table = Table(source, schema=schema)
    assert table.read() == source[1:]
Example #21
0
def test_processors():
    # Processor
    def skip_under_30(erows):
        for number, headers, row in erows:
            krow = dict(zip(headers, row))
            if krow['age'] >= 30:
                yield (number, headers, row)

    # Create table
    table = Table('data/data_infer.csv', post_cast=[skip_under_30])
    # Test stream
    table.stream.open()
    expect = [['1', '39', 'Paul'], ['2', '23', 'Jimmy'], ['3', '36', 'Jane'],
              ['4', '28', 'Judy']]
    actual = table.stream.read()
    assert actual == expect
    # Test table
    expect = [[1, 39, 'Paul'], [3, 36, 'Jane']]
    actual = table.read()
    assert actual == expect
Example #22
0
def test_multi_field_foreign_key():
    schema = deepcopy(FK_SCHEMA)
    schema['foreignKeys'][0]['fields'] = ['name', 'surname']
    schema['foreignKeys'][0]['reference']['fields'] = ['firstname', 'surname']
    table = Table(FK_SOURCE, schema=schema)
    keyed_rows = table.read(keyed=True, relations=FK_RELATIONS)
    assert keyed_rows == [
        {
            'id': '1',
            'name': {
                'firstname': 'Alex',
                'surname': 'Martin'
            },
            'surname': {
                'firstname': 'Alex',
                'surname': 'Martin'
            },
        },
        {
            'id': '2',
            'name': {
                'firstname': 'John',
                'surname': 'Dockins'
            },
            'surname': {
                'firstname': 'John',
                'surname': 'Dockins'
            },
        },
        {
            'id': '3',
            'name': {
                'firstname': 'Walter',
                'surname': 'White'
            },
            'surname': {
                'firstname': 'Walter',
                'surname': 'White'
            },
        },
    ]
Example #23
0
def test_read_invalid_col_value_handled_no_cast():
    # Test a schema-invalid column value in one row, without value-casting
    source = [
        ['key', 'value'],
        ['one', 'not_an_int'],
        ['two', 2],
    ]
    expect = [
        ['one', 'not_an_int'],
        ['two', 2],
    ]
    table = Table(source, schema=SCHEMA_MIN)
    errors = []

    def handler(exc, row_number, row_data, error_data):
        errors.append((exc, row_number, row_data, error_data))

    actual = table.read(cast=False, exc_handler=handler)
    # no actual casting, no cast errors
    assert len(errors) == 0
    assert actual == expect
def test_table_bigquery(name, resource):

    # Storage
    os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = '.credentials.json'
    credentials = GoogleCredentials.get_application_default()
    service = build('bigquery', 'v2', credentials=credentials)
    project = json.load(io.open('.credentials.json', encoding='utf-8'))['project_id']
    dataset = 'resource'
    prefix = '%s_' % uuid.uuid4().hex
    storage = Storage.connect('bigquery',
        service=service, project=project, dataset=dataset, prefix=prefix)

    # Save
    table = Table(resource['data'], schema=resource['schema'])
    table.save('table', storage=storage)

    # Load
    table = Table('table', schema=resource['schema'], storage=storage)
    assert table.read() == cast(resource)['data']

    # Clean
    storage.delete()
Example #25
0
def test_multi_fk_single_field_foreign_keys():
    table = Table(MULTI_FK_SOURCE, schema=MULTI_FK_SCHEMA)
    actual = table.read(relations=MULTI_FK_RELATIONS)
    expect = [
        [
            '1',
            {
                'firstname': 'Alex',
                'middlename': 'F.'
            },
            {
                'surname': 'Martin',
                'title': 'Mrs'
            },
        ],
        [
            '2',
            {
                'firstname': 'John',
                'middlename': 'G.'
            },
            {
                'surname': 'Dockins',
                'title': 'Mr'
            },
        ],
        [
            '3',
            {
                'firstname': 'Walter',
                'middlename': 'H.'
            },
            {
                'surname': 'White',
                'title': 'Mr'
            },
        ],
    ]
    assert actual == expect
Example #26
0
            output_row['mean_top_70m_mm'] = input_row['mean_top_70m_mm']
            output_row['mean_alignment_70m_mm'] = input_row[
                'mean_alignment_70m_mm']
            output_row['twist_5m_mm'] = input_row['twist_5m_mm']
            output_row['gradient_deg'] = input_row['gradient_deg']

            # unvalidated extension point for non-standard geometry items
            output_row['extended_items_geometry'] = json.dumps(
                {'curvature_mm': input_row['x_curvature_mm']})

            output_row['aws_signal_strength_V'] = input_row[
                'aws_signal_strength_V']

            output_row['creating_adapter_version'] = ADAPTER_VERSION
            output_row['data_row_uid'] = uuid.uuid4()

            wr.writerow(output_row)

if args.schema is not None:
    # validate the output file against the schema
    # print(args.schema.name)
    tbl = Table(out_file.name, schema=args.schema.name)
    # print('checking...')
    try:
        tbl.read(limit=2000)
        print('OK')

    except exceptions.TableSchemaException as exception:
        for error in exception.errors:
            print(error)
OUT_FILE = 'test_minimal_xircm_ugms.csv'

ADAPTER_VERSION = 'Vivacity-UGMS-Sample-v0.0.1'
UGMS_UNIT_ID = 'ugms-00001'
UGMS_UNIT_UID = 'e4c27259-ed1c-4e6e-be7c-2b06966b0689'

# create a minimal csv file output
with open(OUT_FILE, 'w') as f:
    wr = csv.writer(f, quoting=csv.QUOTE_MINIMAL)
    f.write(
        'file_timestamp_utc,file_name,timestamp_recorded_utc,ugms_unit_id,creating_adapter_version\n'
    )

    data_row = [
        '2019-02-18T07:45:23Z', OUT_FILE,
        datetime.now().strftime('%Y-%m-%dT%H:%M:%SZ'), UGMS_UNIT_ID,
        UGMS_UNIT_UID, ADAPTER_VERSION
    ]
    wr.writerow(data_row)

# validate it against the schema
tbl = Table(OUT_FILE, schema=SCHEMA)
try:
    tbl.read()
    print('OK')
except exceptions.CastError as exception:
    for error in exception.errors:
        print(error)

except:
    pass
Example #28
0
 def data(self):
     source = self.storage.read(self.index_name)
     t = Table(source, ignore_blank_headers=True)
     return (t.infer(), t.headers, t.read(keyed=False, limit=1000))
Example #29
0
# Data from WEB, schema from MEMORY
SOURCE = 'https://raw.githubusercontent.com/frictionlessdata/tableschema-py/master/data/data_infer.csv'
SCHEMA = {
    'fields': [{
        'name': 'id',
        'type': 'integer'
    }, {
        'name': 'age',
        'type': 'integer'
    }, {
        'name': 'name',
        'type': 'string'
    }]
}

# Open from WEB save to SQL database
table = Table(SOURCE, schema=SCHEMA)
table.save('articles', backend='sql', engine=db)

# Open from SQL save to DRIVE
table = Table('articles', backend='sql', engine=db)
table.schema.save('tmp/articles.json')
table.save('tmp/articles.csv')

# Open from DRIVE print to CONSOLE
table = Table('tmp/articles.csv', schema='tmp/articles.json')
print(table.read(keyed=True))
# Will print
# [{'id': 1, 'age': 39, 'name': 'Paul'}, {'id': 2, 'age': 23, 'name': 'Jimmy'}, {'id': 3, 'age': 36, 'name': 'Jane'}, {'id': 4, 'age': 28, 'name': 'Judy'}]
Example #30
0
		# Define a DataSet Schema
		dsr = DataSetRequest()
		dsr.name = # PUT YOUR DATASET NAME LOGIC HERE
		dsr.description = # DATASET DESCRIPTION
		dsr.schema = Schema(jeff)

		# Create a DataSet with the given Schema
		dataset = datasets.create(dsr)
		domo.logger.info("Created DataSet " + dataset['id'])

		# Get a DataSets's metadata
		retrieved_dataset = datasets.get(dataset['id'])
		domo.logger.info("Retrieved DataSet " + retrieved_dataset['id'])

		# List DataSets
		dataset_list = list(datasets.list(sort=Sorting.NAME))
		domo.logger.info("Retrieved a list containing {} DataSet(s)".format(
                                                        len(dataset_list)))
		csv_file_path = allFiles
		datasets.data_import_from_file(dataset['id'], csv_file_path)
		domo.logger.info("Uploaded data from a file to DataSet {}".format(
			dataset['id']))


for eachCSV in os.listdir(inputdir):
	allFiles = str(os.path.abspath(eachCSV))
	table = Table(allFiles)
	table.infer()
	jeff = table.schema.descriptor
	table.read(keyed=True)
	directoryUpload()
Example #31
0
def test_read_integrity_hash_error():
    table = Table('data/data.csv')
    with pytest.raises(exceptions.IntegrityError) as excinfo:
        table.read(integrity={'hash': HASH + 'a'})
    assert HASH in str(excinfo.value)
Example #32
0
def test_read_integrity_hash():
    table = Table('data/data.csv')
    table.read(integrity={'hash': HASH})
    assert True