def test_unique_constraint_violation(): schema = deepcopy(SCHEMA_CSV) schema['fields'][0]['constraints'] = {'unique': True} source = [ ['id', 'age', 'name'], [1, 39, 'Paul'], [1, 36, 'Jane'], ] table = Table(source, schema=schema) with pytest.raises(exceptions.TableSchemaException) as excinfo: table.read() assert 'duplicates' in str(excinfo.value)
def test_unique_primary_key_violation(): schema = deepcopy(SCHEMA_CSV) schema['primaryKey'] = 'id' source = [ ['id', 'age', 'name'], [1, 39, 'Paul'], [1, 36, 'Jane'], ] table = Table(source, schema=schema) with pytest.raises(exceptions.TableSchemaException) as excinfo: table.read() assert 'duplicates' in str(excinfo.value)
def test_read_invalid_col_value_handled(): # Test a schema-invalid column value in one row, handled source = [ ['key', 'value'], ['one', 'not_an_int'], ['two', 2], ] table = Table(source, schema=SCHEMA_MIN) errors = [] def handler(exc, row_number, row_data, error_data): errors.append((exc, row_number, row_data, error_data)) actual = table.read(exc_handler=handler) expect = [ ['one', 'not_an_int'], ['two', 2], ] assert actual == expect assert isinstance(actual[0][1], FailedCast) assert len(errors) == 1 expect_row_data = OrderedDict([('key', 'one'), ('value', 'not_an_int')]) expect_error_data = OrderedDict([('value', 'not_an_int')]) _check_error(errors[0], expect_exc_class=exceptions.CastError, expect_exc_str='There are 1 cast errors', expect_row_number=2, expect_row_data=expect_row_data, expect_error_data=expect_error_data)
def test_read_single_field_foreign_key_invalid_handled(): relations = deepcopy(FK_RELATIONS) relations['people'][2]['firstname'] = 'Max' table = Table(FK_SOURCE, schema=FK_SCHEMA) errors = [] def handler(exc, row_number, row_data, error_data): errors.append((exc, row_number, row_data, error_data)) expect = [ ['1', { 'firstname': 'Alex', 'surname': 'Martin' }, 'Martin'], ['2', { 'firstname': 'John', 'surname': 'Dockins' }, 'Dockins'], ['3', {}, 'White'], ] actual = table.read(relations=relations, exc_handler=handler) assert actual == expect assert len(errors) == 1 exc, row_number, row_data, error_data = errors[0] assert row_number == 4 expect_keyed_row_data = OrderedDict(zip(FK_SOURCE[0], FK_SOURCE[3])) assert row_data == expect_keyed_row_data assert error_data == OrderedDict([('name', 'Walter')]) assert isinstance(exc, exceptions.UnresolvedFKError) assert 'Foreign key' in str(exc)
def test_read_with_headers_field_names_mismatch_handled(): source = [ ['id', 'bad', 'name'], [1, 39, 'Paul'], [2, 42, 'Peter'], ] table = Table(source, schema=SCHEMA_CSV) errors = [] def handler(exc, row_number, row_data, error_data): errors.append((exc, row_number, row_data, error_data)) expect = [] actual = table.read(exc_handler=handler) assert actual == expect assert len(errors) == 2 for i, error in enumerate(errors): expect_keyed_row_data = OrderedDict(zip(source[0], source[i + 1])) exc, row_number, row_data, error_data = error assert isinstance(exc, exceptions.CastError) assert row_number == i + 2 # actual row number including header line assert row_data == expect_keyed_row_data assert error_data == expect_keyed_row_data assert 'match schema field names' in str(exc)
def test_read_unique_primary_key_violation_handled(): schema = deepcopy(SCHEMA_CSV) schema['primaryKey'] = 'id' source = [ ['id', 'age', 'name'], [1, 39, 'Paul'], [1, 36, 'Jane'], ] table = Table(source, schema=schema) errors = [] def handler(exc, row_number, row_data, error_data): errors.append((exc, row_number, row_data, error_data)) expect = [ [1, 39, 'Paul'], [1, 36, 'Jane'], ] actual = table.read(exc_handler=handler) assert actual == expect assert len(errors) == 1 exc, row_number, row_data, error_data = errors[0] assert isinstance(exc, exceptions.UniqueKeyError) assert row_number == 3 # actual row number including header line assert row_data == OrderedDict([('id', 1), ('age', 36), ('name', 'Jane')]) assert error_data == OrderedDict([('id', 1)]) assert 'duplicates' in str(exc)
def test_read_missing_cols_handled(): source = [ ['key', 'value'], [ 'one', ], ['two', 2], ] table = Table(source, schema=SCHEMA_MIN) errors = [] def handler(exc, row_number, row_data, error_data): errors.append((exc, row_number, row_data, error_data)) actual = table.read(exc_handler=handler) expect = [ ['one', None], ['two', 2], ] assert actual == expect expect_row_data = OrderedDict([('key', 'one'), ('value', None)]) _check_error(errors[0], expect_exc_class=exceptions.CastError, expect_exc_str='Row length', expect_row_number=2, expect_row_data=expect_row_data, expect_error_data=expect_row_data)
def test_read_invalid_extra_cols_handled(): # Test a schema-invalid extra column in one row source = [ ['key', 'value'], ['one', 1, 'unexpected'], ['two', 2], ] table = Table(source, schema=SCHEMA_MIN) errors = [] def handler(exc, row_number, row_data, error_data): errors.append((exc, row_number, row_data, error_data)) actual = table.read(exc_handler=handler) expect = [ ['one', 1], ['two', 2], ] assert actual == expect assert len(errors) == 1 expect_row_data = OrderedDict([('key', 'one'), ('value', 1), ('tableschema-cast-error-extra-col-3', 'unexpected')]) _check_error(errors[0], expect_exc_class=exceptions.CastError, expect_exc_str='Row length', expect_row_number=2, expect_row_data=expect_row_data, expect_error_data=expect_row_data)
def test_schema_infer_missing_values(): table = Table('data/data_infer_missing_values.csv') table.infer(missing_values=['-']) schema = deepcopy(SCHEMA_CSV) schema['missingValues'] = ['-'] assert table.schema.descriptor == schema assert table.read() == [[1, 39, 'Paul'], [None, 25, 'Test'], [2, 23, 'Jimmy'], [None, 25, 'Test'], [3, 36, 'Jane'], [None, 25, 'Test'], [4, 28, 'Judy']]
def test_multiple_foreign_keys_same_field(): schema = deepcopy(FK_SCHEMA) relations = deepcopy(FK_RELATIONS) relations['gender'] = [{ 'firstname': 'Alex', 'gender': 'male/female' }, { 'firstname': 'John', 'gender': 'male' }, { 'firstname': 'Walter', 'gender': 'male' }, { 'firstname': 'Alice', 'gender': 'female' }] # the main ressource now has tow foreignKeys using the same 'name' field schema['foreignKeys'].append({ 'fields': 'name', 'reference': { 'resource': 'gender', 'fields': 'firstname' }, }) table = Table(FK_SOURCE, schema=schema) keyed_rows = table.read(keyed=True, relations=relations) assert keyed_rows == [ { 'id': '1', 'name': { 'firstname': 'Alex', 'surname': 'Martin', 'gender': 'male/female' }, 'surname': 'Martin' }, { 'id': '2', 'name': { 'firstname': 'John', 'surname': 'Dockins', 'gender': 'male' }, 'surname': 'Dockins' }, { 'id': '3', 'name': { 'firstname': 'Walter', 'surname': 'White', 'gender': 'male' }, 'surname': 'White' }, ]
def test_read_storage(import_module): # Mocks import_module.return_value = Mock(Storage=Mock(return_value=Mock( describe=Mock(return_value=SCHEMA_MIN), iter=Mock(return_value=DATA_MIN[1:]), ))) # Tests table = Table('table', backend='storage') expect = [['one', 1], ['two', 2]] actual = table.read() assert actual == expect
def test_composite_primary_key_fails_unique_issue_194(): source = [ ['id1', 'id2'], ['a', '1'], ['a', '1'], ] schema = { 'fields': [ { 'name': 'id1' }, { 'name': 'id2' }, ], 'primaryKey': ['id1', 'id2'] } table = Table(source, schema=schema) with pytest.raises(exceptions.CastError) as excinfo: table.read() assert 'duplicates' in str(excinfo.value)
def task5(inputcsv,outputjson): ! pip install tableschema from tableschema import Table table = Table(inputcsv) table.read(keyed=True) table.infer() table.schema.descriptor table.schema.descriptor["missingValues"] = "N/A" table.schema.commit() table.schema.valid table.schema.errors table.schema.descriptor["missingValues"] = "N/A" table.schema.commit() table.schema.valid table.schema.errors table.read(keyed=True) table.schema.save(outputjson)
def test_read_storage_passed_as_instance(): # Mocks storage = Mock( describe=Mock(return_value=SCHEMA_MIN), iter=Mock(return_value=DATA_MIN[1:]), spec=Storage, ) # Tests table = Table('table', storage=storage) table.infer() expect = [['one', 1], ['two', 2]] actual = table.read() assert actual == expect
def test_table_sql(name, resource): # Storage engine = create_engine('sqlite:///') storage = Storage.connect('sql', engine=engine) # Save table = Table(resource['data'], schema=resource['schema']) table.save('table', storage=storage) # Load table = Table('table', schema=resource['schema'], storage=storage) assert table.read() == cast(resource)['data']
def test_processors(): # Processor def skip_under_30(erows): for row_number, headers, row in erows: krow = dict(zip(headers, row)) if krow['age'] >= 30: yield (row_number, headers, row) # Create table table = Table('data/data_infer.csv', post_cast=[skip_under_30]) table.infer() expect = [[1, 39, 'Paul'], [3, 36, 'Jane']] actual = table.read() assert actual == expect
def test_read_invalid_col_value(): # Test a schema-invalid column value in one row source = [ ['key', 'value'], ['one', 'not_an_int'], ['two', 2], ] table = Table(source, schema=SCHEMA_MIN) with pytest.raises(exceptions.CastError) as excinfo: actual = table.read() assert 'There are 1 cast errors' in str(excinfo.value) error = excinfo.value.errors[0] assert isinstance(error, exceptions.CastError) assert ('Field "value" can\'t cast value "not_an_int" for type "integer"' in str(error))
def test_read_invalid_col_value_no_cast(): # Test a schema-invalid column value in one row, without value-casting source = [ ['key', 'value'], ['one', 'not_an_int'], ['two', 2], ] expect = [ ['one', 'not_an_int'], ['two', 2], ] table = Table(source, schema=SCHEMA_MIN) actual = table.read(cast=False) # no actual casting, no cast errors assert actual == expect
def test_single_field_foreign_key(): table = Table(FK_SOURCE, schema=FK_SCHEMA) rows = table.read(relations=FK_RELATIONS) assert rows == [ ['1', { 'firstname': 'Alex', 'surname': 'Martin' }, 'Martin'], ['2', { 'firstname': 'John', 'surname': 'Dockins' }, 'Dockins'], ['3', { 'firstname': 'Walter', 'surname': 'White' }, 'White'], ]
def test_composite_primary_key_issue_194(): source = [ ['id1', 'id2'], ['a', '1'], ['a', '2'], ] schema = { 'fields': [ { 'name': 'id1' }, { 'name': 'id2' }, ], 'primaryKey': ['id1', 'id2'] } table = Table(source, schema=schema) assert table.read() == source[1:]
def test_processors(): # Processor def skip_under_30(erows): for number, headers, row in erows: krow = dict(zip(headers, row)) if krow['age'] >= 30: yield (number, headers, row) # Create table table = Table('data/data_infer.csv', post_cast=[skip_under_30]) # Test stream table.stream.open() expect = [['1', '39', 'Paul'], ['2', '23', 'Jimmy'], ['3', '36', 'Jane'], ['4', '28', 'Judy']] actual = table.stream.read() assert actual == expect # Test table expect = [[1, 39, 'Paul'], [3, 36, 'Jane']] actual = table.read() assert actual == expect
def test_multi_field_foreign_key(): schema = deepcopy(FK_SCHEMA) schema['foreignKeys'][0]['fields'] = ['name', 'surname'] schema['foreignKeys'][0]['reference']['fields'] = ['firstname', 'surname'] table = Table(FK_SOURCE, schema=schema) keyed_rows = table.read(keyed=True, relations=FK_RELATIONS) assert keyed_rows == [ { 'id': '1', 'name': { 'firstname': 'Alex', 'surname': 'Martin' }, 'surname': { 'firstname': 'Alex', 'surname': 'Martin' }, }, { 'id': '2', 'name': { 'firstname': 'John', 'surname': 'Dockins' }, 'surname': { 'firstname': 'John', 'surname': 'Dockins' }, }, { 'id': '3', 'name': { 'firstname': 'Walter', 'surname': 'White' }, 'surname': { 'firstname': 'Walter', 'surname': 'White' }, }, ]
def test_read_invalid_col_value_handled_no_cast(): # Test a schema-invalid column value in one row, without value-casting source = [ ['key', 'value'], ['one', 'not_an_int'], ['two', 2], ] expect = [ ['one', 'not_an_int'], ['two', 2], ] table = Table(source, schema=SCHEMA_MIN) errors = [] def handler(exc, row_number, row_data, error_data): errors.append((exc, row_number, row_data, error_data)) actual = table.read(cast=False, exc_handler=handler) # no actual casting, no cast errors assert len(errors) == 0 assert actual == expect
def test_table_bigquery(name, resource): # Storage os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = '.credentials.json' credentials = GoogleCredentials.get_application_default() service = build('bigquery', 'v2', credentials=credentials) project = json.load(io.open('.credentials.json', encoding='utf-8'))['project_id'] dataset = 'resource' prefix = '%s_' % uuid.uuid4().hex storage = Storage.connect('bigquery', service=service, project=project, dataset=dataset, prefix=prefix) # Save table = Table(resource['data'], schema=resource['schema']) table.save('table', storage=storage) # Load table = Table('table', schema=resource['schema'], storage=storage) assert table.read() == cast(resource)['data'] # Clean storage.delete()
def test_multi_fk_single_field_foreign_keys(): table = Table(MULTI_FK_SOURCE, schema=MULTI_FK_SCHEMA) actual = table.read(relations=MULTI_FK_RELATIONS) expect = [ [ '1', { 'firstname': 'Alex', 'middlename': 'F.' }, { 'surname': 'Martin', 'title': 'Mrs' }, ], [ '2', { 'firstname': 'John', 'middlename': 'G.' }, { 'surname': 'Dockins', 'title': 'Mr' }, ], [ '3', { 'firstname': 'Walter', 'middlename': 'H.' }, { 'surname': 'White', 'title': 'Mr' }, ], ] assert actual == expect
output_row['mean_top_70m_mm'] = input_row['mean_top_70m_mm'] output_row['mean_alignment_70m_mm'] = input_row[ 'mean_alignment_70m_mm'] output_row['twist_5m_mm'] = input_row['twist_5m_mm'] output_row['gradient_deg'] = input_row['gradient_deg'] # unvalidated extension point for non-standard geometry items output_row['extended_items_geometry'] = json.dumps( {'curvature_mm': input_row['x_curvature_mm']}) output_row['aws_signal_strength_V'] = input_row[ 'aws_signal_strength_V'] output_row['creating_adapter_version'] = ADAPTER_VERSION output_row['data_row_uid'] = uuid.uuid4() wr.writerow(output_row) if args.schema is not None: # validate the output file against the schema # print(args.schema.name) tbl = Table(out_file.name, schema=args.schema.name) # print('checking...') try: tbl.read(limit=2000) print('OK') except exceptions.TableSchemaException as exception: for error in exception.errors: print(error)
OUT_FILE = 'test_minimal_xircm_ugms.csv' ADAPTER_VERSION = 'Vivacity-UGMS-Sample-v0.0.1' UGMS_UNIT_ID = 'ugms-00001' UGMS_UNIT_UID = 'e4c27259-ed1c-4e6e-be7c-2b06966b0689' # create a minimal csv file output with open(OUT_FILE, 'w') as f: wr = csv.writer(f, quoting=csv.QUOTE_MINIMAL) f.write( 'file_timestamp_utc,file_name,timestamp_recorded_utc,ugms_unit_id,creating_adapter_version\n' ) data_row = [ '2019-02-18T07:45:23Z', OUT_FILE, datetime.now().strftime('%Y-%m-%dT%H:%M:%SZ'), UGMS_UNIT_ID, UGMS_UNIT_UID, ADAPTER_VERSION ] wr.writerow(data_row) # validate it against the schema tbl = Table(OUT_FILE, schema=SCHEMA) try: tbl.read() print('OK') except exceptions.CastError as exception: for error in exception.errors: print(error) except: pass
def data(self): source = self.storage.read(self.index_name) t = Table(source, ignore_blank_headers=True) return (t.infer(), t.headers, t.read(keyed=False, limit=1000))
# Data from WEB, schema from MEMORY SOURCE = 'https://raw.githubusercontent.com/frictionlessdata/tableschema-py/master/data/data_infer.csv' SCHEMA = { 'fields': [{ 'name': 'id', 'type': 'integer' }, { 'name': 'age', 'type': 'integer' }, { 'name': 'name', 'type': 'string' }] } # Open from WEB save to SQL database table = Table(SOURCE, schema=SCHEMA) table.save('articles', backend='sql', engine=db) # Open from SQL save to DRIVE table = Table('articles', backend='sql', engine=db) table.schema.save('tmp/articles.json') table.save('tmp/articles.csv') # Open from DRIVE print to CONSOLE table = Table('tmp/articles.csv', schema='tmp/articles.json') print(table.read(keyed=True)) # Will print # [{'id': 1, 'age': 39, 'name': 'Paul'}, {'id': 2, 'age': 23, 'name': 'Jimmy'}, {'id': 3, 'age': 36, 'name': 'Jane'}, {'id': 4, 'age': 28, 'name': 'Judy'}]
# Define a DataSet Schema dsr = DataSetRequest() dsr.name = # PUT YOUR DATASET NAME LOGIC HERE dsr.description = # DATASET DESCRIPTION dsr.schema = Schema(jeff) # Create a DataSet with the given Schema dataset = datasets.create(dsr) domo.logger.info("Created DataSet " + dataset['id']) # Get a DataSets's metadata retrieved_dataset = datasets.get(dataset['id']) domo.logger.info("Retrieved DataSet " + retrieved_dataset['id']) # List DataSets dataset_list = list(datasets.list(sort=Sorting.NAME)) domo.logger.info("Retrieved a list containing {} DataSet(s)".format( len(dataset_list))) csv_file_path = allFiles datasets.data_import_from_file(dataset['id'], csv_file_path) domo.logger.info("Uploaded data from a file to DataSet {}".format( dataset['id'])) for eachCSV in os.listdir(inputdir): allFiles = str(os.path.abspath(eachCSV)) table = Table(allFiles) table.infer() jeff = table.schema.descriptor table.read(keyed=True) directoryUpload()
def test_read_integrity_hash_error(): table = Table('data/data.csv') with pytest.raises(exceptions.IntegrityError) as excinfo: table.read(integrity={'hash': HASH + 'a'}) assert HASH in str(excinfo.value)
def test_read_integrity_hash(): table = Table('data/data.csv') table.read(integrity={'hash': HASH}) assert True