Ejemplo n.º 1
0
def test_iter_invalid_extra_cols_handled():
    # Test a schema-invalid extra column in one row
    source = [
        ['key', 'value'],
        ['one', 1, 'unexpected'],
        ['two', 2],
    ]
    table = Table(source, schema=SCHEMA_MIN)
    errors = []

    def handler(exc, row_number, row_data, error_data):
        errors.append((exc, row_number, row_data, error_data))

    actual = list(table.iter(exc_handler=handler))
    expect = [
        ['one', 1],
        ['two', 2],
    ]
    assert actual == expect
    assert len(errors) == 1
    expect_row_data = OrderedDict([('key', 'one'), ('value', 1),
                                   ('tableschema-cast-error-extra-col-3',
                                    'unexpected')])
    _check_error(errors[0],
                 expect_exc_class=exceptions.CastError,
                 expect_exc_str='Row length',
                 expect_row_number=2,
                 expect_row_data=expect_row_data,
                 expect_error_data=expect_row_data)
Ejemplo n.º 2
0
def test_iter_with_headers_field_names_mismatch_stream_closed():
    table = Table('data/data_headers_field_names_mismatch.csv',
                  schema=SCHEMA_CSV)
    with pytest.raises(exceptions.CastError) as excinfo:
        for _ in table.iter():
            pass
    assert table._Table__stream.closed
Ejemplo n.º 3
0
def test_iter_unique_primary_key_violation_handled():
    # Test exception handler option to switch off fail-fast data validation
    # behaviour
    schema = deepcopy(SCHEMA_CSV)
    schema['primaryKey'] = 'id'
    source = [
        ['id', 'age', 'name'],
        [1, 39, 'Paul'],
        [1, 36, 'Jane'],
    ]
    table = Table(source, schema=schema)

    errors = []

    def handler(exc, row_number, row_data, error_data):
        errors.append((exc, row_number, row_data, error_data))

    expect = [
        [1, 39, 'Paul'],
        [1, 36, 'Jane'],
    ]
    actual = list(table.iter(exc_handler=handler))
    assert actual == expect
    assert len(errors) == 1
    exc, row_number, row_data, error_data = errors[0]
    assert isinstance(exc, exceptions.UniqueKeyError)
    assert row_number == 3  # actual row number including header line
    assert row_data == OrderedDict([('id', 1), ('age', 36), ('name', 'Jane')])
    assert error_data == OrderedDict([('id', 1)])
    assert 'duplicates' in str(exc)
Ejemplo n.º 4
0
def test_iter_missing_cols_handled():
    source = [
        ['key', 'value'],
        [
            'one',
        ],
        ['two', 2],
    ]
    table = Table(source, schema=SCHEMA_MIN)
    errors = []

    def handler(exc, row_number, row_data, error_data):
        errors.append((exc, row_number, row_data, error_data))

    actual = list(table.iter(exc_handler=handler))
    expect = [
        ['one', None],
        ['two', 2],
    ]
    assert actual == expect
    expect_row_data = OrderedDict([('key', 'one'), ('value', None)])
    _check_error(errors[0],
                 expect_exc_class=exceptions.CastError,
                 expect_exc_str='Row length',
                 expect_row_number=2,
                 expect_row_data=expect_row_data,
                 expect_error_data=expect_row_data)
Ejemplo n.º 5
0
def test_iter_with_headers_field_names_mismatch_handled():
    source = [
        ['id', 'bad', 'name'],
        [1, 39, 'Paul'],
        [2, 42, 'Peter'],
    ]
    table = Table(source, schema=SCHEMA_CSV)

    errors = []

    def handler(exc, row_number, row_data, error_data):
        errors.append((exc, row_number, row_data, error_data))

    expect = []
    actual = list(table.iter(exc_handler=handler))
    assert actual == expect
    assert len(errors) == 2
    for i, error in enumerate(errors):
        expect_keyed_row_data = OrderedDict(zip(source[0], source[i + 1]))
        exc, row_number, row_data, error_data = error
        assert isinstance(exc, exceptions.CastError)
        assert row_number == i + 2  # actual row number including header line
        assert row_data == expect_keyed_row_data
        assert error_data == expect_keyed_row_data
        assert 'match schema field names' in str(exc)
Ejemplo n.º 6
0
def test_iter_single_field_foreign_key_invalid_handled():
    relations = deepcopy(FK_RELATIONS)
    relations['people'][2]['firstname'] = 'Max'
    table = Table(FK_SOURCE, schema=FK_SCHEMA)
    errors = []

    def handler(exc, row_number, row_data, error_data):
        errors.append((exc, row_number, row_data, error_data))

    expect = [
        ['1', {
            'firstname': 'Alex',
            'surname': 'Martin'
        }, 'Martin'],
        ['2', {
            'firstname': 'John',
            'surname': 'Dockins'
        }, 'Dockins'],
        ['3', {}, 'White'],
    ]
    actual = list(table.iter(relations=relations, exc_handler=handler))
    assert actual == expect
    assert len(errors) == 1
    exc, row_number, row_data, error_data = errors[0]
    assert row_number == 4
    expect_keyed_row_data = OrderedDict(zip(FK_SOURCE[0], FK_SOURCE[3]))
    assert row_data == expect_keyed_row_data
    assert error_data == OrderedDict([('name', 'Walter')])
    assert isinstance(exc, exceptions.UnresolvedFKError)
    assert 'Foreign key' in str(exc)
Ejemplo n.º 7
0
def test_iter_invalid_col_value_handled():
    # Test a schema-invalid column value in one row, handled
    source = [
        ['key', 'value'],
        ['one', 'not_an_int'],
        ['two', 2],
    ]
    table = Table(source, schema=SCHEMA_MIN)
    errors = []

    def handler(exc, row_number, row_data, error_data):
        errors.append((exc, row_number, row_data, error_data))

    actual = list(table.iter(exc_handler=handler))
    expect = [
        ['one', 'not_an_int'],
        ['two', 2],
    ]
    assert actual == expect
    assert isinstance(actual[0][1], FailedCast)
    assert len(errors) == 1
    expect_row_data = OrderedDict([('key', 'one'), ('value', 'not_an_int')])
    expect_error_data = OrderedDict([('value', 'not_an_int')])
    _check_error(errors[0],
                 expect_exc_class=exceptions.CastError,
                 expect_exc_str='There are 1 cast errors',
                 expect_row_number=2,
                 expect_row_data=expect_row_data,
                 expect_error_data=expect_error_data)
Ejemplo n.º 8
0
def test_iter_invalid_extra_cols_stream_closed():
    table = Table('data/data_invalid_extra_cols.csv', schema=SCHEMA_MIN)
    with pytest.raises(exceptions.CastError) as excinfo:
        for _ in table.iter():
            pass
    # Circumvent name mangling to get at (overly private ;-))
    # __stream attribute
    assert table._Table__stream.closed
Ejemplo n.º 9
0
def test_iter_unique_primary_key_violation_stream_closed():
    schema = deepcopy(SCHEMA_CSV)
    schema['primaryKey'] = 'id'
    table = Table('data/data_unique_primary_key_violation.csv', schema=schema)
    with pytest.raises(exceptions.TableSchemaException) as excinfo:
        for _ in table.iter():
            pass
    assert table._Table__stream.closed
Ejemplo n.º 10
0
def test_iter_single_field_foreign_key_invalid():
    relations = deepcopy(FK_RELATIONS)
    relations['people'][2]['firstname'] = 'Max'
    table = Table(FK_SOURCE, schema=FK_SCHEMA)
    with pytest.raises(exceptions.RelationError) as excinfo:
        for _ in table.iter(relations=relations):
            pass
    assert isinstance(excinfo.value, exceptions.UnresolvedFKError)
    assert 'Foreign key' in str(excinfo.value)
Ejemplo n.º 11
0
def test_iter_with_headers_field_names_mismatch():
    source = [
        ['id', 'bad', 'name'],
        [1, 39, 'Paul'],
    ]
    table = Table(source, schema=SCHEMA_CSV)
    with pytest.raises(exceptions.CastError) as excinfo:
        for _ in table.iter():
            pass
    assert 'match schema field names' in str(excinfo.value)
Ejemplo n.º 12
0
def test_iter_invalid_extra_cols():
    source = [
        ['key', 'value'],
        ['one', 1, 'unexpected'],
        ['two', 2],
    ]
    table = Table(source, schema=SCHEMA_MIN)
    with pytest.raises(exceptions.CastError) as excinfo:
        for _ in table.iter():
            pass
    assert 'Row length' in str(excinfo.value)
Ejemplo n.º 13
0
 def reindex(self):
     file_path, params = self.get_file_info()
     t = Table(file_path, ignore_blank_headers=True, **params)
     schema = t.infer()
     data = t.iter(keyed=True)
     self.storage.create(self.index_name,
                         schema,
                         reindex=True,
                         always_recreate=True)
     self.storage.write(self.index_name, data)
     for res in self.storage.write(self.index_name, data):
         pass
Ejemplo n.º 14
0
def test_iter_unique_primary_key_violation():
    schema = deepcopy(SCHEMA_CSV)
    schema['primaryKey'] = 'id'
    source = [
        ['id', 'age', 'name'],
        [1, 39, 'Paul'],
        [1, 36, 'Jane'],
    ]
    table = Table(source, schema=schema)
    with pytest.raises(exceptions.TableSchemaException) as excinfo:
        for _ in table.iter():
            pass
    assert isinstance(excinfo.value, exceptions.UniqueKeyError)
    assert 'duplicates' in str(excinfo.value)
Ejemplo n.º 15
0
def test_iter_invalid_col_value_no_cast():
    # Test a schema-invalid column value in one row, without value-casting
    source = [
        ['key', 'value'],
        ['one', 'not_an_int'],
        ['two', 2],
    ]
    expect = [
        ['one', 'not_an_int'],
        ['two', 2],
    ]
    table = Table(source, schema=SCHEMA_MIN)
    actual = list(table.iter(cast=False))
    # no actual casting, no cast errors
    assert actual == expect
Ejemplo n.º 16
0
def test_iter_invalid_col_value():
    # Test a schema-invalid column value in one row
    source = [
        ['key', 'value'],
        ['one', 'not_an_int'],
        ['two', 2],
    ]
    table = Table(source, schema=SCHEMA_MIN)
    with pytest.raises(exceptions.CastError) as excinfo:
        for _ in table.iter():
            pass
    assert 'There are 1 cast errors' in str(excinfo.value)
    error = excinfo.value.errors[0]
    assert isinstance(error, exceptions.CastError)
    assert ('Field "value" can\'t cast value "not_an_int" for type "integer"'
            in str(error))
def load_data_from_local_csv(csv_file=ASSET_DATA_FILE):
    table = Table(csv_file, schema=SCHEMA_FILE)

    try:
        valid = validate(table.schema.descriptor)
        if valid:
            for keyed_row in table.iter(keyed=True):
                yield keyed_row
    except exceptions.ValidationError as exception:
        for error in exception.errors:
            print(error)
    except exceptions.CastError as exception:
        if not exception.errors:
            print(exception)

        for error in exception.errors:
            write_skipped_assets(error, [])
Ejemplo n.º 18
0
def test_iter_invalid_col_value_handled_no_cast():
    # Test a schema-invalid column value in one row, without value-casting
    source = [
        ['key', 'value'],
        ['one', 'not_an_int'],
        ['two', 2],
    ]
    expect = [
        ['one', 'not_an_int'],
        ['two', 2],
    ]
    table = Table(source, schema=SCHEMA_MIN)
    errors = []

    def handler(exc, row_number, row_data, error_data):
        errors.append((exc, row_number, row_data, error_data))

    actual = list(table.iter(cast=False, exc_handler=handler))
    # no actual casting, no cast errors
    assert len(errors) == 0
    assert actual == expect
Ejemplo n.º 19
0
class CsvReader(DataReader):
    '''this is a wrapper class for tableschema library'''

    def __init__(self, filePath):
        self._filePath = filePath
        self._table = Table(filePath)

    def close(self):
        pass

    def getColumns(self):
        if not self._table.headers:
            self._table.infer(
                settings.SCHEMA_INFER_LIMIT)
        # clean the headers
        result = []
        for header in self._table.headers:
            tmpheader = header.lower()
            tmpheader = tmpheader.replace(' ', '_').replace('-', '_')
            r = re.search('\w+', tmpheader)
            if r:
                result.append(r.group())
            else:
                raise InvalidCsvHeaderException(
                    '%s is not a valid header' % header)
        return result

    def requery(self):
        self._table = Table(self._filePath)

    def getRow(self):
        i = self._table.iter(cast=True)
        return next(i)

    def getRowsList(self):
        self._table.infer()
        self._table.schema.descriptor[
            'missingValues'] = settings.SCHEMA_CSV_MISSING_VALUES
        self._table.schema.commit()
        i = self._table.iter(cast=True)

        return list(map(tuple, i))

    def getSchema(self):
        '''
        Get data schema infered from records,
            the number of records is defined by SCHEMA_INFER_LIMIT
            the confidence trashold is defined by SCHEMA_INFER_CONFIDENCE
        '''
        t = Table(self._filePath)
        t.infer()
        t.schema.descriptor[
            'missingValues'] = settings.SCHEMA_CSV_MISSING_VALUES
        t.schema.commit()
        return t.infer(
            settings.SCHEMA_INFER_LIMIT,
            confidence=settings.SCHEMA_INFER_CONFIDENCE)

    def __repr__(self):
        return (settings.SOURCE_TYPE_CSV_PREFIX +
                self._filePath)
Ejemplo n.º 20
0
from tableschema import Table

# Data from WEB, schema from MEMORY
SOURCE = 'https://raw.githubusercontent.com/frictionlessdata/tableschema-py/master/data/data_infer.csv'
SCHEMA = {
    'fields': [{
        'name': 'id',
        'type': 'integer'
    }, {
        'name': 'age',
        'type': 'integer'
    }, {
        'name': 'name',
        'type': 'string'
    }]
}

# If schema is not passed it will be inferred
table = Table(SOURCE, schema=SCHEMA)
rows = table.iter()
while True:
    try:
        print(next(rows))
    except StopIteration:
        break
    except Exception as exception:
        print(exception)
from tableschema import Table

fileCSV = 'D:\dct\enem-microdados\DADOS_ENEM_2009.csv'
fileJSON = 'D:\dct\enem-microdados\DADOS_ENEM_2009.json'


# Create table
table = Table(fileCSV, schema=fileJSON)

# Print schema descriptor
print(table.schema.descriptor) 
print "\n"
# Print cast rows in a dict form
for keyed_row in table.iter(keyed=True):
    print(keyed_row)
    print "\n"
Ejemplo n.º 22
0
from tableschema import Table

# Data from WEB, schema from MEMORY
SOURCE = 'https://raw.githubusercontent.com/frictionlessdata/tableschema-py/master/data/data_infer.csv'
SCHEMA = {'fields': [{'name': 'id', 'type': 'integer'}, {'name': 'age', 'type': 'integer'}, {'name': 'name', 'type': 'string'}] }

# If schema is not passed it will be inferred
table = Table(SOURCE, schema=SCHEMA)
rows = table.iter()
while True:
    try:
        print(next(rows))
    except StopIteration:
        break
    except Exception as exception:
        print(exception)
    def loadPeptides(self, datapackage, datasetId, row_start=0, row_stop=None):
        """Load Peptide Data"""

        # Get the Ontology Version
        ontology_version = oceanproteinportal.datapackage.getDatapackageOntologyVersion(datapackage)

        peptideResource = oceanproteinportal.datapackage.findResource(datapackage=datapackage, resource_type='peptide')
        if peptideResource is None:
            return

        datasetCruises = datapackageCruises(datapackage)
        table = Table( peptideResource.descriptor['path'], schema=peptideResource.descriptor['schema'] )

        if (0 < row_start):
            logging.info("Skipping rows until # %s" % (row_start))

        row_count = 0
        data = None
        PEPTIDE_FIELDS = getOntologyMappingFields(type='peptide', ontology_version=ontology_version)
        for keyed_row in table.iter(keyed=True):
            row_count += 1
            if row_count < row_start:
                logging.debug("Skipping Row # %s" % (row_count))
                continue
            if row_stop is not None and row_count > row_stop:
                logging.info("Stopping at Row# %s" % (row_count))
                break
            logging.debug("Reading Row# %s" % (row_count))
            data = readKeyedTableRow(keyed_row=keyed_row, elastic_mappings=PEPTIDE_FIELDS)
            primaryKey = datasetId + data.get('sampleName') + data.get('proteinId') + data.get('peptideSequence')
            data['guid'] = generateGuid( datapackage.descriptor['name'] + '_peptide_' + primaryKey )

            filterSize = {}
            minimumFilterSize = data.get('filterSize:minimum', None)
            maximumFilterSize = data.get('filterSize:maximum', None)
            filterSizeLabel = ''
            if minimumFilterSize is not None:
                del data['filterSize:minimum']
                filterSize['minimum'] = minimumFilterSize
                filterSizeLabel += str(minimumFilterSize)
            if maximumFilterSize is not None:
                del data['filterSize:maximum']
                filterSize['maximum'] = maximumFilterSize
                if filterSizeLabel != '':
                    filterSizeLabel += ' - ' + str(maximumFilterSize)
                else:
                    filterSize += str(maximumFilterSize)
            filterSize['label'] = filterSizeLabel
            data['filterSize'] = filterSize

            if ('coordinate:lat' in data and 'coordinate:lon' in data):
                data['coordinate'] = {
                  'lat': data['coordinate:lat'],
                  'lon': data['coordinate:lon']
                }
                del data['coordinate:lat']
                del data['coordinate:lon']

            # load in ES
            self.load(data=data, type='peptide', id=data['guid'])
            logging.info(res['result'])
    def loadProteins(self, datapackage, datasetId, row_start=0, row_stop=None):
        """Load Protein Data

        Tabular data, so proteins may be repeated for different samples, stations, depths, etc.
        1) Build proteinId first, then lookup if it exists in the store
        2) If not exists, build a new document. Else, update the spectral counts of existing doc
        """
        es = self.getStore()
        index = self.getIndex()

        # Get the Ontology Version
        ontology_version = oceanproteinportal.datapackage.getDatapackageOntologyVersion(datapackage)

        proteinResource = oceanproteinportal.datapackage.findResource(datapackage=datapackage, resource_type='protein')
        if proteinResource is None:
            return

        datasetCruises = oceanproteinportal.datapackage.datapackageCruises(datapackage)
        table = Table(proteinResource.descriptor['path'], schema=proteinResource.descriptor['schema'])

        if (0 < row_start):
            logging.info("Skipping rows until # %s" % (row_start))

        row_count = 0
        proteinId = None
        data = None
        PROTEIN_FIELDS = getOntologyMappingFields(type='protein', ontology_version=ontology_version)
        try:
            for keyed_row in table.iter(keyed=True):
                row_count += 1
                if row_count < row_start:
                    logging.debug("Skipping Row # %s" % (row_count))
                    continue
                if row_stop is not None and row_count > row_stop:
                    logging.info("Stopping at Row# %s" % (row_count))
                    break
                logging.debug("Reading Row# %s" % (row_count))
                row = readKeyedTableRow(keyed_row=keyed_row, elastic_mappings=PROTEIN_FIELDS)

                # Get the unqiue identifier for this protein
                proteinId = row['proteinId']
                protein_guid = generateGuid( datapackage.descriptor['name'] + '_protein_' + datasetId + ':' + proteinId )

                try:
                    res = es.get(index=index, doc_type='protein', id=protein_guid)
                    # Reuse existing protein document
                    data = res['_source']
                except elasticsearch.exceptions.NotFoundError as exc:
                    # Build a new ES Protein document
                    data = {
                      '_dataset': datasetId,
                      'guid': protein_guid,
                      'proteinId': proteinId,
                      'spectralCount': []
                    }

                    if row['productName'] is not None:
                        data['productName'] = row['productName']
                    if row['molecularWeight'] is not None:
                        data['molecularWeight'] = row['molecularWeight']
                    if row['enzymeCommId'] is not None:
                        data['enzymeCommId'] = row['enzymeCommId']
                    if row['uniprotId'] is not None:
                        data['uniprotId'] = row['uniprotId']
                    if row['otherIdentifiedProteins'] is not None:
                        data['otherIdentifiedProteins'] = row['otherIdentifiedProteins']

                    # NCBI
                    ncbiTaxon = None
                    if 'ncbi:id' in row:
                        ncbiTaxon = {
                          'id': row['ncbi:id'],
                          'name': None
                        }
                    if 'ncbi:name' in row:
                        ncbiTaxon['name'] = row['ncbi:name']
                    if ncbiTaxon is not None:
                        data['ncbiTaxon'] = ncbiTaxon

                    # Kegg
                    kegg_pathway = None
                    pathway = row.get('kegg:path', None)
                    if pathway is not None:
                        kegg_pathway = []
                        for idx,path in enumerate(pathway):
                            kegg_pathway.append({'value': path, 'index': idx})
                        data['kegg'] = {
                          'id': row.get('kegg:id', None),
                          'description': row.get('kegg:desc', None),
                          'pathway': kegg_pathway
                        }

                    # PFams
                    if 'pfams:id' in row:
                        data['pfams'] = {
                          'id': data.get('pfams:id', None),
                          'name': data.get('pfams:name', None)
                        }
                # END of initial protein data setup

                # Handle all the unqiue row data for a certain protein
                # FilterSize
                filterSize = {}
                minimumFilterSize = row.get('filterSize:minimum', None)
                maximumFilterSize = row.get('filterSize:maximum', None)
                filterSizeLabel = ''
                if minimumFilterSize is not None:
                    filterSize['minimum'] = minimumFilterSize
                    filterSizeLabel += str(minimumFilterSize)
                if maximumFilterSize is not None:
                    filterSize['maximum'] = maximumFilterSize
                    if filterSizeLabel != '':
                        filterSizeLabel += ' - ' + str(maximumFilterSize)
                    else:
                        filterSize += str(maximumFilterSize)
                if filterSizeLabel != '':
                    filterSize['label'] = filterSizeLabel
                    data['filterSize'] = filterSize

                # Cruise
                cruise = {
                  'value': row.get('spectralCount:cruise', None),
                }
                if cruise['value'] in datasetCruises:
                    cruise['uri'] = datasetCruises[cruise['value']]['uri']
                # To-do
                # 1. Lookup the cruise URI in the datapackage

                # Spectral Counts
                # fix ISO DateTime
                observationDateTime = None
                if 'spectralCount:dateTime' in row and row['spectralCount:dateTime'] is not None:
                    observationDateTime = dateutil.parser.parse(row['spectralCount:dateTime'])
                    observationDateTime = observationDateTime.strftime(SPECTRAL_COUNT_DATE_TIME_FORMAT)
                elif 'spectralCount:date' in row and row['spectralCount:date'] is not None:
                    time = row.get('spectralCount:time', None)
                    if (time is None):
                        time = '00:00:00'
                    observationDateTime = dateutil.parser.parse(row['spectralCount:date'] + 'T' + time)
                    observationDateTime = observationDateTime.strftime(SPECTRAL_COUNT_DATE_TIME_FORMAT)

                spectralCount = {
                    'sampleId': row.get('spectralCount:sampleId', None),
                    'count': row.get('spectralCount:count', None),
                    'cruise': cruise,
                    'station': row.get('spectralCount:station', None),
                    'depth': row.get('spectralCount:depth', None),
                    'dateTime': observationDateTime,
                }
                if (spectralCount['depth'] is not None):
                    if 'min' not in dataset_depth_stats:
                        dataset_depth_stats['min'] = spectralCount['depth']
                        dataset_depth_stats['max'] = spectralCount['depth']
                    else:
                        if spectralCount['depth'] < dataset_depth_stats['min']:
                            dataset_depth_stats['min'] = spectralCount['depth']
                        if spectralCount['depth'] > dataset_depth_stats['max']:
                            dataset_depth_stats['max'] = spectralCount['depth']

                if (row['spectralCount:coordinate:lat'] is not None and row['spectralCount:coordinate:lon'] is not None):
                    spectralCount['coordinate'] = {
                      'lat': row['spectralCount:coordinate:lat'],
                      'lon': row['spectralCount:coordinate:lon']
                    }
                data['spectralCount'].append(spectralCount)
                res = self.load(data=data, type='protein', id=data['guid'])
                logging.info(res['result'])
            # end of for loop of protein rows
        except Exception as e:
            logging.exception("Error with row[%s]: %s" % (row_count, keyed_row))
            raise e
Ejemplo n.º 25
0
def test_iter():
    table = Table(DATA_MIN, schema=SCHEMA_MIN)
    expect = [['one', 1], ['two', 2]]
    actual = list(table.iter())
Ejemplo n.º 26
0
def test_iter_web_csv():
    table = Table(BASE_URL % 'data/data_infer.csv', schema=SCHEMA_CSV)
    expect = [[1, 39, 'Paul'], [2, 23, 'Jimmy'], [3, 36, 'Jane'],
              [4, 28, 'Judy']]
    actual = list(table.iter())
    assert actual == expect
Ejemplo n.º 27
0
def test_iter_missing_cols_stream_closed():
    table = Table('data/data_missing_cols.csv', schema=SCHEMA_MIN)
    with pytest.raises(exceptions.CastError) as excinfo:
        for _ in table.iter():
            pass
    assert table._Table__stream.closed
Ejemplo n.º 28
0
def test_iter_keyed():
    table = Table(DATA_MIN, schema=SCHEMA_MIN)
    expect = [{'key': 'one', 'value': 1}, {'key': 'two', 'value': 2}]
    actual = list(table.iter(keyed=True))
    assert actual == expect
Ejemplo n.º 29
0
from tableschema import Table

table = Table("template.csv", schema="schema.json")
table.schema.valid
# True

for row in table.iter():
  print(row)