def schema_validator(resource): schema = SchemaModel(resource.spec['schema']) for row in resource: for k, v in row.items(): try: schema.cast(k, v) except InvalidCastError: logging.error('Bad value %r for field %s', v, k) raise yield row
def write(self, table, data): """Write data to table. Parameters ---------- table: str Table name. data: list List of data tuples. """ # Process data schema = self.describe(table) model = SchemaModel(schema) cdata = [] for row in data: rdata = {} for index, field in enumerate(model.fields): value = row[index] try: value = model.cast(field['name'], value) except InvalidObjectType as exception: value = json.loads(value) rdata[field['name']] = value cdata.append(rdata) # Insert data dbtable = self.__get_dbtable(table) dbtable.insert().execute(cdata)
class ResourceIterator(object): def __init__(self, infile, spec, orig_spec, validate=False, debug=False): self.spec = spec self.table_schema = SchemaModel(orig_spec['schema']) self.validate = validate self.infile = infile self.debug = debug self.stopped = False def __iter__(self): return self def __next__(self): if self.stopped: raise StopIteration() if self.debug: logging.error('WAITING') line = self.infile.readline().strip() if self.debug: logging.error('INGESTING: %r', line) if line == '': self.stopped = True raise StopIteration() line = json.loads(line) if self.validate: for k, v in line.items(): try: self.table_schema.cast(k, v) except (InvalidCastError, TypeError): field = self.table_schema.get_field(k) if field is None: raise ValueError('Validation failed: No such field %s', k) else: raise ValueError( 'Validation failed: Bad value %r ' 'for field %s with type %s', v, k, field.get('type')) return line def next(self): return self.__next__()
def assert_conforms_to_schema(schema, doc): assert isinstance(doc, dict), "invalid doc: {}".format(doc) row = [doc[field["name"]] for field in schema["fields"]] try: Schema(schema).cast_row(row) except Exception as e: logging.exception(e) raise Exception( "row does not conform to schema\nrow='{}'\nschema='{}'".format( json.dumps(row), json.dumps(schema))) schema_model = SchemaModel(schema) res = {} for k, v in doc.items(): try: res[k] = schema_model.cast(k, v) except Exception as e: logging.exception(e) raise Exception("doc attribute '{}' with value '{}' " "does not conform to schema '{}'".format( *map(json.dumps, [k, v, schema]))) return res