예제 #1
0
    def check_resource_schema(self, default_resource, resource):
        """Check that user resource schema contains all the mandatory fields"""

        def get_uncustomizable_fields(schema):
            uncustomizable = ['constraints', 'format', 'name', 'type']
            field_filter = lambda field: {key: val for key, val in field.items()
                                          if key in uncustomizable}
            fields = [field_filter(field) for field in schema.fields]
            fields = sorted(fields, key=lambda k: k['name'])

        resource_schema = SchemaModel(resource.descriptor['schema'])
        default_schema_dict = default_resource.descriptor['schema']
        if default_resource.descriptor['name'] == 'source_file':
            for field in default_schema_dict['fields']:
                if field['name'] == 'data':
                    field['name'] = self.data_key
        default_schema = SchemaModel(default_schema_dict)

        if default_resource.descriptor['name'] in self.inflexible_resources:
            if get_uncustomizable_fields(default_schema) != \
               get_uncustomizable_fields(resource_schema):
                msg = ('The fields for "{0}" are not subject to'
                       'change').format(resource.local_data_path)
                raise ValueError(msg, resource.local_data_path)
        else:
            required_headers = set(default_schema.required_headers)
            resource_headers = set(resource_schema.headers)
            if not required_headers.issubset(resource_headers):
                missing_headers = required_headers.difference(resource_headers)
                msg = ('Fields [{0}] are needed for internal processing'
                       'but are missing from {1}.'
                       ).format(','.join(missing_headers), resource.local_data_path)
                raise ValueError(msg, resource.local_data_path)
예제 #2
0
 def __init__(self, infile, spec, orig_spec, validate=False, debug=False):
     self.spec = spec
     self.table_schema = SchemaModel(orig_spec['schema'])
     self.validate = validate
     self.infile = infile
     self.debug = debug
     self.stopped = False
예제 #3
0
def export_package(storage, descriptor, datapackage_name):
    """Export Data Package from storage.

    Parameters
    ----------
    storage: object
        Storage object.
    descriptor: str
        Path where to store descriptor.
    datapackage_name: str
        Name of the exported datapackage.

    """

    # Iterate over tables
    resources = []
    mapping = {}
    for table in storage.tables:

        # Prepare
        schema = storage.describe(table)
        base = os.path.dirname(descriptor)
        path, name = _restore_path(table)
        fullpath = os.path.join(base, path)
        if name is not None:
            mapping[table] = name

        # Write data
        _ensure_dir(fullpath)
        with io.open(fullpath,
                     mode=_write_mode,
                     newline=_write_newline,
                     encoding=_write_encoding) as file:
            model = SchemaModel(deepcopy(schema))
            data = storage.read(table)
            writer = csv.writer(file)
            writer.writerow(model.headers)
            for row in data:
                writer.writerow(row)

        # Add resource
        resource = {'schema': schema, 'path': path}
        if name is not None:
            resource['name'] = name
        resources.append(resource)

    # Write descriptor
    resources = _restore_resources(mapping, resources)
    _ensure_dir(descriptor)
    with io.open(descriptor, mode=_write_mode,
                 encoding=_write_encoding) as file:
        descriptor = {
            'name': datapackage_name,
            'resources': resources,
        }
        json.dump(descriptor, file, indent=4)
예제 #4
0
 def __init__(self, schema):
     self.data = schema
     # print(self.data)
     self.schema_model = SchemaModel(schema)
     # print("printing schema model")
     # print(self.schema_model.fields)
     # for f in self.schema_model.fields:
     #     print(SchemaField(f))
     self.fields = [SchemaField(f) for f in self.schema_model.fields]
     self.species_fields = self.find_species_fields(self)
예제 #5
0
 def schema_validator(resource):
     schema = SchemaModel(resource.spec['schema'])
     for row in resource:
         for k, v in row.items():
             try:
                 schema.cast(k, v)
             except InvalidCastError:
                 logging.error('Bad value %r for field %s', v, k)
                 raise
         yield row
    def update_sources_period(self, new_sources):
        """Overwrite source_file with the identified period_id"""

        source_resource = utilities.get_datapackage_resource(self.source_file,
                                                             self.datapackage)
        source_idx = self.datapackage.resources.index(source_resource)
        source_schema_dict = self.datapackage.resources[source_idx].descriptor['schema']
        updates = {'fields':[{'name': 'period_id', 'type': 'string',
                   'title': 'The period source data is relevant for.'}]}
        utilities.deep_update_dict(source_schema_dict, updates)
        source_schema = SchemaModel(source_schema_dict)

        with compat.UnicodeWriter(self.source_file) as source_file:
            source_file.writerow(source_schema.headers)
            for row in utilities.dicts_to_schema_rows(new_sources,
                                                      source_schema):
                source_file.writerow(row)
예제 #7
0
def assert_conforms_to_schema(schema, doc):
    assert isinstance(doc, dict), "invalid doc: {}".format(doc)
    row = [doc[field["name"]] for field in schema["fields"]]
    try:
        Schema(schema).cast_row(row)
    except Exception as e:
        logging.exception(e)
        raise Exception(
            "row does not conform to schema\nrow='{}'\nschema='{}'".format(
                json.dumps(row), json.dumps(schema)))
    schema_model = SchemaModel(schema)
    res = {}
    for k, v in doc.items():
        try:
            res[k] = schema_model.cast(k, v)
        except Exception as e:
            logging.exception(e)
            raise Exception("doc attribute '{}' with value '{}' "
                            "does not conform to schema '{}'".format(
                                *map(json.dumps, [k, v, schema])))
    return res
예제 #8
0
def verify_csvimport(args):
    if not os.path.exists(args.csvfile):
        LOG.error('input CSV file %s does not exist' % args.csvfile)
        exit(-1)

    if os.path.exists(args.schema):
        schemafile = args.schema + '.json'
    else:
        schemafile = os.path.join(schemas_dir, args.schema + '.json')
    if not os.path.exists(schemafile):
        LOG.error('This schema file %s doesn'
                  't exist in current directory or csv_schemas directory' %
                  (args.schema + '.json'))
        exit(-1)

    try:
        schema = SchemaModel(schemafile, case_insensitive_headers=True)

        if 'account' not in schema.headers and args.accountname is None:
            LOG.error('schema headers: %s' % schema.headers)
            LOG.error(
                'This schema does not have an account column and no account name was provided'
            )
            exit(-1)

        with open(schemafile, 'r') as sf:
            schemacontent = json.load(sf)
            try:
                setattr(schema, 'nheaders', schemacontent['nheaders'])
            except KeyError:
                setattr(schema, 'nheaders', 1)

        return schema

    except InvalidSchemaError as e:
        LOG.error('Invalid CSV schema %s' % e)
        exit(-1)
예제 #9
0
def do_csvimport(args, client=None):
    if client is None:
        client = clientfromargs(args)
    logger = get_logger(args)

    logger.debug('selected schema %s' % (args.schema,))
    if os.path.exists(args.schema):
        schemafile = args.schema
    else:
        schemafile = os.path.join(schemas_dir, args.schema + '.json')
        if not os.path.exists(schemafile):
            logger.error('This schema doesn''t exist in csv_schemas')
            exit(-1)
    try:
        schema = SchemaModel(schemafile, case_insensitive_headers=True)
        with open(schemafile, 'r') as sf:
            schemacontent = json.load(sf)
            try:
                nheaders = schemacontent['nheaders']
            except KeyError:
                nheaders = 1
    except InvalidSchemaError:
        logger.error('Invalid CSV schema')
        raise
    logger.debug('schema headers %s' % schema.headers)

    if 'account' not in schema.headers and args.accountname is None:
        logger.error('This schema does not have an account column and no account name was provided')
        exit(-1)

    accounts = {x.account_name: x for x in client.budget.be_accounts}
    payees = {p.name: p for p in client.budget.be_payees}
    mastercategories_perid = {m.id: m for m in client.budget.be_master_categories}
    subcategories = {}
    for s in client.budget.be_subcategories:
        m = mastercategories_perid[s.entities_master_category_id]
        subcategories[m.name + ':' + s.name] = s

    def getaccount(accountname):
        try:
            logger.debug('searching for account %s' % accountname)
            return accounts[accountname]
        except KeyError:
            logger.error('Couldn''t find this account: %s' % accountname)
            exit(-1)

    def getpayee(payeename):
        try:
            logger.debug('searching for payee %s' % payeename)
            return payees[payeename]
        except KeyError:
            logger.debug('Couldn''t find this payee: %s' % payeename)
            payee = Payee(name=payeename)
            client.budget.be_payees.append(payee)
            return payee

    def getsubcategory(categoryname):
        try:
            logger.debug('searching for subcategory %s' % categoryname)
            return subcategories[categoryname]
        except KeyError:
            logger.debug('Couldn''t find this category: %s' % categoryname)
            exit(-1)

    entities_account_id = None
    if 'account' not in schema.headers:
        entities_account_id = getaccount(args.accountname).id

    amount = None
    if 'inflow' in schema.headers and 'outflow' in schema.headers:
        pass
    elif 'amount' in schema.headers:
        pass
    else:
        logger.error('This schema doesn''t provide an amount column or (inflow,outflow) columns')
        exit(-1)

    csvrow = namedtuple('CSVrow', field_names=schema.headers)
    transactions = []

    imported_date = datetime.now().date()

    logger.debug('OK starting the import from %s ' % os.path.abspath(args.csvfile))
    with open(args.csvfile, 'r') as inputfile:
        header = []
        for i in range(0, nheaders):
            header.append(inputfile.readline())
        for row in csv.reader(inputfile):
            if sys.version[0] == '2':
                row = [cell.decode('utf-8') for cell in row]
            if all(map(lambda x: x.strip() == '', row)):
                continue
            logger.debug('read line %s' % row)
            result = csvrow(*list(schema.convert_row(*row, fail_fast=True)))
            if 'account' in schema.headers:
                entities_account_id = getaccount(result.account).id
            if entities_account_id is None:
                logger.error(
                    'No account id, the account %s in the an account column was not recognized' % result.account)
                exit(-1)
            if 'inflow' in schema.headers and 'outflow' in schema.headers:
                amount = result.inflow - result.outflow
            elif 'amount' in schema.headers:
                amount = result.amount

            if 'category' in schema.headers and result.category:
                entities_subcategory_id = getsubcategory(result.category).id
            else:
                entities_subcategory_id = None
            if 'payee' in schema.headers:
                imported_payee = result.payee
            else:
                imported_payee = ''
            entities_payee_id = getpayee(imported_payee).id
            if 'memo' in schema.headers:
                memo = result.memo
            else:
                memo = ''

            transaction = Transaction(
                entities_account_id=entities_account_id,
                amount=amount,
                date=result.date,
                entities_payee_id=entities_payee_id,
                entities_subcategory_id=entities_subcategory_id,
                imported_date=imported_date,
                imported_payee=imported_payee,
                memo=memo,
                source="Imported"
            )
            if args.import_duplicates or (not transaction in client.budget.be_transactions):
                logger.debug('Appending transaction %s ' % transaction.get_dict())
                transactions.append(transaction)
            else:
                logger.debug('Duplicate transaction found %s ' % transaction.get_dict())

    client.add_transactions(transactions)
예제 #10
0
def pull_datapackage(descriptor, name, backend, **backend_options):
    """Pull Data Package from storage.

    All parameters should be used as keyword arguments.

    Args:
        descriptor (str): path where to store descriptor
        name (str): name of the pulled datapackage
        backend (str): backend name like `sql` or `bigquery`
        backend_options (dict): backend options mentioned in backend docs

    """

    # Save datapackage name
    datapackage_name = name

    # Get storage
    plugin = import_module('jsontableschema.plugins.%s' % backend)
    storage = plugin.Storage(**backend_options)

    # Iterate over tables
    resources = []
    for table in storage.tables:

        # Prepare
        schema = storage.describe(table)
        base = os.path.dirname(descriptor)
        path, name = mappers.restore_path(table)
        fullpath = os.path.join(base, path)

        # Write data
        helpers.ensure_dir(fullpath)
        with io.open(fullpath, 'wb') as file:
            model = SchemaModel(deepcopy(schema))
            data = storage.read(table)
            writer = csv.writer(file, encoding='utf-8')
            writer.writerow(model.headers)
            for row in data:
                writer.writerow(row)

        # Add resource
        resource = {'schema': schema, 'path': path}
        if name is not None:
            resource['name'] = name
        resources.append(resource)

    # Write descriptor
    mode = 'w'
    encoding = 'utf-8'
    if six.PY2:
        mode = 'wb'
        encoding = None
    resources = mappers.restore_resources(resources)
    helpers.ensure_dir(descriptor)
    with io.open(descriptor,
                 mode=mode,
                 encoding=encoding) as file:
        descriptor = {
            'name': datapackage_name,
            'resources': resources,
        }
        json.dump(descriptor, file, indent=4)
    return storage
예제 #11
0
 def __init__(self, schema):
     self.data = schema
     self.schema_model = SchemaModel(schema)
     self.fields = [SchemaField(f) for f in self.schema_model.fields]
     self.species_fields = self.find_species_fields(self)
예제 #12
0
 def __init__(self, schema):
     self.schema_model = SchemaModel(schema)
     self.fields = [SchemaField(f) for f in self.schema_model.fields]