Ejemplo n.º 1
0
def validate(resource=None):

    datapackage.validate('datapackage/gtex-v8-datapackage.json')
    gtex_package = datapackage.DataPackage(
        'datapackage/gtex-v8-datapackage.json', strict=True)
    if resource != 'all':
        r = gtex_package.get_resource(resource)
        print(r.name)
        try:
            t = r.read()
        except CastError as ce:
            print('Hit cast error')
            for err in ce.errors:
                print(err)
            print(ce)
        except Exception as inst:
            print('Hit generic exception')
            print(type(inst))
            print(inst.args)
            print(inst)
    else:
        for r in gtex_package.resources:
            print(r.name)
            try:
                t = r.read()
            except CastError as ce:
                print('Hit cast error')
                print(ce.errors)
                print(ce)
            except Exception as inst:
                print('Hit generic exception')
                print(type(inst))
                print(inst.args)
                print(inst)
def validate(descriptor):
    try:
        datapackage.validate(descriptor)
        click.echo('Data package descriptor is valid')
    except datapackage.exceptions.ValidationError as exception:
        click.echo('Data package descriptor is invalid')
        for error in exception.errors:
            click.echo(error)
        exit(1)
Ejemplo n.º 3
0
def process_input(infile, validate=False, debug=False):

    dp_json = infile.readline().strip()
    if dp_json == '':
        sys.exit(-3)
    dp = json.loads(dp_json)
    resources = dp.get('resources', [])
    original_resources = copy.deepcopy(resources)

    if len(dp.get('resources', [])) == 0:
        # Currently datapackages with no resources are disallowed in the schema.
        # Since this might happen in the early stages of a pipeline,
        # we're adding this hack to avoid validation errors
        dp_to_validate = copy.deepcopy(dp)
        dp_to_validate['resources'] = [{
            'name': '__placeholder__',
            'path': PATH_PLACEHOLDER
        }]
    else:
        dp_to_validate = dp
    try:
        datapackage.validate(dp_to_validate)
    except ValidationError as e:
        logging.info('ABOUT TO VALIDATE %r', dp_to_validate)
        for e in e.errors:
            try:
                logging.error(
                    "Data Package validation error: %s at dp%s", e.message,
                    "[%s]" % "][".join(repr(index) for index in e.path))
            except:
                logging.error("Data Package validation error: %s", e)
        raise

    infile.readline().strip()

    def resources_iterator(_resources, _original_resources):
        # we pass a resource instance that may be changed by the processing
        # code, so we must keep a copy of the original resource (used to
        # validate incoming data)
        ret = []
        for resource, orig_resource in zip(_resources, _original_resources):
            if not streaming(resource):
                continue

            res_iter = ResourceIterator(infile, resource, orig_resource,
                                        validate, debug)
            ret.append(res_iter)
        return iter(ret)

    return dp, resources_iterator(resources, original_resources)
Ejemplo n.º 4
0
def test_validate_invalid():
    with pytest.raises(exceptions.ValidationError) as excinfo:
        validate({})
    assert len(excinfo.value.errors) == 1
    assert 'resources' in str(excinfo.value.errors[0])
Ejemplo n.º 5
0
def test_validate_valid():
    valid = validate('data/datapackage/datapackage.json')
    assert valid
Ejemplo n.º 6
0
#!/usr/bin/env python3

###################################################################################
# Please install tableschema before running: https://pypi.org/project/tableschema/
###################################################################################

import tableschema

from datapackage import Package, Resource, validate, exceptions

c2m2_schema = '004_HMP__C2M2_preload__preBag_output_files/datapackage.json'

try:
   valid = validate(c2m2_schema)
except exceptions.ValidationError as exception:
   for error in exception.errors:
      print(error)


Ejemplo n.º 7
0
def ingest(config_file):
    """Ingest a datapackage"""

    # Read the config file telling you what to do
    config = initialize(config_file)

    # Inspect the datapackage
    dp = datapackage.DataPackage(cfg['ingest'].get('datapackage', None))
    if (dp.errors):
        for error in dp.errors:
            logging.error(error)
        raise Exception('Invalid data package')
    # Validate the Datapackage
    try:
        valid = datapackage.validate(dp.descriptor)
    except exceptions.ValidationError as exception:
        for error in datapackage.exception.errors:
            logging.error(error)
        raise Exception('Invalid data package')

    # Generate datasetId
    datasetId = generateDatasetId(datapackage)
    logging.info('Dataset ID: %s' % (datasetId))

    # execute
    store_type = cfg.get('store', None)
    if store_type is None:
        raise Exception('The configuration does not define an ingest store')

    module = __import__('oceanproteinportal.store')
    store_ = getattr(module, store_type)
    store = store_()

    if cfg['ingest'].get('load-dataset-metadata', False):
        logging.info('***** LOADING DATASET METADATA *****')
        store.loadDatasetMetadata(datapackage=dp, datasetId=datasetId)

    if cfg['ingest'].get('load-protein-data', False):
        protein_row_start = cfg['ingest'].get('protein-load-row-start', 0)
        protein_row_stop = cfg['ingest'].get('protein-load-row-stop', None)
        logging.info('***** LOADING PROTEINS (row=%s, %s) *****' %
                     (protein_row_start, protein_row_stop))
        store.loadProteins(datapackage=dp,
                           datasetId=datasetId,
                           row_start=protein_row_start,
                           row_stop=protein_row_stop)

    if cfg['ingest'].get('calculate-dataset-metadata-stats', False):
        logging.info('***** UPDATING DATASET Sample STATS *****')
        store.updateDatasetSampleStats(datasetId=datasetId)

    if cfg['ingest'].get('load-fasta', False):
        logging.info('***** LOAD PROTEIN FASTA *****')
        store.loadProteinsFASTA(datapackage=dp, datasetId=datasetId)

    if cfg['ingest'].get('load-peptide-data', False):
        peptide_row_start = cfg['ingest'].get('peptide-load-row-start', 0)
        peptide_row_stop = cfg['ingest'].get('peptide-load-row-stop', None)
        logging.info('***** LOADING PEPTIDES (row=%s, %s) *****' %
                     (peptide_row_start, peptide_row_stop))
        store.loadPeptide(datapackage=dp,
                          datasetId=datasetId,
                          row_start=peptide_row_start,
                          row_stop=peptide_row_stop)

    if cfg['ingest'].get('add-peptides-to-proteins', False):
        storeupdateProteinsWithPeptide(datapackage=dp, datasetId=datasetId)
Ejemplo n.º 8
0
from datapackage import Package
from datapackage import exceptions
import os
from datapackage import validate, exceptions

ROOT = '/home/pgi/dev/toflit18_data/scripts/'
SKIP_RESOURCES = []

p = Package(os.path.join(ROOT, 'datapackage.json'), ROOT)
if not p.valid:
    for error in p.errors:
        print(error)

try:
    valid = validate(p.descriptor)
    print("valid?: %s" % valid)
except exceptions.ValidationError as exception:
    for error in exception.errors:
        # handle individual error
        print(error)

for resource in p.resources:
    print(resource.name)
    if not resource.valid:
        for error in resource.errors:
            print(error)
    try:
        print("%s relations" % resource.name)
        errors = resource.read()
        resource.check_relations()
        # relations are kept in the resource object => memory leak
Ejemplo n.º 9
0
def test_validate_invalid():
    with pytest.raises(exceptions.ValidationError) as excinfo:
        validate({})
    assert len(excinfo.value.errors) == 1
    assert 'resources' in str(excinfo.value.errors[0])
Ejemplo n.º 10
0
def test_validate_valid():
    valid = validate('data/datapackage/datapackage.json')
    assert valid