def get_tsv_errors(tsv_path, schema_name, optional_fields=[], offline=None, encoding=None):
    '''
    Validate the TSV.
    '''
    logging.info(f'Validating {schema_name} TSV...')
    if not Path(tsv_path).exists():
        return 'File does not exist'

    try:
        rows = dict_reader_wrapper(tsv_path, encoding=encoding)
    except IsADirectoryError:
        return 'Expected a TSV, but found a directory'
    except UnicodeDecodeError as e:
        return get_context_of_decode_error(e)

    if not rows:
        return 'File has no data rows.'

    version = rows[0]['version'] if 'version' in rows[0] else '0'
    try:
        others = [
            p.stem.split('-v')[0] for p in
            (Path(__file__).parent / 'table-schemas/others').iterdir()
        ]
        if schema_name in others:
            schema = get_other_schema(schema_name, version, offline=offline)
        else:
            schema = get_table_schema(schema_name, version, offline=offline,
                                      optional_fields=optional_fields)
    except OSError as e:
        return {e.strerror: Path(e.filename).name}
    return get_table_errors(tsv_path, schema)
def main():
    default_fields = ['schema', 'version', 'field', 'description', 'enum']
    parser = argparse.ArgumentParser()
    parser.add_argument(
        '--fields',
        default=default_fields,
        nargs='+',
        metavar='FIELD',
        help=f'Fields to include in report. Default: {default_fields}')
    args = parser.parse_args()

    writer = DictWriter(sys.stdout,
                        fieldnames=args.fields,
                        extrasaction='ignore')
    writer.writeheader()
    for schema_version in list_schema_versions():

        schema = get_table_schema(schema_version.schema_name,
                                  schema_version.version)
        for field in schema['fields']:
            if 'constraints' in field and 'enum' in field['constraints']:
                enums = field['constraints']['enum']
            else:
                enums = ['']
            for enum in enums:
                writer.writerow({
                    'schema': schema_version.schema_name,
                    'version': schema_version.version,
                    'field': field['name'],
                    'description': field['description'],
                    'enum': enum
                })
    return 0
예제 #3
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('type', help='What type to generate')
    parser.add_argument('target',
                        type=dir_path,
                        help='Directory to write output to')
    args = parser.parse_args()

    schema_versions = dict_schema_versions()
    versions = sorted(schema_versions[args.type])
    assert versions, f'No versions for {args.type}'
    max_version = max(versions)

    is_assay = get_is_assay(args.type)
    if is_assay:
        table_schemas = {v: get_table_schema(args.type, v) for v in versions}
        directory_schema = get_directory_schema(args.type)
    else:
        table_schemas = {v: get_other_schema(args.type, v) for v in versions}
        directory_schema = {}

    # README.md:
    with open(Path(args.target) / 'README.md', 'w') as f:
        url = f'https://hubmapconsortium.github.io/ingest-validation-tools/{args.type}/'
        f.write(f'Moved to [github pages]({url}).')

    # index.md:
    with open(Path(args.target) / 'index.md', 'w') as f:
        f.write(
            generate_readme_md(table_schemas,
                               directory_schema,
                               args.type,
                               is_assay=is_assay))

    # YAML:
    for v in versions:
        schema = table_schemas[v]
        first_field = schema['fields'][0]
        if first_field['name'] == 'version':
            assert first_field['constraints']['enum'] == [v], \
                f'Wrong version constraint in {args.type}-v{v}.yaml'
        assert schema['fields'][0]
        with open(Path(args.target) / f'v{v}.yaml', 'w') as f:
            f.write('# Generated YAML: PRs should not start here!\n' +
                    dump_yaml(schema))

    # Data entry templates:
    with open(
            Path(args.target) / get_tsv_name(args.type, is_assay=is_assay),
            'w') as f:
        max_schema = table_schemas[max_version]
        f.write(generate_template_tsv(max_schema))
    create_xlsx(max_schema,
                Path(args.target) /
                get_xlsx_name(args.type, is_assay=is_assay),
                idempotent=True,
                sheet_name='Export as TSV')
def main():
    mapping = {}
    for assay_type in list_types():
        try:
            schema = get_table_schema(assay_type)
        except Exception as e:
            print(f'Processing: {assay_type}\n{e}', file=sys.stderr)
            return 1
        _add_field_descriptions_to_mapping(schema['fields'], mapping)
    sample_schema = get_sample_schema()
    _add_field_descriptions_to_mapping(sample_schema['fields'], mapping)
    print(dump_yaml(mapping))
    return 0
예제 #5
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('type', help='What type to generate')
    parser.add_argument('target',
                        type=dir_path,
                        help='Directory to write output to')
    args = parser.parse_args()

    schema_versions = dict_schema_versions()
    versions = sorted(schema_versions[args.type])
    max_version = max(versions)

    is_assay = get_is_assay(args.type)
    if is_assay:
        table_schemas = {v: get_table_schema(args.type, v) for v in versions}
        directory_schema = get_directory_schema(args.type)
    else:
        table_schemas = {v: get_other_schema(args.type, v) for v in versions}
        directory_schema = {}

    # README:
    with open(Path(args.target) / 'README.md', 'w') as f:
        f.write(
            generate_readme_md(table_schemas,
                               directory_schema,
                               args.type,
                               is_assay=is_assay))

    # YAML:
    for v in versions:
        with open(Path(args.target) / f'v{v}.yaml', 'w') as f:
            f.write('# Generated YAML: PRs should not start here!\n' +
                    dump_yaml(table_schemas[v]))

    # Data entry templates:
    with open(
            Path(args.target) / get_tsv_name(args.type, is_assay=is_assay),
            'w') as f:
        max_schema = table_schemas[max_version]
        f.write(generate_template_tsv(max_schema))
    create_xlsx(max_schema,
                Path(args.target) /
                get_xlsx_name(args.type, is_assay=is_assay),
                idempotent=True,
                sheet_name='Export as TSV')
예제 #6
0
def get_metadata_tsv_errors(metadata_path, type, optional_fields=[]):
    '''
    Validate the metadata.tsv.
    '''
    logging.info(f'Validating {type} metadata.tsv...')
    try:
        schema = get_table_schema(type, optional_fields=optional_fields)
    except OSError as e:
        return {e.strerror: Path(e.filename).name}
    report = validate_table(metadata_path,
                            schema=schema,
                            format='csv',
                            delimiter='\t',
                            skip_checks=['blank-row'])
    error_messages = report['warnings']
    if 'tables' in report:
        for table in report['tables']:
            error_messages += [
                column_number_to_letters(e['message']) for e in table['errors']
            ]
    return error_messages
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        'type',
        choices=list_types(),
        help='What type to generate')
    parser.add_argument(
        'target',
        type=dir_path,
        help='Directory to write output to')
    args = parser.parse_args()

    table_schema = get_table_schema(args.type)
    directory_schemas = get_directory_schemas(args.type)

    with open(Path(args.target) / get_tsv_name(args.type), 'w') as f:
        f.write(generate_template_tsv(table_schema))
    with open(Path(args.target) / 'README.md', 'w') as f:
        f.write(generate_readme_md(table_schema, directory_schemas, args.type))
    with open(Path(args.target) / 'unified.yaml', 'w') as f:
        f.write(
            f'# NOTE: Do not edit this; It is generated by {__file__}.\n\n'
            + dump_yaml(table_schema))