def get_tsv_errors(tsv_path, schema_name, optional_fields=[], offline=None, encoding=None):
    '''
    Validate the TSV.
    '''
    logging.info(f'Validating {schema_name} TSV...')
    if not Path(tsv_path).exists():
        return 'File does not exist'

    try:
        rows = dict_reader_wrapper(tsv_path, encoding=encoding)
    except IsADirectoryError:
        return 'Expected a TSV, but found a directory'
    except UnicodeDecodeError as e:
        return get_context_of_decode_error(e)

    if not rows:
        return 'File has no data rows.'

    version = rows[0]['version'] if 'version' in rows[0] else '0'
    try:
        others = [
            p.stem.split('-v')[0] for p in
            (Path(__file__).parent / 'table-schemas/others').iterdir()
        ]
        if schema_name in others:
            schema = get_other_schema(schema_name, version, offline=offline)
        else:
            schema = get_table_schema(schema_name, version, offline=offline,
                                      optional_fields=optional_fields)
    except OSError as e:
        return {e.strerror: Path(e.filename).name}
    return get_table_errors(tsv_path, schema)
예제 #2
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('type', help='What type to generate')
    parser.add_argument('target',
                        type=dir_path,
                        help='Directory to write output to')
    args = parser.parse_args()

    schema_versions = dict_schema_versions()
    versions = sorted(schema_versions[args.type])
    assert versions, f'No versions for {args.type}'
    max_version = max(versions)

    is_assay = get_is_assay(args.type)
    if is_assay:
        table_schemas = {v: get_table_schema(args.type, v) for v in versions}
        directory_schema = get_directory_schema(args.type)
    else:
        table_schemas = {v: get_other_schema(args.type, v) for v in versions}
        directory_schema = {}

    # README.md:
    with open(Path(args.target) / 'README.md', 'w') as f:
        url = f'https://hubmapconsortium.github.io/ingest-validation-tools/{args.type}/'
        f.write(f'Moved to [github pages]({url}).')

    # index.md:
    with open(Path(args.target) / 'index.md', 'w') as f:
        f.write(
            generate_readme_md(table_schemas,
                               directory_schema,
                               args.type,
                               is_assay=is_assay))

    # YAML:
    for v in versions:
        schema = table_schemas[v]
        first_field = schema['fields'][0]
        if first_field['name'] == 'version':
            assert first_field['constraints']['enum'] == [v], \
                f'Wrong version constraint in {args.type}-v{v}.yaml'
        assert schema['fields'][0]
        with open(Path(args.target) / f'v{v}.yaml', 'w') as f:
            f.write('# Generated YAML: PRs should not start here!\n' +
                    dump_yaml(schema))

    # Data entry templates:
    with open(
            Path(args.target) / get_tsv_name(args.type, is_assay=is_assay),
            'w') as f:
        max_schema = table_schemas[max_version]
        f.write(generate_template_tsv(max_schema))
    create_xlsx(max_schema,
                Path(args.target) /
                get_xlsx_name(args.type, is_assay=is_assay),
                idempotent=True,
                sheet_name='Export as TSV')
예제 #3
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('type', help='What type to generate')
    parser.add_argument('target',
                        type=dir_path,
                        help='Directory to write output to')
    args = parser.parse_args()

    schema_versions = dict_schema_versions()
    versions = sorted(schema_versions[args.type])
    max_version = max(versions)

    is_assay = get_is_assay(args.type)
    if is_assay:
        table_schemas = {v: get_table_schema(args.type, v) for v in versions}
        directory_schema = get_directory_schema(args.type)
    else:
        table_schemas = {v: get_other_schema(args.type, v) for v in versions}
        directory_schema = {}

    # README:
    with open(Path(args.target) / 'README.md', 'w') as f:
        f.write(
            generate_readme_md(table_schemas,
                               directory_schema,
                               args.type,
                               is_assay=is_assay))

    # YAML:
    for v in versions:
        with open(Path(args.target) / f'v{v}.yaml', 'w') as f:
            f.write('# Generated YAML: PRs should not start here!\n' +
                    dump_yaml(table_schemas[v]))

    # Data entry templates:
    with open(
            Path(args.target) / get_tsv_name(args.type, is_assay=is_assay),
            'w') as f:
        max_schema = table_schemas[max_version]
        f.write(generate_template_tsv(max_schema))
    create_xlsx(max_schema,
                Path(args.target) /
                get_xlsx_name(args.type, is_assay=is_assay),
                idempotent=True,
                sheet_name='Export as TSV')