コード例 #1
0
ファイル: test_dataset.py プロジェクト: glottobank/pycldf
def test_Dataset_from_scratch(tmpdir, data):
    # An unknown file name cannot be used with Dataset.from_data:
    copy(str(data / 'ds1.csv'), str(tmpdir / 'xyz.csv'))
    with pytest.raises(ValueError):
        Dataset.from_data(str(tmpdir / 'xyz.csv'))

    # Known file name, but non-standard column name:
    write_text(str(tmpdir / 'values.csv'), "IDX,Language_ID,Parameter_ID,Value\n1,1,1,1")
    with pytest.raises(ValueError, match='missing columns'):
        ds = Dataset.from_data(str(tmpdir / 'values.csv'))

    # A known file name will determine the CLDF module of the dataset:
    copy(str(data / 'ds1.csv'), str(tmpdir / 'values.csv'))
    ds = Dataset.from_data(str(tmpdir / 'values.csv'))
    assert ds.module == 'StructureDataset'

    assert len(list(ds['ValueTable'])) == 2
    ds.validate()
    ds['ValueTable'].write(2 * list(ds['ValueTable']))
    with pytest.raises(ValueError):
        ds.validate()
    md = ds.write_metadata()
    Dataset.from_metadata(md)
    repr(ds)
    del ds.tablegroup.common_props['dc:conformsTo']
    Dataset.from_metadata(ds.write_metadata())
    assert len(ds.stats()) == 1

    ds.add_table('extra.csv', 'ID')
    ds.write(**{'ValueTable': [], 'extra.csv': []})
    counts = {r[0]: r[2] for r in ds.stats()}
    assert counts['extra.csv'] == 0
コード例 #2
0
def test_Dataset_from_scratch(tmpdir, data):
    # An unknown file name cannot be used with Dataset.from_data:
    copy(str(data / 'ds1.csv'), str(tmpdir / 'xyz.csv'))
    with pytest.raises(ValueError):
        Dataset.from_data(str(tmpdir / 'xyz.csv'))

    # Known file name, but non-standard column name:
    Path(str(tmpdir / 'values.csv')).write_text(
        "IDX,Language_ID,Parameter_ID,Value\n1,1,1,1", encoding='utf-8')
    with pytest.raises(ValueError, match='missing columns'):
        ds = Dataset.from_data(str(tmpdir / 'values.csv'))

    # A known file name will determine the CLDF module of the dataset:
    copy(str(data / 'ds1.csv'), str(tmpdir / 'values.csv'))
    with warnings.catch_warnings(record=True):
        warnings.simplefilter("always")
        ds = Dataset.from_data(str(tmpdir / 'values.csv'))
        assert ds.module == 'StructureDataset'

        assert len(list(ds['ValueTable'])) == 2
        ds.validate()
        ds['ValueTable'].write(2 * list(ds['ValueTable']))
        with pytest.raises(ValueError):
            ds.validate()
        md = ds.write_metadata()
        Dataset.from_metadata(md)
        repr(ds)
        del ds.tablegroup.common_props['dc:conformsTo']
        Dataset.from_metadata(ds.write_metadata())
        assert len(ds.stats()) == 1

    ds.add_table('extra.csv', 'ID')
    ds.write(**{'ValueTable': [], 'extra.csv': []})
    counts = {r[0]: r[2] for r in ds.stats()}
    assert counts['extra.csv'] == 0
コード例 #3
0
ファイル: test_dataset.py プロジェクト: afcarl/pycldf
def test_Dataset_from_scratch(tmpdir, data):
    # An unknown file name cannot be used with Dataset.from_data:
    copy(str(data / 'ds1.csv'), str(tmpdir / 'xyz.csv'))
    with pytest.raises(ValueError):
        Dataset.from_data(str(tmpdir / 'xyz.csv'))

    # Known file name, but non-standard column name:
    write_text(str(tmpdir / 'values.csv'),
               "IDX,Language_ID,Parameter_ID,Value\n1,1,1,1")
    with pytest.raises(ValueError, match='missing columns'):
        ds = Dataset.from_data(str(tmpdir / 'values.csv'))

    # A known file name will determine the CLDF module of the dataset:
    copy(str(data / 'ds1.csv'), str(tmpdir / 'values.csv'))
    ds = Dataset.from_data(str(tmpdir / 'values.csv'))
    assert ds.module == 'StructureDataset'

    assert len(list(ds['ValueTable'])) == 2
    ds.validate()
    ds['ValueTable'].write(2 * list(ds['ValueTable']))
    with pytest.raises(ValueError):
        ds.validate()
    md = ds.write_metadata()
    Dataset.from_metadata(md)
    repr(ds)
    del ds.tablegroup.common_props['dc:conformsTo']
    Dataset.from_metadata(ds.write_metadata())
    assert len(ds.stats()) == 1
コード例 #4
0
ファイル: lexstat.py プロジェクト: Anaphory/lingpycldf
def get_dataset(fname):
    """Load a CLDF dataset.

    Load the file as `json` CLDF metadata description file, or as metadata-free
    dataset contained in a single csv file.

    The distinction is made depending on the file extension: `.json` files are
    loaded as metadata descriptions, all other files are matched against the
    CLDF module specifications. Directories are checked for the presence of
    any CLDF datasets in undefined order of the dataset types.

    Parameters
    ----------
    fname : str or Path
        Path to a CLDF dataset

    Returns
    -------
    Dataset
    """
    fname = Path(fname)
    if not fname.exists():
        raise FileNotFoundError(
            '{:} does not exist'.format(fname))
    if fname.suffix == '.json':
        return Dataset.from_metadata(fname)
    return Dataset.from_data(fname)
コード例 #5
0
ファイル: __main__.py プロジェクト: Anaphory/pycldf
def _get_dataset(args):
    if len(args.args) < 1:
        raise ParserError('not enough arguments')
    fname = Path(args.args[0])
    if not fname.exists() or not fname.is_file():
        raise ParserError('%s is not an existing directory' % fname)
    if fname.suffix == '.json':
        return Dataset.from_metadata(fname)
    return Dataset.from_data(fname)
コード例 #6
0
ファイル: __main__.py プロジェクト: glottobank/pycldf
def _get_dataset(args):
    if len(args.args) < 1:
        raise ParserError('not enough arguments')
    fname = Path(args.args[0])
    if not fname.exists() or not fname.is_file():
        raise ParserError('%s is not an existing directory' % fname)
    if fname.suffix == '.json':
        return Dataset.from_metadata(fname)
    return Dataset.from_data(fname)
コード例 #7
0
def test_validators(tmpdir, mocker, data):
    copy(str(data / 'invalid.csv'), str(tmpdir / 'values.csv'))
    ds = Dataset.from_data(str(tmpdir / 'values.csv'))

    with pytest.raises(ValueError):
        ds.validate()

    log = mocker.Mock()
    ds.validate(log=log)
    assert log.warn.call_count == 2

    for col in ds.tablegroup.tables[0].tableSchema.columns:
        if col.name == 'Language_ID':
            col.propertyUrl.uri = 'http://cldf.clld.org/v1.0/terms.rdf#glottocode'

    log = mocker.Mock()
    ds.validate(log=log)
    assert log.warn.call_count == 4
コード例 #8
0
ファイル: test_dataset.py プロジェクト: glottobank/pycldf
def test_validators(tmpdir, mocker, data):
    copy(str(data / 'invalid.csv'), str(tmpdir / 'values.csv'))
    ds = Dataset.from_data(str(tmpdir / 'values.csv'))

    with pytest.raises(ValueError):
        ds.validate()

    log = mocker.Mock()
    ds.validate(log=log)
    assert log.warn.call_count == 2

    for col in ds.tablegroup.tables[0].tableSchema.columns:
        if col.name == 'Language_ID':
            col.propertyUrl.uri = 'http://cldf.clld.org/v1.0/terms.rdf#glottocode'

    log = mocker.Mock()
    ds.validate(log=log)
    assert log.warn.call_count == 4
コード例 #9
0
def test_Dataset_from_data_empty_file(tmpdir):
    Path(str(tmpdir / 'values.csv')).write_text('', encoding='utf-8')
    with pytest.raises(ValueError, match='empty data file'):
        Dataset.from_data(str(tmpdir / 'values.csv'))
コード例 #10
0
ファイル: test_dataset.py プロジェクト: afcarl/pycldf
def test_Dataset_from_data_empty_file(tmpdir):
    write_text(str(tmpdir / 'values.csv'), '')
    with pytest.raises(ValueError, match='empty data file'):
        Dataset.from_data(str(tmpdir / 'values.csv'))
コード例 #11
0
ファイル: test_dataset.py プロジェクト: glottobank/pycldf
def test_Dataset_from_data_empty_file(tmpdir):
    write_text(str(tmpdir / 'values.csv'), '')
    with pytest.raises(ValueError, match='empty data file'):
        Dataset.from_data(str(tmpdir / 'values.csv'))