Beispiel #1
0
def get_dataset(p):
    try:
        return Dataset.from_metadata(
            p) if p.suffix == '.json' else Dataset.from_data(p)
    except ValueError:
        raise argparse.ArgumentTypeError(
            'Invalid CLDF dataset spec: {0}!'.format(p))
Beispiel #2
0
def get_dataset(fname=None):
    """Load a CLDF dataset.

    Load the file as `json` CLDF metadata description file, or as metadata-free
    dataset contained in a single csv file.

    The distinction is made depending on the file extension: `.json` files are
    loaded as metadata descriptions, all other files are matched against the
    CLDF module specifications. Directories are checked for the presence of
    any CLDF datasets in undefined order of the dataset types.

    Parameters
    ----------
    fname : str or Path
        Path to a CLDF dataset

    Returns
    -------
    pycldf.Dataset
    """
    if fname is None:
        fname = repository
    else:
        fname = Path(fname)
    if not fname.exists():
        raise FileNotFoundError('{:} does not exist'.format(fname))
    if fname.suffix == '.json':
        return Dataset.from_metadata(fname)
    return Dataset.from_data(fname)
Beispiel #3
0
def metadatafree_dataset(tmp_path):
    values = tmp_path / 'values.csv'
    values.write_text("""\
ID,Language_ID,Parameter_ID,Value
1,abcd1235,param1,val1
2,abcd1234,param1,val2""",
                      encoding='utf8')
    return Dataset.from_data(values)
Beispiel #4
0
def test_MultiParameter(metadatafree_dataset, StructureDataset, glottolog,
                        tmp_path):
    _ = MultiParameter(metadatafree_dataset, ['param1'],
                       glottolog={lg.id: lg
                                  for lg in glottolog.languoids()})
    mp = MultiParameter(StructureDataset, ['B', 'C'])
    for lang, values in mp.iter_languages():
        assert lang.name == 'Bengali'
        assert values['C'][0].v == 'C-1'
        assert values['C'][0].code == '1'
        break
    mp = MultiParameter(StructureDataset, ['B'],
                        language_properties=['Family_name'])
    assert 'Family_name' in mp.parameters
    mp = MultiParameter(StructureDataset, [])
    assert '__language__' in mp.parameters

    values = tmp_path / 'values.csv'
    values.write_text("""\
ID,Language_ID,Parameter_ID,Value
1,abcd1235,param1,1
2,abcd1235,param1,2
3,abcd1235,param1,3
4,abcd1235,param1,4
5,abcd1235,param1,5
6,abcd1235,param1,6
7,abcd1235,param1,7
8,abcd1235,param1,8
9,abcd1235,param1,9
10,abcd1234,param1,10""",
                      encoding='utf8')
    ds = Dataset.from_data(values)
    mp = MultiParameter(ds, ['param1'],
                        glottolog={lg.id: lg
                                   for lg in glottolog.languoids()})
    assert list(mp.parameters.values())[0].type == CONTINUOUS
    mp = MultiParameter(ds, [],
                        glottolog={lg.id: lg
                                   for lg in glottolog.languoids()})
    assert len(mp.languages) == 2
Beispiel #5
0
def get_dataset(args):
    if args.dataset.suffix == '.json':
        return Dataset.from_metadata(args.dataset)
    return Dataset.from_data(args.dataset)