def get_dataset(p): try: return Dataset.from_metadata( p) if p.suffix == '.json' else Dataset.from_data(p) except ValueError: raise argparse.ArgumentTypeError( 'Invalid CLDF dataset spec: {0}!'.format(p))
def get_dataset(fname=None): """Load a CLDF dataset. Load the file as `json` CLDF metadata description file, or as metadata-free dataset contained in a single csv file. The distinction is made depending on the file extension: `.json` files are loaded as metadata descriptions, all other files are matched against the CLDF module specifications. Directories are checked for the presence of any CLDF datasets in undefined order of the dataset types. Parameters ---------- fname : str or Path Path to a CLDF dataset Returns ------- pycldf.Dataset """ if fname is None: fname = repository else: fname = Path(fname) if not fname.exists(): raise FileNotFoundError('{:} does not exist'.format(fname)) if fname.suffix == '.json': return Dataset.from_metadata(fname) return Dataset.from_data(fname)
def metadatafree_dataset(tmp_path): values = tmp_path / 'values.csv' values.write_text("""\ ID,Language_ID,Parameter_ID,Value 1,abcd1235,param1,val1 2,abcd1234,param1,val2""", encoding='utf8') return Dataset.from_data(values)
def test_MultiParameter(metadatafree_dataset, StructureDataset, glottolog, tmp_path): _ = MultiParameter(metadatafree_dataset, ['param1'], glottolog={lg.id: lg for lg in glottolog.languoids()}) mp = MultiParameter(StructureDataset, ['B', 'C']) for lang, values in mp.iter_languages(): assert lang.name == 'Bengali' assert values['C'][0].v == 'C-1' assert values['C'][0].code == '1' break mp = MultiParameter(StructureDataset, ['B'], language_properties=['Family_name']) assert 'Family_name' in mp.parameters mp = MultiParameter(StructureDataset, []) assert '__language__' in mp.parameters values = tmp_path / 'values.csv' values.write_text("""\ ID,Language_ID,Parameter_ID,Value 1,abcd1235,param1,1 2,abcd1235,param1,2 3,abcd1235,param1,3 4,abcd1235,param1,4 5,abcd1235,param1,5 6,abcd1235,param1,6 7,abcd1235,param1,7 8,abcd1235,param1,8 9,abcd1235,param1,9 10,abcd1234,param1,10""", encoding='utf8') ds = Dataset.from_data(values) mp = MultiParameter(ds, ['param1'], glottolog={lg.id: lg for lg in glottolog.languoids()}) assert list(mp.parameters.values())[0].type == CONTINUOUS mp = MultiParameter(ds, [], glottolog={lg.id: lg for lg in glottolog.languoids()}) assert len(mp.languages) == 2
def get_dataset(args): if args.dataset.suffix == '.json': return Dataset.from_metadata(args.dataset) return Dataset.from_data(args.dataset)