def test_write_read_archive(self): from pycldf.dataset import Dataset from pycldf.util import Archive ds = Dataset.from_file(FIXTURES.joinpath('ds1.csv')) out = self.tmp_path() with self.assertRaises(ValueError): ds.write(out.joinpath('non-existing'), '.tsv', archive=True) with Archive(self.tmp_path('archive.zip').as_posix(), 'w') as archive: ds.write('.', archive=archive) ds2 = Dataset.from_file(FIXTURES.joinpath('ds1.csv')) ds2.name = 'new_name' ds2.write('.', archive=archive) ds_out = Dataset.from_zip(self.tmp_path('archive.zip'), name='ds1') self.assertEqual(ds.rows, ds_out.rows) self.assertEqual(ds.metadata, ds_out.metadata) with Archive(self.tmp_path('archive.zip')) as archive: ds_out = Dataset.from_metadata('ds1.csv-metadata.json', container=archive) self.assertEqual(ds.rows, ds_out.rows) self.assertEqual(ds.metadata, ds_out.metadata) ds.write(out, '.tsv', archive=True) ds_out = Dataset.from_zip(out.joinpath('ds1.zip')) self.assertEqual(ds.rows, ds_out.rows) self.assertEqual(ds.metadata, ds_out.metadata)
def test_dataset_from_file(self): from pycldf.dataset import Dataset ds = Dataset.from_file(FIXTURES.joinpath('ds1.csv')) self.assertIn('ds1', repr(ds)) self.assertEqual(len(ds), 2) self.assertEqual(ds.table.url, 'ds1.csv') self.assertEqual(ds.metadata['dc:creator'], 'The Author') row = ['3', 'abcd1234', 'fid2', 'maybe', '', 'new[4]'] with self.assertRaises(ValueError): ds.add_row(row) ds.sources.add('@book{new,\nauthor={new author}}') res = ds.add_row(row) self.assertEqual(res.url, 'http://example.org/valuesets/3') self.assertEqual(len(res.refs), 1) self.assertEqual( res.valueUrl('Language_ID'), 'http://glottolog.org/resource/languoid/id/abcd1234') res = ds.add_row(['4', None, None, None, None, None]) self.assertEqual(res.valueUrl('Language_ID'), None) out = self.tmp_path() ds.write(out, '.tsv') self.assertTrue(out.joinpath('ds1.bib').exists()) md = load(out.joinpath('ds1.tsv-metadata.json')) self.assertEqual('ds1.tsv', md['tables'][0]['url']) Dataset.from_file(out.joinpath('ds1.tsv'))
def test_invalid_dataset_from_file(self): from pycldf.dataset import Dataset log = Mock(warn=Mock()) with patch('pycldf.dataset.log', log): Dataset.from_file(FIXTURES.joinpath('invalid.csv'), skip_on_error=True) self.assertEqual(log.warn.call_count, 2) with self.assertRaises(ValueError): Dataset.from_file(FIXTURES.joinpath('invalid.csv'))
def stats(args): """ cldf stats <DATASET> Print basic stats for CLDF dataset <DATASET>, where <DATASET> may be the path to - a CLDF metadata file - a CLDF core data file - a CLDF zip archive """ if len(args.args) < 1: raise ParserError('not enough arguments') fname = Path(args.args[0]) if not fname.exists() or not fname.is_file(): raise ParserError('%s is not an existing directory' % fname) if fname.suffix == '.zip': ds = Dataset.from_zip(fname) elif fname.name.endswith(MD_SUFFIX): ds = Dataset.from_metadata(fname) else: ds = Dataset.from_file(fname) print(fname) stats_ = ds.stats print(""" Name: %s Different languages: %s Different parameters: %s Rows: %s """ % ( ds.name, len(stats_['languages']), len(stats_['parameters']), stats_['rowcount'] ))
def test_write_read(self): from pycldf.dataset import Dataset, REQUIRED_FIELDS row = ['1', 'abcd1234', 'fid', 'yes'] ds = Dataset('name') ds.fields = tuple(v[0] for v in REQUIRED_FIELDS) ds.add_row(row) ds.write(self.tmp_path()) self.assertTrue(self.tmp_path('name.csv').exists()) ds2 = Dataset.from_file(self.tmp_path('name.csv')) self.assertEqual(list(ds2[0].values()), row) self.assertEqual(list(ds2['1'].values()), row)