Esempio n. 1
0
    def test_write_read_archive(self):
        from pycldf.dataset import Dataset
        from pycldf.util import Archive

        ds = Dataset.from_file(FIXTURES.joinpath('ds1.csv'))
        out = self.tmp_path()

        with self.assertRaises(ValueError):
            ds.write(out.joinpath('non-existing'), '.tsv', archive=True)

        with Archive(self.tmp_path('archive.zip').as_posix(), 'w') as archive:
            ds.write('.', archive=archive)
            ds2 = Dataset.from_file(FIXTURES.joinpath('ds1.csv'))
            ds2.name = 'new_name'
            ds2.write('.', archive=archive)
        ds_out = Dataset.from_zip(self.tmp_path('archive.zip'), name='ds1')
        self.assertEqual(ds.rows, ds_out.rows)
        self.assertEqual(ds.metadata, ds_out.metadata)

        with Archive(self.tmp_path('archive.zip')) as archive:
            ds_out = Dataset.from_metadata('ds1.csv-metadata.json', container=archive)
            self.assertEqual(ds.rows, ds_out.rows)
            self.assertEqual(ds.metadata, ds_out.metadata)

        ds.write(out, '.tsv', archive=True)
        ds_out = Dataset.from_zip(out.joinpath('ds1.zip'))
        self.assertEqual(ds.rows, ds_out.rows)
        self.assertEqual(ds.metadata, ds_out.metadata)
Esempio n. 2
0
    def test_dataset_from_file(self):
        from pycldf.dataset import Dataset

        ds = Dataset.from_file(FIXTURES.joinpath('ds1.csv'))
        self.assertIn('ds1', repr(ds))
        self.assertEqual(len(ds), 2)
        self.assertEqual(ds.table.url, 'ds1.csv')
        self.assertEqual(ds.metadata['dc:creator'], 'The Author')

        row = ['3', 'abcd1234', 'fid2', 'maybe', '', 'new[4]']
        with self.assertRaises(ValueError):
            ds.add_row(row)

        ds.sources.add('@book{new,\nauthor={new author}}')
        res = ds.add_row(row)
        self.assertEqual(res.url, 'http://example.org/valuesets/3')
        self.assertEqual(len(res.refs), 1)
        self.assertEqual(
            res.valueUrl('Language_ID'),
            'http://glottolog.org/resource/languoid/id/abcd1234')
        res = ds.add_row(['4', None, None, None, None, None])
        self.assertEqual(res.valueUrl('Language_ID'), None)
        out = self.tmp_path()
        ds.write(out, '.tsv')
        self.assertTrue(out.joinpath('ds1.bib').exists())
        md = load(out.joinpath('ds1.tsv-metadata.json'))
        self.assertEqual('ds1.tsv', md['tables'][0]['url'])
        Dataset.from_file(out.joinpath('ds1.tsv'))
Esempio n. 3
0
    def test_invalid_dataset_from_file(self):
        from pycldf.dataset import Dataset

        log = Mock(warn=Mock())
        with patch('pycldf.dataset.log', log):
            Dataset.from_file(FIXTURES.joinpath('invalid.csv'), skip_on_error=True)
            self.assertEqual(log.warn.call_count, 2)

        with self.assertRaises(ValueError):
            Dataset.from_file(FIXTURES.joinpath('invalid.csv'))
Esempio n. 4
0
def stats(args):
    """
    cldf stats <DATASET>

    Print basic stats for CLDF dataset <DATASET>, where <DATASET> may be the path to
    - a CLDF metadata file
    - a CLDF core data file
    - a CLDF zip archive
    """
    if len(args.args) < 1:
        raise ParserError('not enough arguments')
    fname = Path(args.args[0])
    if not fname.exists() or not fname.is_file():
        raise ParserError('%s is not an existing directory' % fname)
    if fname.suffix == '.zip':
        ds = Dataset.from_zip(fname)
    elif fname.name.endswith(MD_SUFFIX):
        ds = Dataset.from_metadata(fname)
    else:
        ds = Dataset.from_file(fname)
    print(fname)
    stats_ = ds.stats
    print("""
Name: %s
Different languages: %s
Different parameters: %s
Rows: %s
""" % (
        ds.name,
        len(stats_['languages']),
        len(stats_['parameters']),
        stats_['rowcount']
    ))
Esempio n. 5
0
    def test_write_read(self):
        from pycldf.dataset import Dataset, REQUIRED_FIELDS

        row = ['1', 'abcd1234', 'fid', 'yes']
        ds = Dataset('name')
        ds.fields = tuple(v[0] for v in REQUIRED_FIELDS)
        ds.add_row(row)
        ds.write(self.tmp_path())
        self.assertTrue(self.tmp_path('name.csv').exists())
        ds2 = Dataset.from_file(self.tmp_path('name.csv'))
        self.assertEqual(list(ds2[0].values()), row)
        self.assertEqual(list(ds2['1'].values()), row)