Ejemplo n.º 1
0
def test_move(tmppath):
    from clldutils.path import move

    dst = tmppath / 'a'
    dst.mkdir()
    src = make_file(tmppath, name='test')
    move(src, dst)
    assert not src.exists()
    assert dst.joinpath(src.name).exists()
Ejemplo n.º 2
0
    def test_move(self):
        from clldutils.path import move

        dst = self.tmp_path('a')
        dst.mkdir()
        src = self.make_file('test')
        move(src, dst)
        self.assertFalse(src.exists())
        self.assertTrue(dst.joinpath(src.name).exists())
Ejemplo n.º 3
0
def add_rows(fname, *rows):
    with NamedTemporaryFile(delete=False) as fp:
        tmp = Path(fp.name)

    with UnicodeWriter(tmp) as writer:
        if fname.exists():
            with UnicodeReader(fname) as reader_:
                for row in reader_:
                    writer.writerow(row)
        writer.writerows(rows)
    move(tmp, fname)
Ejemplo n.º 4
0
def safe_overwrite(fname):
    fname = Path(fname)
    if not fname.parent.exists():
        fname.parent.mkdir()
    assert fname.parent.exists()
    tmp = fname.parent
    while tmp.exists():
        tmp = fname.parent.joinpath('%s.%s' % (fname.name, random_string(6)))
    yield tmp
    if fname.exists():
        remove(fname)
    move(tmp, fname)
Ejemplo n.º 5
0
Archivo: util.py Proyecto: clld/clld
def safe_overwrite(fname):
    fname = Path(fname)
    if not fname.parent.exists():
        fname.parent.mkdir()
    assert fname.parent.exists()
    tmp = fname.parent
    while tmp.exists():
        tmp = fname.parent.joinpath('%s.%s' % (fname.name, random_string(6)))
    yield tmp
    if fname.exists():
        remove(fname)
    move(tmp, fname)
Ejemplo n.º 6
0
    def create(self, req, filename=None, verbose=True):
        p = self.abspath(req)
        if not p.parent.exists():  # pragma: no cover
            p.parent.mkdir()
        tmp = Path('%s.tmp' % p.as_posix())

        if self.rdf:
            # we do not create archives with a readme for rdf downloads, because each
            # RDF entity points to the dataset and the void description of the dataset
            # covers all relevant metadata.
            #
            # TODO: write test for the file name things!?
            #
            with closing(
                    GzipFile(filename=Path(tmp.stem).stem,
                             fileobj=tmp.open('wb'))) as fp:
                self.before(req, fp)
                for i, item in enumerate(
                        page_query(self.query(req), verbose=verbose)):
                    self.dump(req, fp, item, i)
                self.after(req, fp)
        else:
            with ZipFile(tmp.as_posix(), 'w', ZIP_DEFLATED) as zipfile:
                if not filename:
                    fp = self.get_stream()
                    self.before(req, fp)
                    for i, item in enumerate(
                            page_query(self.query(req), verbose=verbose)):
                        self.dump(req, fp, item, i)
                    self.after(req, fp)
                    zipfile.writestr(self.name, self.read_stream(fp))
                else:  # pragma: no cover
                    zipfile.write(filename, self.name)
                zipfile.writestr(
                    'README.txt',
                    README.format(
                        req.dataset.name,
                        '=' * (len(req.dataset.name) + len(' data download')),
                        req.dataset.license,
                        TxtCitation(None).render(req.dataset,
                                                 req)).encode('utf8'))
        if p.exists():  # pragma: no cover
            remove(p)
        move(tmp, p)
Ejemplo n.º 7
0
    def create(self, req, filename=None, verbose=True):
        p = self.abspath(req)
        if not p.parent.exists():  # pragma: no cover
            p.parent.mkdir()
        tmp = Path('%s.tmp' % p.as_posix())

        if self.rdf:
            # we do not create archives with a readme for rdf downloads, because each
            # RDF entity points to the dataset and the void description of the dataset
            # covers all relevant metadata.
            #
            # TODO: write test for the file name things!?
            #
            with closing(GzipFile(
                    filename=Path(tmp.stem).stem, fileobj=tmp.open('wb')
            )) as fp:
                self.before(req, fp)
                for i, item in enumerate(page_query(self.query(req), verbose=verbose)):
                    self.dump(req, fp, item, i)
                self.after(req, fp)
        else:
            with ZipFile(tmp.as_posix(), 'w', ZIP_DEFLATED) as zipfile:
                if not filename:
                    fp = self.get_stream()
                    self.before(req, fp)
                    for i, item in enumerate(
                            page_query(self.query(req), verbose=verbose)):
                        self.dump(req, fp, item, i)
                    self.after(req, fp)
                    zipfile.writestr(self.name, self.read_stream(fp))
                else:  # pragma: no cover
                    zipfile.write(filename, self.name)
                zipfile.writestr(
                    'README.txt',
                    README.format(
                        req.dataset.name,
                        '=' * (
                            len(req.dataset.name)
                            + len(' data download')),
                        req.dataset.license,
                        TxtCitation(None).render(req.dataset, req)).encode('utf8'))
        if p.exists():  # pragma: no cover
            remove(p)
        move(tmp, p)
Ejemplo n.º 8
0
def rewrite(fname, visitor, **kw):
    """Utility function to rewrite rows in tsv files.

    :param fname: Path of the dsv file to operate on.
    :param visitor: A callable that takes a line-number and a row as input and returns a \
    (modified) row or None to filter out the row.
    :param kw: Keyword parameters are passed through to csv.reader/csv.writer.
    """
    if not isinstance(fname, Path):
        assert isinstance(fname, string_types)
        fname = Path(fname)

    assert fname.is_file()
    with NamedTemporaryFile(delete=False) as fp:
        tmp = Path(fp.name)

    with UnicodeReader(fname, **kw) as reader_:
        with UnicodeWriter(tmp, **kw) as writer:
            for i, row in enumerate(reader_):
                row = visitor(i, row)
                if row is not None:
                    writer.writerow(row)
    move(tmp, fname)
Ejemplo n.º 9
0
def lff2tree(api, log=logging.getLogger(__name__)):
    """
    - get mapping glottocode -> Languoid from old tree
    - assemble new directory tree
      - for each path component in lff/dff:
        - create new dir
        - copy info file from old tree (possibly updating the name) or
        - create info file
      - for each language/dialect in lff/dff:
        - create new dir
        - copy info file from old tree (possibly updating the name) or
        - create info file
    - rm old tree
    - copy new tree
    """
    builddir = api.build_path('tree')
    old_tree = {l.id: l for l in api.languoids()}
    out = api.tree

    if out.exists():
        if builddir.exists():
            try:
                rmtree(builddir)
            except:  # pragma: no cover
                pass
            if builddir.exists():  # pragma: no cover
                raise ValueError('please remove %s before proceeding' %
                                 builddir)
        # move the old tree out of the way
        move(out, builddir)
    out.mkdir()

    new = {}
    languages = {}
    languoids = {}

    def checked(l, lin):
        if l.id in languoids:
            print(l.id, lin, l)
        assert l.id not in languoids
        for n, gc, _level, hid in lin:
            if gc in languoids:
                if languoids[gc] != (n, _level, hid):
                    log.error('{0}: {1} vs {2}'.format(gc, languoids[gc],
                                                       (n, _level, hid)))
                    raise ValueError('inconsistent languoid data')
            else:
                languoids[gc] = (n, _level, hid)
        languoids[l.id] = (l.name, l.level, l.iso or l.hid)
        return l

    for lang, lineage in read_lff(api, log, new, Level.language,
                                  api.build_path('lff.txt')):
        languages[lang.id] = checked(lang, lineage)
        lang2tree(api, log, lang, lineage, out, old_tree)

    for lang, lineage in read_lff(api, log, new, Level.dialect,
                                  api.build_path('dff.txt')):
        lang = checked(lang, lineage)
        if not lang.lineage or lang.lineage[0][1] not in languages:
            log.error('missing language in dff: {0[0]} [{0[1]}]'.format(
                lang.lineage[0]))
            raise ValueError('invalid language referenced')

        lin = languages[lang.lineage[0][1]].lineage + lang.lineage
        lang2tree(api, log, lang, lin, out, old_tree)

    duplicates = False
    for name, getter in [('name', itemgetter(0)), ('hid', itemgetter(2))]:
        count = Counter(getter(spec) for spec in languoids.values())
        for thing, n in count.most_common():
            if thing is None:
                continue
            if n < 2:
                break
            log.error('duplicate {0}: {1} ({2})'.format(name, thing, n))
            duplicates = True
    if duplicates:
        raise ValueError('duplicates found')
Ejemplo n.º 10
0
    def create(self, req, filename=None, verbose=True):
        p = self.abspath(req)
        if not p.parent.exists():  # pragma: no cover
            p.parent.mkdir()
        tmp = Path('%s.tmp' % p)

        language_url_pattern = self.route_url_pattern(req, 'language')

        with ZipFile(tmp.as_posix(), 'w', ZIP_DEFLATED) as zipfile:
            tables = []
            for param in DBSession.query(Parameter).options(joinedload(Parameter.domain)):
                fname = '%s-%s.csv' % (req.dataset.id, param.id)
                zipfile.writestr(fname, self.get_values(param, language_url_pattern))
                tables.append({
                    '@type': 'Table',
                    'url': fname,
                    'notes': [
                        {
                            '@id': req.resource_url(param),
                            'dc:identifier': param.id,
                            'dc:title': param.name,
                            'dc:description': param.description or ''}] + [
                        {
                            '@type': 'DomainElement',
                            'name': de.name,
                            'description': de.description,
                            'numeric': de.number
                        } for de in param.domain
                    ],
                })

            md = CsvmJsonAdapter.csvm_basic_doc(req, tables=tables)
            md.update({
                '@type': 'TableGroup',
                'dc:language': list(self.get_languages(req, language_url_pattern)),
                'tableSchema': {
                    "columns": [
                        {
                            "name": "ID",
                            "datatype": "string",
                            "required": True
                        },
                        {
                            "name": "Language_ID",
                            "datatype": "string",
                            "required": True
                        },
                        {
                            "name": "Parameter_ID",
                            "datatype": "string",
                            "required": True
                        },
                        {
                            "name": "Contribution_ID",
                            "datatype": "string",
                            "required": True
                        },
                        {
                            "name": "Value",
                            "datatype": "string",
                            "required": True
                        },
                        {
                            "name": "Source",
                            "datatype": "string",
                        },
                        {
                            "name": "Comment",
                            "datatype": "string",
                        },
                    ],
                    "primaryKey": "ID",
                    'aboutUrl': self.route_url_pattern(req, 'value', '{ID}'),
                },
            })
            zipfile.writestr(
                '%s.csv-metadata.json' % req.dataset.id, json.dumps(md, indent=4))
            bib = Database([
                rec.bibtex() for rec in DBSession.query(Source).order_by(Source.name)])
            zipfile.writestr('%s.bib' % req.dataset.id, ('%s' % bib).encode('utf8'))
            zipfile.writestr(
                'README.txt',
                README.format(
                    req.dataset.name,
                    '=' * (
                        len(req.dataset.name)
                        + len(' data download')),
                    req.dataset.license,
                    TxtCitation(None).render(req.dataset, req)).encode('utf8'))
        if p.exists():  # pragma: no cover
            remove(p)
        move(tmp, p)