def test_move(tmppath): from clldutils.path import move dst = tmppath / 'a' dst.mkdir() src = make_file(tmppath, name='test') move(src, dst) assert not src.exists() assert dst.joinpath(src.name).exists()
def test_move(self): from clldutils.path import move dst = self.tmp_path('a') dst.mkdir() src = self.make_file('test') move(src, dst) self.assertFalse(src.exists()) self.assertTrue(dst.joinpath(src.name).exists())
def add_rows(fname, *rows): with NamedTemporaryFile(delete=False) as fp: tmp = Path(fp.name) with UnicodeWriter(tmp) as writer: if fname.exists(): with UnicodeReader(fname) as reader_: for row in reader_: writer.writerow(row) writer.writerows(rows) move(tmp, fname)
def safe_overwrite(fname): fname = Path(fname) if not fname.parent.exists(): fname.parent.mkdir() assert fname.parent.exists() tmp = fname.parent while tmp.exists(): tmp = fname.parent.joinpath('%s.%s' % (fname.name, random_string(6))) yield tmp if fname.exists(): remove(fname) move(tmp, fname)
def create(self, req, filename=None, verbose=True): p = self.abspath(req) if not p.parent.exists(): # pragma: no cover p.parent.mkdir() tmp = Path('%s.tmp' % p.as_posix()) if self.rdf: # we do not create archives with a readme for rdf downloads, because each # RDF entity points to the dataset and the void description of the dataset # covers all relevant metadata. # # TODO: write test for the file name things!? # with closing( GzipFile(filename=Path(tmp.stem).stem, fileobj=tmp.open('wb'))) as fp: self.before(req, fp) for i, item in enumerate( page_query(self.query(req), verbose=verbose)): self.dump(req, fp, item, i) self.after(req, fp) else: with ZipFile(tmp.as_posix(), 'w', ZIP_DEFLATED) as zipfile: if not filename: fp = self.get_stream() self.before(req, fp) for i, item in enumerate( page_query(self.query(req), verbose=verbose)): self.dump(req, fp, item, i) self.after(req, fp) zipfile.writestr(self.name, self.read_stream(fp)) else: # pragma: no cover zipfile.write(filename, self.name) zipfile.writestr( 'README.txt', README.format( req.dataset.name, '=' * (len(req.dataset.name) + len(' data download')), req.dataset.license, TxtCitation(None).render(req.dataset, req)).encode('utf8')) if p.exists(): # pragma: no cover remove(p) move(tmp, p)
def create(self, req, filename=None, verbose=True): p = self.abspath(req) if not p.parent.exists(): # pragma: no cover p.parent.mkdir() tmp = Path('%s.tmp' % p.as_posix()) if self.rdf: # we do not create archives with a readme for rdf downloads, because each # RDF entity points to the dataset and the void description of the dataset # covers all relevant metadata. # # TODO: write test for the file name things!? # with closing(GzipFile( filename=Path(tmp.stem).stem, fileobj=tmp.open('wb') )) as fp: self.before(req, fp) for i, item in enumerate(page_query(self.query(req), verbose=verbose)): self.dump(req, fp, item, i) self.after(req, fp) else: with ZipFile(tmp.as_posix(), 'w', ZIP_DEFLATED) as zipfile: if not filename: fp = self.get_stream() self.before(req, fp) for i, item in enumerate( page_query(self.query(req), verbose=verbose)): self.dump(req, fp, item, i) self.after(req, fp) zipfile.writestr(self.name, self.read_stream(fp)) else: # pragma: no cover zipfile.write(filename, self.name) zipfile.writestr( 'README.txt', README.format( req.dataset.name, '=' * ( len(req.dataset.name) + len(' data download')), req.dataset.license, TxtCitation(None).render(req.dataset, req)).encode('utf8')) if p.exists(): # pragma: no cover remove(p) move(tmp, p)
def rewrite(fname, visitor, **kw): """Utility function to rewrite rows in tsv files. :param fname: Path of the dsv file to operate on. :param visitor: A callable that takes a line-number and a row as input and returns a \ (modified) row or None to filter out the row. :param kw: Keyword parameters are passed through to csv.reader/csv.writer. """ if not isinstance(fname, Path): assert isinstance(fname, string_types) fname = Path(fname) assert fname.is_file() with NamedTemporaryFile(delete=False) as fp: tmp = Path(fp.name) with UnicodeReader(fname, **kw) as reader_: with UnicodeWriter(tmp, **kw) as writer: for i, row in enumerate(reader_): row = visitor(i, row) if row is not None: writer.writerow(row) move(tmp, fname)
def lff2tree(api, log=logging.getLogger(__name__)): """ - get mapping glottocode -> Languoid from old tree - assemble new directory tree - for each path component in lff/dff: - create new dir - copy info file from old tree (possibly updating the name) or - create info file - for each language/dialect in lff/dff: - create new dir - copy info file from old tree (possibly updating the name) or - create info file - rm old tree - copy new tree """ builddir = api.build_path('tree') old_tree = {l.id: l for l in api.languoids()} out = api.tree if out.exists(): if builddir.exists(): try: rmtree(builddir) except: # pragma: no cover pass if builddir.exists(): # pragma: no cover raise ValueError('please remove %s before proceeding' % builddir) # move the old tree out of the way move(out, builddir) out.mkdir() new = {} languages = {} languoids = {} def checked(l, lin): if l.id in languoids: print(l.id, lin, l) assert l.id not in languoids for n, gc, _level, hid in lin: if gc in languoids: if languoids[gc] != (n, _level, hid): log.error('{0}: {1} vs {2}'.format(gc, languoids[gc], (n, _level, hid))) raise ValueError('inconsistent languoid data') else: languoids[gc] = (n, _level, hid) languoids[l.id] = (l.name, l.level, l.iso or l.hid) return l for lang, lineage in read_lff(api, log, new, Level.language, api.build_path('lff.txt')): languages[lang.id] = checked(lang, lineage) lang2tree(api, log, lang, lineage, out, old_tree) for lang, lineage in read_lff(api, log, new, Level.dialect, api.build_path('dff.txt')): lang = checked(lang, lineage) if not lang.lineage or lang.lineage[0][1] not in languages: log.error('missing language in dff: {0[0]} [{0[1]}]'.format( lang.lineage[0])) raise ValueError('invalid language referenced') lin = languages[lang.lineage[0][1]].lineage + lang.lineage lang2tree(api, log, lang, lin, out, old_tree) duplicates = False for name, getter in [('name', itemgetter(0)), ('hid', itemgetter(2))]: count = Counter(getter(spec) for spec in languoids.values()) for thing, n in count.most_common(): if thing is None: continue if n < 2: break log.error('duplicate {0}: {1} ({2})'.format(name, thing, n)) duplicates = True if duplicates: raise ValueError('duplicates found')
def create(self, req, filename=None, verbose=True): p = self.abspath(req) if not p.parent.exists(): # pragma: no cover p.parent.mkdir() tmp = Path('%s.tmp' % p) language_url_pattern = self.route_url_pattern(req, 'language') with ZipFile(tmp.as_posix(), 'w', ZIP_DEFLATED) as zipfile: tables = [] for param in DBSession.query(Parameter).options(joinedload(Parameter.domain)): fname = '%s-%s.csv' % (req.dataset.id, param.id) zipfile.writestr(fname, self.get_values(param, language_url_pattern)) tables.append({ '@type': 'Table', 'url': fname, 'notes': [ { '@id': req.resource_url(param), 'dc:identifier': param.id, 'dc:title': param.name, 'dc:description': param.description or ''}] + [ { '@type': 'DomainElement', 'name': de.name, 'description': de.description, 'numeric': de.number } for de in param.domain ], }) md = CsvmJsonAdapter.csvm_basic_doc(req, tables=tables) md.update({ '@type': 'TableGroup', 'dc:language': list(self.get_languages(req, language_url_pattern)), 'tableSchema': { "columns": [ { "name": "ID", "datatype": "string", "required": True }, { "name": "Language_ID", "datatype": "string", "required": True }, { "name": "Parameter_ID", "datatype": "string", "required": True }, { "name": "Contribution_ID", "datatype": "string", "required": True }, { "name": "Value", "datatype": "string", "required": True }, { "name": "Source", "datatype": "string", }, { "name": "Comment", "datatype": "string", }, ], "primaryKey": "ID", 'aboutUrl': self.route_url_pattern(req, 'value', '{ID}'), }, }) zipfile.writestr( '%s.csv-metadata.json' % req.dataset.id, json.dumps(md, indent=4)) bib = Database([ rec.bibtex() for rec in DBSession.query(Source).order_by(Source.name)]) zipfile.writestr('%s.bib' % req.dataset.id, ('%s' % bib).encode('utf8')) zipfile.writestr( 'README.txt', README.format( req.dataset.name, '=' * ( len(req.dataset.name) + len(' data download')), req.dataset.license, TxtCitation(None).render(req.dataset, req)).encode('utf8')) if p.exists(): # pragma: no cover remove(p) move(tmp, p)