예제 #1
0
def download_and_unpack_zipfiles(url, dataset, *paths):
    """Download zipfiles and immediately unpack the content"""
    with TemporaryDirectory() as tmpdir:
        urlretrieve(url, tmpdir.joinpath('ds.zip').as_posix())
        with zipfile.ZipFile(tmpdir.joinpath('ds.zip').as_posix()) as zipf:
            for path in paths:
                zipf.extract(as_posix(path), path=tmpdir.as_posix())
                copy(tmpdir.joinpath(path), dataset.raw)
예제 #2
0
def download(dataset):
    with TemporaryDirectory() as tmpdir:
        urlretrieve(
            'http://www.evolution.reading.ac.uk/Files/%s.zip' % NAME,
            tmpdir.joinpath('gbd.zip').as_posix())
        with ZipFile(tmpdir.joinpath('gbd.zip').as_posix()) as zip:
            zip.extract(NAME + '.xlsx', tmpdir.as_posix())
        xls2csv(tmpdir.joinpath(NAME + '.xlsx'), outdir=dataset.raw)
예제 #3
0
def iter_tables(zippath=None):
    with TemporaryDirectory() as tmp:
        if not zippath:
            zippath = download_tables(tmp)

        with ZipArchive(zippath) as archive:
            for name in archive.namelist():
                match = TABLE_NAME_PATTERN.search(name)
                if match:
                    yield Table(match.group('name_and_date'), archive.read_text(name))
예제 #4
0
    def download_and_unpack(self, url, *paths, **kw):
        """
        Download a zipfile and immediately unpack selected content.

        :param url:
        :param paths:
        :param kw:
        :return:
        """
        with self.temp_download(url, 'ds.zip', log=kw.pop('log',
                                                          None)) as zipp:
            with TemporaryDirectory() as tmpdir:
                with zipfile.ZipFile(zipp.as_posix()) as zipf:
                    for path in paths:
                        zipf.extract(as_posix(path), path=tmpdir.as_posix())
                        copy(tmpdir.joinpath(path), self)
예제 #5
0
    def download_and_unpack(self, url, *paths, **kw):
        """
        Download a zipfile and immediately unpack selected content.

        :param url:
        :param paths:
        :param kw:
        """
        with self.temp_download(url, 'ds.zip', log=kw.pop('log',
                                                          None)) as zipp:
            with TemporaryDirectory() as tmpdir:
                with zipfile.ZipFile(str(zipp)) as zipf:
                    for info in zipf.infolist():
                        if (not paths) or info.filename in paths:
                            zipf.extract(info, path=str(tmpdir))
                            shutil.copy(str(tmpdir.joinpath(info.filename)),
                                        str(self))
예제 #6
0
    def retrieve(self, item, cdstar_catalog, checksums, mediacatalog):
        """
        - download
        - compute checksum
        - upload to CDSTAR
        - add to cdstar.json

        :return: Image instance
        """
        md = self.metadata(item) or {}
        source_url = md.pop('source_url', None)
        if not source_url:
            return
        # We turn the Staged_images instance into a `dict`, which we will enrich and then
        # turn into an Images instance.
        item = dict(zip(item.fields(), item.csv_row()))
        with TemporaryDirectory() as tmp:
            if isinstance(source_url, Path):
                fname = tmp.joinpath(source_url.name)
                copy(source_url, fname)
            else:
                # download the thing
                fname = self._download(source_url, tmp)
                if not fname:
                    return
            checksum = md5(fname)
            if checksum in checksums:
                raise ValueError('duplicate item {0} {1}'.format(item['id'], checksum))
            item.update(md)
            item['id'] = checksum
            item['collection'] = 'Tsammalex'
            img = Images.fromdict(item)
            if checksum not in mediacatalog.items:
                # now upload to CDSTAR
                _, _, obj = list(cdstar_catalog.create(fname, item))[0]
                mediacatalog.add(obj)
            return img
예제 #7
0
def test_TemporaryDirectory():
    from clldutils.path import TemporaryDirectory

    with TemporaryDirectory() as tmp:
        assert tmp.exists()
    assert not tmp.exists()
예제 #8
0
def download(dataset):
    with TemporaryDirectory() as tmpdir:
        urlretrieve(URL, tmpdir.joinpath('ds.xlsm').as_posix())
        xls2csv(tmpdir.joinpath('ds.xlsm'), outdir=dataset.raw)
예제 #9
0
    def create(self, req, filename=None, verbose=True, outfile=None):
        cldf_cfg = req.registry.getUtility(ICldfConfig)

        with TemporaryDirectory() as tmpd:
            cls = getattr(dataset, cldf_cfg.module)
            ds = cls.in_dir(tmpd)
            ds.properties['dc:bibliographicCitation'] = text_citation(
                req, req.dataset)
            ds.properties['dc:publisher'] = '%s, %s' % (
                req.dataset.publisher_name, req.dataset.publisher_place)
            ds.properties['dc:license'] = req.dataset.license
            ds.properties['dc:issued'] = req.dataset.published.isoformat()
            ds.properties['dc:title'] = req.dataset.name
            ds.properties['dc:creator'] = req.dataset.formatted_editors()
            ds.properties['dc:identifier'] = req.resource_url(req.dataset)
            ds.properties['dcat:accessURL'] = req.route_url('download')
            if DBSession.query(Sentence).count():
                ds.add_component('ExampleTable')
            if DBSession.query(DomainElement).count():
                ds.add_component('CodeTable', {
                    'name': 'Number',
                    'datatype': 'integer'
                })
            ds.add_component('ParameterTable')
            ds.add_component('LanguageTable')
            ds.add_table('contributions.csv', 'ID', 'Name', 'Description',
                         'Contributors')
            ds.add_columns(
                ds.primary_table,
                Column.fromvalue({
                    'name':
                    'Contribution_ID',
                    'datatype':
                    'string',
                    'valueUrl':
                    url_template(req, 'contribution', 'contribution').uri,
                }))
            ds.add_foreign_key(ds.primary_table, 'Contribution_ID',
                               'contributions.csv', 'ID')
            ds['LanguageTable'].aboutUrl = url_template(req, 'language', 'ID')
            ds['ParameterTable'].aboutUrl = url_template(
                req, 'parameter', 'ID')
            ds[ds.primary_table].aboutUrl = url_template(req, 'value', 'ID')

            cldf_cfg.custom_schema(req, ds)

            for src in cldf_cfg.query(Source):
                ds.sources.add(cldf_cfg.convert(Source, src, req))
            fname = outfile or self.abspath(req)

            transaction.abort()

            tabledata = defaultdict(list)
            for table, model in [
                ('ParameterTable', Parameter),
                ('CodeTable', DomainElement),
                ('LanguageTable', Language),
                ('ExampleTable', Sentence),
                ('contributions.csv', Contribution),
                (ds.primary_table, Value),
            ]:
                if verbose:
                    print('exporting {0} ...'.format(model))
                transaction.begin()
                for item in cldf_cfg.query(model):
                    tabledata[table].append(cldf_cfg.convert(model, item, req))
                transaction.abort()
                if verbose:
                    print('... done')

            transaction.begin()
            ds.write(**cldf_cfg.custom_tabledata(req, tabledata))
            ds.validate()

            shutil.make_archive(str(fname.parent / fname.stem), 'zip',
                                str(tmpd))