Python DataDir Examples

Programming Language: Python

Namespace/Package Name: cldfbench.datadir

Class/Type: DataDir

Examples at hotexamples.com: 5

Python DataDir - 5 examples found. These are the top rated real world Python examples of cldfbench.datadir.DataDir extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

DataDir(3)

joinpath(2)

resolve(2)

Frequently Used Methods

DataDir (3)

joinpath (2)

resolve (2)

Example #1

Show file

 def __init__(self):
     if not self.dir:
         self.dir = pathlib.Path(inspect.getfile(self.__class__)).parent
     self.dir = DataDir(self.dir)
     md = self.dir / 'metadata.json'
     self.metadata = self.metadata_cls.from_file(
         md) if md.exists() else self.metadata_cls()
     self.metadata.id = self.id

Example #2

Show file

class Dataset(object):
    """
    A cldfbench dataset ties together
    - `raw` data, to be used as source for the
    - `cldf` data, which is created using config data from
    - `etc`.

    To use the cldfbench infrastructure, one should sub-class `Dataset`.

    cldfbench supports the following workflow:
    - a `download` command populates a `Dataset`'s `raw` directory.
    - a `makecldf` command (re)creates the CLDF dataset in `cldf`.
    """
    dir = None
    id = None
    metadata_cls = Metadata

    def __init__(self):
        if not self.dir:
            self.dir = pathlib.Path(inspect.getfile(self.__class__)).parent
        self.dir = DataDir(self.dir)
        md = self.dir / 'metadata.json'
        self.metadata = self.metadata_cls.from_file(
            md) if md.exists() else self.metadata_cls()
        self.metadata.id = self.id

    def __str__(self):
        return '{0.__class__.__name__} "{0.id}" at {1}'.format(
            self, self.dir.resolve())

    def cldf_specs(self):
        """
        A `Dataset` must declare all CLDF datasets that are derived from it.

        :return: A single `CLDFSpec` instance, or a `dict`, mapping names to `CLDFSpec` \
        instances, where the name will be used by `cldf_reader`/`cldf_writer` to look up \
        the spec.
        """
        return CLDFSpec(dir=self.cldf_dir)

    @property
    def cldf_specs_dict(self):
        """
        Turn cldf_specs into a `dict` for simpler lookup.

        :return: `dict` mapping lookup keys to `CLDFSpec` instances.
        """
        specs = self.cldf_specs()
        if isinstance(specs, CLDFSpec):
            return {None: specs}
        assert isinstance(specs, dict)
        return specs

    @lazyproperty
    def cldf_dir(self):
        return self.dir / 'cldf'

    @lazyproperty
    def raw_dir(self):
        return self.dir / 'raw'

    @lazyproperty
    def etc_dir(self):
        return self.dir / 'etc'

    def cldf_writer(self, args, cldf_spec=None, clean=True):
        """
        :param args:
        :param cldf_spec: Key of the relevant `CLDFSpec` in `Dataset.cldf_specs`
        :param clean: `bool` flag signaling whether to clean the CLDF dir before writing. \
        Note that `False` must be passed for subsequent calls to `cldf_writer` in case the \
        spec re-uses a directory.
        :return: a `cldf_spec.writer_cls` instance, for write-access to CLDF data. \
        This method should be used in a with-statement, and will then return a `CLDFWriter` with \
        an empty working directory.
        """
        if not isinstance(cldf_spec, CLDFSpec):
            cldf_spec = self.cldf_specs_dict[cldf_spec]
        return cldf_spec.get_writer(args=args, dataset=self, clean=clean)

    def cldf_reader(self, cldf_spec=None):
        """
        :param cldf_spec:
        :return: a `pycldf.Dataset` instance, for read-access to the CLDF data.
        """
        if not isinstance(cldf_spec, CLDFSpec):
            cldf_spec = self.cldf_specs_dict[cldf_spec]
        return cldf_spec.get_dataset()

    @lazyproperty
    def repo(self):
        try:
            return Repository(self.dir)
        except ValueError:  # pragma: no cover
            return

    #
    # Workflow commands are implemented with two methods for each command:
    # - cmd_<command>: The implementation of the command, typically overwritten by datasets.
    # - _cmd_<command>: An (optional) wrapper providing setup and teardown functionality, calling
    #   cmd_<command> in between.
    #
    # Workflow commands must accept an `argparse.Namespace` as sole positional argument.
    #
    def _cmd_download(self, args):
        self.raw_dir.mkdir(exist_ok=True)
        self.cmd_download(args)
        (self.raw_dir / 'README.md').write_text(
            'Raw data downloaded {0}'.format(datetime.utcnow().isoformat()),
            encoding='utf8')

    def cmd_download(self, args):
        args.log.warning('cmd_{0} not implemented for dataset {1}'.format(
            'download', self.id))
        return NOOP

    def _cmd_readme(self, args):
        if self.metadata:
            self.dir.joinpath('README.md').write_text(self.cmd_readme(args),
                                                      encoding='utf8')

    def cmd_readme(self, args):
        return self.metadata.markdown() if self.metadata else ''

    def _cmd_makecldf(self, args):
        specs = list(self.cldf_specs_dict.values())
        if len(specs) == 1:
            # There's only one CLDF spec! We instantiate the writer now and inject it into `args`:
            with self.cldf_writer(args, cldf_spec=specs[0]) as writer:
                args.writer = writer
                self.cmd_makecldf(args)
        else:
            self.cmd_makecldf(args)

        if self.metadata and self.metadata.known_license:
            legalcode = self.metadata.known_license.legalcode
            if legalcode:
                (self.dir / 'LICENSE').write_text(legalcode, encoding='utf8')

    def cmd_makecldf(self, args):
        """
        :param args: An `argparse.Namespace` including attributes:
        - `writer`: `CLDFWriter` instance
        """
        args.log.warning('cmd_{0} not implemented for dataset {1}'.format(
            'makecldf', self.id))
        return NOOP

Example #3

Show file

File: media.py Project: SimonGreenhill/cldfbench

def run(args):

    ds = get_dataset(args)
    ds_cldf = ds.cldf_reader()
    release_dir = args.out / '{0}_{1}'.format(ds.id, MEDIA)

    if ds_cldf.get('media.csv', None) is None:  # pragma: no cover
        args.log.error('Dataset has no media.csv')
        raise ParserError
    if args.parent_doi and not Zenodo.DOI_PATTERN.match(args.parent_doi):
        args.log.error('Invalid passed DOI')
        raise ParserError
    if args.update_zenodo:
        if not release_dir.exists():
            args.log.error(
                '"{0}" not found -- run --create-release first?'.format(
                    release_dir))
            raise ParserError
        if not (release_dir / ZENODO_FILE_NAME).exists():
            args.log.error(
                '"{0}" not found -- run --create-release first?'.format(
                    release_dir / ZENODO_FILE_NAME))
            raise ParserError
        if args.create_release:
            args.log.error(
                'You cannot create the release and update zenodo at the same time.'
            )
            raise ParserError
    if args.create_release:
        if not args.parent_doi:
            args.log.error(
                'The corresponding DOI is required (via --parent-doi).')
            raise ParserError

    mime_types = None
    if args.mimetype:
        mime_types = [m.strip() for m in nfilter(args.mimetype.split(','))]

    if args.list:
        size = collections.Counter()
        number = collections.Counter()
    else:
        media_dir = args.out / MEDIA
        media_dir.mkdir(exist_ok=True)
        media = []

    if not args.update_zenodo:
        used_file_extensions = set()
        with UnicodeWriter(media_dir /
                           INDEX_CSV if not args.list else None) as w:
            for i, row in enumerate(
                    tqdm.tqdm([r for r in ds_cldf['media.csv']],
                              desc='Getting {0} items'.format(MEDIA))):
                url = ds_cldf.get_row_url('media.csv', row)
                if isinstance(url, rfc3986.URIReference):
                    url = url.normalize().unsplit()
                    row['URL'] = url
                f_ext = url.split('.')[-1].lower()
                if args.debug and i > 500:
                    break
                if (mime_types is None) or f_ext in mime_types\
                        or any(row['mimetype'].startswith(x) for x in mime_types):
                    if args.list:
                        m = '{0} ({1})'.format(row['mimetype'], f_ext)
                        size[m] += int(row['size'])
                        number.update([m])
                    else:
                        used_file_extensions.add(f_ext.lower())
                        d = media_dir / row['ID'][:2]
                        d.mkdir(exist_ok=True)
                        fn = '.'.join([row['ID'], f_ext])
                        target = d / fn
                        row['local_path'] = pathlib.Path(row['ID'][:2]) / fn
                        if i == 0:
                            w.writerow(row)
                        w.writerow(row.values())
                        media.append(target)
                        if (not target.exists()) or md5(target) != row['ID']:
                            _create_download_thread(url, target)

    if args.list:
        for k, v in size.most_common():
            print('\t'.join([k.ljust(20), str(number[k]), format_size(v)]))
        return

    # Waiting for the download threads to finish
    if 'download_threads' in globals():
        for t in download_threads:
            t.join()

    if args.create_release:
        assert media_dir.exists(), 'No folder "{0}" found in {1}'.format(
            MEDIA, media_dir.resolve())

        release_dir.mkdir(exist_ok=True)

        media.append(media_dir / INDEX_CSV)

        try:
            zipf = zipfile.ZipFile(str(release_dir / '{0}.zip'.format(MEDIA)),
                                   'w', zipfile.ZIP_DEFLATED)
            fp = args.out
            for f in tqdm.tqdm(media, desc='Creating {0}.zip'.format(MEDIA)):
                zipf.write(str(f), str(os.path.relpath(str(f), str(fp))))
            zipf.close()
        except Exception as e:
            args.log.error(e)
            raise

        def _contrib(d):
            return {
                k: v
                for k, v in d.items()
                if k in {'name', 'affiliation', 'orcid', 'type'}
            }

        version_v = git_describe('.').split('-')[0]
        version = version_v.replace('v', '')
        git_url = [r for r in ds.repo.repo.remotes
                   if r.name == 'origin'][0].url.replace('.git', '')
        with jsonlib.update(release_dir / ZENODO_FILE_NAME,
                            indent=4,
                            default=collections.OrderedDict()) as md:
            contribs = ds.dir / 'CONTRIBUTORS.md'
            creators, contributors = get_creators_and_contributors(
                contribs.read_text(
                    encoding='utf8') if contribs.exists() else '',
                strict=False)
            if creators:
                md['creators'] = [_contrib(p) for p in creators]
            if contributors:
                md['contributors'] = [_contrib(p) for p in contributors]
            communities = [r["identifier"] for r in md.get("communities", [])] + \
                [c.strip() for c in nfilter(args.communities.split(','))] + \
                COMMUNITIES
            if communities and not args.debug:
                md['communities'] = [{
                    "identifier": community_id
                } for community_id in sorted(set(communities))]
            md.update({
                'title':
                '{0} {1} Files'.format(ds.metadata.title, MEDIA.title()),
                'access_right':
                'open',
                'keywords':
                sorted(set(md.get('keywords', []) + ['linguistics'])),
                'upload_type':
                'dataset',
                'publication_date':
                datetime.today().strftime('%Y-%m-%d'),
                'version':
                version,
                'related_identifiers': [
                    {
                        'scheme': 'url',
                        'identifier':
                        '{0}/tree/{1}'.format(git_url, version_v),
                        'relation': 'isSupplementTo'
                    },
                ],
            })
            if args.parent_doi:
                md['related_identifiers'].append({
                    'scheme': 'doi',
                    'identifier': args.parent_doi,
                    'relation': 'isPartOf'
                })
                supplement_to = " - Supplement to dataset " \
                                "<a href='https://doi.org/{0}'>{1}</a> ".format(
                    args.parent_doi, ds.metadata.title)  # noqa: E122
            if ds.metadata.url:
                md['related_identifiers'].append({
                    'scheme':
                    'url',
                    'identifier':
                    ds.metadata.url,
                    'relation':
                    'isAlternateIdentifier'
                })

            formats = ', '.join(sorted(used_file_extensions))
            descr = '<br /><br />' + ds.metadata.description if ds.metadata.description else ''
            online_url, online = '', ''
            if ds.metadata.url:
                online_url = ds.metadata.url
                online = "<br /><br />Available online at: <a href='{0}'>{0}</a>".format(
                    online_url)
            md['description'] = html.escape(
                DESCRIPTION.format(
                    url=online_url,
                    formats=' ({0})'.format(formats) if formats else '',
                    title=md['title'],
                    supplement_to=supplement_to,
                    descr=descr,
                    online=online))

            license_md = ''
            if ds.metadata.zenodo_license:
                md['license'] = {'id': ds.metadata.zenodo_license}
                license_md = LICENCE.format(ds.metadata.zenodo_license)

            DataDir(release_dir).write(
                'README.md',
                README.format(
                    title=md['title'],
                    doi='https://doi.org/{0}'.format(args.parent_doi),
                    ds_title=ds.metadata.title,
                    license=license_md,
                    formats=' ({0})'.format(formats) if formats else '',
                    media=MEDIA,
                    index=INDEX_CSV))

    if args.update_zenodo:

        md = {}
        md.update(jsonlib.load(release_dir / ZENODO_FILE_NAME))

        if args.debug:
            api_url = API_URL_SANDBOX
            access_token = os.environ.get('ZENODO_SANDBOX_ACCESS_TOKEN')
        else:
            api_url = API_URL
            access_token = ACCESS_TOKEN
        zenodo_url = api_url.replace('api/', '')

        args.log.info('Updating Deposit ID {0} on {1} with:'.format(
            args.update_zenodo, zenodo_url))
        api = Zenodo(api_url=api_url, access_token=access_token)
        try:
            rec = api.record_from_id('{0}record/{1}'.format(
                zenodo_url, args.update_zenodo))
        except Exception as e:
            args.log.error(
                'Check connection and credentials for accessing Zenodo.\n{0}'.
                format(e))
            return
        latest_version = rec.links['latest'].split('/')[-1]
        if latest_version != args.update_zenodo:
            args.log.warn(
                'Passed deposit ID does not refer to latest version {0}!'.
                format(latest_version))
        args.log.info('  DOI:     ' + rec.metadata.doi)
        args.log.info('  Title:   ' + rec.metadata.title)
        args.log.info('  Version: ' + rec.metadata.version)
        args.log.info('  Date:    ' + rec.metadata.publication_date)
        args.log.info('  Files:   ' + ', '.join([f.key for f in rec.files]))
        p = input("Proceed? [y/N]: ")
        if p.lower() == 'y':
            dep = api.update_deposit(args.update_zenodo, **md)
            if dep.state != PUBLISHED:
                api.publish_deposit(dep)
            args.log.info('Updated successfully')

Example #4

Show file

def run(args):
    ds = Dataset().cldf_reader()

    release_dir = args.out / '{0}_audio'.format(Dataset().id)
    zenodo_file_name = 'zenodo.json'

    if args.list:
        size = collections.Counter()
        number = collections.Counter()
    else:
        f2c = {r['ID']: r['Parameter_ID'] for r in ds['FormTable']}
        audio = args.out / 'audio'
        audio.mkdir(exist_ok=True)

    if not args.update_zenodo:
        for row in tqdm.tqdm([r for r in ds['media.csv']]):
            if args.list:
                size[row['mimetype']] += int(row['size'])
                number.update([row['mimetype']])
            else:
                d = audio / f2c[row['Form_ID']]
                d.mkdir(exist_ok=True)
                url = ds.get_row_url('media.csv', row)
                target = d / '{}.{}'.format(row['ID'], url.split('.')[-1])
                if (not target.exists()) or md5(target) != row['ID']:
                    if (args.mimetype is None) or target.suffix.endswith(
                            args.mimetype):
                        create_download_thread(url, target)

    if args.list:
        for k, v in size.most_common():
            print('\t'.join([k, str(number[k]), format_size(v)]))

    if args.create_release:
        assert audio.exists(), 'No folder "audio" found in {0}'.format(
            audio.resolve())

        release_dir.mkdir(exist_ok=True)

        args.log.info('creating audio ZIP archive per parameter folder ...')
        try:
            zipf = zipfile.ZipFile(str(release_dir / 'audio.zip'), 'w',
                                   zipfile.ZIP_DEFLATED)
            fp = args.out
            for root, dirs, files in tqdm.tqdm(os.walk(audio)):
                for f in files:
                    if not f.startswith('.') and not f.startswith('__')\
                            and ((args.mimetype is None) or f.endswith(args.mimetype)):
                        zipf.write(os.path.join(root, f),
                                   os.path.relpath(os.path.join(root, f), fp))
            zipf.close()
        except Exception as e:
            args.log.error(e)
            raise

        def contrib(d):
            return {
                k: v
                for k, v in d.items()
                if k in {'name', 'affiliation', 'orcid', 'type'}
            }

        with jsonlib.update(release_dir / zenodo_file_name,
                            indent=4,
                            default=collections.OrderedDict()) as md:
            contribs = Dataset().dir / 'CONTRIBUTORS.md'
            creators, contributors = get_creators_and_contributors(
                contribs.read_text(
                    encoding='utf8') if contribs.exists() else '',
                strict=False)
            if creators:
                md['creators'] = [contrib(p) for p in creators]
            if contributors:
                md['contributors'] = [contrib(p) for p in contributors]
            if COMMUNITIES:
                md['communities'] = [{
                    'id': community_id
                } for community_id in COMMUNITIES]
            md.update({
                'title':
                '{0} Audio Files'.format(Dataset().metadata.title),
                'access_right':
                'open',
                'keywords':
                sorted(set(md.get('keywords', []) + ['linguistics'])),
                'upload_type':
                'video',
                'version':
                VERSION,
                'related_identifiers': [
                    {
                        'scheme': 'doi',
                        'identifier': '10.5281/zenodo.4309141',
                        'relation': 'isPartOf'
                    },
                    {
                        'scheme':
                        'url',
                        'identifier':
                        '{0}{1}/tree/v{2}'.format(GITHUB_PREFIX,
                                                  Dataset().id, VERSION),
                        'relation':
                        'isSupplementTo'
                    },
                ],
            })
            if Dataset().metadata.url:
                md['related_identifiers'].append({
                    'scheme':
                    'url',
                    'identifier':
                    Dataset().metadata.url,
                    'relation':
                    'isAlternateIdentifier'
                })
            md['description'] = html.escape(
                DESCRIPTION.format(
                    GITHUB_PREFIX,
                    Dataset().id,
                    Dataset().metadata.url if Dataset().metadata.url else '',
                    VERSION))

            license_md = ''
            if Dataset().metadata.zenodo_license:
                md['license'] = {'id': Dataset().metadata.zenodo_license}
                license_md = LISENCE.format(Dataset().metadata.zenodo_license)

            DataDir(release_dir).write(
                'README.md',
                RELEASE_NOTE.format(md['title'], GITHUB_PREFIX,
                                    Dataset().id,
                                    Dataset().metadata.title, license_md))

    if args.update_zenodo:
        assert release_dir.exists()
        assert (release_dir / zenodo_file_name).exists()

        md = {}
        md.update(jsonlib.load(release_dir / zenodo_file_name))

        api_url = API_URL
        zenodo_url = api_url.replace('api/', '')

        args.log.info('Updating Deposit ID {0} on {1} with:'.format(
            args.update_zenodo, zenodo_url))
        api = Zenodo(api_url=api_url, access_token=ACCESS_TOKEN)
        rec = api.record_from_id('{0}record/{1}'.format(
            zenodo_url, args.update_zenodo))
        args.log.info('  DOI:   ' + rec.metadata.doi)
        args.log.info('  Title: ' + rec.metadata.title)
        args.log.info('  Date:  ' + rec.metadata.publication_date)
        args.log.info('  Files: ' + ', '.join([f.key for f in rec.files]))
        p = input("Proceed? [y/N]: ")
        if p.lower() == 'y':
            dep = api.update_deposit(args.update_zenodo, **md)
            if dep.state != zenodoclient.models.PUBLISHED:
                api.publish_deposit(dep)
            args.log.info('Updated successfully')

Example #5

Show file

File: dataset.py Project: SimonGreenhill/cldfbench

class Dataset(object):
    """
    A cldfbench dataset ties together

    - `raw` data, to be used as source for the
    - `cldf` data, which is created using config data from
    - `etc`.

    To use the cldfbench infrastructure, one should sub-class `Dataset`.

    cldfbench supports the following workflow:
    - a `download` command populates a `Dataset`'s `raw` directory.
    - a `makecldf` command (re)creates the CLDF dataset in `cldf`.

    The following class attributes are supposed to be overwritten by subclasses:

    :ivar dir: `pathlib.Path` pointing to the root directory of the dataset.
    :ivar id: A `str` identifier for the dataset. No assumption about uniqueness properties of \
    this identifier is made.
    :ivar metadata_cls: Subclass of :class:`Metadata` (or :class:`Metadata` if not overwritten)
    """
    dir = None
    id = None
    metadata_cls = Metadata

    def __init__(self):
        if not self.dir:
            self.dir = pathlib.Path(inspect.getfile(self.__class__)).parent
        self.dir = DataDir(self.dir)
        md = self.dir / 'metadata.json'
        self.metadata = self.metadata_cls.from_file(
            md) if md.exists() else self.metadata_cls()
        self.metadata.id = self.id

    def __str__(self):
        return '{0.__class__.__name__} "{0.id}" at {1}'.format(
            self, self.dir.resolve())

    @lazyproperty
    def cldf_dir(self) -> DataDir:
        """
        Directory where CLDF data generated from the Dataset will be stored (unless specified
        differently by a :class:`CLDFSpec`).
        """
        return self.dir / 'cldf'

    @lazyproperty
    def raw_dir(self) -> DataDir:
        """
        Directory where cldfbench expects the raw or source data.
        """
        return self.dir / 'raw'

    @lazyproperty
    def etc_dir(self) -> DataDir:
        """
        Directory where cldfbench expects additional configuration or metadata.
        """
        return self.dir / 'etc'

    def cldf_specs(self) -> typing.Union[CLDFSpec, typing.Dict[str, CLDFSpec]]:
        """
        A `Dataset` must declare all CLDF datasets that are derived from it.

        :return: A single :class:`CLDFSpec` instance, or a `dict`, mapping names to `CLDFSpec` \
        instances, where the name will be used by `cldf_reader`/`cldf_writer` to look up \
        the spec.
        """
        return CLDFSpec(dir=self.cldf_dir)

    @property
    def cldf_specs_dict(
            self) -> typing.Dict[typing.Union[str, None], CLDFSpec]:
        """
        Turn :meth:`cldf_specs` into a `dict` for simpler lookup.

        :return: `dict` mapping lookup keys to `CLDFSpec` instances.
        """
        specs = self.cldf_specs()
        if isinstance(specs, CLDFSpec):
            return {None: specs}
        assert isinstance(specs, dict)
        return specs

    def update_submodules(self):
        """
        Convenience method to be used in a `Dataset`'s `cmd_download` to update raw data curated
        as git submodules.
        """
        subprocess.check_call('git -C {} submodule update --remote'.format(
            self.dir.resolve()),
                              shell=True)

    def cldf_writer(self, args, cldf_spec=None, clean=True) -> CLDFWriter:
        """
        :param args:
        :param cldf_spec: Key of the relevant `CLDFSpec` in `Dataset.cldf_specs`
        :param clean: `bool` flag signaling whether to clean the CLDF dir before writing. \
        Note that `False` must be passed for subsequent calls to `cldf_writer` in case the \
        spec re-uses a directory.
        :return: a `cldf_spec.writer_cls` instance, for write-access to CLDF data. \
        This method should be used in a with-statement, and will then return a `CLDFWriter` with \
        an empty working directory.
        """
        if not isinstance(cldf_spec, CLDFSpec):
            cldf_spec = self.cldf_specs_dict[cldf_spec]
        return cldf_spec.get_writer(args=args, dataset=self, clean=clean)

    def cldf_reader(self,
                    cldf_spec: typing.Union[str,
                                            None] = None) -> pycldf.Dataset:
        """
        :param cldf_spec:
        :return: a `pycldf.Dataset` instance, for read-access to the CLDF data.
        """
        if not isinstance(cldf_spec, CLDFSpec):
            cldf_spec = self.cldf_specs_dict[cldf_spec]
        return cldf_spec.get_dataset()

    @lazyproperty
    def repo(self) -> typing.Union[Repository, None]:
        """
        The git repository cloned to the dataset's directory (or `None`).
        """
        try:
            return Repository(self.dir)
        except ValueError:  # pragma: no cover
            return

    def _cmd_download(self, args):
        self.raw_dir.mkdir(exist_ok=True)
        self.cmd_download(args)
        (self.raw_dir / 'README.md').write_text(
            'Raw data downloaded {0}'.format(datetime.utcnow().isoformat()),
            encoding='utf8')

    def cmd_download(self, args: argparse.Namespace):
        """
        Implementations of this methods should populate the dataset's `raw_dir` with the source
        data.
        """
        args.log.warning('cmd_{0} not implemented for dataset {1}'.format(
            'download', self.id))
        return NOOP

    def _cmd_readme(self, args):
        if self.metadata:
            badge = build_status_badge(self)
            md = self.cmd_readme(args)
            if badge:
                lines, title_found = [], False
                for line in md.split('\n'):
                    lines.append(line)
                    if line.startswith('# ') and not title_found:
                        title_found = True
                        lines.extend(['', badge])
                md = '\n'.join(lines)

            section = [
                '\n\n## CLDF Datasets\n',
                'The following CLDF datasets are available in [{0}]({0}):\n'.
                format(self.cldf_dir.resolve().relative_to(self.dir.resolve()))
            ]
            for ds in self.cldf_specs_dict.values():
                if ds.metadata_path.exists():
                    p = ds.metadata_path.resolve().relative_to(
                        self.dir.resolve())
                    section.append(
                        '- CLDF [{0}](https://github.com/cldf/cldf/tree/master/modules/{0}) '
                        'at [{1}]({1})'.format(ds.module, p))

            self.dir.joinpath('README.md').write_text(md + '\n'.join(section),
                                                      encoding='utf8')

    def cmd_readme(self, args: argparse.Namespace) -> str:
        """
        Implementations of this method should create the content for the dataset's README.md
        and return it as markdown formatted string.
        """
        return self.metadata.markdown() if self.metadata else ''

    def _cmd_makecldf(self, args):
        specs = list(self.cldf_specs_dict.values())
        if len(specs) == 1:
            # There's only one CLDF spec! We instantiate the writer now and inject it into `args`:
            with self.cldf_writer(args, cldf_spec=specs[0]) as writer:
                args.writer = writer
                self.cmd_makecldf(args)
        else:
            self.cmd_makecldf(args)

        if self.metadata and self.metadata.known_license:
            legalcode = self.metadata.known_license.legalcode
            if legalcode:
                (self.dir / 'LICENSE').write_text(legalcode, encoding='utf8')

    def cmd_makecldf(self, args: argparse.Namespace):
        """
        Implementations of this method should write the CLDF data curated by the dataset.

        :param args: An `argparse.Namespace` including attributes: \
        - `writer`: :class:`CLDFWriter` instance
        """
        args.log.warning('cmd_{0} not implemented for dataset {1}'.format(
            'makecldf', self.id))
        return NOOP