Exemple #1
0
def is_lmf(source: AnyPath) -> bool:
    """Return True if *source* is a WN-LMF file."""
    source = Path(source).expanduser()
    if not is_xml(source):
        return False
    with source.open(mode='rb') as fh:
        try:
            _read_header(fh)
        except LMFError:
            return False
    return True
Exemple #2
0
def is_lmf(source: AnyPath) -> bool:
    """Return True if *source* is a WN-LMF XML file."""
    source = Path(source)
    if source.suffix.lower() != '.xml':
        return False
    with source.open() as fh:
        xmldecl = fh.readline().rstrip()
        doctype = fh.readline().rstrip()
        if not (xmldecl == _XMLDECL and doctype in _DOCTYPES):
            return False
    return True
Exemple #3
0
def iterpackages(path: AnyPath) -> Iterator[Package]:
    """Yield any wordnet Packages found at *path*.

    The *path* argument can point to one of the following:
      - a lexical resource file
      - a wordnet package directory
      - a wordnet collection directory
      - a tar archive containing one of the above
      - a compressed (gzip or lzma) resource file or tar archive
    """
    path = Path(path).expanduser()

    if path.is_dir():
        if is_package_directory(path):
            yield Package(path)

        elif is_collection_directory(path):
            yield from Collection(path).packages()

        else:
            raise wn.Error(
                f'does not appear to be a valid package or collection: {path!s}'
            )

    elif tarfile.is_tarfile(path):
        with tarfile.open(path) as tar:
            _check_tar(tar)
            with tempfile.TemporaryDirectory() as tmpdir:
                tar.extractall(path=tmpdir)
                contents = list(Path(tmpdir).iterdir())
                if len(contents) != 1:
                    raise wn.Error(
                        'archive may only have one resource, package, or collection'
                    )
                yield from iterpackages(contents[0])

    else:
        decompressed: Path
        with _get_decompressed(path) as decompressed:
            if lmf.is_lmf(decompressed):
                yield _ResourceOnlyPackage(decompressed)
            else:
                raise wn.Error(
                    f'not a valid lexical resource: {path!s}'
                )
Exemple #4
0
def dump(lexicons: LexicalResource,
         destination: AnyPath,
         version: str = '1.0') -> None:
    """Write wordnets in the WN-LMF format.

    Args:
        lexicons: a list of :class:`Lexicon` objects
    """
    if version not in _SCHEMAS:
        raise LMFError(f'invalid version: {version}')
    destination = Path(destination).expanduser()
    doctype = _DOCTYPE.format(schema=_SCHEMAS[version])
    dc_uri = _DC_URIS[version]
    with destination.open('wt', encoding='utf-8') as out:
        print(_XMLDECL.decode('utf-8'), file=out)
        print(doctype, file=out)
        print(f'<LexicalResource xmlns:dc="{dc_uri}">', file=out)
        for lexicon in lexicons:
            _dump_lexicon(lexicon, out, version)
        print('</LexicalResource>', file=out)
Exemple #5
0
def load(source: AnyPath) -> LexicalResource:
    """Load wordnets encoded in the WN-LMF format.

    Args:
        source: path to a WN-LMF file
    """
    source = Path(source).expanduser()

    with source.open('rb') as fh:
        version = _read_header(fh)

    events = XMLEventIterator(ET.iterparse(source, events=('start', 'end')))
    root = events.start('LexicalResource')

    lexicons: List[Lexicon] = []
    while events.starts('Lexicon', 'LexiconExtension'):
        lexicons.append(_load_lexicon(events, version))
        root.clear()

    events.end(root.tag)
    list(events)  # consume remaining events, if any

    return lexicons
Exemple #6
0
def is_collection_directory(path: AnyPath) -> bool:
    path = Path(path)
    return (path.is_dir()
            and len(list(filter(is_package_directory, path.iterdir()))) >= 1)
Exemple #7
0
def is_package_directory(path: AnyPath) -> bool:
    path = Path(path)
    return (path.is_dir()
            and len(list(filter(is_lexical_resource, path.iterdir()))) == 1)
Exemple #8
0
def is_collection_directory(path: AnyPath) -> bool:
    """Return ``True`` if *path* appears to be a wordnet collection."""
    path = Path(path).expanduser()
    return (path.is_dir()
            and len(list(filter(is_package_directory, path.iterdir()))) >= 1)
Exemple #9
0
def is_package_directory(path: AnyPath) -> bool:
    """Return ``True`` if *path* appears to be a Wordnet Package."""
    path = Path(path).expanduser()
    return (path.is_dir()
            and len(list(filter(lmf.is_lmf, path.iterdir()))) == 1)