Beispiel #1
0
        "lemmata": False,
          "light": False,
        "lexicon": LEXICON,
       "language": "es",
        "default": "NC",
            "map": kwargs.get("tagset", "") != PAROLE and parole2penntreebank or None,
    })
    s = _en_parse(s, False, tags, chunks, relations, **kwargs)
    # Use pattern.es.inflect for lemmatization:
    if lemmata:
        p = [find_lemmata(sentence) for sentence in s.split()]
        p = "\n".join([" ".join(["/".join(token) for token in sentence]) for sentence in p])
        s = TaggedString(p, tags=s.tags+["lemma"], language="es")
    return s

def tag(s, tokenize=True, encoding="utf-8"):
    """ Returns a list of (token, tag)-tuples from the given string.
    """
    tags = []
    for sentence in parse(s, tokenize, True, False, False, False, encoding).split():
        for token in sentence:
            tags.append((token[0], token[1]))
    return tags

#### COMMAND LINE ##################################################################################
# From the folder that contains the "pattern" folder:
# python -m pattern.es.parser xml -s "A quien se hace de miel las moscas le comen." -OTCLI

if __name__ == "__main__":
    commandline(parse)
Beispiel #2
0
    s = _en_parse(s, False, tags, chunks, relations, **kwargs)
    p = [[[m[token[0]]] + token[1:] for token in sentence]
         for sentence in s.split()]
    p = "\n".join(
        [" ".join(["/".join(token) for token in sentence]) for sentence in p])
    s = TaggedString(p, tags=s.tags, language="de")
    # Use pattern.de.inflect for lemmatization:
    if lemmata:
        p = [find_lemmata(sentence) for sentence in s.split()]
        s = TaggedString(p, tags=s.tags + ["lemma"], language="de")
    return s


def tag(s, tokenize=True, encoding="utf-8"):
    """ Returns a list of (token, tag)-tuples from the given string.
    """
    tags = []
    for sentence in parse(s, tokenize, True, False, False, False,
                          encoding).split():
        for token in sentence:
            tags.append((token[0], token[1]))
    return tags


#### COMMAND LINE ##################################################################################
# From the folder that contains the "pattern" folder:
# python -m pattern.de.parser xml -s "Ein Unglück kommt selten allein." -OTCLI

if __name__ == "__main__":
    commandline(parse)