"lemmata": False, "light": False, "lexicon": LEXICON, "language": "es", "default": "NC", "map": kwargs.get("tagset", "") != PAROLE and parole2penntreebank or None, }) s = _en_parse(s, False, tags, chunks, relations, **kwargs) # Use pattern.es.inflect for lemmatization: if lemmata: p = [find_lemmata(sentence) for sentence in s.split()] p = "\n".join([" ".join(["/".join(token) for token in sentence]) for sentence in p]) s = TaggedString(p, tags=s.tags+["lemma"], language="es") return s def tag(s, tokenize=True, encoding="utf-8"): """ Returns a list of (token, tag)-tuples from the given string. """ tags = [] for sentence in parse(s, tokenize, True, False, False, False, encoding).split(): for token in sentence: tags.append((token[0], token[1])) return tags #### COMMAND LINE ################################################################################## # From the folder that contains the "pattern" folder: # python -m pattern.es.parser xml -s "A quien se hace de miel las moscas le comen." -OTCLI if __name__ == "__main__": commandline(parse)
s = _en_parse(s, False, tags, chunks, relations, **kwargs) p = [[[m[token[0]]] + token[1:] for token in sentence] for sentence in s.split()] p = "\n".join( [" ".join(["/".join(token) for token in sentence]) for sentence in p]) s = TaggedString(p, tags=s.tags, language="de") # Use pattern.de.inflect for lemmatization: if lemmata: p = [find_lemmata(sentence) for sentence in s.split()] s = TaggedString(p, tags=s.tags + ["lemma"], language="de") return s def tag(s, tokenize=True, encoding="utf-8"): """ Returns a list of (token, tag)-tuples from the given string. """ tags = [] for sentence in parse(s, tokenize, True, False, False, False, encoding).split(): for token in sentence: tags.append((token[0], token[1])) return tags #### COMMAND LINE ################################################################################## # From the folder that contains the "pattern" folder: # python -m pattern.de.parser xml -s "Ein Unglück kommt selten allein." -OTCLI if __name__ == "__main__": commandline(parse)