Example #1
0
def parse_spacy(passages, lang, verbose=False):
    for passage, in annotate_all(zip(passages),
                                 as_array=True,
                                 as_tuples=True,
                                 lang=lang,
                                 verbose=verbose):
        terminals = sorted(passage.layer(layer0.LAYER_ID).all,
                           key=operator.attrgetter("position"))
        dep_nodes = [ConlluConverter.Node()] + [
            ConlluConverter.Node(t.position,
                                 terminal=t,
                                 token=ConlluConverter.Token(t.text, t.tag))
            for t in terminals
        ]
        for dep_node in dep_nodes[1:]:
            dep_node.token.paragraph = dep_node.terminal.paragraph
            head = Attr.HEAD(dep_node.terminal.tok[Attr.HEAD.value])
            if head:
                head += dep_node.position
            rel = Attr.DEP(dep_node.terminal.tok[Attr.DEP.value],
                           lang=passage.attrib.get("lang", lang))
            assert head is not None and rel is not None, \
                "head=%r, rel=%r for token %d in:\n%s" % (head, rel, dep_node.position, " ".join(map(str, terminals)))
            edge = ConlluConverter.Edge(head, rel, remote=False)
            dep_node.terminal = None
            edge.link_head(dep_nodes)
            dep_node.add_edges([edge])
        parsed = ConlluConverter().build_passage(dep_nodes, passage.ID)
        yield passage, parsed
Example #2
0
def from_conllu(lines,
                passage_id=None,
                return_original=False,
                annotate=False,
                terminals_only=False,
                dep=False,
                **kwargs):
    """Converts from parsed text in Universal Dependencies format to a Passage object.

    :param lines: iterable of lines in Universal Dependencies format, describing a single passage.
    :param passage_id: ID to set for passage
    :param return_original: return triple of (UCCA passage, Universal Dependencies string, sentence ID)
    :param annotate: whether to save dependency annotations in "extra" dict of layer 0
    :param terminals_only: create only terminals (with any annotation if specified), no non-terminals
    :param dep: return dependency graph rather than converted UCCA passage

    :return generator of Passage objects
    """
    from semstr.conversion.conllu import ConlluConverter
    return ConlluConverter().from_format(lines,
                                         passage_id=passage_id,
                                         return_original=return_original,
                                         annotate=annotate,
                                         terminals_only=terminals_only,
                                         dep=dep,
                                         format=kwargs.get("format"))
Example #3
0
def to_conllu(passage, test=False, *args, **kwargs):
    """ Convert from a Passage object to a string in Universal Dependencies format (conllu)

    :param passage: the Passage object to convert
    :param test: whether to omit the head and deprel columns. Defaults to False

    :return list of lines representing the semantic dependencies in the passage
    """
    del args, kwargs
    from semstr.conversion.conllu import ConlluConverter
    return ConlluConverter().to_format(passage, test, tree=True)
Example #4
0
def to_conllu(passage, test=False, enhanced=True, preprocess=True, **kwargs):
    """ Convert from a Passage object to a string in Universal Dependencies format (conllu)

    :param passage: the Passage object to convert
    :param test: whether to omit the head and deprel columns. Defaults to False
    :param enhanced: whether to include enhanced edges
    :param preprocess: preprocess the converted dependency graph before returning it?

    :return list of lines representing the semantic dependencies in the passage
    """
    from semstr.conversion.conllu import ConlluConverter
    return ConlluConverter(enhanced=enhanced).to_format(passage, test=test, preprocess=preprocess,
                                                        format=kwargs.get("format"))
Example #5
0
def from_conllu(lines,
                passage_id=None,
                split=True,
                return_original=False,
                annotate=False,
                *args,
                **kwargs):
    """Converts from parsed text in Universal Dependencies format to a Passage object.

    :param lines: iterable of lines in Universal Dependencies format, describing a single passage.
    :param passage_id: ID to set for passage
    :param split: split each sentence to its own passage?
    :param return_original: return triple of (UCCA passage, Universal Dependencies string, sentence ID)
    :param annotate: whether to save dependency annotations in "extra" dict of layer 0

    :return generator of Passage objects
    """
    del args, kwargs
    from semstr.conversion.conllu import ConlluConverter
    return ConlluConverter().from_format(lines,
                                         passage_id,
                                         split,
                                         return_original=return_original,
                                         annotate=annotate)