Ejemplo n.º 1
0
def get_multilines(spans):
    intervals = Intervals()
    lines = []
    for start, stop, type in spans:
        line = Line(start, stop, type, level=None)
        intervals.addi(start, stop, line)
        lines.append(line)

    # level
    for line in lines:
        selected = intervals.search(line.start, line.stop)
        line.level = get_free_level(selected)

    # chunk
    intervals.split_overlaps()

    # group
    groups = defaultdict(list)
    for start, stop, line in intervals:
        groups[start, stop].append(line)

    for start, stop in sorted(groups):
        lines = groups[start, stop]
        lines = sorted(lines, key=lambda _: _.level)
        yield Multiline(start, stop, lines)
Ejemplo n.º 2
0
def section_markup(markup, mode=HTML):
    arcs = []
    for source, target, type in markup.deps:
        if type == ROOT:
            continue

        if source < target:
            start, stop = source, target
            direction = RIGHT
        else:
            start, stop = target, source
            direction = LEFT

        arc = Arc(start, stop, direction, type, level=None)
        arcs.append(arc)

    # order
    arcs = sorted(arcs, key=Arc.layout_order)

    # level
    intervals = Intervals()
    for arc in arcs:
        stop = arc.stop
        if mode == ASCII:
            stop += 1  # in ascii mode include stop
        intervals.addi(arc.start, stop, arc)

    for arc in arcs:
        selected = intervals.overlap(arc.start, arc.stop)
        arc.level = get_free_level(selected)

    # group
    sections = defaultdict(list)
    for arc in arcs:
        start, stop, direction, type, level = arc
        parent = id(arc)
        for index in range(start, stop + 1):
            if index == start:
                part = BEGIN if direction == RIGHT else END
            elif index == stop:
                part = END if direction == RIGHT else BEGIN
            else:
                part = INSIDE

            section = ArcSection(part, direction, type, level, parent)
            sections[index].append(section)

    for index, word in enumerate(markup.words):
        arcs = sections[index]
        arcs = sorted(arcs, key=Arc.level_order)
        yield DepMarkupSection(word, arcs)
Ejemplo n.º 3
0
def split_overlapping_spans(spans):
    order = {}
    for index, span in enumerate(spans):
        order[id(span)] = index

    intervals = Intervals()
    for span in spans:
        intervals.addi(span.start, span.stop, span)

    intervals.split_overlaps()

    groups = defaultdict(list)
    for start, stop, span in intervals:
        groups[start, stop].append(span)

    for start, stop in sorted(groups):
        spans = groups[start, stop]
        spans = sorted(spans, key=lambda _: order[id(_)])
        type = spans[-1].type
        yield Span(start, stop, type)
Ejemplo n.º 4
0
def get_multilines(spans):
    # level
    intervals = Intervals()
    for start, stop, type in sorted(spans):
        selected = intervals.search(start, stop)
        level = get_free_level(selected)
        intervals.addi(start, stop, Line(start, stop, type, level))

    # chunk
    intervals.split_overlaps()

    # group
    groups = defaultdict(list)
    for start, stop, line in intervals:
        groups[start, stop].append(line)

    for start, stop in sorted(groups):
        lines = groups[start, stop]
        lines = sorted(lines)
        yield Multiline(start, stop, lines)