def _get_direction_ngrams(direction, c, attrib, n_min, n_max, lower,
                          from_phrase):
    # TODO: this currently looks only in current table;
    #   precompute over the whole document/page instead
    bbox_direction_aligned = bbox_vert_aligned if direction == 'vert' else bbox_horz_aligned
    ngrams_space = Ngrams(n_max=n_max, split_tokens=[])
    f = (lambda w: w.lower()) if lower else (lambda w: w)
    spans = [c] if isinstance(c, TemporarySpan) else c.get_contexts()
    for span in spans:
        if not span.sentence.is_tabular() or not span.sentence.is_visual():
            continue
        for phrase in span.sentence.table.phrases:
            if (from_phrase):
                if (bbox_direction_aligned(bbox_from_phrase(phrase),
                                           bbox_from_span(span))
                        and phrase is not span.sentence):
                    for ngram in tokens_to_ngrams(getattr(phrase, attrib),
                                                  n_min=n_min,
                                                  n_max=n_max,
                                                  lower=lower):
                        yield ngram
            else:
                for ts in ngrams_space.apply(phrase):
                    if (bbox_direction_aligned(bbox_from_span(ts),
                                               bbox_from_span(span))
                            and not (phrase == span.sentence
                                     and ts.get_span() in span.get_span())):
                        yield f(ts.get_span())
def _preprocess_visual_features(doc):
    if hasattr(doc, '_visual_features'):
        return
    # cache flag
    doc._visual_features = True

    phrase_by_page = defaultdict(list)
    for phrase in doc.phrases:
        phrase_by_page[phrase.page[0]].append(phrase)
        phrase._aligned_lemmas = set()

    for page, phrases in phrase_by_page.items():
        # process per page alignments
        yc_aligned = defaultdict(list)
        x0_aligned = defaultdict(list)
        xc_aligned = defaultdict(list)
        x1_aligned = defaultdict(list)
        for phrase in phrases:
            phrase.bbox = bbox_from_phrase(phrase)
            phrase.yc = (phrase.bbox.top + phrase.bbox.bottom) / 2
            phrase.x0 = phrase.bbox.left
            phrase.x1 = phrase.bbox.right
            phrase.xc = (phrase.x0 + phrase.x1) / 2
            # index current phrase by different alignment keys
            yc_aligned[phrase.yc].append(phrase)
            x0_aligned[phrase.x0].append(phrase)
            x1_aligned[phrase.x1].append(phrase)
            xc_aligned[phrase.xc].append(phrase)
        for l in yc_aligned.values():
            l.sort(key=lambda p: p.xc)
        for l in x0_aligned.values():
            l.sort(key=lambda p: p.yc)
        for l in x1_aligned.values():
            l.sort(key=lambda p: p.yc)
        for l in xc_aligned.values():
            l.sort(key=lambda p: p.yc)
        _assign_alignment_features(yc_aligned, 'Y_')
        _assign_alignment_features(x0_aligned, 'LEFT_')
        _assign_alignment_features(x1_aligned, 'RIGHT_')
        _assign_alignment_features(xc_aligned, 'CENTER_')