Example #1
0
def strlib_unary_features(span):
    """
    Structural-related features for a single span
    """
    if not span.sentence.is_structural():
        return

    yield "TAG_" + get_tag(span), DEF_VALUE

    for attr in get_attributes(span):
        yield "HTML_ATTR_" + attr, DEF_VALUE

    yield "PARENT_TAG_" + get_parent_tag(span), DEF_VALUE

    prev_tags = get_prev_sibling_tags(span)
    if len(prev_tags):
        yield "PREV_SIB_TAG_" + prev_tags[-1], DEF_VALUE
        yield "NODE_POS_" + str(len(prev_tags) + 1), DEF_VALUE
    else:
        yield "FIRST_NODE", DEF_VALUE

    next_tags = get_next_sibling_tags(span)
    if len(next_tags):
        yield "NEXT_SIB_TAG_" + next_tags[0], DEF_VALUE
    else:
        yield "LAST_NODE", DEF_VALUE

    yield "ANCESTOR_CLASS_[%s]" % " ".join(
        get_ancestor_class_names(span)), DEF_VALUE

    yield "ANCESTOR_TAG_[%s]" % " ".join(
        get_ancestor_tag_names(span)), DEF_VALUE

    yield "ANCESTOR_ID_[%s]" % " ".join(get_ancestor_id_names(span)), DEF_VALUE
Example #2
0
def _strlib_unary_features(span: SpanMention) -> Iterator[Tuple[str, int]]:
    """Structural-related features for a single span."""
    if not span.sentence.is_structural():
        return

    yield f"TAG_{get_tag(span)}", DEF_VALUE

    for attr in get_attributes(span):
        yield f"HTML_ATTR_{attr}", DEF_VALUE

    yield f"PARENT_TAG_{get_parent_tag(span)}", DEF_VALUE

    prev_tags = get_prev_sibling_tags(span)
    if len(prev_tags):
        yield f"PREV_SIB_TAG_{prev_tags[-1]}", DEF_VALUE
        yield f"NODE_POS_{len(prev_tags) + 1}", DEF_VALUE
    else:
        yield "FIRST_NODE", DEF_VALUE

    next_tags = get_next_sibling_tags(span)
    if len(next_tags):
        yield f"NEXT_SIB_TAG_{next_tags[0]}", DEF_VALUE
    else:
        yield "LAST_NODE", DEF_VALUE

    yield f"ANCESTOR_CLASS_[{' '.join(get_ancestor_class_names(span))}]", DEF_VALUE

    yield f"ANCESTOR_TAG_[{' '.join(get_ancestor_tag_names(span))}]", DEF_VALUE

    yield f"ANCESTOR_ID_[{' '.join(get_ancestor_id_names(span))}]", DEF_VALUE