def strlib_unary_features(span): """ Structural-related features for a single span """ if not span.sentence.is_structural(): return yield "TAG_" + get_tag(span), DEF_VALUE for attr in get_attributes(span): yield "HTML_ATTR_" + attr, DEF_VALUE yield "PARENT_TAG_" + get_parent_tag(span), DEF_VALUE prev_tags = get_prev_sibling_tags(span) if len(prev_tags): yield "PREV_SIB_TAG_" + prev_tags[-1], DEF_VALUE yield "NODE_POS_" + str(len(prev_tags) + 1), DEF_VALUE else: yield "FIRST_NODE", DEF_VALUE next_tags = get_next_sibling_tags(span) if len(next_tags): yield "NEXT_SIB_TAG_" + next_tags[0], DEF_VALUE else: yield "LAST_NODE", DEF_VALUE yield "ANCESTOR_CLASS_[%s]" % " ".join( get_ancestor_class_names(span)), DEF_VALUE yield "ANCESTOR_TAG_[%s]" % " ".join( get_ancestor_tag_names(span)), DEF_VALUE yield "ANCESTOR_ID_[%s]" % " ".join(get_ancestor_id_names(span)), DEF_VALUE
def _strlib_unary_features(span: SpanMention) -> Iterator[Tuple[str, int]]: """Structural-related features for a single span.""" if not span.sentence.is_structural(): return yield f"TAG_{get_tag(span)}", DEF_VALUE for attr in get_attributes(span): yield f"HTML_ATTR_{attr}", DEF_VALUE yield f"PARENT_TAG_{get_parent_tag(span)}", DEF_VALUE prev_tags = get_prev_sibling_tags(span) if len(prev_tags): yield f"PREV_SIB_TAG_{prev_tags[-1]}", DEF_VALUE yield f"NODE_POS_{len(prev_tags) + 1}", DEF_VALUE else: yield "FIRST_NODE", DEF_VALUE next_tags = get_next_sibling_tags(span) if len(next_tags): yield f"NEXT_SIB_TAG_{next_tags[0]}", DEF_VALUE else: yield "LAST_NODE", DEF_VALUE yield f"ANCESTOR_CLASS_[{' '.join(get_ancestor_class_names(span))}]", DEF_VALUE yield f"ANCESTOR_TAG_[{' '.join(get_ancestor_tag_names(span))}]", DEF_VALUE yield f"ANCESTOR_ID_[{' '.join(get_ancestor_id_names(span))}]", DEF_VALUE