def volt_throttler(c): (part, attr) = c if same_table((part, attr)): return is_horz_aligned((part, attr)) or is_vert_aligned((part, attr)) if LF_part_miss_match((part, attr)) < 0: return False return True
def stg_temp_filter(c): (part, attr) = c if same_table((part, attr)): return is_horz_aligned((part, attr)) or is_vert_aligned((part, attr)) # Filter if not inside of a table return part.context.sentence.is_tabular()
def polarity_filter(c): (part, attr) = c # Check if the polarities are not matched with the part ngrams_part = set(x for x in get_row_ngrams(part, n_max=1, lower=False) if (x and polarity_pattern.match(x))) if len(ngrams_part) != 0 and all( not attr.context.get_span().lower().startswith(_.lower()) for _ in ngrams_part): logger.debug( f"ngrams_part: {ngrams_part}\nattr: {attr.context.get_span().lower()}" ) return False if same_table(c): return is_horz_aligned(c) or is_vert_aligned(c) return True
def _vizlib_multinary_features( spans: Tuple[SpanMention, ...]) -> Iterator[Tuple[str, int]]: """Visual-related features for multiple spans.""" if same_page(spans): yield "SAME_PAGE", DEF_VALUE if is_horz_aligned(spans): yield "HORZ_ALIGNED", DEF_VALUE if is_vert_aligned(spans): yield "VERT_ALIGNED", DEF_VALUE if is_vert_aligned_left(spans): yield "VERT_ALIGNED_LEFT", DEF_VALUE if is_vert_aligned_right(spans): yield "VERT_ALIGNED_RIGHT", DEF_VALUE if is_vert_aligned_center(spans): yield "VERT_ALIGNED_CENTER", DEF_VALUE
def _vizlib_binary_features(span1: SpanMention, span2: SpanMention) -> Iterator[Tuple[str, int]]: """Visual-related features for a pair of spans.""" if same_page((span1, span2)): yield "SAME_PAGE", DEF_VALUE if is_horz_aligned((span1, span2)): yield "HORZ_ALIGNED", DEF_VALUE if is_vert_aligned((span1, span2)): yield "VERT_ALIGNED", DEF_VALUE if is_vert_aligned_left((span1, span2)): yield "VERT_ALIGNED_LEFT", DEF_VALUE if is_vert_aligned_right((span1, span2)): yield "VERT_ALIGNED_RIGHT", DEF_VALUE if is_vert_aligned_center((span1, span2)): yield "VERT_ALIGNED_CENTER", DEF_VALUE
def ce_v_max_filter(c): (part, attr) = c if same_table(c): return is_horz_aligned(c) or is_vert_aligned(c) # Check if the ce_v_max's are not matched with the part ngrams_part = _filter_non_parts( set(x for x in get_vert_ngrams(attr, n_max=1))) ngrams_part = _filter_non_parts( ngrams_part.union(set(x for x in get_horz_ngrams(attr, n_max=1)))) if len(ngrams_part) != 0 and all( not part.context.get_span().lower().startswith(_.lower()) for _ in ngrams_part): logger.debug( f"ngrams_part: {ngrams_part}\npart: {part.context.get_span().lower()}" ) return False return True
def _vizlib_binary_features(span1, span2): """ Visual-related features for a pair of spans """ if same_page((span1, span2)): yield "SAME_PAGE", DEF_VALUE if is_horz_aligned((span1, span2)): yield "HORZ_ALIGNED", DEF_VALUE if is_vert_aligned((span1, span2)): yield "VERT_ALIGNED", DEF_VALUE if is_vert_aligned_left((span1, span2)): yield "VERT_ALIGNED_LEFT", DEF_VALUE if is_vert_aligned_right((span1, span2)): yield "VERT_ALIGNED_RIGHT", DEF_VALUE if is_vert_aligned_center((span1, span2)): yield "VERT_ALIGNED_CENTER", DEF_VALUE
def LF_part_ce_keywords_in_col_prefix_same_table(c): return (TRUE if same_table(c) and is_vert_aligned(c) and overlap( _CE_KEYWORDS.union(_CE_ABBREVS), get_row_ngrams(c[1], n_max=3)) and not overlap(_NON_CEV_KEYWORDS, get_row_ngrams(c[1], n_max=3)) and not LF_current_in_row(c) else ABSTAIN)
def LF_same_table_must_align(c): return (FALSE if (same_table(c) and not (is_horz_aligned(c) or is_vert_aligned(c))) else ABSTAIN)
def LF_aligned_or_global(c): return (TRUE if (same_row(c) or is_horz_aligned(c) or same_col(c) or is_vert_aligned(c) or not c.part.context.sentence.is_tabular()) else ABSTAIN)
def LF_polarity_part_viz_aligned(c): return TRUE if is_horz_aligned(c) or is_vert_aligned(c) else ABSTAIN
def temp_throttler(c): """Temperature throttler.""" (part, attr) = c if same_table((part, attr)): return is_horz_aligned((part, attr)) or is_vert_aligned((part, attr)) return True