def LF_part_ce_keywords_in_rows_cols_prefix_1(c): ngrams = set(list(get_horz_ngrams(c[1]))) ngrams = ngrams.union(set(list(get_vert_ngrams(c[1])))) ngrams_part = _filter_non_parts(ngrams) return (TRUE if overlap(_CE_KEYWORDS.union(_CE_ABBREVS), ngrams) and any( [c.part.context.get_span().lower().startswith(_) for _ in ngrams_part]) else ABSTAIN)
def LF_part_miss_match(c): ngrams_part = set(list(get_vert_ngrams(c[1], n_max=1))) ngrams_part = filter_non_parts( ngrams_part.union(set(list(get_horz_ngrams(c[1], n_max=1))))) return (0 if len(ngrams_part) == 0 or any( [c[0].get_span().lower().startswith(_.lower()) for _ in ngrams_part]) else -1)
def LF_part_miss_match(c): ngrams_part = set(list(get_vert_ngrams(c[1], n_max=1))) ngrams_part = _filter_non_parts( ngrams_part.union(set(list(get_horz_ngrams(c[1], n_max=1))))) return (ABSTAIN if len(ngrams_part) == 0 or any([ c.part.context.get_span().lower().startswith(_.lower()) for _ in ngrams_part ]) else FALSE)
def pos_gain_keywords(c): vert_ngrams = set(get_vert_ngrams(c.gain, n_max=1, lower=True)) row_ngrams = set(get_row_ngrams(c.gain, lower=True)) if overlap(["typ", "typ."], vert_ngrams) and overlap(["khz", "mhz"], row_ngrams): return TRUE return ABSTAIN
def pos_gain_header_unit(c): horz_ngrams = set(get_horz_ngrams(c.gain, n_max=1, lower=True)) vert_ngrams = set(get_vert_ngrams(c.gain, n_max=1, lower=True)) right_ngrams = set([ x[0] for x in get_neighbor_cell_ngrams( c.gain, n_max=1, dist=5, directions=True, lower=False) if x[-1] == "RIGHT" ]) if (overlap(["gain", "unity"], horz_ngrams) and overlap(["mhz", "khz"], right_ngrams) and overlap(["typ", "typ."], vert_ngrams)): return TRUE else: return ABSTAIN
def ce_v_max_filter(c): (part, attr) = c if same_table(c): return is_horz_aligned(c) or is_vert_aligned(c) # Check if the ce_v_max's are not matched with the part ngrams_part = _filter_non_parts( set(x for x in get_vert_ngrams(attr, n_max=1))) ngrams_part = _filter_non_parts( ngrams_part.union(set(x for x in get_horz_ngrams(attr, n_max=1)))) if len(ngrams_part) != 0 and all( not part.context.get_span().lower().startswith(_.lower()) for _ in ngrams_part): logger.debug( f"ngrams_part: {ngrams_part}\npart: {part.context.get_span().lower()}" ) return False return True
def neg_current_keywords_in_vert(c): return (FALSE if overlap( ["over", "temperature", "vgn", "f", "-3", "db", "dbc", "min", "max"], get_vert_ngrams(c.supply_current, lower=True), ) else ABSTAIN)