Esempio n. 1
0
    def hertz_units(attr):
        hertz_units = ["mhz", "khz"]
        keywords = [
            "product",
            "gain",
            "gain",
            "unity",
            "bandwidth",
            "gbp",
            "gbw",
            "gbwp",
        ]
        filter_keywords = ["-3 db", "maximum", "minimum", "impedance"]
        related_ngrams = set(get_right_ngrams(attr, n_max=1, lower=True))
        related_ngrams.update(get_row_ngrams(attr, n_max=1, spread=[-2, 2], lower=True))
        cell_ngrams = set(get_cell_ngrams(attr, n_max=1, lower=True))

        if "f" in cell_ngrams and "=" in cell_ngrams:
            return False

        if attr.get_span().strip() == "0":
            return False

        if overlap(filter_keywords, get_row_ngrams(attr, n_max=1, lower=True)):
            return False

        if overlap(hertz_units, related_ngrams) and overlap(keywords, related_ngrams):
            return True

        return False
Esempio n. 2
0
def LF_ce_keywords_no_part_in_rows(c):
    for _ in get_row_ngrams(c[1], n_max=3, lower=False):
        if re.match("^([0-9]+[a-zA-Z]+|[a-zA-Z]+[0-9]+)[0-9a-zA-Z]*$",
                    _.upper()):
            return ABSTAIN

    return (TRUE if overlap(_CE_KEYWORDS.union(_CE_ABBREVS),
                            get_row_ngrams(c[1], n_max=3)) else ABSTAIN)
Esempio n. 3
0
def LF_part_ce_keywords_in_row_prefix(c):
    ngrams_part = _filter_non_parts(get_row_ngrams(c[1], n_max=3))

    return (TRUE if overlap(_CE_KEYWORDS.union(_CE_ABBREVS),
                            get_row_ngrams(c[1], n_max=3)) and any([
                                c.part.context.get_span().lower().startswith(_)
                                for _ in ngrams_part
                            ])
            and not overlap(_NON_CEV_KEYWORDS, get_row_ngrams(c[1], n_max=3))
            and not LF_current_in_row(c) else ABSTAIN)
Esempio n. 4
0
def LF_ce_keywords_not_part_in_row_col_prefix(c):
    ngrams_part = set(list(get_col_ngrams(c[1], n_max=3, lower=False)))
    ngrams_part = _filter_non_parts(
        ngrams_part.union(set(list(get_row_ngrams(c[1], n_max=3,
                                                  lower=False)))))

    return (TRUE if not same_table(c) and overlap(
        _CE_KEYWORDS.union(_CE_ABBREVS), get_row_ngrams(c[1], n_max=3))
            and len(ngrams_part) == 0
            and not overlap(_NON_CEV_KEYWORDS, get_row_ngrams(c.part, n_max=3))
            and not overlap(_NON_CEV_KEYWORDS, get_row_ngrams(c[1], n_max=3))
            and not LF_current_in_row(c) else ABSTAIN)
Esempio n. 5
0
def LF_part_ce_keywords_in_rows_cols_prefix(c):
    ngrams = set(list(get_row_ngrams(c[1], n_max=3)))
    ngrams = ngrams.union(set(list(get_col_ngrams(c[1], n_max=3))))
    ngrams_part = _filter_non_parts(ngrams)
    return (TRUE if overlap(_CE_KEYWORDS.union(_CE_ABBREVS), ngrams) and any(
        [c.part.context.get_span().lower().startswith(_)
         for _ in ngrams_part]) else ABSTAIN)
Esempio n. 6
0
def LF_part_miss_match_polarity(c):
    ngrams_part = set(list(get_row_ngrams(c.part, n_max=1, lower=False)))
    ngrams_part = _filter_non_polarity(ngrams_part)
    return (ABSTAIN if len(ngrams_part) == 0 or any([
        c[1].context.get_span().lower().startswith(_.lower())
        for _ in ngrams_part
    ]) else FALSE)
Esempio n. 7
0
def LF_complement_left_row(c):
    return (-1 if (overlap(
        ["complement", "complementary"],
        chain.from_iterable(
            [get_row_ngrams(c.part),
             get_left_ngrams(c.part, window=10)]),
    )) else 0)
Esempio n. 8
0
def pos_gain(c):
    row_ngrams = set(get_row_ngrams(c.gain, lower=True))
    #     print("row_ngrams", row_ngrams)
    if overlap(["gain"], row_ngrams):
        return TRUE
    else:
        ABSTAIN
Esempio n. 9
0
def LF_bad_keywords_in_row(c):
    return (
        FALSE
        if overlap(
            ["continuous", "cut-off", "gain", "breakdown"], get_row_ngrams(c.volt)
        )
        else ABSTAIN
    )
Esempio n. 10
0
def LF_complement_left_row(c):
    """Return False if temp mention's ngrams align with the following keywords."""
    return (FALSE if (overlap(
        ["complement", "complementary"],
        chain.from_iterable(
            [get_row_ngrams(c.part),
             get_left_ngrams(c.part, window=10)]),
    )) else ABSTAIN)
Esempio n. 11
0
    def ce_v_max_conditions(attr):
        ngrams = set(get_row_ngrams(attr, n_max=1))
        if not overlap(ce_keywords.union(ce_abbrevs), ngrams):
            return False
        if any(_ in attr.sentence.text.lower() for _ in ["vcb", "base"]):
            return False

        return True
Esempio n. 12
0
def pos_gain_keywords(c):
    vert_ngrams = set(get_vert_ngrams(c.gain, n_max=1, lower=True))
    row_ngrams = set(get_row_ngrams(c.gain, lower=True))
    if overlap(["typ", "typ."], vert_ngrams) and overlap(["khz", "mhz"],
                                                         row_ngrams):
        return TRUE

    return ABSTAIN
Esempio n. 13
0
 def is_birthplace_table_row(mention):
     if not mention.sentence.is_tabular():
         return False
     ngrams = get_row_ngrams(mention, lower=True)
     if "birth_place" in ngrams:
         return True
     else:
         return False
Esempio n. 14
0
def LF_part_miss_match_part(c):
    if not c[1].context.sentence.is_tabular():
        return ABSTAIN
    ngrams_part = set(list(get_row_ngrams(c[1], n_max=1, lower=False)))
    ngrams_part = _filter_non_parts(ngrams_part)
    return (ABSTAIN if len(ngrams_part) == 0 or any([
        c.part.context.get_span().lower().startswith(_.lower())
        for _ in ngrams_part
    ]) else FALSE)
Esempio n. 15
0
def is_in_birthplace_table_row(mention):
    if not mention.sentence.is_tabular():
        return False
    ngrams = get_row_ngrams(mention, lower=True)
    birth_place_words = set(["birth", "place"])
    if birth_place_words <= set(ngrams):
        return True
    else:
        return False
Esempio n. 16
0
    def current_units(attr):

        # NOTE: These two symbols for mu are unique, not duplicates.
        current_units = ["ma", "μa", "ua", "µa", "\uf06da"]
        keywords = ["supply", "quiescent", "iq", "is", "idd", "icc"]
        filter_keywords = ["offset", "bias", "logic", "shutdown"]
        related_ngrams = set(get_right_ngrams(attr, n_max=1, lower=True))
        related_ngrams.update(get_row_ngrams(attr, n_max=1, spread=[-5, 5], lower=True))

        if attr.get_span().strip() == "0":
            return False

        if overlap(filter_keywords, get_row_ngrams(attr, n_max=1, lower=True)):
            return False

        if overlap(current_units, related_ngrams) and overlap(keywords, related_ngrams):
            return True

        return False
Esempio n. 17
0
 def get_row_and_column_ngrams(mention):
     row_ngrams = list(get_row_ngrams(mention))
     col_ngrams = list(get_col_ngrams(mention))
     if not mention.sentence.is_tabular():
         assert len(row_ngrams) == 1 and row_ngrams[0] is None
         assert len(col_ngrams) == 1 and col_ngrams[0] is None
     else:
         assert not any(x is None for x in row_ngrams)
         assert not any(x is None for x in col_ngrams)
     if "birth_place" in row_ngrams:
         return True
     else:
         return False
Esempio n. 18
0
def tablelib_unary_features(span):
    """
    Table-/structure-related features for a single span
    """
    if not span.sentence.is_tabular():
        return
    sentence = span.sentence
    for attrib in settings["featurization"]["table"]["unary_features"]["attrib"]:
        for ngram in get_cell_ngrams(
            span,
            n_max=settings["featurization"]["table"]["unary_features"][
                "get_cell_ngrams"
            ]["max"],
            attrib=attrib,
        ):
            yield f"CELL_{attrib.upper()}_[{ngram}]", DEF_VALUE
        for row_num in range(sentence.row_start, sentence.row_end + 1):
            yield f"ROW_NUM_[{row_num}]", DEF_VALUE
        for col_num in range(sentence.col_start, sentence.col_end + 1):
            yield f"COL_NUM_[{col_num}]", DEF_VALUE
        # NOTE: These two features could be accounted for by HTML_ATTR in
        # structural features
        yield f"ROW_SPAN_[{num_rows(sentence)}]", DEF_VALUE
        yield f"COL_SPAN_[{num_cols(sentence)}]", DEF_VALUE
        for axis in ["row", "col"]:
            for ngram in get_head_ngrams(
                span,
                axis,
                n_max=settings["featurization"]["table"]["unary_features"][
                    "get_head_ngrams"
                ]["max"],
                attrib=attrib,
            ):
                yield f"{axis.upper()}_HEAD_{attrib.upper()}_[{ngram}]", DEF_VALUE
        for ngram in get_row_ngrams(
            span,
            n_max=settings["featurization"]["table"]["unary_features"][
                "get_row_ngrams"
            ]["max"],
            attrib=attrib,
        ):
            yield f"ROW_{attrib.upper()}_[{ngram}]", DEF_VALUE
        for ngram in get_col_ngrams(
            span,
            n_max=settings["featurization"]["table"]["unary_features"][
                "get_col_ngrams"
            ]["max"],
            attrib=attrib,
        ):
            yield f"COL_{attrib.upper()}_[{ngram}]", DEF_VALUE
Esempio n. 19
0
def polarity_filter(c):
    (part, attr) = c

    # Check if the polarities are not matched with the part
    ngrams_part = set(x for x in get_row_ngrams(part, n_max=1, lower=False)
                      if (x and polarity_pattern.match(x)))
    if len(ngrams_part) != 0 and all(
            not attr.context.get_span().lower().startswith(_.lower())
            for _ in ngrams_part):
        logger.debug(
            f"ngrams_part: {ngrams_part}\nattr: {attr.context.get_span().lower()}"
        )
        return False

    if same_table(c):
        return is_horz_aligned(c) or is_vert_aligned(c)

    return True
Esempio n. 20
0
def neg_gain_keywords_in_row(c):
    row_ngrams = set(get_row_ngrams(c.gain, n_max=1, lower=True))
    if overlap(
        [
            "small",
            "full",
            "flat",
            "current",
            "thd",
            "signal",
            "flatness",
            "input",
            "noise",
            "f=",
            "f",
            "-3",
            "power",
            "db",
            "dbm",
            "output",
            "impedence",
            "delay",
            "capacitance",
            "range",
            "ratio",
            "dbc",
            "temperature",
            "common",
            "voltage",
            "range",
        ],
            row_ngrams,
    ):
        return FALSE
    else:
        return ABSTAIN
Esempio n. 21
0
def LF_typ_row(c):
    return -1 if overlap(["typ", "typ."], list(get_row_ngrams(c.temp))) else 0
Esempio n. 22
0
def LF_tstg_row(c):
    return 1 if overlap(["tstg", "stg", "ts"], list(get_row_ngrams(
        c.temp))) else 0
Esempio n. 23
0
def LF_operating_row(c):
    return 1 if "operating" in get_row_ngrams(c.temp) else 0
Esempio n. 24
0
def LF_temperature_row(c):
    return 1 if "temperature" in get_row_ngrams(c.temp) else 0
Esempio n. 25
0
def LF_storage_row(c):
    return 1 if "storage" in get_row_ngrams(c.temp) else 0
Esempio n. 26
0
def LF_part_mismatch_row(c):
    ngrams_part = _filter_non_parts(set(list(get_row_ngrams(c[1], n_max=1))))
    return (ABSTAIN if len(ngrams_part) == 0 or any([
        c.part.context.get_span().lower().startswith(_.lower())
        for _ in ngrams_part
    ]) else FALSE)
Esempio n. 27
0
def LF_operating_row(c):
    return FALSE if "operating" in get_row_ngrams(c[1]) else ABSTAIN
Esempio n. 28
0
def LF_tstg_row(c):
    return (TRUE if overlap(["tstg", "stg", "ts"], list(get_row_ngrams(c[1])))
            else ABSTAIN)
Esempio n. 29
0
def LF_too_many_numbers_row(c):
    num_numbers = list(get_row_ngrams(c.temp,
                                      attrib="ner_tags")).count("number")
    return -1 if num_numbers >= 3 else 0
Esempio n. 30
0
def LF_temperature_row(c):
    return TRUE if "temperature" in get_row_ngrams(c[1]) else ABSTAIN