def hertz_units(attr): hertz_units = ["mhz", "khz"] keywords = [ "product", "gain", "gain", "unity", "bandwidth", "gbp", "gbw", "gbwp", ] filter_keywords = ["-3 db", "maximum", "minimum", "impedance"] related_ngrams = set(get_right_ngrams(attr, n_max=1, lower=True)) related_ngrams.update(get_row_ngrams(attr, n_max=1, spread=[-2, 2], lower=True)) cell_ngrams = set(get_cell_ngrams(attr, n_max=1, lower=True)) if "f" in cell_ngrams and "=" in cell_ngrams: return False if attr.get_span().strip() == "0": return False if overlap(filter_keywords, get_row_ngrams(attr, n_max=1, lower=True)): return False if overlap(hertz_units, related_ngrams) and overlap(keywords, related_ngrams): return True return False
def get_word_feats(span): attrib = "words" if span.stable_id not in unary_word_feats: unary_word_feats[span.stable_id] = set() for ngram in tokens_to_ngrams(span.get_attrib_tokens(attrib), n_min=1, n_max=2): feature = f"CONTAINS_{attrib.upper()}_[{ngram}]" unary_word_feats.add(feature) for ngram in get_left_ngrams( span, window=settings["featurization"]["content"]["word_feature"] ["window"], n_max=2, attrib=attrib, ): feature = f"LEFT_{attrib.upper()}_[{ngram}]" unary_word_feats.add(feature) for ngram in get_right_ngrams( span, window=settings["featurization"]["content"]["word_feature"] ["window"], n_max=2, attrib=attrib, ): feature = f"RIGHT_{attrib.upper()}_[{ngram}]" unary_word_feats.add(feature) for f in unary_word_feats[span.stable_id]: yield f
def _get_word_feats(span: SpanMention) -> Iterator[str]: attrib = "words" if span.stable_id not in unary_word_feats: unary_word_feats[span.stable_id] = set() for ngram in tokens_to_ngrams(span.get_attrib_tokens(attrib), n_min=1, n_max=2): feature = f"CONTAINS_{attrib.upper()}_[{ngram}]" unary_word_feats[span.stable_id].add(feature) for ngram in get_left_ngrams( span, window=settings["featurization"]["textual"]["word_feature"] ["window"], n_max=2, attrib=attrib, ): feature = f"LEFT_{attrib.upper()}_[{ngram}]" unary_word_feats[span.stable_id].add(feature) for ngram in get_right_ngrams( span, window=settings["featurization"]["textual"]["word_feature"] ["window"], n_max=2, attrib=attrib, ): feature = f"RIGHT_{attrib.upper()}_[{ngram}]" unary_word_feats[span.stable_id].add(feature) unary_word_feats[span.stable_id].add(( f"SPAN_TYPE_[" f"{'IMPLICIT' if isinstance(span, ImplicitSpanMention) else 'EXPLICIT'}" f"]")) if span.get_span()[0].isupper(): unary_word_feats[span.stable_id].add("STARTS_WITH_CAPITAL") unary_word_feats[span.stable_id].add(f"LENGTH_{span.get_num_words()}") for f in unary_word_feats[span.stable_id]: yield f
def current_units(attr): # NOTE: These two symbols for mu are unique, not duplicates. current_units = ["ma", "μa", "ua", "µa", "\uf06da"] keywords = ["supply", "quiescent", "iq", "is", "idd", "icc"] filter_keywords = ["offset", "bias", "logic", "shutdown"] related_ngrams = set(get_right_ngrams(attr, n_max=1, lower=True)) related_ngrams.update(get_row_ngrams(attr, n_max=1, spread=[-5, 5], lower=True)) if attr.get_span().strip() == "0": return False if overlap(filter_keywords, get_row_ngrams(attr, n_max=1, lower=True)): return False if overlap(current_units, related_ngrams) and overlap(keywords, related_ngrams): return True return False
def LF_positive_number_right(c): return (TRUE if any([ re.match(r"\d+", ngram) for ngram in get_right_ngrams(c[1], window=4) ]) else ABSTAIN)
def LF_to_right(c): return TRUE if "to" in get_right_ngrams(c[1], window=2) else ABSTAIN
def LF_polarity_right_of_part(c): right_ngrams = set(get_right_ngrams(c.part, lower=False)) return (TRUE if ((c[1].context.get_span() == "NPN" and "NPN" in right_ngrams) or (c[1].context.get_span() == "PNP" and "PNP" in right_ngrams)) else ABSTAIN)