Exemple #1
0
def LF_complement_left_row(c):
    return (-1 if (overlap(
        ["complement", "complementary"],
        chain.from_iterable(
            [get_row_ngrams(c.part),
             get_left_ngrams(c.part, window=10)]),
    )) else 0)
Exemple #2
0
def get_word_feats(span):
    attrib = "words"

    if span.stable_id not in unary_word_feats:
        unary_word_feats[span.stable_id] = set()

        for ngram in tokens_to_ngrams(span.get_attrib_tokens(attrib),
                                      n_min=1,
                                      n_max=2):
            feature = f"CONTAINS_{attrib.upper()}_[{ngram}]"
            unary_word_feats.add(feature)

        for ngram in get_left_ngrams(
                span,
                window=settings["featurization"]["content"]["word_feature"]
            ["window"],
                n_max=2,
                attrib=attrib,
        ):
            feature = f"LEFT_{attrib.upper()}_[{ngram}]"
            unary_word_feats.add(feature)

        for ngram in get_right_ngrams(
                span,
                window=settings["featurization"]["content"]["word_feature"]
            ["window"],
                n_max=2,
                attrib=attrib,
        ):
            feature = f"RIGHT_{attrib.upper()}_[{ngram}]"
            unary_word_feats.add(feature)

    for f in unary_word_feats[span.stable_id]:
        yield f
Exemple #3
0
def LF_negative_number_left(c):
    return (
        TRUE
        if any(
            [re.match(r"-\s*\d+", ngram) for ngram in get_left_ngrams(c.temp, window=4)]
        )
        else ABSTAIN
    )
Exemple #4
0
def LF_complement_left_row(c):
    """Return False if temp mention's ngrams align with the following keywords."""
    return (FALSE if (overlap(
        ["complement", "complementary"],
        chain.from_iterable(
            [get_row_ngrams(c.part),
             get_left_ngrams(c.part, window=10)]),
    )) else ABSTAIN)
Exemple #5
0
def _get_word_feats(span: SpanMention) -> Iterator[str]:
    attrib = "words"

    if span.stable_id not in unary_word_feats:
        unary_word_feats[span.stable_id] = set()

        for ngram in tokens_to_ngrams(span.get_attrib_tokens(attrib),
                                      n_min=1,
                                      n_max=2):
            feature = f"CONTAINS_{attrib.upper()}_[{ngram}]"
            unary_word_feats[span.stable_id].add(feature)

        for ngram in get_left_ngrams(
                span,
                window=settings["featurization"]["textual"]["word_feature"]
            ["window"],
                n_max=2,
                attrib=attrib,
        ):
            feature = f"LEFT_{attrib.upper()}_[{ngram}]"
            unary_word_feats[span.stable_id].add(feature)

        for ngram in get_right_ngrams(
                span,
                window=settings["featurization"]["textual"]["word_feature"]
            ["window"],
                n_max=2,
                attrib=attrib,
        ):
            feature = f"RIGHT_{attrib.upper()}_[{ngram}]"
            unary_word_feats[span.stable_id].add(feature)

        unary_word_feats[span.stable_id].add((
            f"SPAN_TYPE_["
            f"{'IMPLICIT' if isinstance(span, ImplicitSpanMention) else 'EXPLICIT'}"
            f"]"))

        if span.get_span()[0].isupper():
            unary_word_feats[span.stable_id].add("STARTS_WITH_CAPITAL")

        unary_word_feats[span.stable_id].add(f"LENGTH_{span.get_num_words()}")

    for f in unary_word_feats[span.stable_id]:
        yield f
Exemple #6
0
def LF_to_left(c):
    return 1 if "to" in get_left_ngrams(c.temp, window=2) else 0
Exemple #7
0
def LF_to_left(c):
    return TRUE if "to" in get_left_ngrams(c[1], window=2) else ABSTAIN
Exemple #8
0
def LF_negative_number_left(c):
    """Return True if temp mention's left ngrams contain negative number."""
    return (TRUE if any([
        re.match(r"-\s*\d+", ngram)
        for ngram in get_left_ngrams(c.temp, window=4)
    ]) else ABSTAIN)
Exemple #9
0
def LF_to_left(c):
    """Return True if temp mention's left ngrams contain ``to''."""
    return TRUE if "to" in get_left_ngrams(c.temp, window=2) else ABSTAIN