Exemple #1
0
def same_col(c):
    """Return True if all Spans in the given candidate are from the same Col.

    :param c: The candidate whose Spans are being compared
    :rtype: boolean
    """
    return same_table(c) and all(
        is_col_aligned(c[i].sentence, c[0].sentence) for i in range(len(c)))
Exemple #2
0
def same_col(c: Candidate) -> bool:
    """Return True if all Mentions in the given candidate are from the same Col.

    :param c: The candidate whose Mentions are being compared
    :rtype: boolean
    """
    return same_table(c) and all(
        is_col_aligned(_to_span(c[i]).sentence,
                       _to_span(c[0]).sentence) for i in range(len(c)))
Exemple #3
0
def col_filter(c):
    (data, col) = c
    # Ignore only empty candidate values
    if (re.match("^[\., -]*$", data.context.get_span())):
        return False
    if same_table((data, col)):
        d = data.context.sentence
        c = col.context.sentence
        return (is_col_aligned(d, c)) # and is_vert_aligned((data, col)))
    return True
Exemple #4
0
def is_tabular_aligned(c: Candidate) -> bool:
    """Return True if all Mentions in the given candidate are from the same Row or Col.

    :param c: The candidate whose Mentions are being compared
    """
    return same_table(c) and all(
        is_col_aligned(_to_span(c[i]).sentence,
                       _to_span(c[0]).sentence)
        or is_row_aligned(_to_span(c[i]).sentence,
                          _to_span(c[0]).sentence) for i in range(len(c)))
def mention_analysis(align_val, data_val, document_name, align_type, mentions,
                     mention_subclasses):
    (data_mentions, row_mentions,
     col_mentions) = split_mentions(mentions, mention_subclasses)
    align_mentions = row_mentions if align_type == 'row' else col_mentions
    s1s = [
        x for x in data_mentions if data_val == x.context.get_span().upper()
        and x.document.name == document_name
    ]
    s2s = [
        x for x in align_mentions if align_val in x.context.get_span().upper()
        and x.document.name == document_name
    ]
    print("######################################")
    print(f"Mention Analysis {align_type}")
    print(
        f"{len(s1s)} mentions for the data value {data_val} in document {document_name}."
    )
    print(
        f"{len(s2s)} mentions for the align value {align_val} in document {document_name}."
    )
    if (len(s1s) > 0 and len(s2s) > 0):
        for s1 in s1s:
            for s2 in s2s:
                print("")
                print(f"Sentence of the data value: {s1.context.sentence}")
                print(f"Sentence of the align value: {s2.context.sentence}")
                aligned = is_row_aligned(
                    s1.context.sentence, s2.context.sentence
                ) if align_type == 'row' else is_col_aligned(
                    s1.context.sentence, s2.context.sentence)
                print(f"The values are {align_type} aligned: {aligned}")
                hv_aligned = is_horz_aligned(
                    (s1, s2)) if align_type == 'row' else is_vert_aligned(
                        (s1, s2))
                hv = "horizontally" if align_type == 'row' else "vertically"
                print(f"The values are {hv} aligned: {hv_aligned}")
    elif (len(s1s) > 0):
        for s1 in s1s:
            print(f"Sentence of the data value: {s1.context.sentence}")
        print("")
        print(
            f"No align value found in all align mentions for the document {document_name}:"
        )
        pprint([x for x in align_mentions if x.document.name == document_name])
    elif (len(s2s) > 0):
        for s2 in s2s:
            print(f"Sentence of the align value: {s2.context.sentence}")
        print("")
        print(
            f"No data value found in all data mentions for the document {document_name}:"
        )
        pprint([x for x in data_mentions if x.document.name == document_name])
    print("######################################")