Ejemplo n.º 1
0
def annotation(body, label="", background="#ddd", color="#333", **style):
    """Build an HtmlElement span object with the given body and annotation label.

    The end result will look something like this:

        [body | label]

    Parameters
    ----------
    body : string
        The string to put in the "body" part of the annotation.
    label : string
        The string to put in the "label" part of the annotation.
    background : string
        The color to use for the background "chip" containing this annotation.
    color : string
        The color to use for the body and label text.
    **style : dict
        Any CSS you want to use to customize the containing "chip".

    Examples
    --------

    Produce a simple annotation with default colors:

    >>> annotation("apple", "fruit")

    Produce an annotation with custom colors:

    >>> annotation("apple", "fruit", background="#FF0", color="black")

    Produce an annotation with crazy CSS:

    >>> annotation("apple", "fruit", background="#FF0", border="1px dashed red")

    """

    if "font_family" not in style:
        style["font_family"] = "sans-serif"

    return span(style=styles(
        background=background,
        border_radius=rem(0.33),
        color=color,
        padding=(rem(0.17), rem(0.67)),
        display="inline-flex",
        justify_content="center",
        align_items="center",
        **style,
    ))(body,
       span(style=styles(
           color=color,
           font_size=em(0.67),
           opacity=0.5,
           padding_left=rem(0.5),
           text_transform="uppercase",
           margin_bottom=px(-2),
       ))(label))
Ejemplo n.º 2
0
 def _highlight(self, token: Union[str, HtmlElement], background_color,
                dotted_underline_color, classes: List[str], **props):
     return span(_class=" ".join(classes + ["highlight"]),
                 style=styles(
                     background_color=background_color,
                     border_bottom=f"4px dotted {dotted_underline_color}",
                 ),
                 **props)(token)
Ejemplo n.º 3
0
def annotation(body, background, color, **style):
    """Build an HtmlElement span object with the given body and annotation label. """

    if "font_family" not in style:
        style["font_family"] = "sans-serif"

    main_style = styles(background=background, color=color,)

    return span(style=main_style)(body)
Ejemplo n.º 4
0
def annotate(body, label="", background="#ddd", color="#333", **style):
    if "font_family" not in style:
        style["font_family"] = "sans-serif"

    return span(style=styles(
        background=background,
        border_radius=rem(0.33),
        color=color,
        padding=(rem(0.17), rem(0.67)),
        display="inline",
        justify_content="center",
        align_items="center",
        **style,
    ))(body,
       span(style=styles(
           color=color,
           font_size=em(0.67),
           opacity=0.5,
           padding_left=rem(0.5),
           text_transform="uppercase",
           margin_bottom=px(-2),
       ))(label))
Ejemplo n.º 5
0
    def _get_underline_element(self, token, slot_to_spans):
        if not slot_to_spans:
            return token
        max_slot_index = max(slot_to_spans.keys())
        element = token
        for slot_index in range(max_slot_index + 1):
            spans = slot_to_spans[slot_index]
            if not spans:
                color = "rgba(0, 0, 0, 0)"  # Transparent element w/opacity=0
                props = {}
            else:
                containing_slot = spans[0]
                color = containing_slot[3]
                classes = ["underline"]
                if token != SPACE:
                    classes.append("token-underline")
                classes.extend([f"span-{span[4]}" for span in spans])  # Encode ids in class names
                props = {
                    "class": " ".join(classes),
                    "data-primary-color": color
                }
            if slot_index == 0:
                padding_bottom = 0
            else:
                padding_bottom = self.underline_spacing
            display = "inline-block"
            element = htbuilder.span(
                style=styles(
                    display=display,
                    border_bottom=f"{self.underline_thickness}px solid",
                    border_color=color,
                    padding_bottom=px(padding_bottom),
                ),
                **props
            )(element)

            # Return outermost nested span
        return element
Ejemplo n.º 6
0
    def html(self):

        # Add document elements
        if self.document._.name == 'Document':
            document_name = 'Source Document'
        else:
            document_name = self.document._.name + ' summary'
        doc_header = div(id_="document-header")(document_name)
        doc_elements = []

        # Add document content, which comprises multiple elements, one for each summary. Only the elment corresponding to
        # selected summary will be visible.

        mu = MultiUnderline()

        for summary_idx, summary in enumerate(self.summaries):
            token_idx_to_sent_idx = {}
            for sent_idx, sent in enumerate(summary.sents):
                for token in sent:
                    token_idx_to_sent_idx[token.i] = sent_idx
            is_selected_summary = (summary_idx == 0
                                   )  # By default, first summary is selected

            if self.semantic_alignments is not None:
                doc_token_idx_to_matches = defaultdict(list)
                semantic_alignment = self.semantic_alignments[summary_idx]
                for summary_token_idx, matches in semantic_alignment.items():
                    for doc_token_idx, sim in matches:
                        doc_token_idx_to_matches[doc_token_idx].append(
                            (summary_token_idx, sim))
            else:
                doc_token_idx_to_matches = {}

            token_elements = []
            for doc_token_idx, doc_token in enumerate(self.document):
                if doc_token.is_stop or doc_token.is_punct:
                    classes = ["stopword"]
                    if self.gray_out_stopwords:
                        classes.append("grayed-out")
                    el = span(_class=" ".join(classes))(doc_token.text)

                else:
                    matches = doc_token_idx_to_matches.get(doc_token_idx)
                    if matches:
                        summary_token_idx, sim = max(matches,
                                                     key=itemgetter(1))
                        sent_idx = token_idx_to_sent_idx[summary_token_idx]
                        color_primary = get_color(sent_idx)
                        highlight_color_primary = color_with_opacity(
                            color_primary, sim)
                        props = {
                            'data-highlight-id': str(doc_token_idx),
                            'data-primary-color': highlight_color_primary
                        }
                        match_classes = []
                        for summary_token_idx, sim in matches:
                            sent_idx = token_idx_to_sent_idx[summary_token_idx]
                            match_classes.append(
                                f"summary-highlight-{summary_idx}-{summary_token_idx}"
                            )
                            color = color_with_opacity(get_color(sent_idx),
                                                       sim)
                            props[
                                f"data-color-{summary_idx}-{summary_token_idx}"] = color
                        props["data-match-classes"] = " ".join(match_classes)
                        el = self._highlight(
                            doc_token.text, highlight_color_primary,
                            color_primary,
                            match_classes + ["annotation-hidden"], **props)
                    else:
                        el = doc_token.text
                token_elements.append(el)

            spans = []
            if self.lexical_alignments is not None:
                lexical_alignment = self.lexical_alignments[summary_idx]
                for summary_span, doc_spans in lexical_alignment.items():
                    summary_span_start, summary_span_end = summary_span
                    span_id = f"{summary_idx}-{summary_span_start}-{summary_span_end}"
                    sent_idx = token_idx_to_sent_idx[summary_span_start]
                    for doc_span_start, doc_span_end in doc_spans:
                        spans.append((doc_span_start, doc_span_end, sent_idx,
                                      get_color(sent_idx), span_id))
            token_elements = mu.markup(token_elements, spans)

            classes = ["main-doc", "bordered"]
            if self.scroll:
                classes.append("scroll")

            main_doc = div(_class=" ".join(classes))(token_elements),

            classes = ["doc"]
            if is_selected_summary:
                classes.append("display")
            else:
                classes.append("nodisplay")
            doc_elements.append(
                div(**{
                    "class": " ".join(classes),
                    "data-index": summary_idx
                })(main_doc, div(_class="proxy-doc"),
                   div(_class="proxy-scroll")))

        summary_title = "Summary"
        summary_header = div(id_="summary-header")(
            summary_title,
            div(id="summary-header-gap"),
        )

        summary_items = []
        for summary_idx, summary in enumerate(self.summaries):
            token_idx_to_sent_idx = {}
            for sent_idx, sent in enumerate(summary.sents):
                for token in sent:
                    token_idx_to_sent_idx[token.i] = sent_idx

            spans = []
            matches_ngram = [False] * len(list(summary))
            if self.lexical_alignments is not None:
                lexical_alignment = self.lexical_alignments[summary_idx]
                for summary_span in lexical_alignment.keys():
                    start, end = summary_span
                    matches_ngram[slice(start, end)] = [True] * (end - start)
                    span_id = f"{summary_idx}-{start}-{end}"
                    sent_idx = token_idx_to_sent_idx[start]
                    spans.append(
                        (start, end, sent_idx, get_color(sent_idx), span_id))

            if self.semantic_alignments is not None:
                semantic_alignment = self.semantic_alignments[summary_idx]
            else:
                semantic_alignment = {}
            token_elements = []
            for token_idx, token in enumerate(summary):
                if token.is_stop or token.is_punct:
                    classes = ["stopword"]
                    if self.gray_out_stopwords:
                        classes.append("grayed-out")
                    el = span(_class=" ".join(classes))(token.text)
                else:
                    classes = []
                    if token.ent_iob_ in ('I', 'B'):
                        classes.append("entity")
                    if matches_ngram[token_idx]:
                        classes.append("matches-ngram")
                    matches = semantic_alignment.get(token_idx)
                    if matches:
                        top_match = max(matches, key=itemgetter(1))
                        top_sim = max(top_match[1], 0)
                        top_doc_token_idx = top_match[0]
                        props = {
                            "data-highlight-id": f"{summary_idx}-{token_idx}",
                            "data-top-doc-highlight-id":
                            str(top_doc_token_idx),
                            "data-top-doc-sim": f"{top_sim:.2f}",
                        }
                        classes.extend([
                            "annotation-hidden",
                            f"summary-highlight-{summary_idx}-{token_idx}"
                        ])
                        sent_idx = token_idx_to_sent_idx[token_idx]
                        el = self._highlight(
                            token.text,
                            color_with_opacity(get_color(sent_idx), top_sim),
                            color_with_opacity(get_color(sent_idx), 1),
                            classes, **props)
                    else:
                        if classes:
                            el = span(_class=" ".join(classes))(token.text)
                        else:
                            el = token.text
                token_elements.append(el)

            token_elements = mu.markup(token_elements, spans)

            classes = ["summary-item"]
            if summary_idx == 0:  # Default is for first summary to be selected
                classes.append("selected")

            summary_items.append(
                div(**{
                    "class": ' '.join(classes),
                    "data-index": summary_idx
                })(div(_class="name")(summary._.name),
                   div(_class="content")(token_elements)))
        classes = ["summary-list", "bordered"]
        if self.scroll:
            classes.append("scroll")
        if self.lexical_alignments is not None:
            classes.append("has-lexical-alignment")
        if self.semantic_alignments is not None:
            classes.append("has-semantic-alignment")
        summary_list = div(_class=" ".join(classes))(summary_items)

        annotation_key = \
            """
              <ul class="annotation-key">
                <li class="annotation-key-label">Annotations:</li>
                <li id="option-lexical" class="option selected">
                    <span class="annotation-key-ngram">N-Gram overlap</span>
                </li>
                <li id="option-semantic" class="option selected">
                    <span class="annotation-key-semantic">Semantic overlap</span>
                </li>
                <li id="option-novel" class="option selected">
                    <span class="annotation-key-novel">Novel words</span>
                </li>
                <li id="option-entity" class="option selected">
                    <span class="annotation-key-entity">Novel entities</span>
                </li>
        
            </ul>
            """

        body = div(
            annotation_key,
            div(_class=f"vis-container {self.layout}-layout")(
                div(_class="doc-container")(doc_header, *doc_elements),
                div(_class="summary-container")(summary_header, summary_list)),
        )
        return [
            """<link href="https://cdn.jsdelivr.net/npm/[email protected]/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha384-+0n0xVW2eSR5OomGNYDnhzAbDsOXxcvSN1TPprVMTNDbiYZCxYbOOl7+AMvyTG2x" crossorigin="anonymous">""",
            local_stylesheet(
                Path(__file__).parent / "resources" / "summvis.css"),
            """<link rel="preconnect" href="https://fonts.gstatic.com">
                <link href="https://fonts.googleapis.com/css2?family=Roboto:wght@400;500&display=swap" rel="stylesheet">""",
            body, """<script
                src="https://code.jquery.com/jquery-3.5.1.min.js"
                integrity="sha256-9/aliU8dGd2tb6OSsuzixeV4y/faTqgFtohetphbbj0="
                crossorigin="anonymous"></script>
                <script src="https://cdn.jsdelivr.net/npm/[email protected]/dist/js/bootstrap.bundle.min.js"
                 integrity="sha384-Piv4xVNRyMGpqkS2by6br4gNJ7DXjqk09RmUpJ8jgGtD7zP9yug3goQfGII0yAns"
                  crossorigin="anonymous"></script>""",
            local_script(
                Path(__file__).parent / "resources" /
                "jquery.color-2.1.2.min.js"),
            local_script(Path(__file__).parent / "resources" / "summvis.js"),
            """<script src="https://cdn.jsdelivr.net/npm/[email protected]/dist/js/bootstrap.bundle.min.js" integrity="sha384-gtEjrD/SeCtmISkJkNUaaKMoLD0//ElJ19smozuHV6z3Iehds+3Ulb9Bn9Plx0x4" crossorigin="anonymous"></script>"""
        ]
Ejemplo n.º 7
0
                padding_bottom=px(padding_bottom),
            ),
                                     **props)(element)

            # Return outermost nested span
        return element


if __name__ == "__main__":
    from htbuilder import div

    # Test
    text = "The quick brown fox jumps"
    tokens = text.split()
    tokens = [
        "The",
        htbuilder.span(style=styles(color="red"))("quick"), "brown", "fox",
        "jumps"
    ]
    spans = [
        (0, 2, 0, "green", "green1"),
        (1, 3, 0, "orange", "orange1"),
        (3, 4, 0, "red", "red1"),
        (2, 4, 0, "blue", "blue1"),
        (1, 5, 0, "orange", "orange1"),
    ]

    mu = MultiUnderline()
    html = str(div(mu.markup(tokens, spans)))
    print(html)
Ejemplo n.º 8
0
                **props
            )(element)

            # Return outermost nested span
        return element


if __name__ == "__main__":
    from htbuilder import div

    # Test
    text = "The quick brown fox jumps"
    tokens = text.split()
    tokens = [
        "The",
        htbuilder.span(style=styles(color="red"))("quick"),
        "brown",
        "fox",
        "jumps"
    ]
    spans = [
        (0, 2, 0, "green", "green1"),
        (1, 3, 0, "orange", "orange1"),
        (3, 4, 0, "red", "red1"),
        (2, 4, 0, "blue", "blue1"),
        (1, 5, 0, "orange", "orange1"),
    ]

    mu = MultiUnderline()
    html = str(div(mu.markup(tokens, spans)))
    print(html)