def annotation(body, label="", background="#ddd", color="#333", **style): """Build an HtmlElement span object with the given body and annotation label. The end result will look something like this: [body | label] Parameters ---------- body : string The string to put in the "body" part of the annotation. label : string The string to put in the "label" part of the annotation. background : string The color to use for the background "chip" containing this annotation. color : string The color to use for the body and label text. **style : dict Any CSS you want to use to customize the containing "chip". Examples -------- Produce a simple annotation with default colors: >>> annotation("apple", "fruit") Produce an annotation with custom colors: >>> annotation("apple", "fruit", background="#FF0", color="black") Produce an annotation with crazy CSS: >>> annotation("apple", "fruit", background="#FF0", border="1px dashed red") """ if "font_family" not in style: style["font_family"] = "sans-serif" return span(style=styles( background=background, border_radius=rem(0.33), color=color, padding=(rem(0.17), rem(0.67)), display="inline-flex", justify_content="center", align_items="center", **style, ))(body, span(style=styles( color=color, font_size=em(0.67), opacity=0.5, padding_left=rem(0.5), text_transform="uppercase", margin_bottom=px(-2), ))(label))
def _highlight(self, token: Union[str, HtmlElement], background_color, dotted_underline_color, classes: List[str], **props): return span(_class=" ".join(classes + ["highlight"]), style=styles( background_color=background_color, border_bottom=f"4px dotted {dotted_underline_color}", ), **props)(token)
def annotation(body, background, color, **style): """Build an HtmlElement span object with the given body and annotation label. """ if "font_family" not in style: style["font_family"] = "sans-serif" main_style = styles(background=background, color=color,) return span(style=main_style)(body)
def annotate(body, label="", background="#ddd", color="#333", **style): if "font_family" not in style: style["font_family"] = "sans-serif" return span(style=styles( background=background, border_radius=rem(0.33), color=color, padding=(rem(0.17), rem(0.67)), display="inline", justify_content="center", align_items="center", **style, ))(body, span(style=styles( color=color, font_size=em(0.67), opacity=0.5, padding_left=rem(0.5), text_transform="uppercase", margin_bottom=px(-2), ))(label))
def _get_underline_element(self, token, slot_to_spans): if not slot_to_spans: return token max_slot_index = max(slot_to_spans.keys()) element = token for slot_index in range(max_slot_index + 1): spans = slot_to_spans[slot_index] if not spans: color = "rgba(0, 0, 0, 0)" # Transparent element w/opacity=0 props = {} else: containing_slot = spans[0] color = containing_slot[3] classes = ["underline"] if token != SPACE: classes.append("token-underline") classes.extend([f"span-{span[4]}" for span in spans]) # Encode ids in class names props = { "class": " ".join(classes), "data-primary-color": color } if slot_index == 0: padding_bottom = 0 else: padding_bottom = self.underline_spacing display = "inline-block" element = htbuilder.span( style=styles( display=display, border_bottom=f"{self.underline_thickness}px solid", border_color=color, padding_bottom=px(padding_bottom), ), **props )(element) # Return outermost nested span return element
def html(self): # Add document elements if self.document._.name == 'Document': document_name = 'Source Document' else: document_name = self.document._.name + ' summary' doc_header = div(id_="document-header")(document_name) doc_elements = [] # Add document content, which comprises multiple elements, one for each summary. Only the elment corresponding to # selected summary will be visible. mu = MultiUnderline() for summary_idx, summary in enumerate(self.summaries): token_idx_to_sent_idx = {} for sent_idx, sent in enumerate(summary.sents): for token in sent: token_idx_to_sent_idx[token.i] = sent_idx is_selected_summary = (summary_idx == 0 ) # By default, first summary is selected if self.semantic_alignments is not None: doc_token_idx_to_matches = defaultdict(list) semantic_alignment = self.semantic_alignments[summary_idx] for summary_token_idx, matches in semantic_alignment.items(): for doc_token_idx, sim in matches: doc_token_idx_to_matches[doc_token_idx].append( (summary_token_idx, sim)) else: doc_token_idx_to_matches = {} token_elements = [] for doc_token_idx, doc_token in enumerate(self.document): if doc_token.is_stop or doc_token.is_punct: classes = ["stopword"] if self.gray_out_stopwords: classes.append("grayed-out") el = span(_class=" ".join(classes))(doc_token.text) else: matches = doc_token_idx_to_matches.get(doc_token_idx) if matches: summary_token_idx, sim = max(matches, key=itemgetter(1)) sent_idx = token_idx_to_sent_idx[summary_token_idx] color_primary = get_color(sent_idx) highlight_color_primary = color_with_opacity( color_primary, sim) props = { 'data-highlight-id': str(doc_token_idx), 'data-primary-color': highlight_color_primary } match_classes = [] for summary_token_idx, sim in matches: sent_idx = token_idx_to_sent_idx[summary_token_idx] match_classes.append( f"summary-highlight-{summary_idx}-{summary_token_idx}" ) color = color_with_opacity(get_color(sent_idx), sim) props[ f"data-color-{summary_idx}-{summary_token_idx}"] = color props["data-match-classes"] = " ".join(match_classes) el = self._highlight( doc_token.text, highlight_color_primary, color_primary, match_classes + ["annotation-hidden"], **props) else: el = doc_token.text token_elements.append(el) spans = [] if self.lexical_alignments is not None: lexical_alignment = self.lexical_alignments[summary_idx] for summary_span, doc_spans in lexical_alignment.items(): summary_span_start, summary_span_end = summary_span span_id = f"{summary_idx}-{summary_span_start}-{summary_span_end}" sent_idx = token_idx_to_sent_idx[summary_span_start] for doc_span_start, doc_span_end in doc_spans: spans.append((doc_span_start, doc_span_end, sent_idx, get_color(sent_idx), span_id)) token_elements = mu.markup(token_elements, spans) classes = ["main-doc", "bordered"] if self.scroll: classes.append("scroll") main_doc = div(_class=" ".join(classes))(token_elements), classes = ["doc"] if is_selected_summary: classes.append("display") else: classes.append("nodisplay") doc_elements.append( div(**{ "class": " ".join(classes), "data-index": summary_idx })(main_doc, div(_class="proxy-doc"), div(_class="proxy-scroll"))) summary_title = "Summary" summary_header = div(id_="summary-header")( summary_title, div(id="summary-header-gap"), ) summary_items = [] for summary_idx, summary in enumerate(self.summaries): token_idx_to_sent_idx = {} for sent_idx, sent in enumerate(summary.sents): for token in sent: token_idx_to_sent_idx[token.i] = sent_idx spans = [] matches_ngram = [False] * len(list(summary)) if self.lexical_alignments is not None: lexical_alignment = self.lexical_alignments[summary_idx] for summary_span in lexical_alignment.keys(): start, end = summary_span matches_ngram[slice(start, end)] = [True] * (end - start) span_id = f"{summary_idx}-{start}-{end}" sent_idx = token_idx_to_sent_idx[start] spans.append( (start, end, sent_idx, get_color(sent_idx), span_id)) if self.semantic_alignments is not None: semantic_alignment = self.semantic_alignments[summary_idx] else: semantic_alignment = {} token_elements = [] for token_idx, token in enumerate(summary): if token.is_stop or token.is_punct: classes = ["stopword"] if self.gray_out_stopwords: classes.append("grayed-out") el = span(_class=" ".join(classes))(token.text) else: classes = [] if token.ent_iob_ in ('I', 'B'): classes.append("entity") if matches_ngram[token_idx]: classes.append("matches-ngram") matches = semantic_alignment.get(token_idx) if matches: top_match = max(matches, key=itemgetter(1)) top_sim = max(top_match[1], 0) top_doc_token_idx = top_match[0] props = { "data-highlight-id": f"{summary_idx}-{token_idx}", "data-top-doc-highlight-id": str(top_doc_token_idx), "data-top-doc-sim": f"{top_sim:.2f}", } classes.extend([ "annotation-hidden", f"summary-highlight-{summary_idx}-{token_idx}" ]) sent_idx = token_idx_to_sent_idx[token_idx] el = self._highlight( token.text, color_with_opacity(get_color(sent_idx), top_sim), color_with_opacity(get_color(sent_idx), 1), classes, **props) else: if classes: el = span(_class=" ".join(classes))(token.text) else: el = token.text token_elements.append(el) token_elements = mu.markup(token_elements, spans) classes = ["summary-item"] if summary_idx == 0: # Default is for first summary to be selected classes.append("selected") summary_items.append( div(**{ "class": ' '.join(classes), "data-index": summary_idx })(div(_class="name")(summary._.name), div(_class="content")(token_elements))) classes = ["summary-list", "bordered"] if self.scroll: classes.append("scroll") if self.lexical_alignments is not None: classes.append("has-lexical-alignment") if self.semantic_alignments is not None: classes.append("has-semantic-alignment") summary_list = div(_class=" ".join(classes))(summary_items) annotation_key = \ """ <ul class="annotation-key"> <li class="annotation-key-label">Annotations:</li> <li id="option-lexical" class="option selected"> <span class="annotation-key-ngram">N-Gram overlap</span> </li> <li id="option-semantic" class="option selected"> <span class="annotation-key-semantic">Semantic overlap</span> </li> <li id="option-novel" class="option selected"> <span class="annotation-key-novel">Novel words</span> </li> <li id="option-entity" class="option selected"> <span class="annotation-key-entity">Novel entities</span> </li> </ul> """ body = div( annotation_key, div(_class=f"vis-container {self.layout}-layout")( div(_class="doc-container")(doc_header, *doc_elements), div(_class="summary-container")(summary_header, summary_list)), ) return [ """<link href="https://cdn.jsdelivr.net/npm/[email protected]/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha384-+0n0xVW2eSR5OomGNYDnhzAbDsOXxcvSN1TPprVMTNDbiYZCxYbOOl7+AMvyTG2x" crossorigin="anonymous">""", local_stylesheet( Path(__file__).parent / "resources" / "summvis.css"), """<link rel="preconnect" href="https://fonts.gstatic.com"> <link href="https://fonts.googleapis.com/css2?family=Roboto:wght@400;500&display=swap" rel="stylesheet">""", body, """<script src="https://code.jquery.com/jquery-3.5.1.min.js" integrity="sha256-9/aliU8dGd2tb6OSsuzixeV4y/faTqgFtohetphbbj0=" crossorigin="anonymous"></script> <script src="https://cdn.jsdelivr.net/npm/[email protected]/dist/js/bootstrap.bundle.min.js" integrity="sha384-Piv4xVNRyMGpqkS2by6br4gNJ7DXjqk09RmUpJ8jgGtD7zP9yug3goQfGII0yAns" crossorigin="anonymous"></script>""", local_script( Path(__file__).parent / "resources" / "jquery.color-2.1.2.min.js"), local_script(Path(__file__).parent / "resources" / "summvis.js"), """<script src="https://cdn.jsdelivr.net/npm/[email protected]/dist/js/bootstrap.bundle.min.js" integrity="sha384-gtEjrD/SeCtmISkJkNUaaKMoLD0//ElJ19smozuHV6z3Iehds+3Ulb9Bn9Plx0x4" crossorigin="anonymous"></script>""" ]
padding_bottom=px(padding_bottom), ), **props)(element) # Return outermost nested span return element if __name__ == "__main__": from htbuilder import div # Test text = "The quick brown fox jumps" tokens = text.split() tokens = [ "The", htbuilder.span(style=styles(color="red"))("quick"), "brown", "fox", "jumps" ] spans = [ (0, 2, 0, "green", "green1"), (1, 3, 0, "orange", "orange1"), (3, 4, 0, "red", "red1"), (2, 4, 0, "blue", "blue1"), (1, 5, 0, "orange", "orange1"), ] mu = MultiUnderline() html = str(div(mu.markup(tokens, spans))) print(html)
**props )(element) # Return outermost nested span return element if __name__ == "__main__": from htbuilder import div # Test text = "The quick brown fox jumps" tokens = text.split() tokens = [ "The", htbuilder.span(style=styles(color="red"))("quick"), "brown", "fox", "jumps" ] spans = [ (0, 2, 0, "green", "green1"), (1, 3, 0, "orange", "orange1"), (3, 4, 0, "red", "red1"), (2, 4, 0, "blue", "blue1"), (1, 5, 0, "orange", "orange1"), ] mu = MultiUnderline() html = str(div(mu.markup(tokens, spans))) print(html)