def add_link_markup(tag): """Add necessary markup to the given link and return if modified. Add an external link icon if the input is not a CFPB (internal) link. Add an external link redirect if the input is not a gov link. If it contains a descendent that should not get an icon, return the link. If not, add a download icon if the input is a file. Otherwise (internal link that is not a file), return None. """ icon = False tag = BeautifulSoup(tag, 'html.parser').find('a', href=True) if tag is None: return None if not tag.attrs.get('class', None): tag.attrs.update({'class': []}) if tag['href'].startswith('/external-site/?'): # Sets the icon to indicate you're leaving consumerfinance.gov icon = 'external-link' components = urlparse(tag['href']) arguments = parse_qs(components.query) if 'ext_url' in arguments: external_url = arguments['ext_url'][0] # Add the redirect notice as well tag['href'] = signed_redirect(external_url) elif NON_CFPB_LINKS.match(tag['href']): # Sets the icon to indicate you're leaving consumerfinance.gov icon = 'external-link' if NON_GOV_LINKS.match(tag['href']): # Add the redirect notice as well tag['href'] = signed_redirect(tag['href']) elif DOWNLOAD_LINKS.search(tag['href']): # Sets the icon to indicate you're downloading a file icon = 'download' if tag.select(', '.join(ICONLESS_LINK_CHILD_ELEMENTS)): # If this tag has any children that are in our list of child elements # that should not get an icon, it doesn't get the icon. It might still # be an external link and modified accordingly above. return str(tag) if icon: tag.attrs['class'].append(LINK_ICON_CLASSES) # Wraps the link text in a span that provides the underline contents = tag.contents span = BeautifulSoup('', 'html.parser').new_tag('span') span['class'] = LINK_ICON_TEXT_CLASSES span.contents = contents tag.contents = [span, NavigableString(' ')] # Appends the SVG icon tag.contents.append(BeautifulSoup(svg_icon(icon), 'html.parser')) return str(tag) return None
def add_link_markup(tags): for tag in tags: added_icon = False if not tag.attrs.get('class', None): tag.attrs.update({'class': []}) if tag['href'].startswith('/external-site/?'): components = urlparse(tag['href']) arguments = parse_qs(components.query) if 'ext_url' in arguments: external_url = arguments['ext_url'][0] tag['href'] = signed_redirect(external_url) elif NONCFPB_LINK_PATTERN.match(tag['href']): # Sets the icon to indicate you're leaving consumerfinance.gov tag.attrs['class'].append(EXTERNAL_A_CSS) if EXTERNAL_LINK_PATTERN.match(tag['href']): tag['href'] = signed_redirect(tag['href']) added_icon = True elif DOWNLOAD_LINK_PATTERN.search(tag['href']): # Sets the icon to indicate you're downloading a file tag.attrs['class'].append(DOWNLOAD_A_CSS) added_icon = True if added_icon: # Wraps the link text in a span that provides the underline contents = tag.contents span = BeautifulSoup('', 'html.parser').new_tag('span') span['class'] = EXTERNAL_SPAN_CSS span.contents = contents tag.contents = [span, NavigableString(' ')] elif not FILES_LINK_PATTERN.match(tag['href']): fix_link(tag)
def add_link_markup(tags): for tag in tags: added_icon = False if not tag.attrs.get('class', None): tag.attrs.update({'class': []}) if NONCFPB_LINK_PATTERN.match(tag['href']): # Sets the icon to indicate you're leaving consumerfinance.gov tag.attrs['class'].append(EXTERNAL_A_CSS) if EXTERNAL_LINK_PATTERN.match(tag['href']): # Sets the link to an external one if you're leaving .gov tag['href'] = '/external-site/?ext_url=' + tag['href'] added_icon = True elif DOWNLOAD_LINK_PATTERN.search(tag['href']): # Sets the icon to indicate you're downloading a file tag.attrs['class'].append(DOWNLOAD_A_CSS) added_icon = True if added_icon: # Wraps the link text in a span that provides the underline contents = tag.contents span = BeautifulSoup('').new_tag('span') span['class'] = EXTERNAL_SPAN_CSS span.contents = contents tag.contents = [span, NavigableString(' ')] elif not FILES_LINK_PATTERN.match(tag['href']): fix_link(tag)
def add_link_markup(tags): for tag in tags: added_icon = False if not tag.attrs.get('class', None): tag.attrs.update({'class': []}) if NONCFPB_LINK_PATTERN.match(tag['href']): # Sets the icon to indicate you're leaving consumerfinance.gov tag.attrs['class'].append(EXTERNAL_A_CSS) if EXTERNAL_LINK_PATTERN.match(tag['href']): # Sets the link to an external one if you're leaving .gov tag['href'] = '/external-site/?ext_url=' + tag['href'] added_icon = True elif DOWNLOAD_LINK_PATTERN.search(tag['href']): # Sets the icon to indicate you're downloading a file tag.attrs['class'].append(DOWNLOAD_A_CSS) added_icon = True if added_icon: # Wraps the link text in a span that provides the underline contents = tag.contents span = BeautifulSoup('', 'html.parser').new_tag('span') span['class'] = EXTERNAL_SPAN_CSS span.contents = contents tag.contents = [span, NavigableString(' ')] elif not FILES_LINK_PATTERN.match(tag['href']): fix_link(tag)
def plain_content(readability_content, content_digests, node_indexes): # Load article as DOM soup = BeautifulSoup(readability_content, 'html.parser') # Make all elements plain elements = plain_elements(soup.contents, content_digests, node_indexes) if node_indexes: # Add node index attributes to nodes elements = [add_node_indexes(element) for element in elements] # Replace article contents with plain elements soup.contents = elements return str(soup)
def add_link_markup(tag): """Add necessary markup to the given link and return if modified. Add an external link icon if the input is not a CFPB (internal) link. Add an external link redirect if the input is not a gov link. Add a download icon if the input is a file. Otherwise (internal link that is not a file), return None. """ icon = False if not tag.attrs.get('class', None): tag.attrs.update({'class': []}) if tag['href'].startswith('/external-site/?'): # Sets the icon to indicate you're leaving consumerfinance.gov icon = 'external-link' components = urlparse(tag['href']) arguments = parse_qs(components.query) if 'ext_url' in arguments: external_url = arguments['ext_url'][0] # Add the redirect notice as well tag['href'] = signed_redirect(external_url) elif NON_CFPB_LINKS.match(tag['href']): # Sets the icon to indicate you're leaving consumerfinance.gov icon = 'external-link' if NON_GOV_LINKS.match(tag['href']): # Add the redirect notice as well tag['href'] = signed_redirect(tag['href']) elif DOWNLOAD_LINKS.search(tag['href']): # Sets the icon to indicate you're downloading a file icon = 'download' if icon: tag.attrs['class'].append(LINK_ICON_CLASSES) # Wraps the link text in a span that provides the underline contents = tag.contents span = BeautifulSoup('', 'html.parser').new_tag('span') span['class'] = LINK_ICON_TEXT_CLASSES span.contents = contents tag.contents = [span, NavigableString(' ')] # Appends the SVG icon tag.contents.append(BeautifulSoup(svg_icon(icon), 'html.parser')) return str(tag) return None
def render_table_odt(elem, doc): table = elem.content[0] table_number = tuple( str(i) for i in utils.get_elem_count(doc, pf.Table, register="table")) table_name = "Table{}".format("_".join(str(i) for i in table_number)) # table_root = BeautifulSoup("", "xml") if hasattr(table, "caption") and table.caption: colon = ": " caption = "".join(pf.stringify(c) for c in table.caption) else: colon = "" caption = "" caption_odt = utils.create_nested_tags( **{ "name": "text:p", "attrs": { "text:style-name": "Table" }, "contents": [ { "name": "text:span", "attrs": { "text:style-name": "Strong_20_Emphasis" }, "contents": [ "Table ", { "name": "text:sequence", "attrs": { "text:ref-name": f"ref{table_name}", "text:name": "Table", "text:formula": "ooow:Table+1", "style:num-format": "1", }, "contents": [".".join(table_number)], }, colon, ], }, caption, ], }) table_root.contents.append(caption_odt) table_odt = utils.create_nested_tags( **{ "name": "table:table", "attrs": { "table:name": table_name, "table:style-name": table_name, "table:template-name": "Default Style", }, }) table_root.contents.append(table_odt) unoccupied_width = 1 - sum(table.width) unspecified_widths = len([w for w in table.width if not w]) remaining_for_each = unoccupied_width / unspecified_widths widths = [w if w else remaining_for_each for w in table.width] # We want the table to occupy a maximum width widths = map(lambda x: x * table.total_width, widths) column_style_names, column_styles, column_definitions = zip( *create_column_definitions(widths, table_name)) pf.debug(column_style_names, column_styles, column_definitions) styles = BeautifulSoup("", "xml") styles.contents = list(column_styles) table_odt.contents.extend(column_definitions) for r, row in enumerate(table.content): row_odt = Tag(name="table:table-row") row_odt.attrs = { "table:style-name": "{table_name}.{r}".format(table_name=table_name, r=r + 1) } row_cell_styles = [] for c, cell in enumerate(row.content): if cell.covered: cell_odt = Tag(name="table:covered-table-cell") row_odt.contents.append(cell_odt) row_cell_styles.append(None) else: cell_odt = Tag(name="table:table-cell") cell_style_name = "{column_style}{r}".format( column_style=column_style_names[c], r=r + 1) cell_style = Tag(name="style:style") cell_style.attrs = { "style:name": cell_style_name, "style:family": "table-cell", "style:writing-mode": "page", } style_cell_properies = Tag(name="style:table-cell-properties") style_cell_properies.attrs = { "fo:padding-left": "0.10cm", "fo:padding-right": "0.10cm", "fo:padding-top": "0.10cm", "fo:padding-bottom": "0.10cm", "style:vertical-align": "bottom", } style_background_image = Tag(name="style:background-image") style_cell_properies.contents.append(style_background_image) cell_style.contents.append(style_cell_properies) row_cell_styles.append(cell_style) cell_odt.attrs = { "table:style-name": cell_style_name, "office:value-type": "string", } if cell.col_span > 1: cell_odt.attrs[ "table:number-columns-spanned"] = cell.col_span if cell.content: cell_content = utils.panflute2output( cell.content, format="opendocument").strip() cell_content = BeautifulSoup(cell_content, "lxml").html.body text_p = re.compile("text:p") for t in cell_content.find_all(text_p): if cell.heading == 1: t["text:style-name"] = "Table_20_Heading" elif cell.heading == 2: t["text:style-name"] = "Table_20_Subheading" else: t["text:style-name"] = "Table_20_Contents" if cell.vertical: t_contents = t.contents t.contents = [ utils.create_nested_tags( **{ "name": "text:span", "attrs": { "text:style-name": "Vertical" }, "contents": t_contents, }) ] cell_odt.contents = cell_content.contents else: cell_content = Tag(name="text:p") cell_content.attrs = { "text:style-name": "Table_20_contents" } cell_odt.contents.append(cell_content) row_odt.contents.append(cell_odt) if row.underlines: for underline in row.underlines: start = underline[0] stop = underline[1] for i in range(start - 1, stop): cell_style = row_cell_styles[i] if cell_style is None: pass else: cell_style.contents[0].attrs[ "fo:border-bottom"] = "0.5pt solid #000000" add_top_space = table.content[r - 1].btm_space if r else False if row.top_space or add_top_space: for cell_style in row_cell_styles: if cell_style is not None: padding_top = cell_style.contents[0].attrs[ "fo:padding-top"] padding_top = (float(padding_top.strip("cm")) + 0.05 * add_top_space + 0.05 * row.top_space) cell_style.contents[0].attrs[ "fo:padding-top"] = f"{padding_top}cm" row_cell_styles = [cs for cs in row_cell_styles if cs is not None] styles.contents.extend(row_cell_styles) table_odt.contents.append(row_odt) try: footer = elem.content[1].content[0] except IndexError: footer = None if footer is not None: for definition_item in footer.content: term = "".join(pf.stringify(e) for e in definition_item.term) definitions = [ utils.panflute2output(d.content, format="opendocument") for d in definition_item.definitions ] definitions_parsed = BeautifulSoup("".join(definitions), "lxml").html.body.contents for t in definitions_parsed: if t.name == "text:p": t.name = "text:span" t.contents.insert(0, NavigableString(" ")) definition = utils.create_nested_tags( **{ "name": "text:p", "attrs": { "text:style-name": "Table_20_Legend" }, "contents": [{ "name": "text:span", "attrs": { "text:style-name": "Superscript" }, "contents": [term], }] + definitions_parsed, }) table_root.contents.append(definition) styles = "\n".join(c.prettify() for c in styles.contents) doc.auto_styles.append(styles) table = "\n".join(str(c) for c in table_root.contents) # pf.debug(table) return table