def add_link_markup(tags): for tag in tags: added_icon = False if not tag.attrs.get('class', None): tag.attrs.update({'class': []}) if tag['href'].startswith('/external-site/?'): components = urlparse(tag['href']) arguments = parse_qs(components.query) if 'ext_url' in arguments: external_url = arguments['ext_url'][0] tag['href'] = signed_redirect(external_url) elif NONCFPB_LINK_PATTERN.match(tag['href']): # Sets the icon to indicate you're leaving consumerfinance.gov tag.attrs['class'].append(EXTERNAL_A_CSS) if EXTERNAL_LINK_PATTERN.match(tag['href']): tag['href'] = signed_redirect(tag['href']) added_icon = True elif DOWNLOAD_LINK_PATTERN.search(tag['href']): # Sets the icon to indicate you're downloading a file tag.attrs['class'].append(DOWNLOAD_A_CSS) added_icon = True if added_icon: # Wraps the link text in a span that provides the underline contents = tag.contents span = BeautifulSoup('', 'html.parser').new_tag('span') span['class'] = EXTERNAL_SPAN_CSS span.contents = contents tag.contents = [span, NavigableString(' ')] elif not FILES_LINK_PATTERN.match(tag['href']): fix_link(tag)
def parse_links(soup): extlink_pattern = re.compile(settings.EXTERNAL_LINK_PATTERN) noncfpb_pattern = re.compile(settings.NONCFPB_LINK_PATTERN) files_pattern = re.compile(settings.FILES_LINK_PATTERN) a_class = os.environ.get('EXTERNAL_A_CSS', 'icon-link icon-link__external-link') span_class = os.environ.get('EXTERNAL_SPAN_CSS', 'icon-link_text') # This removes style tags <style> for s in soup('style'): s.decompose() # This removes all inline style attr's for tag in soup.recursiveChildGenerator(): try: del tag['style'] except: # 'NavigableString' object has does not have attr's pass for a in soup.find_all('a', href=True): # Sets the icon to indicate you're leaving consumerfinance.gov if noncfpb_pattern.match(a['href']): # Sets the link to an external one if you're leaving .gov if extlink_pattern.match(a['href']): a['href'] = '/external-site/?ext_url=' + a['href'] a.attrs.update({'class': a_class}) a.append(' ') # We want an extra space before the icon a.append(soup.new_tag('span', attrs='class="%s"' % span_class)) elif not files_pattern.match(a['href']): fix_link(a) return soup
def add_link_markup(tags): for tag in tags: added_icon = False if not tag.attrs.get('class', None): tag.attrs.update({'class': []}) if NONCFPB_LINK_PATTERN.match(tag['href']): # Sets the icon to indicate you're leaving consumerfinance.gov tag.attrs['class'].append(EXTERNAL_A_CSS) if EXTERNAL_LINK_PATTERN.match(tag['href']): # Sets the link to an external one if you're leaving .gov tag['href'] = '/external-site/?ext_url=' + tag['href'] added_icon = True elif DOWNLOAD_LINK_PATTERN.search(tag['href']): # Sets the icon to indicate you're downloading a file tag.attrs['class'].append(DOWNLOAD_A_CSS) added_icon = True if added_icon: # Wraps the link text in a span that provides the underline contents = tag.contents span = BeautifulSoup('').new_tag('span') span['class'] = EXTERNAL_SPAN_CSS span.contents = contents tag.contents = [span, NavigableString(' ')] elif not FILES_LINK_PATTERN.match(tag['href']): fix_link(tag)
def add_link_markup(tags): for tag in tags: added_icon = False if not tag.attrs.get('class', None): tag.attrs.update({'class': []}) if NONCFPB_LINK_PATTERN.match(tag['href']): # Sets the icon to indicate you're leaving consumerfinance.gov tag.attrs['class'].append(EXTERNAL_A_CSS) if EXTERNAL_LINK_PATTERN.match(tag['href']): # Sets the link to an external one if you're leaving .gov tag['href'] = '/external-site/?ext_url=' + tag['href'] added_icon = True elif DOWNLOAD_LINK_PATTERN.search(tag['href']): # Sets the icon to indicate you're downloading a file tag.attrs['class'].append(DOWNLOAD_A_CSS) added_icon = True if added_icon: # Wraps the link text in a span that provides the underline contents = tag.contents span = BeautifulSoup('', 'html.parser').new_tag('span') span['class'] = EXTERNAL_SPAN_CSS span.contents = contents tag.contents = [span, NavigableString(' ')] elif not FILES_LINK_PATTERN.match(tag['href']): fix_link(tag)