def check_html_content_single( self, html_to_check, html_soup, original_url_split, final_url_split): """Returns a list of tuple (content, presence) indicating whether an html tag was present or not in the source. """ content = [] for key, html_check_list in html_to_check.items(): if key == PREFIX_ALL or\ is_similar_url_split(key, original_url_split) or\ is_similar_url_split(key, final_url_split): # we check for html_check in html_check_list: kwargs = {} if html_check.attrs: kwargs["attrs"] = html_check.attrs if html_check.content: # XXX Use text because the included bs4 does not use # the new string parameter and text is backward # compatible. kwargs["text"] = html_check.content found = html_soup.find( html_check.tag, **kwargs) is not None content.append((str(html_check), found)) return content
def check_html_content_single(self, html_to_check, html_soup, original_url_split, final_url_split): """Returns a list of tuple (content, presence) indicating whether an html tag was present or not in the source. """ content = [] for key, html_check_list in html_to_check.items(): if key == PREFIX_ALL or\ is_similar_url_split(key, original_url_split) or\ is_similar_url_split(key, final_url_split): # we check for html_check in html_check_list: kwargs = {} if html_check.attrs: kwargs["attrs"] = html_check.attrs if html_check.content: # XXX Use text because the included bs4 does not use # the new string parameter and text is backward # compatible. kwargs["text"] = html_check.content found = html_soup.find(html_check.tag, **kwargs) is not None content.append((str(html_check), found)) return content
def check_text_content_single(self, text_content_to_check, full_text, original_url_split, final_url_split): """Returns a list of tuple (content, presence) indicating whether an html tag was present or not in the source. """ content = [] for key, text_check_list in text_content_to_check.items(): if key == PREFIX_ALL or\ is_similar_url_split(key, original_url_split) or\ is_similar_url_split(key, final_url_split): # we check for text_check in text_check_list: try: match = text_check.search(full_text) content.append((text_check.pattern, match is not None)) except AttributeError: found = text_check in full_text content.append((text_check, found)) return content
def check_text_content_single( self, text_content_to_check, full_text, original_url_split, final_url_split): """Returns a list of tuple (content, presence) indicating whether an html tag was present or not in the source. """ content = [] for key, text_check_list in text_content_to_check.items(): if key == PREFIX_ALL or\ is_similar_url_split(key, original_url_split) or\ is_similar_url_split(key, final_url_split): # we check for text_check in text_check_list: try: match = text_check.search(full_text) content.append((text_check.pattern, match is not None)) except AttributeError: found = text_check in full_text content.append((text_check, found)) return content