def set_of_html_href_file_paths_from(content): """ Search content for href html local file uri links and return a set of those links. Links will have unquote applied to generate the paths so 'a%20path/another%20file.pdf' becomes 'a path/another file.pdf' Parameters ========== content : str string containing html formatted href tags Returns ======= set[Path] set of local href link paths """ soup = BeautifulSoup(content, 'html.parser') url_paths = set() for a_tag in soup.findAll(href=True): if (urlparse(a_tag['href']).scheme == "" or urlparse(a_tag['href']).scheme == "file") \ and len(urlparse(a_tag['href']).path): path_to_add = unquote(a_tag['href']) path_to_add = helper_functions.unescape(path_to_add) url_paths.add(path_to_add) return url_paths
def versions_match(cpe_version: str, cve_entry: CveDbEntry) -> bool: for version_boundary, operator_ in [ (cve_entry.version_start_including, operator.le), (cve_entry.version_start_excluding, operator.lt), (cve_entry.version_end_including, operator.ge), (cve_entry.version_end_excluding, operator.gt) ]: if version_boundary and not compare_version(version_boundary, cpe_version, operator_): return False cve_version = unescape(cve_entry.version) if cve_version not in ['ANY', 'N/A'] and not compare_version(cve_version, cpe_version, operator.eq): return False return True
def build_version_string(cve_entry: CveDbEntry) -> str: if not any([cve_entry.version_start_including, cve_entry.version_start_excluding, cve_entry.version_end_including, cve_entry.version_end_excluding]): return unescape(cve_entry.version) result = 'version' if cve_entry.version_start_including: result = f'{cve_entry.version_start_including} ≤ {result}' elif cve_entry.version_start_excluding: result = f'{cve_entry.version_start_excluding} < {result}' if cve_entry.version_end_including: result = f'{result} ≤ {cve_entry.version_end_including}' elif cve_entry.version_end_excluding: result = f'{result} < {cve_entry.version_end_excluding}' return result
def build_version_string(cve_entry: CveDbEntry) -> str: if not any([ cve_entry.version_start_including, cve_entry.version_start_excluding, cve_entry.version_end_including, cve_entry.version_end_excluding ]): return unescape(cve_entry.version) result = 'version' if cve_entry.version_start_including: result = '{} ≤ {}'.format(cve_entry.version_start_including, result) elif cve_entry.version_start_excluding: result = '{} < {}'.format(cve_entry.version_start_excluding, result) if cve_entry.version_end_including: result = '{} ≤ {}'.format(result, cve_entry.version_end_including) elif cve_entry.version_end_excluding: result = '{} < {}'.format(result, cve_entry.version_end_excluding) return result
def set_of_markdown_file_paths_from(content: str) -> set[str]: """ Search string for markdown formatted image and file links and return a set of local file path objects. Returned Paths will have unquote applied to generate the paths so 'a%20path/another%20file.pdf' becomes 'a path/another file.pdf' Link formats supported are [any text](../my_other_notebook/attachments/five.pdf "test tool tip text") [or empty](../my_other_notebook/attachments/an_image.jpg) [any text](https://www.google.com "google") [or empty](https://www.google.com) Parameters ========== content : str string containing html formatted img links Returns ======= set[str] set of local link strings """ regex_md_pattern = re.compile(r''' \[[^]]*]\( # match the '[alt text](' part of the markdown link ( # start capturing group [^) ]* # match many characters up to ) or up to a space [ ] ) # close capturing group (?: # start non capturing group \)| # match literal ) or single space ) # close non capturing group ''', re.MULTILINE | re.VERBOSE) matches_md = regex_md_pattern.findall(content) set_of_md_formatted_links = set() for match in matches_md: if not match.startswith("https://") and not match.startswith("http://") and len(match): path = unquote(match) path = helper_functions.unescape(path) set_of_md_formatted_links.add(path) return set_of_md_formatted_links
def _product_matches_cve(product: Product, cve_entry: CveDbEntry) -> bool: return ( terms_match(product.vendor_name, cve_entry.vendor) and terms_match(product.product_name, cve_entry.product_name) and versions_match(unescape(product.version_number), cve_entry) )
def test_unescape(content, expected): result = helper_functions.unescape(content) assert result == expected