def get_urls(self): """Extract all URLs embedded in this file through a simple regex.""" if not os.path.getsize(self.file_path): return [] # http://stackoverflow.com/a/454589 urls, f = set(), open(self.file_path, "rb") for url in re.findall(URL_REGEX, self.mmap(f.fileno())): if not is_whitelisted_domain(url[1]): urls.add("".join(url)) return list(urls)
def get_urls(self): """Extract all URLs embedded in this file through a simple regex.""" if not os.path.getsize(self.file_path): return [] # http://stackoverflow.com/a/454589 urls, f = set(), open(self.file_path, "rb") for url in re.findall(URL_REGEX, self.mmap(f.fileno())): if not is_whitelisted_domain(url[1]): urls.add("".join(url)) return list(urls)
def _is_whitelisted(self, conn, hostname): """Checks if whitelisting conditions are met""" # Is whitelistng enabled? if not self.whitelist_enabled: return False # Is DNS recording coming from allowed NS server. if not self.known_dns: pass elif (conn.get("src") in self.known_dns or conn.get("dst") in self.known_dns): pass else: return False # Is hostname whitelisted. if not is_whitelisted_domain(hostname): return False return True
def _is_whitelisted(self, conn, hostname): """Checks if whitelisting conditions are met""" # Is whitelistng enabled? if not self.whitelist_enabled: return False # Is DNS recording coming from allowed NS server. if not self.known_dns: pass elif (conn.get("src") in self.known_dns or conn.get("dst") in self.known_dns): pass else: return False # Is hostname whitelisted. if not is_whitelisted_domain(hostname): return False return True
def test_is_whitelisted_domain(): assert is_whitelisted_domain("java.com") is True assert is_whitelisted_domain("java2.com") is False assert is_whitelisted_domain("crl.microsoft.com") is True
def test_is_whitelisted_domain(): assert is_whitelisted_domain("java.com") is True assert is_whitelisted_domain("java2.com") is False assert is_whitelisted_domain("crl.microsoft.com") is True