def extract_abp(content): """Extracts blocked and unblocked domains from ABP style content.""" pattern_unsupported = re.compile(r"\S+(?>\/|\=)\S+", re.V1) pattern_supported_block = re.compile( r"^\|\|.+\^(?>$|.+(?:" r"\bfirst-party\b|" r"\b1p\b|" r"\bthird-party\b|" r"\b3p\b|" r"\bdocument\b|" r"\ball\b" # r"\ball\b|" # r"\bpopup\b" r"))", re.V1, ) pattern_scrub_blocked_list = [ r"^\|\|", r"\^($|.+(?>" r"\bfirst-party\b|" r"\b1p\b|" r"\bthird-party\b|" r"\b3p\b|\bdocument\b|" r"\ball\b|" r"\bpopup\b|" r"\S+))", ] pattern_scrub_blocked = re.compile( "|".join(f"(?:{p})" for p in pattern_scrub_blocked_list), re.V1 ) block_rules = [ x for x in content if re.match(pattern_supported_block, x, concurrent=True) and not re.match(pattern_unsupported, x, concurrent=True) ] blocked_domains = [ re.sub(pattern_scrub_blocked, "", x, concurrent=True) for x in block_rules ] blocked_domains = [x for x in blocked_domains if valid_domain(x)] pattern_supported_unblock = re.compile(r"@@\|\|.+\^$") unblock_rules = [ x for x in content if re.match(pattern_supported_unblock, x, concurrent=True) and not re.match(pattern_unsupported, x, concurrent=True) ] unblocked_domains = [ x.replace("@@||", "").replace("^", "").replace("$important", "") for x in unblock_rules ] regex_rules = [] return blocked_domains, unblocked_domains, unblock_rules, regex_rules
def validateDomain(url): """check if the url is valid""" try: if not valid_url(url): raise ArgumentTypeError('\n{}[x] Invalid url.{}\n'.format(BR, S)) elif not valid_domain(getNetloc(url)): raise ArgumentTypeError('\n{}[x] Invalid domain.{}\n'.format( BR, S)) else: return url except Exception, e: print e sys.exit(0)
def extract_hosts(content, list_type): """Extracts blocked or unblocked domains from hosts/domains style content.""" pattern_scrub = [ r"(?>\#|\!|\s+\#|\s+\!).*", r"^\s", r".*\blocalhost\b.*", r"^\d*\.\d*\.\d*\.\d*\s*(?>\s|www\.|m\.)", r"^(?>www\.|m\.)", ] pattern = re.compile("|".join(f"(?:{p})" for p in pattern_scrub), re.V1) domains = [re.sub(pattern, "", x, concurrent=True) for x in content] domains = [x for x in domains if valid_domain(x)] blocked_domains, unblocked_domains = [], [] if list_type == "unblock": unblocked_domains = domains if list_type == "block": blocked_domains = domains return blocked_domains, unblocked_domains
def parse_domains(self, domains): ipsl = [] if not domains: return for d in domains: if not valid_domain(d): log.error('There was a problem validating domain input ' 'using %s. Skipping...' % d) continue # note this does NOT support ipv6 translation try: hostname, aliaslist, ipaddrlist = socket.gethostbyname_ex(d) except socket.gaierror: log.error('Could not resolve domain %s. Skipping...' % d) continue ipsl.extend(ipaddrlist) self.ips.extend(ipsl)