def init_domain(domain, environment, options): cache_dir = options.get("_", {}).get("cache_dir", "./cache") # If we have data from pshtt, skip if it's not a live domain. if utils.domain_not_live(domain, cache_dir=cache_dir): logging.debug("\tSkipping, domain not reachable during inspection.") return False # If we have data from pshtt, skip if it's just a redirector. if utils.domain_is_redirect(domain, cache_dir=cache_dir): logging.debug( "\tSkipping, domain seen as just an external redirector during inspection." ) return False # requests needs a URL, not just a domain. url = None if not (domain.startswith('http://') or domain.startswith('https://')): # If we have data from pshtt, use the canonical endpoint. if utils.domain_canonical(domain, cache_dir=cache_dir): url = utils.domain_canonical(domain, cache_dir=cache_dir) # Otherwise, well, ssl should work. else: url = 'https://' + domain else: url = domain return {'url': url}
def init_domain(domain, environment, options): # If we have data from pshtt, skip if it's not a live domain. if utils.domain_not_live(domain): logging.debug("\tSkipping, domain not reachable during inspection.") return False # If we have data from pshtt, skip if it's just a redirector. if utils.domain_is_redirect(domain): logging.debug( "\tSkipping, domain seen as just an external redirector during inspection." ) return False # phantomas needs a URL, not just a domain. url = None if not (domain.startswith('http://') or domain.startswith('https://')): # If we have data from pshtt, use the canonical endpoint. if utils.domain_canonical(domain): url = utils.domain_canonical(domain) # Otherwise, well, whatever. else: url = 'http://' + domain else: url = domain return {'url': url}
def init_domain(domain, environment, options): cache_dir = options.get("_", {}).get("cache_dir", "./cache") # If we have data from pshtt, skip if it's not a live domain. if utils.domain_not_live(domain, cache_dir=cache_dir): logging.debug("\tSkipping, domain not reachable during inspection.") return False # If we have data from pshtt, skip if it's just a redirector. if utils.domain_is_redirect(domain, cache_dir=cache_dir): logging.debug("\tSkipping, domain seen as just an external redirector during inspection.") return False # requests needs a URL, not just a domain. url = None if not (domain.startswith('http://') or domain.startswith('https://')): # If we have data from pshtt, use the canonical endpoint. if utils.domain_canonical(domain, cache_dir=cache_dir): url = utils.domain_canonical(domain, cache_dir=cache_dir) # Otherwise, well, ssl should work. else: url = 'https://' + domain else: url = domain return {'url': url}
def scan(domain, options): logging.debug("[%s][pageload]" % domain) # If we have data from pshtt, skip if it's not a live domain. if utils.domain_not_live(domain): logging.debug("\tSkipping, domain not reachable during inspection.") return None # If we have data from pshtt, skip if it's just a redirector. if utils.domain_is_redirect(domain): logging.debug( "\tSkipping, domain seen as just a redirector during inspection.") return None # phantomas needs a URL, not just a domain. if not (domain.startswith('http://') or domain.startswith('https://')): # If we have data from pshtt, use the canonical endpoint. if utils.domain_canonical(domain): url = utils.domain_canonical(domain) # Otherwise, well, whatever. else: url = 'http://' + domain else: url = domain # We'll cache prettified JSON from the output. cache = utils.cache_path(domain, "pageload") # If we've got it cached, use that. if (options.get("force", False) is False) and (os.path.exists(cache)): logging.debug("\tCached.") raw = open(cache).read() data = json.loads(raw) if data.get('invalid'): return None # If no cache, or we should run anyway, do the scan. else: logging.debug("\t %s %s --reporter=json --ignore-ssl-errors" % (command, url)) raw = utils.scan( [command, url, "--reporter=json", "--ignore-ssl-errors"]) if not raw: utils.write(utils.invalid({}), cache) return None # It had better be JSON, which we can cache in prettified form. data = json.loads(raw) utils.write(utils.json_for(data), cache) yield [data['metrics'][metric] for metric in interesting_metrics]
def scan(domain, options): logging.debug("[%s][pageload]" % domain) # If we have data from pshtt, skip if it's not a live domain. if utils.domain_not_live(domain): logging.debug("\tSkipping, domain not reachable during inspection.") return None # If we have data from pshtt, skip if it's just a redirector. if utils.domain_is_redirect(domain): logging.debug("\tSkipping, domain seen as just a redirector during inspection.") return None # phantomas needs a URL, not just a domain. if not (domain.startswith('http://') or domain.startswith('https://')): # If we have data from pshtt, use the canonical endpoint. if utils.domain_canonical(domain): url = utils.domain_canonical(domain) # Otherwise, well, whatever. else: url = 'http://' + domain else: url = domain # We'll cache prettified JSON from the output. cache = utils.cache_path(domain, "pageload") # If we've got it cached, use that. if (options.get("force", False) is False) and (os.path.exists(cache)): logging.debug("\tCached.") raw = open(cache).read() data = json.loads(raw) if data.get('invalid'): return None # If no cache, or we should run anyway, do the scan. else: logging.debug("\t %s %s --reporter=json --ignore-ssl-errors" % (command, url)) raw = utils.scan([command, url, "--reporter=json", "--ignore-ssl-errors"]) if not raw: utils.write(utils.invalid({}), cache) return None # It had better be JSON, which we can cache in prettified form. data = json.loads(raw) utils.write(utils.json_for(data), cache) yield [data['metrics'][metric] for metric in interesting_metrics]
def init_domain(domain, environment, options): # If we've got pshtt data, use it to cut down work. if (utils.domain_is_redirect(domain) or utils.domain_not_live(domain)): logging.debug("\tSkipping a11y scan based on pshtt data.") return False # Use redirect/blacklist data to adjust (or stop) scan URL. url = get_url_to_scan(domain) if not url: logging.debug("\tSkipping a11y scan based on redirect/blacklist data.") return False # Send adjusted URL to scan function. return {'url': url}
def scan(domain, options): logging.debug("[%s][a11y]" % domain) domain_to_scan = get_domain_to_scan(domain) if (utils.domain_is_redirect(domain) or utils.domain_not_live(domain) or not domain_to_scan): logging.debug("Skipping a11y scan for %s" % domain) return None logging.debug("Running scan for %s" % domain) errors = get_errors_from_scan_or_cache(domain, options) for data in errors: logging.debug("Writing data for %s" % domain) yield [ domain, data['typeCode'], data['code'], data['message'], data['context'], data['selector'] ]
def scan(domain, options): logging.debug("[%s][third_parties]" % domain) # Default timeout is 15s, too little. timeout = int(options.get("timeout", 60)) # If we have data from pshtt, skip if it's not a live domain. if utils.domain_not_live(domain): logging.debug("\tSkipping, domain not reachable during inspection.") return None # If we have data from pshtt, skip if it's just a redirector. if utils.domain_is_redirect(domain): logging.debug( "\tSkipping, domain seen as just an external redirector during inspection." ) return None # phantomas needs a URL, not just a domain. if not (domain.startswith('http://') or domain.startswith('https://')): # If we have data from pshtt, use the canonical endpoint. if utils.domain_canonical(domain): url = utils.domain_canonical(domain) # Otherwise, well, whatever. else: url = 'http://' + domain else: url = domain # calculated_domain = re.sub("https?:\/\/", "", url) # We'll cache prettified JSON from the output. cache = utils.cache_path(domain, "third_parties") # If we've got it cached, use that. if (options.get("force", False) is False) and (os.path.exists(cache)): logging.debug("\tCached.") raw = open(cache).read() data = json.loads(raw) if data.get('invalid'): return None # If no cache, or we should run anyway, do the scan. else: logging.debug( "\t %s %s --modules=domains --reporter=json --timeout=%i --ignore-ssl-errors" % (command, url, timeout)) raw = utils.scan([ command, url, "--modules=domains", "--reporter=json", "--timeout=%i" % timeout, "--ignore-ssl-errors" ], allowed_return_codes=[252]) if not raw: utils.write(utils.invalid({}), cache) return None # It had better be JSON, which we can cache in prettified form. data = json.loads(raw) utils.write(utils.json_for(data), cache) services = services_for(data, domain, options) # Convert to CSV row known_names = list(known_services.keys()) known_names.sort() known_matches = [ 'Yes' if host in services['known'] else 'No' for host in known_names ] yield [ len(services['external']), len(services['internal']), services['external_requests'], services['internal_requests'], serialize(services['external']), serialize(services['internal']), # services['affiliated'], # services['unknown'] ] + known_matches