Example #1
0
def init_domain(domain, environment, options):
    cache_dir = options.get("_", {}).get("cache_dir", "./cache")
    # If we have data from pshtt, skip if it's not a live domain.
    if utils.domain_not_live(domain, cache_dir=cache_dir):
        logging.debug("\tSkipping, domain not reachable during inspection.")
        return False

    # If we have data from pshtt, skip if it's just a redirector.
    if utils.domain_is_redirect(domain, cache_dir=cache_dir):
        logging.debug(
            "\tSkipping, domain seen as just an external redirector during inspection."
        )
        return False

    # requests needs a URL, not just a domain.
    url = None
    if not (domain.startswith('http://') or domain.startswith('https://')):

        # If we have data from pshtt, use the canonical endpoint.
        if utils.domain_canonical(domain, cache_dir=cache_dir):
            url = utils.domain_canonical(domain, cache_dir=cache_dir)

        # Otherwise, well, ssl should work.
        else:
            url = 'https://' + domain
    else:
        url = domain

    return {'url': url}
Example #2
0
def init_domain(domain, environment, options):
    # If we have data from pshtt, skip if it's not a live domain.
    if utils.domain_not_live(domain):
        logging.debug("\tSkipping, domain not reachable during inspection.")
        return False

    # If we have data from pshtt, skip if it's just a redirector.
    if utils.domain_is_redirect(domain):
        logging.debug(
            "\tSkipping, domain seen as just an external redirector during inspection."
        )
        return False

    # phantomas needs a URL, not just a domain.
    url = None
    if not (domain.startswith('http://') or domain.startswith('https://')):

        # If we have data from pshtt, use the canonical endpoint.
        if utils.domain_canonical(domain):
            url = utils.domain_canonical(domain)

        # Otherwise, well, whatever.
        else:
            url = 'http://' + domain
    else:
        url = domain

    return {'url': url}
Example #3
0
def init_domain(domain, environment, options):
    cache_dir = options.get("_", {}).get("cache_dir", "./cache")
    # If we have data from pshtt, skip if it's not a live domain.
    if utils.domain_not_live(domain, cache_dir=cache_dir):
        logging.debug("\tSkipping, domain not reachable during inspection.")
        return False

    # If we have data from pshtt, skip if it's just a redirector.
    if utils.domain_is_redirect(domain, cache_dir=cache_dir):
        logging.debug("\tSkipping, domain seen as just an external redirector during inspection.")
        return False

    # requests needs a URL, not just a domain.
    url = None
    if not (domain.startswith('http://') or domain.startswith('https://')):

        # If we have data from pshtt, use the canonical endpoint.
        if utils.domain_canonical(domain, cache_dir=cache_dir):
            url = utils.domain_canonical(domain, cache_dir=cache_dir)

        # Otherwise, well, ssl should work.
        else:
            url = 'https://' + domain
    else:
        url = domain

    return {'url': url}
Example #4
0
def scan(domain, options):
    logging.debug("[%s][pageload]" % domain)

    # If we have data from pshtt, skip if it's not a live domain.
    if utils.domain_not_live(domain):
        logging.debug("\tSkipping, domain not reachable during inspection.")
        return None

    # If we have data from pshtt, skip if it's just a redirector.
    if utils.domain_is_redirect(domain):
        logging.debug(
            "\tSkipping, domain seen as just a redirector during inspection.")
        return None

    # phantomas needs a URL, not just a domain.
    if not (domain.startswith('http://') or domain.startswith('https://')):

        # If we have data from pshtt, use the canonical endpoint.
        if utils.domain_canonical(domain):
            url = utils.domain_canonical(domain)

        # Otherwise, well, whatever.
        else:
            url = 'http://' + domain
    else:
        url = domain

    # We'll cache prettified JSON from the output.
    cache = utils.cache_path(domain, "pageload")

    # If we've got it cached, use that.
    if (options.get("force", False) is False) and (os.path.exists(cache)):
        logging.debug("\tCached.")
        raw = open(cache).read()
        data = json.loads(raw)
        if data.get('invalid'):
            return None

    # If no cache, or we should run anyway, do the scan.
    else:
        logging.debug("\t %s %s --reporter=json --ignore-ssl-errors" %
                      (command, url))
        raw = utils.scan(
            [command, url, "--reporter=json", "--ignore-ssl-errors"])
        if not raw:
            utils.write(utils.invalid({}), cache)
            return None

        # It had better be JSON, which we can cache in prettified form.
        data = json.loads(raw)
        utils.write(utils.json_for(data), cache)

    yield [data['metrics'][metric] for metric in interesting_metrics]
Example #5
0
def scan(domain, options):
    logging.debug("[%s][pageload]" % domain)

    # If we have data from pshtt, skip if it's not a live domain.
    if utils.domain_not_live(domain):
        logging.debug("\tSkipping, domain not reachable during inspection.")
        return None

    # If we have data from pshtt, skip if it's just a redirector.
    if utils.domain_is_redirect(domain):
        logging.debug("\tSkipping, domain seen as just a redirector during inspection.")
        return None

    # phantomas needs a URL, not just a domain.
    if not (domain.startswith('http://') or domain.startswith('https://')):

        # If we have data from pshtt, use the canonical endpoint.
        if utils.domain_canonical(domain):
            url = utils.domain_canonical(domain)

        # Otherwise, well, whatever.
        else:
            url = 'http://' + domain
    else:
        url = domain

    # We'll cache prettified JSON from the output.
    cache = utils.cache_path(domain, "pageload")

    # If we've got it cached, use that.
    if (options.get("force", False) is False) and (os.path.exists(cache)):
        logging.debug("\tCached.")
        raw = open(cache).read()
        data = json.loads(raw)
        if data.get('invalid'):
            return None

    # If no cache, or we should run anyway, do the scan.
    else:
        logging.debug("\t %s %s --reporter=json --ignore-ssl-errors" % (command, url))
        raw = utils.scan([command, url, "--reporter=json", "--ignore-ssl-errors"])
        if not raw:
            utils.write(utils.invalid({}), cache)
            return None

        # It had better be JSON, which we can cache in prettified form.
        data = json.loads(raw)
        utils.write(utils.json_for(data), cache)

    yield [data['metrics'][metric] for metric in interesting_metrics]
Example #6
0
def init_domain(domain, environment, options):
    # If we've got pshtt data, use it to cut down work.
    if (utils.domain_is_redirect(domain) or utils.domain_not_live(domain)):
        logging.debug("\tSkipping a11y scan based on pshtt data.")
        return False

    # Use redirect/blacklist data to adjust (or stop) scan URL.
    url = get_url_to_scan(domain)

    if not url:
        logging.debug("\tSkipping a11y scan based on redirect/blacklist data.")
        return False

    # Send adjusted URL to scan function.
    return {'url': url}
Example #7
0
def scan(domain, options):
    logging.debug("[%s][a11y]" % domain)

    domain_to_scan = get_domain_to_scan(domain)
    if (utils.domain_is_redirect(domain) or
            utils.domain_not_live(domain) or
            not domain_to_scan):
        logging.debug("Skipping a11y scan for %s" % domain)
        return None
    logging.debug("Running scan for %s" % domain)
    errors = get_errors_from_scan_or_cache(domain, options)

    for data in errors:
        logging.debug("Writing data for %s" % domain)
        yield [
            domain,
            data['typeCode'],
            data['code'],
            data['message'],
            data['context'],
            data['selector']
        ]
def scan(domain, options):
    logging.debug("[%s][third_parties]" % domain)

    # Default timeout is 15s, too little.
    timeout = int(options.get("timeout", 60))

    # If we have data from pshtt, skip if it's not a live domain.
    if utils.domain_not_live(domain):
        logging.debug("\tSkipping, domain not reachable during inspection.")
        return None

    # If we have data from pshtt, skip if it's just a redirector.
    if utils.domain_is_redirect(domain):
        logging.debug(
            "\tSkipping, domain seen as just an external redirector during inspection."
        )
        return None

    # phantomas needs a URL, not just a domain.
    if not (domain.startswith('http://') or domain.startswith('https://')):

        # If we have data from pshtt, use the canonical endpoint.
        if utils.domain_canonical(domain):
            url = utils.domain_canonical(domain)

        # Otherwise, well, whatever.
        else:
            url = 'http://' + domain
    else:
        url = domain

    # calculated_domain = re.sub("https?:\/\/", "", url)

    # We'll cache prettified JSON from the output.
    cache = utils.cache_path(domain, "third_parties")

    # If we've got it cached, use that.
    if (options.get("force", False) is False) and (os.path.exists(cache)):
        logging.debug("\tCached.")
        raw = open(cache).read()
        data = json.loads(raw)
        if data.get('invalid'):
            return None

    # If no cache, or we should run anyway, do the scan.
    else:
        logging.debug(
            "\t %s %s --modules=domains --reporter=json --timeout=%i --ignore-ssl-errors"
            % (command, url, timeout))
        raw = utils.scan([
            command, url, "--modules=domains", "--reporter=json",
            "--timeout=%i" % timeout, "--ignore-ssl-errors"
        ],
                         allowed_return_codes=[252])
        if not raw:
            utils.write(utils.invalid({}), cache)
            return None

        # It had better be JSON, which we can cache in prettified form.
        data = json.loads(raw)
        utils.write(utils.json_for(data), cache)

    services = services_for(data, domain, options)

    # Convert to CSV row
    known_names = list(known_services.keys())
    known_names.sort()
    known_matches = [
        'Yes' if host in services['known'] else 'No' for host in known_names
    ]

    yield [
        len(services['external']),
        len(services['internal']),
        services['external_requests'],
        services['internal_requests'],
        serialize(services['external']),
        serialize(services['internal']),
        # services['affiliated'],
        # services['unknown']
    ] + known_matches