Ejemplo n.º 1
0
def get_chrome_preload_list(options):

    preload_cache = utils.cache_single("preload-list.json")
    preload_json = None

    if (not options.get("force", False)) and os.path.exists(preload_cache):
        logging.debug("Using cached Chrome preload list.")
        preload_json = json.loads(open(preload_cache).read())
    else:
        logging.debug("Fetching Chrome preload list from source...")

        preload_list_url = 'https://chromium.googlesource.com/chromium/src/net/+/master/http/transport_security_state_static.json'
        preload_list_url_as_text = preload_list_url + '?format=text'
        with urllib.request.urlopen(preload_list_url_as_text) as response:
            raw = response.read()

        # To avoid parsing the contents of the file out of the source tree viewer's
        # HTML, we download it as a raw file. googlesource.com Base64-encodes the
        # file to avoid potential content injection issues, so we need to decode it
        # before using it. https://code.google.com/p/gitiles/issues/detail?id=7
        raw = base64.b64decode(raw).decode('utf-8')

        # The .json file contains '//' comments, which are not actually valid JSON,
        # and confuse Python's JSON decoder. Begone, foul comments!
        raw = ''.join([re.sub(r'^\s*//.*$', '', line)
                       for line in raw.splitlines()])

        preload_json = json.loads(raw)
        utils.write(utils.json_for(preload_json), preload_cache)

    return {entry['name'] for entry in preload_json['entries']}
Ejemplo n.º 2
0
def get_chrome_preload_list(options):

    preload_cache = utils.cache_single("preload-list.json")
    preload_json = None

    if (not options.get("force", False)) and os.path.exists(preload_cache):
        logging.debug("Using cached Chrome preload list.")
        preload_json = json.loads(open(preload_cache).read())
    else:
        logging.debug("Fetching Chrome preload list from source...")

        preload_list_url = 'https://chromium.googlesource.com/chromium/src/net/+/master/http/transport_security_state_static.json'
        preload_list_url_as_text = preload_list_url + '?format=text'
        with urllib.request.urlopen(preload_list_url_as_text) as response:
            raw = response.read()

        # To avoid parsing the contents of the file out of the source tree viewer's
        # HTML, we download it as a raw file. googlesource.com Base64-encodes the
        # file to avoid potential content injection issues, so we need to decode it
        # before using it. https://code.google.com/p/gitiles/issues/detail?id=7
        raw = base64.b64decode(raw).decode('utf-8')

        # The .json file contains '//' comments, which are not actually valid JSON,
        # and confuse Python's JSON decoder. Begone, foul comments!
        raw = ''.join(
            [re.sub(r'^\s*//.*$', '', line) for line in raw.splitlines()])

        preload_json = json.loads(raw)
        utils.write(utils.json_for(preload_json), preload_cache)

    return {entry['name'] for entry in preload_json['entries']}
Ejemplo n.º 3
0
def init(options):
    global analytics_domains

    analytics_file = options.get("analytics")
    if (not analytics_file) or (not analytics_file.endswith(".csv")):
        no_csv = "--analytics should point to the file path or URL to a CSV of participating domains."
        logging.error(no_csv)
        return False

    # It's a URL, download it first.
    if analytics_file.startswith("http:") or analytics_file.startswith("https:"):

        analytics_path = os.path.join(utils.cache_dir(), "analytics.csv")

        try:
            response = requests.get(analytics_file)
            utils.write(response.text, analytics_path)
        except:
            no_csv = "--analytics URL not downloaded successfully."
            logging.error(no_csv)
            return False

    # Otherwise, read it off the disk
    else:
        analytics_path = analytics_file

        if (not os.path.exists(analytics_path)):
            no_csv = "--analytics file not found."
            logging.error(no_csv)
            return False

    analytics_domains = utils.load_domains(analytics_path)

    return True
Ejemplo n.º 4
0
def gather(suffix, options):
    url = options.get("url")
    if url is None:
        logging.warn("A --url is required. (Can be a local path.)")
        exit(1)

    # remote URL
    if url.startswith("http:") or url.startswith("https:"):
        # Though it's saved in cache/, it will be downloaded every time.
        remote_path = os.path.join(utils.cache_dir(), "url.csv")

        try:
            response = requests.get(url)
            utils.write(response.text, remote_path)
        except:
            logging.error("Remote URL not downloaded successfully.")
            print(utils.format_last_exception())
            exit(1)

    # local path
    else:
        remote_path = url

    for domain in utils.load_domains(remote_path):
        yield domain
Ejemplo n.º 5
0
def init(environment, options):
    global redirects
    global config

    redirects_file = options.get("a11y_redirects")
    config_file = options.get("a11y_config")

    # Parse redirects
    if redirects_file:
        if not redirects_file.endswith(".yml"):
            logging.error("--a11y_redirects should be a YML file")
            return False

        # if remote, try to download
        if redirects_file.startswith("http:") or redirects_file.startswith(
                "https:"):
            redirects_path = os.path.join(utils.cache_dir(),
                                          "a11y_redirects.yml")

            try:
                response = requests.get(redirects_file)
                utils.write(response.text, redirects_path)
            except:
                logging.error(
                    "--a11y_redirects URL not downloaded successfully.")
                return False

        # Otherwise, read it off the disk
        else:
            redirects_path = redirects_file

            if (not os.path.exists(redirects_path)):
                logging.error("--a11y_redirects file not found.")
                return False

        with open(redirects_path, 'r') as f:
            redirects = yaml.load(f)

    # Get config
    if config_file:
        if not config_file.endswith(".json"):
            logging.error("--a11y_config should be a json file")
            return False

        # if remote, try to download
        if config_file.startswith("http:") or config_file.startswith("https:"):
            config_path = os.path.join(utils.cache_dir(), "a11y_config.json")

            try:
                response = requests.get(config_file)
                utils.write(response.text, config_path)
            except:
                logging.error("--a11y_config URL not downloaded successfully.")
                return False

        config = config_path
    return True
Ejemplo n.º 6
0
def scan(domain, options):
    logging.debug("[%s][pshtt]" % domain)

    # cache output from pshtt
    cache_pshtt = utils.cache_path(domain, "pshtt", ext="json")

    force = options.get("force", False)
    data = None

    if (force is False) and (os.path.exists(cache_pshtt)):
        logging.debug("\tCached.")
        raw = open(cache_pshtt).read()
        data = json.loads(raw)
        if (data.__class__ is dict) and data.get('invalid'):
            return None

    else:
        logging.debug("\t %s %s" % (command, domain))

        flags = "--json --user-agent \"%s\" --timeout %i --preload-cache %s" % (
            user_agent, timeout, preload_cache)

        # Only useful when debugging interaction between projects.
        # flags = "%s --debug" % flags

        # Give the Python shell environment a pyenv environment.
        pyenv_init = "eval \"$(pyenv init -)\" && pyenv shell %s" % pyenv_version
        # Really un-ideal, but calling out to Python2 from Python 3 is a nightmare.
        # I don't think this tool's threat model includes untrusted CSV, either.
        raw = utils.unsafe_execute("%s && %s %s %s" %
                                   (pyenv_init, command, domain, flags))

        if not raw:
            utils.write(utils.invalid({}), cache_pshtt)
            logging.warn("\tBad news scanning, sorry!")
            return None

        data = json.loads(raw)
        utils.write(utils.json_for(data), utils.cache_path(domain, "pshtt"))

    # pshtt scanner uses JSON arrays, even for single items
    data = data[0]

    row = []
    for field in headers:
        value = data[field]

        # TODO: Fix this upstream
        if (field != "HSTS Header") and (field != "HSTS Max Age") and (
                field != "Redirect To"):
            if value is None:
                value = False

        row.append(value)

    yield row
Ejemplo n.º 7
0
def scan(domain, options):
    logging.debug("[%s][analytics]" % domain)
    logging.debug("\tChecking file.")

    data = {'participating': (domain in analytics_domains)}

    cache = utils.cache_path(domain, "analytics")
    utils.write(utils.json_for(data), cache)

    yield [data['participating']]
Ejemplo n.º 8
0
def scan(domain, options):
    logging.debug("[%s][pageload]" % domain)

    inspection = utils.data_for(domain, "inspect")

    # If we have data from inspect, skip if it's not a live domain.
    if inspection and (not inspection.get("up")):
        logging.debug("\tSkipping, domain not reachable during inspection.")
        return None

    # If we have data from inspect, skip if it's just a redirector.
    if inspection and (inspection.get("redirect") is True):
        logging.debug(
            "\tSkipping, domain seen as just a redirector during inspection.")
        return None

    # phantomas needs a URL, not just a domain.
    if not (domain.startswith('http://') or domain.startswith('https://')):

        # If we have data from inspect, use the canonical endpoint.
        if inspection and inspection.get("canonical"):
            url = inspection.get("canonical")

        # Otherwise, well, whatever.
        else:
            url = 'http://' + domain
    else:
        url = domain

    # We'll cache prettified JSON from the output.
    cache = utils.cache_path(domain, "pageload")

    # If we've got it cached, use that.
    if (options.get("force", False) is False) and (os.path.exists(cache)):
        logging.debug("\tCached.")
        raw = open(cache).read()
        data = json.loads(raw)
        if data.get('invalid'):
            return None

    # If no cache, or we should run anyway, do the scan.
    else:
        logging.debug("\t %s %s --reporter=json --ignore-ssl-errors" %
                      (command, url))
        raw = utils.scan(
            [command, url, "--reporter=json", "--ignore-ssl-errors"])
        if not raw:
            utils.write(utils.invalid({}), cache)
            return None

        # It had better be JSON, which we can cache in prettified form.
        data = json.loads(raw)
        utils.write(utils.json_for(data), cache)

    yield [data['metrics'][metric] for metric in interesting_metrics]
Ejemplo n.º 9
0
def scan(domain, options):
    logging.debug("[%s][pshtt]" % domain)

    # cache output from pshtt
    cache_pshtt = utils.cache_path(domain, "pshtt", ext="json")

    force = options.get("force", False)
    data = None

    if (force is False) and (os.path.exists(cache_pshtt)):
        logging.debug("\tCached.")
        raw = open(cache_pshtt).read()
        data = json.loads(raw)
        if (data.__class__ is dict) and data.get('invalid'):
            return None

    else:
        logging.debug("\t %s %s" % (command, domain))

        flags = "--json --user-agent \"%s\" --timeout %i --preload-cache %s" % (user_agent, timeout, preload_cache)

        # Only useful when debugging interaction between projects.
        # flags = "%s --debug" % flags

        # Give the Python shell environment a pyenv environment.
        pyenv_init = "eval \"$(pyenv init -)\" && pyenv shell %s" % pyenv_version
        # Really un-ideal, but calling out to Python2 from Python 3 is a nightmare.
        # I don't think this tool's threat model includes untrusted CSV, either.
        raw = utils.unsafe_execute("%s && %s %s %s" % (pyenv_init, command, domain, flags))

        if not raw:
            utils.write(utils.invalid({}), cache_pshtt)
            logging.warn("\tBad news scanning, sorry!")
            return None

        data = json.loads(raw)
        utils.write(utils.json_for(data), utils.cache_path(domain, "pshtt"))

    # pshtt scanner uses JSON arrays, even for single items
    data = data[0]

    row = []
    for field in headers:
        value = data[field]

        # TODO: Fix this upstream
        if (field != "HSTS Header") and (field != "HSTS Max Age") and (field != "Redirect To"):
            if value is None:
                value = False

        row.append(value)

    yield row
Ejemplo n.º 10
0
def scan(domain, options):
    logging.debug("[%s][analytics]" % domain)
    logging.debug("\tChecking file.")

    data = {
        'participating': (domain in analytics_domains)
    }

    cache = utils.cache_path(domain, "analytics")
    utils.write(utils.json_for(data), cache)

    yield [data['participating']]
Ejemplo n.º 11
0
def scan(domain, options):
    logging.debug("[%s][pageload]" % domain)

    inspection = utils.data_for(domain, "inspect")

    # If we have data from inspect, skip if it's not a live domain.
    if inspection and (not inspection.get("up")):
        logging.debug("\tSkipping, domain not reachable during inspection.")
        return None

    # If we have data from inspect, skip if it's just a redirector.
    if inspection and (inspection.get("redirect") is True):
        logging.debug("\tSkipping, domain seen as just a redirector during inspection.")
        return None

    # phantomas needs a URL, not just a domain.
    if not (domain.startswith('http://') or domain.startswith('https://')):

        # If we have data from inspect, use the canonical endpoint.
        if inspection and inspection.get("canonical"):
            url = inspection.get("canonical")

        # Otherwise, well, whatever.
        else:
            url = 'http://' + domain
    else:
        url = domain

    # We'll cache prettified JSON from the output.
    cache = utils.cache_path(domain, "pageload")

    # If we've got it cached, use that.
    if (options.get("force", False) is False) and (os.path.exists(cache)):
        logging.debug("\tCached.")
        raw = open(cache).read()
        data = json.loads(raw)
        if data.get('invalid'):
            return None

    # If no cache, or we should run anyway, do the scan.
    else:
        logging.debug("\t %s %s --reporter=json --ignore-ssl-errors" % (command, url))
        raw = utils.scan([command, url, "--reporter=json", "--ignore-ssl-errors"])
        if not raw:
            utils.write(utils.invalid({}), cache)
            return None

        # It had better be JSON, which we can cache in prettified form.
        data = json.loads(raw)
        utils.write(utils.json_for(data), cache)

    yield [data['metrics'][metric] for metric in interesting_metrics]
Ejemplo n.º 12
0
def scan(domain, options):
    logging.debug("[%s][pshtt]" % domain)

    # cache output from pshtt
    cache_pshtt = utils.cache_path(domain, "pshtt", ext="json")

    force = options.get("force", False)
    data = None

    if (force is False) and (os.path.exists(cache_pshtt)):
        logging.debug("\tCached.")
        raw = open(cache_pshtt).read()
        data = json.loads(raw)
        if (data.__class__ is dict) and data.get('invalid'):
            return None

    else:
        logging.debug("\t %s %s" % (command, domain))

        raw = utils.scan([
            command,
            domain,
            '--json',
            '--user-agent', '\"%s\"' % user_agent,
            '--timeout', str(timeout),
            '--preload-cache', preload_cache
        ])

        if not raw:
            utils.write(utils.invalid({}), cache_pshtt)
            logging.warn("\tBad news scanning, sorry!")
            return None

        data = json.loads(raw)
        utils.write(utils.json_for(data), utils.cache_path(domain, "pshtt"))

    # pshtt scanner uses JSON arrays, even for single items
    data = data[0]

    row = []
    for field in headers:
        value = data[field]

        # TODO: Fix this upstream
        if (field != "HSTS Header") and (field != "HSTS Max Age") and (field != "Redirect To"):
            if value is None:
                value = False

        row.append(value)

    yield row
Ejemplo n.º 13
0
def scan(domain, options):
    logging.debug("[%s][trustymail]" % domain)

    # cache output from pshtt
    cache_trustymail = utils.cache_path(domain, "trustymail", ext="json")

    force = options.get("force", False)

    if (force is False) and (os.path.exists(cache_trustymail)):
        logging.debug("\tCached.")
        raw = open(cache_trustymail).read()
        data = json.loads(raw)
        if (data.__class__ is dict) and data.get('invalid'):
            return None

    else:
        logging.debug("\t %s %s" % (command, domain))

        raw = utils.scan([
            command,
            domain,
            '--json',
            '--timeout',
            str(timeout),
        ])

        if not raw:
            utils.write(utils.invalid({}), cache_trustymail)
            logging.warn("\tBad news scanning, sorry!")
            return None

        data = json.loads(raw)
        utils.write(utils.json_for(data),
                    utils.cache_path(domain, "trustymail"))

    # trustymail scanner follows pshtt in  using JSON arrays, even for single items
    data = data[0]

    row = []
    for field in headers:
        value = data[field]

        row.append(value)

    yield row
Ejemplo n.º 14
0
def scan(domain, options):
    logging.debug("[%s][inspect]" % domain)

    # cache JSON as it comes back from site-inspector
    cache = utils.cache_path(domain, "inspect")
    if (options.get("force", False) is False) and (os.path.exists(cache)):
        logging.debug("\tCached.")
        raw = open(cache).read()
        data = json.loads(raw)
        if data.get('invalid'):
            return None

    else:
        logging.debug("\t %s %s --http" % (command, domain))
        raw = utils.scan([command, domain, "--http"])
        if not raw:
            utils.write(utils.invalid({}), cache)
            return None
        utils.write(raw, cache)
        data = json.loads(raw)

    # TODO: get this from a site-inspector field directly
    canonical_https = data['endpoints']['https'][data['canonical_endpoint']]
    # TODO: guarantee these as present in site-inspector
    https_valid = canonical_https.get('https_valid', False)
    https_bad_chain = canonical_https.get('https_bad_chain', False)
    https_bad_name = canonical_https.get('https_bad_name', False)
    # TODO: site-inspector should float this up
    hsts_details = canonical_https.get('hsts_details', {})
    max_age = hsts_details.get('max_age', None)

    yield [
        data['canonical'], data['up'],
        data['redirect'], data['redirect_to'],
        https_valid, data['default_https'], data['downgrade_https'],
        data['enforce_https'],
        https_bad_chain, https_bad_name,
        data['hsts'], data['hsts_header'],
        max_age,
        data['hsts_entire_domain'],
        data['hsts_entire_domain_preload'],
        domain in chrome_preload_list,
        data['broken_root'], data['broken_www']
    ]
Ejemplo n.º 15
0
def check_wildcard(subdomain, options):

    wildcard = wildcard_for(subdomain)

    cache = utils.cache_path(subdomain, "subdomains")
    if (options.get("force", False) is False) and (os.path.exists(cache)):
        logging.debug("\tDNS info cached.")
        raw = open(cache).read()
        data = json.loads(raw)

    else:
        logging.debug("\t dig +short '%s'" % wildcard)
        raw_wild = utils.unsafe_execute("dig +short '%s'" % wildcard)

        if raw_wild == "":
            raw_wild = None
            raw_self = None
        else:
            logging.debug("\t dig +short '%s'" % subdomain)
            raw_self = utils.unsafe_execute("dig +short '%s'" % subdomain)

        if raw_wild:
            parsed_wild = raw_wild.split("\n")
            parsed_wild.sort()
        else:
            parsed_wild = None

        if raw_self:
            parsed_self = raw_self.split("\n")
            parsed_self.sort()
        else:
            parsed_self = None

        data = {'response': {'wild': parsed_wild, 'itself': parsed_self}}
        utils.write(
            utils.json_for(data),
            cache
        )

    return data['response']
Ejemplo n.º 16
0
def scan(domain, options):
    logging.debug("[%s][inspect]" % domain)

    # cache JSON as it comes back from site-inspector
    cache = utils.cache_path(domain, "inspect")
    if (options.get("force", False) is False) and (os.path.exists(cache)):
        logging.debug("\tCached.")
        raw = open(cache).read()
        data = json.loads(raw)
        if data.get('invalid'):
            return None

    else:
        logging.debug("\t %s %s --http" % (command, domain))
        raw = utils.scan([command, domain, "--http"])
        if not raw:
            utils.write(utils.invalid({}), cache)
            return None
        utils.write(raw, cache)
        data = json.loads(raw)

    # TODO: get this from a site-inspector field directly
    canonical_https = data['endpoints']['https'][data['canonical_endpoint']]
    # TODO: guarantee these as present in site-inspector
    https_valid = canonical_https.get('https_valid', False)
    https_bad_chain = canonical_https.get('https_bad_chain', False)
    https_bad_name = canonical_https.get('https_bad_name', False)
    # TODO: site-inspector should float this up
    hsts_details = canonical_https.get('hsts_details', {})
    max_age = hsts_details.get('max_age', None)

    yield [
        data['canonical'], data['up'], data['redirect'], data['redirect_to'],
        https_valid, data['default_https'], data['downgrade_https'],
        data['enforce_https'], https_bad_chain, https_bad_name, data['hsts'],
        data['hsts_header'], max_age, data['hsts_entire_domain'],
        data['hsts_entire_domain_preload'], domain in chrome_preload_list,
        data['broken_root'], data['broken_www']
    ]
Ejemplo n.º 17
0
def check_wildcard(subdomain, options):

    wildcard = wildcard_for(subdomain)

    cache = utils.cache_path(subdomain, "subdomains")
    if (options.get("force", False) is False) and (os.path.exists(cache)):
        logging.debug("\tDNS info cached.")
        raw = open(cache).read()
        data = json.loads(raw)

    else:
        logging.debug("\t dig +short '%s'" % wildcard)
        raw_wild = utils.unsafe_execute("dig +short '%s'" % wildcard)

        if raw_wild == "":
            raw_wild = None
            raw_self = None
        else:
            logging.debug("\t dig +short '%s'" % subdomain)
            raw_self = utils.unsafe_execute("dig +short '%s'" % subdomain)

        if raw_wild:
            parsed_wild = raw_wild.split("\n")
            parsed_wild.sort()
        else:
            parsed_wild = None

        if raw_self:
            parsed_self = raw_self.split("\n")
            parsed_self.sort()
        else:
            parsed_self = None

        data = {'response': {'wild': parsed_wild, 'itself': parsed_self}}
        utils.write(utils.json_for(data), cache)

    return data['response']
Ejemplo n.º 18
0
def scan(domain, options):
    logging.debug("[%s][sslyze]" % domain)

    # Optional: skip domains which don't support HTTPS in prior inspection
    if utils.domain_doesnt_support_https(domain):
        logging.debug("\tSkipping, HTTPS not supported in inspection.")
        return None

    # Optional: if pshtt data says canonical endpoint uses www and this domain
    # doesn't have it, add it.
    if utils.domain_uses_www(domain):
        scan_domain = "www.%s" % domain
    else:
        scan_domain = domain

    # cache XML from sslyze
    cache_xml = utils.cache_path(domain, "sslyze", ext="xml")
    # because sslyze manages its own output (can't yet print to stdout),
    # we have to mkdir_p the path ourselves
    utils.mkdir_p(os.path.dirname(cache_xml))

    force = options.get("force", False)

    if (force is False) and (os.path.exists(cache_xml)):
        logging.debug("\tCached.")
        xml = open(cache_xml).read()

    else:
        logging.debug("\t %s %s" % (command, scan_domain))
        # use scan_domain (possibly www-prefixed) to do actual scan

        # Give the Python shell environment a pyenv environment.
        pyenv_init = "eval \"$(pyenv init -)\" && pyenv shell %s" % pyenv_version
        # Really un-ideal, but calling out to Python2 from Python 3 is a nightmare.
        # I don't think this tool's threat model includes untrusted CSV, either.
        raw = utils.unsafe_execute("%s && %s --regular --quiet %s --xml_out=%s" % (pyenv_init, command, scan_domain, cache_xml))

        if raw is None:
            # TODO: save standard invalid XML data...?
            logging.warn("\tBad news scanning, sorry!")
            return None

        xml = utils.scan(["cat", cache_xml])
        if not xml:
            logging.warn("\tBad news reading XML, sorry!")
            return None

        utils.write(xml, cache_xml)

    data = parse_sslyze(xml)

    if data is None:
        logging.warn("\tNo valid target for scanning, couldn't connect.")
        return None

    utils.write(utils.json_for(data), utils.cache_path(domain, "sslyze"))

    yield [
        data['protocols']['sslv2'], data['protocols']['sslv3'],
        data['protocols']['tlsv1.0'], data['protocols']['tlsv1.1'],
        data['protocols']['tlsv1.2'],

        data['config'].get('any_dhe'), data['config'].get('all_dhe'),
        data['config'].get('weakest_dh'),
        data['config'].get('any_rc4'), data['config'].get('all_rc4'),

        data['config'].get('ocsp_stapling'),

        data['certs'].get('key_type'), data['certs'].get('key_length'),
        data['certs'].get('leaf_signature'), data['certs'].get('any_sha1'),
        data['certs'].get('not_before'), data['certs'].get('not_after'),
        data['certs'].get('served_issuer'),

        data.get('errors')
    ]
Ejemplo n.º 19
0
def scan(domain, options):
    logging.debug("[%s][tls]" % domain)

    # If pshtt data exists, check to see if we can skip.
    if utils.domain_doesnt_support_https(domain):
        logging.debug("\tSkipping, HTTPS not supported.")
        return None

    # cache reformatted JSON from ssllabs
    cache = utils.cache_path(domain, "tls")

    # Optional: if pshtt data says canonical endpoint uses www and this domain
    # doesn't have it, add it.
    if utils.domain_uses_www(domain):
        scan_domain = "www.%s" % domain
    else:
        scan_domain = domain

    force = options.get("force", False)

    if (force is False) and (os.path.exists(cache)):
        logging.debug("\tCached.")
        raw = open(cache).read()
        data = json.loads(raw)

        if data.get('invalid'):
            return None
    else:
        logging.debug("\t %s %s" % (command, scan_domain))

        usecache = str(not force).lower()

        if options.get("debug"):
            cmd = [
                command,
                "--usecache=%s" % usecache, "--verbosity=debug", scan_domain
            ]
        else:
            cmd = [command, "--usecache=%s" % usecache, "--quiet", scan_domain]

        raw = utils.scan(cmd)
        if raw:
            data = json.loads(raw)

            # if SSL Labs gave us back an error response, cache this
            # as an invalid entry.
            if len(data) < 1:
                utils.write(utils.invalid({'response': data}), cache)
                return None

            # we only give ssllabs-scan one at a time,
            # so we can de-pluralize this
            data = data[0]

            # if SSL Labs had an error hitting the site, cache this
            # as an invalid entry.
            if data["status"] == "ERROR":
                utils.write(utils.invalid(data), cache)
                return None

            utils.write(utils.json_for(data), cache)
        else:
            return None
            # raise Exception("Invalid data from ssllabs-scan: %s" % raw)

    # can return multiple rows, one for each 'endpoint'
    for endpoint in data['endpoints']:

        # this meant it couldn't connect to the endpoint
        if not endpoint.get("grade"):
            continue

        sslv3 = False
        tlsv12 = False
        for protocol in endpoint['details']['protocols']:
            if ((protocol['name'] == "SSL")
                    and (protocol['version'] == '3.0')):
                sslv3 = True
            if ((protocol['name'] == "TLS")
                    and (protocol['version'] == '1.2')):
                tlsv12 = True

        spdy = False
        h2 = False
        npn = endpoint['details'].get('npnProtocols', None)
        if npn:
            spdy = ("spdy" in npn)
            h2 = ("h2" in npn)

        yield [
            endpoint['grade'], endpoint['details']['cert']['sigAlg'],
            endpoint['details']['key']['alg'],
            endpoint['details']['key']['size'],
            endpoint['details']['forwardSecrecy'],
            endpoint['details']['ocspStapling'],
            endpoint['details'].get('fallbackScsv',
                                    "N/A"), endpoint['details']['supportsRc4'],
            sslv3, tlsv12, spdy, endpoint['details']['sniRequired'], h2
        ]
Ejemplo n.º 20
0
def scan(domain, options):
    logging.debug("[%s][sslyze]" % domain)

    # Optional: skip domains which don't support HTTPS in pshtt scan.
    if utils.domain_doesnt_support_https(domain):
        logging.debug("\tSkipping, HTTPS not supported.")
        return None

    # Optional: if pshtt data says canonical endpoint uses www and this domain
    # doesn't have it, add it.
    if utils.domain_uses_www(domain):
        scan_domain = "www.%s" % domain
    else:
        scan_domain = domain

    # cache JSON from sslyze
    cache_json = utils.cache_path(domain, "sslyze")
    # because sslyze manages its own output (can't yet print to stdout),
    # we have to mkdir_p the path ourselves
    utils.mkdir_p(os.path.dirname(cache_json))

    force = options.get("force", False)

    if (force is False) and (os.path.exists(cache_json)):
        logging.debug("\tCached.")
        raw_json = open(cache_json).read()
        try:
            data = json.loads(raw_json)
            if (data.__class__ is dict) and data.get('invalid'):
                return None
        except json.decoder.JSONDecodeError as err:
            logging.warn("Error decoding JSON.  Cache probably corrupted.")
            return None

    else:
        # use scan_domain (possibly www-prefixed) to do actual scan
        logging.debug("\t %s %s" % (command, scan_domain))

        # This is --regular minus --heartbleed
        # See: https://github.com/nabla-c0d3/sslyze/issues/217
        raw_response = utils.scan([
            command,
            "--sslv2", "--sslv3", "--tlsv1", "--tlsv1_1", "--tlsv1_2",
            "--reneg", "--resum", "--certinfo",
            "--http_get", "--hide_rejected_ciphers",
            "--compression", "--openssl_ccs",
            "--fallback", "--quiet",
            scan_domain, "--json_out=%s" % cache_json
        ])

        if raw_response is None:
            # TODO: save standard invalid JSON data...?
            utils.write(utils.invalid({}), cache_json)
            logging.warn("\tBad news scanning, sorry!")
            return None

        raw_json = utils.scan(["cat", cache_json])
        if not raw_json:
            logging.warn("\tBad news reading JSON, sorry!")
            return None

        utils.write(raw_json, cache_json)

    data = parse_sslyze(raw_json)

    if data is None:
        logging.warn("\tNo valid target for scanning, couldn't connect.")
        return None

    yield [
        scan_domain,
        data['protocols']['sslv2'], data['protocols']['sslv3'],
        data['protocols']['tlsv1.0'], data['protocols']['tlsv1.1'],
        data['protocols']['tlsv1.2'],

        data['config'].get('any_dhe'), data['config'].get('all_dhe'),
        data['config'].get('weakest_dh'),
        data['config'].get('any_rc4'), data['config'].get('all_rc4'),

        data['certs'].get('key_type'), data['certs'].get('key_length'),
        data['certs'].get('leaf_signature'),
        data['certs'].get('any_sha1_served'),
        data['certs'].get('any_sha1_constructed'),
        data['certs'].get('not_before'), data['certs'].get('not_after'),
        data['certs'].get('served_issuer'), data['certs'].get('constructed_issuer'),

        data.get('errors')
    ]
Ejemplo n.º 21
0
def scan(domain, options):
	logging.debug("[%s][sslyze]" % domain)

	# Optional: skip domains which don't support HTTPS in prior inspection
	inspection = utils.data_for(domain, "inspect")
	if inspection and (not inspection.get("support_https")):
		logging.debug("\tSkipping, HTTPS not supported in inspection.")
		return None

	# Optional: if inspect data says canonical endpoint uses www and this domain
	# doesn't have it, add it.
	if inspection and (inspection.get("canonical_endpoint") == "www") and (not domain.startswith("www.")):
		scan_domain = "www.%s" % domain
	else:
		scan_domain = domain

	# cache XML from sslyze
	cache_xml = utils.cache_path(domain, "sslyze", ext="xml")
	# because sslyze manages its own output (can't yet print to stdout),
	# we have to mkdir_p the path ourselves
	utils.mkdir_p(os.path.dirname(cache_xml))

	force = options.get("force", False)

	if (force is False) and (os.path.exists(cache_xml)):
		logging.debug("\tCached.")
		xml = open(cache_xml).read()

	else:
		logging.debug("\t %s %s" % (command, domain))
		# use scan_domain (possibly www-prefixed) to do actual scan
		raw = utils.scan([command, "--regular", "--quiet", scan_domain, "--xml_out=%s" % cache_xml], env=command_env)
		
		if raw is None:
			# TODO: save standard invalid XML data...?
			logging.warn("\tBad news scanning, sorry!")
			return None

		xml = utils.scan(["cat", cache_xml])
		if not xml:
			logging.warn("\tBad news reading XML, sorry!")
			return None

		utils.write(xml, cache_xml)

	data = parse_sslyze(xml)

	if data is None:
		logging.warn("\tNo valid target for scanning, couldn't connect.")
		return None

	utils.write(utils.json_for(data), utils.cache_path(domain, "sslyze"))

	yield [
		data['protocols']['sslv2'], data['protocols']['sslv3'], 
		data['protocols']['tlsv1.0'], data['protocols']['tlsv1.1'], 
		data['protocols']['tlsv1.2'], 

		data['config'].get('any_dhe'), data['config'].get('all_dhe'),
		data['config'].get('weakest_dh'),
		data['config'].get('any_rc4'), data['config'].get('all_rc4'),

		data['config'].get('ocsp_stapling'),
		
		data['certs'].get('key_type'), data['certs'].get('key_length'),
		data['certs'].get('leaf_signature'), data['certs'].get('any_sha1'),
		data['certs'].get('not_before'), data['certs'].get('not_after'),
		data['certs'].get('served_issuer'), 

		data.get('errors')
	]
Ejemplo n.º 22
0
def scan(domain, options):
    logging.debug("[%s][tls]" % domain)

    # If inspection data exists, check to see if we can skip.
    inspection = utils.data_for(domain, "inspect")
    if inspection and (not inspection.get("support_https")):
        logging.debug("\tSkipping, HTTPS not supported in inspection.")
        yield None

    else:
        # cache reformatted JSON from ssllabs
        cache = utils.cache_path(domain, "tls")

        force = options.get("force", False)

        if (force is False) and (os.path.exists(cache)):
            logging.debug("\tCached.")
            raw = open(cache).read()
            data = json.loads(raw)

            if data.get('invalid'):
                return None
        else:
            logging.debug("\t %s %s" % (command, domain))

            usecache = str(not force).lower()

            if options.get("debug"):
                cmd = [command, "--usecache=%s" % usecache,
                       "--verbosity=debug", domain]
            else:
                cmd = [command, "--usecache=%s" % usecache,
                       "--quiet", domain]
            raw = utils.scan(cmd)
            if raw:
                data = json.loads(raw)

                # we only give ssllabs-scan one at a time,
                # so we can de-pluralize this
                data = data[0]

                # if SSL Labs had an error hitting the site, cache this
                # as an invalid entry.
                if data["status"] == "ERROR":
                    utils.write(utils.invalid(data), cache)
                    return None

                utils.write(utils.json_for(data), cache)
            else:
                return None
                # raise Exception("Invalid data from ssllabs-scan: %s" % raw)

        # can return multiple rows, one for each 'endpoint'
        for endpoint in data['endpoints']:

            # this meant it couldn't connect to the endpoint
            if not endpoint.get("grade"):
                continue

            sslv3 = False
            tlsv12 = False
            for protocol in endpoint['details']['protocols']:
                if ((protocol['name'] == "SSL") and
                        (protocol['version'] == '3.0')):
                    sslv3 = True
                if ((protocol['name'] == "TLS") and
                        (protocol['version'] == '1.2')):
                    tlsv12 = True

            spdy = False
            h2 = False
            npn = endpoint['details'].get('npnProtocols', None)
            if npn:
                spdy = ("spdy" in npn)
                h2 = ("h2-" in npn)

            def ccs_map(n):
                return {
                    -1: "N/A (Error)",
                    0: "N/A (Unknown)",
                    1: "No (not vulnerable)",
                    2: "No (not exploitable)",
                    3: "Yes"
                }[n]

            def fs_map(n):
                return {
                    0: "0 - No",
                    1: "1 - Some",
                    2: "2 - Modern",
                    4: "3 - Robust"
                }[n]

            yield [
                endpoint['grade'],
                endpoint['details']['cert']['sigAlg'],
                endpoint['details']['key']['alg'],
                endpoint['details']['key']['size'],
                fs_map(endpoint['details']['forwardSecrecy']),
                endpoint['details']['ocspStapling'],
                endpoint['details'].get('fallbackScsv', "N/A"),
                endpoint['details'].get('freak'),
                ccs_map(endpoint['details']['openSslCcs']),
                sslv3,
                tlsv12,
                spdy,
                endpoint['details']['sniRequired'],
                h2
            ]
Ejemplo n.º 23
0
def get_errors_from_pa11y_lambda_scan(domain, cache):
    client = boto3.client(
        'lambda',
        aws_access_key_id=os.environ.get('AWS_ACCESS_KEY_ID'),
        aws_secret_access_key=os.environ.get('AWS_SECRET_ACCESS_KEY'),
        region_name=os.environ.get('AWS_REGION_NAME'))

    lambda_options = {
        'url': domain,
        'pa11yOptions': {
            'standard':
            'WCAG2AA',
            'wait':
            500,
            'ignore': [
                'notice', 'warning',
                'WCAG2AA.Principle1.Guideline1_4.1_4_3.G18.BgImage',
                'WCAG2AA.Principle1.Guideline1_4.1_4_3.G18.Abs',
                'WCAG2AA.Principle1.Guideline1_4.1_4_3.G145.Abs',
                'WCAG2AA.Principle3.Guideline3_1.3_1_1.H57.2',
                'WCAG2AA.Principle3.Guideline3_1.3_1_1.H57.3',
                'WCAG2AA.Principle3.Guideline3_1.3_1_2.H58.1',
                'WCAG2AA.Principle4.Guideline4_1.4_1_1.F77',
                'WCAG2AA.Principle4.Guideline4_1.4_1_2.H91',
                'WCAG2AA.Principle1.Guideline1_3.1_3_1.G141',
                'WCAG2AA.Principle1.Guideline1_3.1_3_1.H39',
                'WCAG2AA.Principle1.Guideline1_3.1_3_1.H42',
                'WCAG2AA.Principle1.Guideline1_3.1_3_1.H43',
                'WCAG2AA.Principle1.Guideline1_3.1_3_1.H44',
                'WCAG2AA.Principle1.Guideline1_3.1_3_1.H48',
                'WCAG2AA.Principle1.Guideline1_3.1_3_1.H49',
                'WCAG2AA.Principle1.Guideline1_3.1_3_1.H63',
                'WCAG2AA.Principle1.Guideline1_3.1_3_1.H65',
                'WCAG2AA.Principle1.Guideline1_3.1_3_1.H71',
                'WCAG2AA.Principle1.Guideline1_3.1_3_1.H73',
                'WCAG2AA.Principle1.Guideline1_3.1_3_1.H85',
                'WCAG2AA.Principle1.Guideline1_3.1_3_1.H93',
                'WCAG2AA.Principle1.Guideline1_3.1_3_1.F68',
                'WCAG2AA.Principle1.Guideline1_3.1_3_1.H39.3.LayoutTable',
                'WCAG2AA.Principle1.Guideline1_3.1_3_1.H42.2',
                'WCAG2AA.Principle1.Guideline1_3.1_3_1.H43.HeadersRequired',
                'WCAG2AA.Principle1.Guideline1_3.1_3_1.H71.NoLegend',
                'WCAG2AA.Principle1.Guideline1_3.1_3_1.H73.3.LayoutTable',
                'WCAG2AA.Principle2.Guideline2_2.2_2_1.F41.2',
                'WCAG2AA.Principle2.Guideline2_4.2_4_1.G1,G123,G124.NoSuchID',
                'WCAG2AA.Principle2.Guideline2_4.2_4_1.H64.1',
                'WCAG2AA.Principle2.Guideline2_4.2_4_2.H25.1.EmptyTitle',
                'WCAG2AA.Principle3.Guideline3_1.3_1_1.H57.3.Lang',
                'WCAG2AA.Principle3.Guideline3_1.3_1_1.H57.3.XmlLang',
                'WCAG2AA.Principle3.Guideline3_1.3_1_2.H58.1.Lang',
                'WCAG2AA.Principle3.Guideline3_2.3_2_2.H32.2'
            ]
        }
    }

    payload = json.dumps(lambda_options).encode()

    response = client.invoke(
        FunctionName=os.environ['AWS_LAMBDA_PA11Y_FUNCTION_NAME'],
        Payload=payload,
    )

    response_payload_bytes = response['Payload'].read()
    response_payload_string = response_payload_bytes.decode('UTF-8')
    response_payload_json = json.loads(response_payload_string)

    logging.debug("Invoking a11y_lambda function: %s" % lambda_options)

    results = response_payload_json
    errors = get_errors_from_results(results)
    cachable = json.dumps({'results': errors})
    logging.debug("Writing to cache: %s" % domain)
    content = cachable
    destination = cache
    utils.write(content, destination)
    return errors
Ejemplo n.º 24
0
def network_check(subdomain, endpoint, options):
    cache = utils.cache_path(subdomain, "subdomains")

    wildcard = wildcard_for(subdomain)

    if (options.get("force", False) is False) and (os.path.exists(cache)):
        logging.debug("\tDNS and content cached.")
        raw = open(cache).read()
        data = json.loads(raw)

    # Hit DNS and HTTP.
    else:
        # HTTP content: just use curl.
        #
        # Turn on --insecure because we want to see the content even at sites
        # where the certificate isn't right or proper.
        logging.debug("\t curl --silent --insecure %s" % endpoint)
        content = utils.scan(["curl", "--silent", "--insecure", endpoint])

        # DNS content: just use dig.
        #
        # Not awesome - uses an unsafe shell execution of `dig` to look up DNS,
        # as I couldn't figure out a way to get "+short" to play nice with
        # the more secure execution methods available to me. Since this system
        # isn't expected to process untrusted input, this should be okay.
        logging.debug("\t dig +short '%s'" % wildcard)
        raw_wild = utils.unsafe_execute("dig +short '%s'" % wildcard)

        if raw_wild == "":
            raw_wild = None
            raw_self = None
        else:
            logging.debug("\t dig +short '%s'" % subdomain)
            raw_self = utils.unsafe_execute("dig +short '%s'" % subdomain)

        if raw_wild:
            parsed_wild = raw_wild.split("\n")
            parsed_wild.sort()
        else:
            parsed_wild = None

        if raw_self:
            parsed_self = raw_self.split("\n")
            parsed_self.sort()
        else:
            parsed_self = None

        # Cache HTTP and DNS data to disk.
        data = {
            'response': {
                'content': content,
                'wildcard_dns': parsed_wild,
                'self_dns': parsed_self
            }
        }

        if (parsed_wild) and (parsed_wild == parsed_self):
            data['response']['matched_wild'] = True
        else:
            data['response']['matched_wild'] = False

        utils.write(utils.json_for(data), cache)

    return data['response']
Ejemplo n.º 25
0
def paginated_mode(suffix, options, uid, api_key):
    # Cache hostnames in a dict for de-duping.
    hostnames_map = {}

    certificate_api = certificates.CensysCertificates(uid, api_key)

    if 'query' in options and options['query']:
        query = options['query']
    else:
        query = "parsed.subject.common_name:\"%s\" or parsed.extensions.subject_alt_name.dns_names:\"%s\"" % (
            suffix, suffix)
    logging.debug("Censys query:\n%s\n" % query)

    # time to sleep between requests (defaults to 5s)
    delay = int(options.get("delay", 5))

    # Censys page size, fixed
    page_size = 100

    # Start page defaults to 1.
    start_page = int(options.get("start", 1))

    # End page defaults to whatever the API says is the last one.
    end_page = options.get("end", None)
    if end_page is None:
        end_page = get_end_page(query, certificate_api)
        if end_page is None:
            logging.warn("Error looking up number of pages.")
            exit(1)
    else:
        end_page = int(end_page)

    max_records = ((end_page - start_page) + 1) * page_size

    fields = [
        "parsed.subject.common_name",
        "parsed.extensions.subject_alt_name.dns_names"
    ]

    current_page = start_page

    logging.warn("Fetching up to %i records, starting at page %i." %
                 (max_records, start_page))
    last_cached = False
    force = options.get("force", False)

    while current_page <= end_page:
        if (not last_cached) and (current_page > start_page):
            logging.debug("(Waiting %is before fetching page %i.)" %
                          (delay, current_page))
            last_cached = False
            time.sleep(delay)

        logging.debug("Fetching page %i." % current_page)

        cache_page = utils.cache_path(str(current_page), "censys")
        if (force is False) and (os.path.exists(cache_page)):
            logging.warn("\t[%i] Cached page." % current_page)
            last_cached = True

            certs_raw = open(cache_page).read()
            certs = json.loads(certs_raw)
            if (certs.__class__ is dict) and certs.get('invalid'):
                continue
        else:
            try:
                certs = list(
                    certificate_api.search(query,
                                           fields=fields,
                                           page=current_page,
                                           max_records=page_size))
                utils.write(utils.json_for(certs), cache_page)
            except censys.base.CensysException:
                logging.warn(utils.format_last_exception())
                logging.warn("Censys error, skipping page %i." % current_page)
                utils.write(utils.invalid({}), cache_page)
                continue
            except:
                logging.warn(utils.format_last_exception())
                logging.warn("Unexpected error, skipping page %i." %
                             current_page)
                utils.write(utils.invalid({}), cache_page)
                exit(1)

        for cert in certs:
            # Common name + SANs
            names = cert.get('parsed.subject.common_name', []) + cert.get(
                'parsed.extensions.subject_alt_name.dns_names', [])
            logging.debug(names)

            for name in names:
                hostnames_map[sanitize_name(name)] = None

        current_page += 1

    logging.debug("Done fetching from API.")

    return hostnames_map
Ejemplo n.º 26
0
def scan(domain, options):
    logging.debug("[%s][sslyze]" % domain)

    # Optional: skip domains which don't support HTTPS in prior inspection
    if utils.domain_doesnt_support_https(domain):
        logging.debug("\tSkipping, HTTPS not supported in inspection.")
        return None

    # Optional: if pshtt data says canonical endpoint uses www and this domain
    # doesn't have it, add it.
    if utils.domain_uses_www(domain):
        scan_domain = "www.%s" % domain
    else:
        scan_domain = domain

    # cache XML from sslyze
    cache_xml = utils.cache_path(domain, "sslyze", ext="xml")
    # because sslyze manages its own output (can't yet print to stdout),
    # we have to mkdir_p the path ourselves
    utils.mkdir_p(os.path.dirname(cache_xml))

    force = options.get("force", False)

    if (force is False) and (os.path.exists(cache_xml)):
        logging.debug("\tCached.")
        xml = open(cache_xml).read()

    else:
        logging.debug("\t %s %s" % (command, scan_domain))
        # use scan_domain (possibly www-prefixed) to do actual scan

        # Give the Python shell environment a pyenv environment.
        pyenv_init = "eval \"$(pyenv init -)\" && pyenv shell %s" % pyenv_version
        # Really un-ideal, but calling out to Python2 from Python 3 is a nightmare.
        # I don't think this tool's threat model includes untrusted CSV, either.
        raw = utils.unsafe_execute(
            "%s && %s --regular --quiet %s --xml_out=%s" %
            (pyenv_init, command, scan_domain, cache_xml))

        if raw is None:
            # TODO: save standard invalid XML data...?
            logging.warn("\tBad news scanning, sorry!")
            return None

        xml = utils.scan(["cat", cache_xml])
        if not xml:
            logging.warn("\tBad news reading XML, sorry!")
            return None

        utils.write(xml, cache_xml)

    data = parse_sslyze(xml)

    if data is None:
        logging.warn("\tNo valid target for scanning, couldn't connect.")
        return None

    utils.write(utils.json_for(data), utils.cache_path(domain, "sslyze"))

    yield [
        data['protocols']['sslv2'], data['protocols']['sslv3'],
        data['protocols']['tlsv1.0'], data['protocols']['tlsv1.1'],
        data['protocols']['tlsv1.2'], data['config'].get('any_dhe'),
        data['config'].get('all_dhe'), data['config'].get('weakest_dh'),
        data['config'].get('any_rc4'), data['config'].get('all_rc4'),
        data['config'].get('ocsp_stapling'), data['certs'].get('key_type'),
        data['certs'].get('key_length'), data['certs'].get('leaf_signature'),
        data['certs'].get('any_sha1'), data['certs'].get('not_before'),
        data['certs'].get('not_after'), data['certs'].get('served_issuer'),
        data.get('errors')
    ]
Ejemplo n.º 27
0
def gather(suffix, options):
    # Register a (free) Censys.io account to get a UID and API key.
    uid = options.get("censys_id", None)
    api_key = options.get("censys_key", None)

    if (uid is None) or (api_key is None):
        uid = os.environ.get("CENSYS_UID", None)
        api_key = os.environ.get("CENSYS_API_KEY", None)

    if (uid is None) or (api_key is None):
        logging.warn(
            "No Censys credentials set. API key required to use the Censys API."
        )
        exit(1)

    certificate_api = certificates.CensysCertificates(uid, api_key)

    query = "parsed.subject.common_name:\"%s\" or parsed.extensions.subject_alt_name.dns_names:\"%s\"" % (
        suffix, suffix)
    logging.debug("Censys query:\n%s\n" % query)

    # Hostnames beginning with a wildcard prefix will have the prefix stripped.
    wildcard_pattern = re.compile("^\*\.")
    redacted_pattern = re.compile("^(\?\.)+")

    # time to sleep between requests (defaults to 5s)
    delay = int(options.get("delay", 5))

    # Censys page size, fixed
    page_size = 100

    # Start page defaults to 1.
    start_page = int(options.get("start", 1))

    # End page defaults to whatever the API says is the last one.
    end_page = options.get("end", None)
    if end_page is None:
        end_page = get_end_page(query, certificate_api)
        if end_page is None:
            logging.warn("Error looking up number of pages.")
            exit(1)
    else:
        end_page = int(end_page)

    max_records = ((end_page - start_page) + 1) * page_size

    # Cache hostnames in a dict for de-duping.
    hostnames_map = {}

    fields = [
        "parsed.subject.common_name",
        "parsed.extensions.subject_alt_name.dns_names"
    ]

    current_page = start_page

    logging.warn("Fetching up to %i records, starting at page %i." %
                 (max_records, start_page))
    last_cached = False
    force = options.get("force", False)

    while current_page <= end_page:
        if (not last_cached) and (current_page > start_page):
            logging.debug("(Waiting %is before fetching page %i.)" %
                          (delay, current_page))
            last_cached = False
            time.sleep(delay)

        logging.debug("Fetching page %i." % current_page)

        cache_page = utils.cache_path(str(current_page), "censys")
        if (force is False) and (os.path.exists(cache_page)):
            logging.warn("\t[%i] Cached page." % current_page)
            last_cached = True

            certs_raw = open(cache_page).read()
            certs = json.loads(certs_raw)
            if (certs.__class__ is dict) and certs.get('invalid'):
                continue
        else:
            try:
                certs = list(
                    certificate_api.search(query,
                                           fields=fields,
                                           page=current_page,
                                           max_records=page_size))
                utils.write(utils.json_for(certs), cache_page)
            except censys.base.CensysException:
                logging.warn(utils.format_last_exception())
                logging.warn("Censys error, skipping page %i." % current_page)
                utils.write(utils.invalid({}), cache_page)
                continue
            except:
                logging.warn(utils.format_last_exception())
                logging.warn("Unexpected error, skipping page %i." %
                             current_page)
                utils.write(utils.invalid({}), cache_page)
                exit(1)

        for cert in certs:
            # Common name + SANs
            names = cert.get('parsed.subject.common_name', []) + cert.get(
                'parsed.extensions.subject_alt_name.dns_names', [])
            logging.debug(names)

            for name in names:
                # Strip off any wildcard prefix.
                name = re.sub(wildcard_pattern, '', name).lower().strip()
                # Strip off any redacted ? prefixes. (Ugh.)
                name = re.sub(redacted_pattern, '', name).lower().strip()
                hostnames_map[name] = None

        current_page += 1

    logging.debug("Done fetching from API.")

    # Iterator doesn't buy much efficiency, since we paginated already.
    # Necessary evil to de-dupe before returning hostnames, though.
    for hostname in hostnames_map.keys():
        yield hostname
Ejemplo n.º 28
0
def scan(domain, options):
    logging.debug("[%s][third_parties]" % domain)

    # Default timeout is 15s, too little.
    timeout = int(options.get("timeout", 60))

    # If we have data from pshtt, skip if it's not a live domain.
    if utils.domain_not_live(domain):
        logging.debug("\tSkipping, domain not reachable during inspection.")
        return None

    # If we have data from pshtt, skip if it's just a redirector.
    if utils.domain_is_redirect(domain):
        logging.debug(
            "\tSkipping, domain seen as just an external redirector during inspection."
        )
        return None

    # phantomas needs a URL, not just a domain.
    if not (domain.startswith('http://') or domain.startswith('https://')):

        # If we have data from pshtt, use the canonical endpoint.
        if utils.domain_canonical(domain):
            url = utils.domain_canonical(domain)

        # Otherwise, well, whatever.
        else:
            url = 'http://' + domain
    else:
        url = domain

    # calculated_domain = re.sub("https?:\/\/", "", url)

    # We'll cache prettified JSON from the output.
    cache = utils.cache_path(domain, "third_parties")

    # If we've got it cached, use that.
    if (options.get("force", False) is False) and (os.path.exists(cache)):
        logging.debug("\tCached.")
        raw = open(cache).read()
        data = json.loads(raw)
        if data.get('invalid'):
            return None

    # If no cache, or we should run anyway, do the scan.
    else:
        logging.debug(
            "\t %s %s --modules=domains --reporter=json --timeout=%i --ignore-ssl-errors"
            % (command, url, timeout))
        raw = utils.scan([
            command, url, "--modules=domains", "--reporter=json",
            "--timeout=%i" % timeout, "--ignore-ssl-errors"
        ],
                         allowed_return_codes=[252])
        if not raw:
            utils.write(utils.invalid({}), cache)
            return None

        # It had better be JSON, which we can cache in prettified form.
        data = json.loads(raw)
        utils.write(utils.json_for(data), cache)

    services = services_for(data, domain, options)

    # Convert to CSV row
    known_names = list(known_services.keys())
    known_names.sort()
    known_matches = [
        'Yes' if host in services['known'] else 'No' for host in known_names
    ]

    yield [
        len(services['external']),
        len(services['internal']),
        services['external_requests'],
        services['internal_requests'],
        serialize(services['external']),
        serialize(services['internal']),
        # services['affiliated'],
        # services['unknown']
    ] + known_matches
Ejemplo n.º 29
0
def cache_errors(errors, domain, cache):
    cachable = json.dumps({'results': errors})
    logging.debug("Writing to cache: %s" % domain)
    content = cachable
    destination = cache
    utils.write(content, destination)
Ejemplo n.º 30
0
def scan(domain, options):
    logging.debug("[%s][tls]" % domain)

    # If inspection data exists, check to see if we can skip.
    if utils.domain_doesnt_support_https(domain):
        logging.debug("\tSkipping, HTTPS not supported in inspection.")
        return None

    # cache reformatted JSON from ssllabs
    cache = utils.cache_path(domain, "tls")

    # Optional: if pshtt data says canonical endpoint uses www and this domain
    # doesn't have it, add it.
    if utils.domain_uses_www(domain):
        scan_domain = "www.%s" % domain
    else:
        scan_domain = domain

    force = options.get("force", False)

    if (force is False) and (os.path.exists(cache)):
        logging.debug("\tCached.")
        raw = open(cache).read()
        data = json.loads(raw)

        if data.get('invalid'):
            return None
    else:
        logging.debug("\t %s %s" % (command, scan_domain))

        usecache = str(not force).lower()

        if options.get("debug"):
            cmd = [command, "--usecache=%s" % usecache,
                   "--verbosity=debug", scan_domain]
        else:
            cmd = [command, "--usecache=%s" % usecache,
                   "--quiet", scan_domain]

        raw = utils.scan(cmd)
        if raw:
            data = json.loads(raw)

            # if SSL Labs gave us back an error response, cache this
            # as an invalid entry.
            if len(data) < 1:
                utils.write(utils.invalid({'response': data}), cache)
                return None

            # we only give ssllabs-scan one at a time,
            # so we can de-pluralize this
            data = data[0]

            # if SSL Labs had an error hitting the site, cache this
            # as an invalid entry.
            if data["status"] == "ERROR":
                utils.write(utils.invalid(data), cache)
                return None

            utils.write(utils.json_for(data), cache)
        else:
            return None
            # raise Exception("Invalid data from ssllabs-scan: %s" % raw)

    # can return multiple rows, one for each 'endpoint'
    for endpoint in data['endpoints']:

        # this meant it couldn't connect to the endpoint
        if not endpoint.get("grade"):
            continue

        sslv3 = False
        tlsv12 = False
        for protocol in endpoint['details']['protocols']:
            if ((protocol['name'] == "SSL") and
                    (protocol['version'] == '3.0')):
                sslv3 = True
            if ((protocol['name'] == "TLS") and
                    (protocol['version'] == '1.2')):
                tlsv12 = True

        spdy = False
        h2 = False
        npn = endpoint['details'].get('npnProtocols', None)
        if npn:
            spdy = ("spdy" in npn)
            h2 = ("h2" in npn)

        yield [
            endpoint['grade'],
            endpoint['details']['cert']['sigAlg'],
            endpoint['details']['key']['alg'],
            endpoint['details']['key']['size'],
            endpoint['details']['forwardSecrecy'],
            endpoint['details']['ocspStapling'],
            endpoint['details'].get('fallbackScsv', "N/A"),
            endpoint['details']['supportsRc4'],
            sslv3,
            tlsv12,
            spdy,
            endpoint['details']['sniRequired'],
            h2
        ]
Ejemplo n.º 31
0
def scan(domain, options):
    logging.debug("[%s][tls]" % domain)

    # If inspection data exists, check to see if we can skip.
    inspection = utils.data_for(domain, "inspect")
    if inspection and (not inspection.get("support_https")):
        logging.debug("\tSkipping, HTTPS not supported in inspection.")
        return None

    else:
        # cache reformatted JSON from ssllabs
        cache = utils.cache_path(domain, "tls")

        force = options.get("force", False)

        if (force is False) and (os.path.exists(cache)):
            logging.debug("\tCached.")
            raw = open(cache).read()
            data = json.loads(raw)

            if data.get("invalid"):
                return None
        else:
            logging.debug("\t %s %s" % (command, domain))

            usecache = str(not force).lower()

            if options.get("debug"):
                cmd = [command, "--usecache=%s" % usecache, "--verbosity=debug", domain]
            else:
                cmd = [command, "--usecache=%s" % usecache, "--quiet", domain]
            raw = utils.scan(cmd)
            if raw:
                data = json.loads(raw)

                # we only give ssllabs-scan one at a time,
                # so we can de-pluralize this
                data = data[0]

                # if SSL Labs had an error hitting the site, cache this
                # as an invalid entry.
                if data["status"] == "ERROR":
                    utils.write(utils.invalid(data), cache)
                    return None

                utils.write(utils.json_for(data), cache)
            else:
                return None
                # raise Exception("Invalid data from ssllabs-scan: %s" % raw)

        # can return multiple rows, one for each 'endpoint'
        for endpoint in data["endpoints"]:

            # this meant it couldn't connect to the endpoint
            if not endpoint.get("grade"):
                continue

            sslv3 = False
            tlsv12 = False
            for protocol in endpoint["details"]["protocols"]:
                if (protocol["name"] == "SSL") and (protocol["version"] == "3.0"):
                    sslv3 = True
                if (protocol["name"] == "TLS") and (protocol["version"] == "1.2"):
                    tlsv12 = True

            spdy = False
            h2 = False
            npn = endpoint["details"].get("npnProtocols", None)
            if npn:
                spdy = "spdy" in npn
                h2 = "h2-" in npn

            yield [
                endpoint["grade"],
                endpoint["details"]["cert"]["sigAlg"],
                endpoint["details"]["key"]["alg"],
                endpoint["details"]["key"]["size"],
                endpoint["details"]["forwardSecrecy"],
                endpoint["details"]["ocspStapling"],
                endpoint["details"].get("fallbackScsv", "N/A"),
                endpoint["details"]["supportsRc4"],
                sslv3,
                tlsv12,
                spdy,
                endpoint["details"]["sniRequired"],
                h2,
            ]
Ejemplo n.º 32
0
def network_check(subdomain, endpoint, options):
    cache = utils.cache_path(subdomain, "subdomains")

    wildcard = wildcard_for(subdomain)

    if (options.get("force", False) is False) and (os.path.exists(cache)):
        logging.debug("\tDNS and content cached.")
        raw = open(cache).read()
        data = json.loads(raw)

    # Hit DNS and HTTP.
    else:
        # HTTP content: just use curl.
        #
        # Turn on --insecure because we want to see the content even at sites
        # where the certificate isn't right or proper.
        logging.debug("\t curl --silent --insecure %s" % endpoint)
        content = utils.scan(["curl", "--silent", "--insecure", endpoint])

        # DNS content: just use dig.
        #
        # Not awesome - uses an unsafe shell execution of `dig` to look up DNS,
        # as I couldn't figure out a way to get "+short" to play nice with
        # the more secure execution methods available to me. Since this system
        # isn't expected to process untrusted input, this should be okay.
        logging.debug("\t dig +short '%s'" % wildcard)
        raw_wild = utils.unsafe_execute("dig +short '%s'" % wildcard)

        if raw_wild == "":
            raw_wild = None
            raw_self = None
        else:
            logging.debug("\t dig +short '%s'" % subdomain)
            raw_self = utils.unsafe_execute("dig +short '%s'" % subdomain)

        if raw_wild:
            parsed_wild = raw_wild.split("\n")
            parsed_wild.sort()
        else:
            parsed_wild = None

        if raw_self:
            parsed_self = raw_self.split("\n")
            parsed_self.sort()
        else:
            parsed_self = None

        # Cache HTTP and DNS data to disk.
        data = {'response': {
            'content': content,
            'wildcard_dns': parsed_wild,
            'self_dns': parsed_self
        }}

        if (parsed_wild) and (parsed_wild == parsed_self):
            data['response']['matched_wild'] = True
        else:
            data['response']['matched_wild'] = False

        utils.write(utils.json_for(data), cache)

    return data['response']