def run(self): data = [self.clean_row(d) for d in self.a11y_raw] parsed_datasets = [ ('a11y', self.make_a11y_data(data)), ('agencies', self.make_agency_data(data)), ('domains', self.make_domain_data(data)), ] mkdir_p(results_dir()) for name, data in parsed_datasets: path = '{}/{}.json'.format(results_dir(), name) with open(path, 'w+') as f: json.dump(data, f, indent=2)
def scan(domain, options): logging.debug("[%s][sslyze]" % domain) # Optional: skip domains which don't support HTTPS in prior inspection inspection = utils.data_for(domain, "inspect") if inspection and (not inspection.get("support_https")): logging.debug("\tSkipping, HTTPS not supported in inspection.") return None # Optional: if inspect data says canonical endpoint uses www and this domain # doesn't have it, add it. if inspection and (inspection.get("canonical_endpoint") == "www") and (not domain.startswith("www.")): scan_domain = "www.%s" % domain else: scan_domain = domain # cache XML from sslyze cache_xml = utils.cache_path(domain, "sslyze", ext="xml") # because sslyze manages its own output (can't yet print to stdout), # we have to mkdir_p the path ourselves utils.mkdir_p(os.path.dirname(cache_xml)) force = options.get("force", False) if (force is False) and (os.path.exists(cache_xml)): logging.debug("\tCached.") xml = open(cache_xml).read() else: logging.debug("\t %s %s" % (command, domain)) # use scan_domain (possibly www-prefixed) to do actual scan raw = utils.scan([command, "--regular", "--quiet", scan_domain, "--xml_out=%s" % cache_xml], env=command_env) if raw is None: # TODO: save standard invalid XML data...? logging.warn("\tBad news scanning, sorry!") return None xml = utils.scan(["cat", cache_xml]) if not xml: logging.warn("\tBad news reading XML, sorry!") return None utils.write(xml, cache_xml) data = parse_sslyze(xml) if data is None: logging.warn("\tNo valid target for scanning, couldn't connect.") return None utils.write(utils.json_for(data), utils.cache_path(domain, "sslyze")) yield [ data['protocols']['sslv2'], data['protocols']['sslv3'], data['protocols']['tlsv1.0'], data['protocols']['tlsv1.1'], data['protocols']['tlsv1.2'], data['config'].get('any_dhe'), data['config'].get('all_dhe'), data['config'].get('weakest_dh'), data['config'].get('any_rc4'), data['config'].get('all_rc4'), data['config'].get('ocsp_stapling'), data['certs'].get('key_type'), data['certs'].get('key_length'), data['certs'].get('leaf_signature'), data['certs'].get('any_sha1'), data['certs'].get('not_before'), data['certs'].get('not_after'), data['certs'].get('served_issuer'), data.get('errors') ]
def scan(domain, options): logging.debug("[%s][sslyze]" % domain) # Optional: skip domains which don't support HTTPS in pshtt scan. if utils.domain_doesnt_support_https(domain): logging.debug("\tSkipping, HTTPS not supported.") return None # Optional: if pshtt data says canonical endpoint uses www and this domain # doesn't have it, add it. if utils.domain_uses_www(domain): scan_domain = "www.%s" % domain else: scan_domain = domain # cache JSON from sslyze cache_json = utils.cache_path(domain, "sslyze") # because sslyze manages its own output (can't yet print to stdout), # we have to mkdir_p the path ourselves utils.mkdir_p(os.path.dirname(cache_json)) force = options.get("force", False) if (force is False) and (os.path.exists(cache_json)): logging.debug("\tCached.") raw_json = open(cache_json).read() try: data = json.loads(raw_json) if (data.__class__ is dict) and data.get('invalid'): return None except json.decoder.JSONDecodeError as err: logging.warn("Error decoding JSON. Cache probably corrupted.") return None else: # use scan_domain (possibly www-prefixed) to do actual scan logging.debug("\t %s %s" % (command, scan_domain)) # This is --regular minus --heartbleed # See: https://github.com/nabla-c0d3/sslyze/issues/217 raw_response = utils.scan([ command, "--sslv2", "--sslv3", "--tlsv1", "--tlsv1_1", "--tlsv1_2", "--reneg", "--resum", "--certinfo", "--http_get", "--hide_rejected_ciphers", "--compression", "--openssl_ccs", "--fallback", "--quiet", scan_domain, "--json_out=%s" % cache_json ]) if raw_response is None: # TODO: save standard invalid JSON data...? utils.write(utils.invalid({}), cache_json) logging.warn("\tBad news scanning, sorry!") return None raw_json = utils.scan(["cat", cache_json]) if not raw_json: logging.warn("\tBad news reading JSON, sorry!") return None utils.write(raw_json, cache_json) data = parse_sslyze(raw_json) if data is None: logging.warn("\tNo valid target for scanning, couldn't connect.") return None yield [ scan_domain, data['protocols']['sslv2'], data['protocols']['sslv3'], data['protocols']['tlsv1.0'], data['protocols']['tlsv1.1'], data['protocols']['tlsv1.2'], data['config'].get('any_dhe'), data['config'].get('all_dhe'), data['config'].get('weakest_dh'), data['config'].get('any_rc4'), data['config'].get('all_rc4'), data['certs'].get('key_type'), data['certs'].get('key_length'), data['certs'].get('leaf_signature'), data['certs'].get('any_sha1_served'), data['certs'].get('any_sha1_constructed'), data['certs'].get('not_before'), data['certs'].get('not_after'), data['certs'].get('served_issuer'), data['certs'].get('constructed_issuer'), data.get('errors') ]
def scan(domain, options): logging.debug("[%s][sslyze]" % domain) # Optional: skip domains which don't support HTTPS in prior inspection if utils.domain_doesnt_support_https(domain): logging.debug("\tSkipping, HTTPS not supported in inspection.") return None # Optional: if pshtt data says canonical endpoint uses www and this domain # doesn't have it, add it. if utils.domain_uses_www(domain): scan_domain = "www.%s" % domain else: scan_domain = domain # cache XML from sslyze cache_xml = utils.cache_path(domain, "sslyze", ext="xml") # because sslyze manages its own output (can't yet print to stdout), # we have to mkdir_p the path ourselves utils.mkdir_p(os.path.dirname(cache_xml)) force = options.get("force", False) if (force is False) and (os.path.exists(cache_xml)): logging.debug("\tCached.") xml = open(cache_xml).read() else: logging.debug("\t %s %s" % (command, scan_domain)) # use scan_domain (possibly www-prefixed) to do actual scan # Give the Python shell environment a pyenv environment. pyenv_init = "eval \"$(pyenv init -)\" && pyenv shell %s" % pyenv_version # Really un-ideal, but calling out to Python2 from Python 3 is a nightmare. # I don't think this tool's threat model includes untrusted CSV, either. raw = utils.unsafe_execute( "%s && %s --regular --quiet %s --xml_out=%s" % (pyenv_init, command, scan_domain, cache_xml)) if raw is None: # TODO: save standard invalid XML data...? logging.warn("\tBad news scanning, sorry!") return None xml = utils.scan(["cat", cache_xml]) if not xml: logging.warn("\tBad news reading XML, sorry!") return None utils.write(xml, cache_xml) data = parse_sslyze(xml) if data is None: logging.warn("\tNo valid target for scanning, couldn't connect.") return None utils.write(utils.json_for(data), utils.cache_path(domain, "sslyze")) yield [ data['protocols']['sslv2'], data['protocols']['sslv3'], data['protocols']['tlsv1.0'], data['protocols']['tlsv1.1'], data['protocols']['tlsv1.2'], data['config'].get('any_dhe'), data['config'].get('all_dhe'), data['config'].get('weakest_dh'), data['config'].get('any_rc4'), data['config'].get('all_rc4'), data['config'].get('ocsp_stapling'), data['certs'].get('key_type'), data['certs'].get('key_length'), data['certs'].get('leaf_signature'), data['certs'].get('any_sha1'), data['certs'].get('not_before'), data['certs'].get('not_after'), data['certs'].get('served_issuer'), data.get('errors') ]
def main(): options = utils.options() debug = options.get('debug', False) encoding = options.get('encoding', 'latin-1') name = options.get('name', 'hostnames') filter_name = options.get('filter', name) filter = filters.get(filter_name, None) if filter is None: print("No filter by that name. Specify one with --filter.") exit(1) # Drop output in a directory next to the script. this_dir = os.path.dirname(__file__) output = os.path.join(this_dir, "hostnames") utils.mkdir_p(output) out_filename = "%s.csv" % name out_file = open(os.path.join(output, out_filename), 'w', newline='') out_writer = csv.writer(out_file) if len(options["_"]) < 1: print("Provide the name to an input file.") exit(1) input_filename = options["_"][0] if not os.path.exists(input_filename): print("Input file doesn't exist.") exit(1) suffix = options.get("suffix", ".gov") # if it has a ., make sure the . is escaped if suffix.startswith("."): suffix = "\\%s" % suffix pattern = re.compile("%s\n?$" % suffix) max = int(options.get("max", -1)) # Proceed missed = 0 matched = 0 name_map = {} curr = 0 with open(input_filename, encoding=encoding) as f: try: for line in f: if pattern.search(line): hostname = filter(line) if debug: print("Match!!!! %s" % hostname) matched += 1 name_map[hostname] = None else: if debug: print("Didn't match: %s" % line.strip()) missed += 1 curr += 1 if (max > 0) and (curr >= max): print("Stopping at %i." % curr) break if (curr % 1000000) == 0: print("Processing: %i" % curr) except UnicodeDecodeError as e: print(curr) print(utils.format_last_exception()) exit(1) hostnames = list(name_map.keys()) hostnames.sort() print("Matched %i (%i unique), missed on %i." % (matched, len(hostnames), missed)) print("Writing out CSV.") for hostname in hostnames: out_writer.writerow([hostname]) print("Done.")
def scan(domain, options): logging.debug("[%s][sslyze]" % domain) # Optional: skip domains which don't support HTTPS in prior inspection if utils.domain_doesnt_support_https(domain): logging.debug("\tSkipping, HTTPS not supported in inspection.") return None # Optional: if pshtt data says canonical endpoint uses www and this domain # doesn't have it, add it. if utils.domain_uses_www(domain): scan_domain = "www.%s" % domain else: scan_domain = domain # cache XML from sslyze cache_xml = utils.cache_path(domain, "sslyze", ext="xml") # because sslyze manages its own output (can't yet print to stdout), # we have to mkdir_p the path ourselves utils.mkdir_p(os.path.dirname(cache_xml)) force = options.get("force", False) if (force is False) and (os.path.exists(cache_xml)): logging.debug("\tCached.") xml = open(cache_xml).read() else: logging.debug("\t %s %s" % (command, scan_domain)) # use scan_domain (possibly www-prefixed) to do actual scan # Give the Python shell environment a pyenv environment. pyenv_init = "eval \"$(pyenv init -)\" && pyenv shell %s" % pyenv_version # Really un-ideal, but calling out to Python2 from Python 3 is a nightmare. # I don't think this tool's threat model includes untrusted CSV, either. raw = utils.unsafe_execute("%s && %s --regular --quiet %s --xml_out=%s" % (pyenv_init, command, scan_domain, cache_xml)) if raw is None: # TODO: save standard invalid XML data...? logging.warn("\tBad news scanning, sorry!") return None xml = utils.scan(["cat", cache_xml]) if not xml: logging.warn("\tBad news reading XML, sorry!") return None utils.write(xml, cache_xml) data = parse_sslyze(xml) if data is None: logging.warn("\tNo valid target for scanning, couldn't connect.") return None utils.write(utils.json_for(data), utils.cache_path(domain, "sslyze")) yield [ data['protocols']['sslv2'], data['protocols']['sslv3'], data['protocols']['tlsv1.0'], data['protocols']['tlsv1.1'], data['protocols']['tlsv1.2'], data['config'].get('any_dhe'), data['config'].get('all_dhe'), data['config'].get('weakest_dh'), data['config'].get('any_rc4'), data['config'].get('all_rc4'), data['config'].get('ocsp_stapling'), data['certs'].get('key_type'), data['certs'].get('key_length'), data['certs'].get('leaf_signature'), data['certs'].get('any_sha1'), data['certs'].get('not_before'), data['certs'].get('not_after'), data['certs'].get('served_issuer'), data.get('errors') ]
#!/usr/bin/env python from scanners import utils import csv import os import re import time # pip install censys import censys.certificates # Drop output in a directory next to the script. this_dir = os.path.dirname(__file__) output = os.path.join(this_dir, "hostnames") utils.mkdir_p(output) out_filename = "censys.csv" out_file = open(os.path.join(output, out_filename), 'w', newline='') out_writer = csv.writer(out_file) options = utils.options() debug = options.get("debug", False) suffix = options.get("suffix", ".gov") wildcard_pattern = re.compile("\*.") # time to sleep between requests (defaults to 5s) delay = int(options.get("delay", 5)) # Censys page size, fixed page_size = 100