def queue(command): try: return scanner.queue_scan_command(server_info, command) except OSError as err: text = ("OSError - likely too many processes and open files.") data['errors'].append(text) logging.warn("%s\n%s" % (text, utils.format_last_exception())) return None, None, None, None, None, None except Exception as err: text = ("Unknown exception queueing sslyze command.\n%s" % utils.format_last_exception()) data['errors'].append(text) logging.warn(text) return None, None, None, None, None, None
def gather(suffix, options): url = options.get("url") if url is None: logging.warn("A --url is required. (Can be a local path.)") exit(1) # remote URL if url.startswith("http:") or url.startswith("https:"): # Though it's saved in cache/, it will be downloaded every time. remote_path = os.path.join(utils.cache_dir(), "url.csv") try: response = requests.get(url) utils.write(response.text, remote_path) except: logging.error("Remote URL not downloaded successfully.") print(utils.format_last_exception()) exit(1) # local path else: remote_path = url for domain in utils.load_domains(remote_path): yield domain
def scan_serial(scanner, server_info, data, options): logging.debug("\tRunning scans in serial.") logging.debug("\t\tSSLv2 scan.") sslv2 = scanner.run_scan_command(server_info, Sslv20ScanCommand()) logging.debug("\t\tSSLv3 scan.") sslv3 = scanner.run_scan_command(server_info, Sslv30ScanCommand()) logging.debug("\t\tTLSv1.0 scan.") tlsv1 = scanner.run_scan_command(server_info, Tlsv10ScanCommand()) logging.debug("\t\tTLSv1.1 scan.") tlsv1_1 = scanner.run_scan_command(server_info, Tlsv11ScanCommand()) logging.debug("\t\tTLSv1.2 scan.") tlsv1_2 = scanner.run_scan_command(server_info, Tlsv12ScanCommand()) certs = None if options.get("sslyze-certs", True) is True: try: logging.debug("\t\tCertificate information scan.") certs = scanner.run_scan_command(server_info, CertificateInfoScanCommand()) # Let generic exceptions bubble up. except idna.core.InvalidCodepoint: logging.warn(utils.format_last_exception()) data['errors'].append("Invalid certificate/OCSP for this domain.") certs = None else: certs = None logging.debug("\tDone scanning.") return sslv2, sslv3, tlsv1, tlsv1_1, tlsv1_2, certs
def gather(suffixes, options, extra={}): # Returns a parsed, processed Google service credentials object. credentials = load_credentials() if credentials is None: logging.warn("No BigQuery credentials provided.") logging.warn( "Set BIGQUERY_CREDENTIALS or BIGQUERY_CREDENTIALS_PATH environment variables." ) exit(1) # When using this form of instantiation, the client won't pull # the project_id out of the creds, has to be set explicitly. client = bigquery.Client(project=credentials.project_id, credentials=credentials) # Allow override of default timeout (in seconds). timeout = int(options.get("timeout", default_timeout)) # Construct the query. query = query_for(suffixes) logging.debug("Censys query:\n%s\n" % query) # Plan to store in cache/censys/export.csv. download_path = utils.cache_path("export", "censys", ext="csv") # Reuse of cached data can be turned on with --cache. cache = options.get("cache", False) if (cache is True) and os.path.exists(download_path): logging.warn("Using cached download data.") # But by default, fetch new data from the BigQuery API, # and write it to the expected download location. else: logging.warn("Kicking off SQL query job.") rows = None # Actually execute the query. try: # Executes query and loads all results into memory. query_job = client.query(query) iterator = query_job.result(timeout=timeout) rows = list(iterator) except google.api_core.exceptions.Forbidden: logging.warn("Access denied to Censys' BigQuery tables.") except: logging.warn(utils.format_last_exception()) logging.warn("Error talking to BigQuery, aborting.") # At this point, the job is complete and we need to download # the resulting CSV URL in results_url. logging.warn("Caching results of SQL query.") download_file = open(download_path, 'w', newline='') download_writer = csv.writer(download_file) download_writer.writerow(["Domain"]) # will be skipped on read # Parse the rows and write them out as they were returned (dupes # and all), to be de-duped by the central gathering script. for row in rows: domains = row['common_name'] + row['dns_names'] for domain in domains: download_writer.writerow([domain]) # End CSV writing. download_file.close() # Whether we downloaded it fresh or not, read from the cached data. for domain in utils.load_domains(download_path): if domain: yield domain
def paginated_mode(suffix, options, uid, api_key): # Cache hostnames in a dict for de-duping. hostnames_map = {} certificate_api = certificates.CensysCertificates(uid, api_key) if 'query' in options and options['query']: query = options['query'] else: query = "parsed.subject.common_name:\"%s\" or parsed.extensions.subject_alt_name.dns_names:\"%s\"" % ( suffix, suffix) logging.debug("Censys query:\n%s\n" % query) # time to sleep between requests (defaults to 5s) delay = int(options.get("delay", 5)) # Censys page size, fixed page_size = 100 # Start page defaults to 1. start_page = int(options.get("start", 1)) # End page defaults to whatever the API says is the last one. end_page = options.get("end", None) if end_page is None: end_page = get_end_page(query, certificate_api) if end_page is None: logging.warn("Error looking up number of pages.") exit(1) else: end_page = int(end_page) max_records = ((end_page - start_page) + 1) * page_size fields = [ "parsed.subject.common_name", "parsed.extensions.subject_alt_name.dns_names" ] current_page = start_page logging.warn("Fetching up to %i records, starting at page %i." % (max_records, start_page)) last_cached = False force = options.get("force", False) while current_page <= end_page: if (not last_cached) and (current_page > start_page): logging.debug("(Waiting %is before fetching page %i.)" % (delay, current_page)) last_cached = False time.sleep(delay) logging.debug("Fetching page %i." % current_page) cache_page = utils.cache_path(str(current_page), "censys") if (force is False) and (os.path.exists(cache_page)): logging.warn("\t[%i] Cached page." % current_page) last_cached = True certs_raw = open(cache_page).read() certs = json.loads(certs_raw) if (certs.__class__ is dict) and certs.get('invalid'): continue else: try: certs = list( certificate_api.search(query, fields=fields, page=current_page, max_records=page_size)) utils.write(utils.json_for(certs), cache_page) except censys.base.CensysException: logging.warn(utils.format_last_exception()) logging.warn("Censys error, skipping page %i." % current_page) utils.write(utils.invalid({}), cache_page) continue except: logging.warn(utils.format_last_exception()) logging.warn("Unexpected error, skipping page %i." % current_page) utils.write(utils.invalid({}), cache_page) exit(1) for cert in certs: # Common name + SANs names = cert.get('parsed.subject.common_name', []) + cert.get( 'parsed.extensions.subject_alt_name.dns_names', []) logging.debug(names) for name in names: hostnames_map[sanitize_name(name)] = None current_page += 1 logging.debug("Done fetching from API.") return hostnames_map
def export_mode(suffix, options, uid, api_key): # Cache hostnames in a dict for de-duping. hostnames_map = {} # Default timeout to 20 minutes. timeout = int(options.get("timeout", (60 * 60 * 20))) # Wait 5 seconds between checking on the job. between_jobs = 5 try: export_api = export.CensysExport(uid, api_key) except censys.base.CensysUnauthorizedException: logging.warn( "The Censys.io Export API rejected the provided Censys credentials. The credentials may be inaccurate, or you may need to request access from the Censys.io team." ) exit(1) # Uses a FLATTEN command in order to work around a BigQuery # error around multiple "repeated" fields. *shrug* query = "SELECT parsed.subject.common_name, parsed.extensions.subject_alt_name.dns_names from FLATTEN([certificates.certificates], parsed.extensions.subject_alt_name.dns_names) where parsed.subject.common_name LIKE \"%%%s\" OR parsed.extensions.subject_alt_name.dns_names LIKE \"%%%s\";" % ( suffix, suffix) logging.debug("Censys query:\n%s\n" % query) download_file = utils.cache_path("export", "censys", ext="csv") force = options.get("force", False) if (force is False) and os.path.exists(download_file): logging.warn("Using cached download data.") else: logging.warn("Kicking off SQL query job.") results_url = None try: job = export_api.new_job(query, format='csv', flatten=True) job_id = job['job_id'] started = datetime.datetime.now() while True: elapsed = (datetime.datetime.now() - started).seconds status = export_api.check_job(job_id) if status['status'] == 'error': logging.warn("Error from Censys: %s" % status['error']) exit(1) # Not expected, but better to explicitly handle. elif status['status'] == 'expired': logging.warn("Results are somehow expired, bailing.") exit(1) elif status['status'] == 'pending': logging.debug("[%is] Job still pending." % elapsed) time.sleep(between_jobs) elif status['status'] == 'success': logging.warn("[%is] Job complete!" % elapsed) results_url = status['download_paths'][0] break if (elapsed > timeout): logging.warn("Timeout waiting for job to complete.") exit(1) except censys.base.CensysException: logging.warn(utils.format_last_exception()) logging.warn("Censys error, aborting.") # At this point, the job is complete and we need to download # the resulting CSV URL in results_url. logging.warn("Downloading results of SQL query.") utils.download(results_url, download_file) # Read in downloaded CSV file, run any hostnames in each line # through the sanitizer, and de-dupe using the map. with open(download_file, newline='') as csvfile: for row in csv.reader(csvfile): if (not row[0]) or ( row[0].lower().startswith("parsed_subject_common_name")): continue names = [row[0].lower(), row[1].lower()] # logging.debug(names) for name in names: if name: hostnames_map[sanitize_name(name)] = None return hostnames_map
def main(): options = utils.options() debug = options.get('debug', False) encoding = options.get('encoding', 'latin-1') name = options.get('name', 'hostnames') filter_name = options.get('filter', name) filter = filters.get(filter_name, None) if filter is None: print("No filter by that name. Specify one with --filter.") exit(1) # Drop output in a directory next to the script. this_dir = os.path.dirname(__file__) output = os.path.join(this_dir, "hostnames") utils.mkdir_p(output) out_filename = "%s.csv" % name out_file = open(os.path.join(output, out_filename), 'w', newline='') out_writer = csv.writer(out_file) if len(options["_"]) < 1: print("Provide the name to an input file.") exit(1) input_filename = options["_"][0] if not os.path.exists(input_filename): print("Input file doesn't exist.") exit(1) suffix = options.get("suffix", ".gov") # if it has a ., make sure the . is escaped if suffix.startswith("."): suffix = "\\%s" % suffix pattern = re.compile("%s\n?$" % suffix) max = int(options.get("max", -1)) # Proceed missed = 0 matched = 0 name_map = {} curr = 0 with open(input_filename, encoding=encoding) as f: try: for line in f: if pattern.search(line): hostname = filter(line) if debug: print("Match!!!! %s" % hostname) matched += 1 name_map[hostname] = None else: if debug: print("Didn't match: %s" % line.strip()) missed += 1 curr += 1 if (max > 0) and (curr >= max): print("Stopping at %i." % curr) break if (curr % 1000000) == 0: print("Processing: %i" % curr) except UnicodeDecodeError as e: print(curr) print(utils.format_last_exception()) exit(1) hostnames = list(name_map.keys()) hostnames.sort() print("Matched %i (%i unique), missed on %i." % (matched, len(hostnames), missed)) print("Writing out CSV.") for hostname in hostnames: out_writer.writerow([hostname]) print("Done.")
def gather(suffix, options): # Register a (free) Censys.io account to get a UID and API key. uid = options.get("censys_id", None) api_key = options.get("censys_key", None) if (uid is None) or (api_key is None): uid = os.environ.get("CENSYS_UID", None) api_key = os.environ.get("CENSYS_API_KEY", None) if (uid is None) or (api_key is None): logging.warn( "No Censys credentials set. API key required to use the Censys API." ) exit(1) certificate_api = certificates.CensysCertificates(uid, api_key) query = "parsed.subject.common_name:\"%s\" or parsed.extensions.subject_alt_name.dns_names:\"%s\"" % ( suffix, suffix) logging.debug("Censys query:\n%s\n" % query) # Hostnames beginning with a wildcard prefix will have the prefix stripped. wildcard_pattern = re.compile("^\*\.") redacted_pattern = re.compile("^(\?\.)+") # time to sleep between requests (defaults to 5s) delay = int(options.get("delay", 5)) # Censys page size, fixed page_size = 100 # Start page defaults to 1. start_page = int(options.get("start", 1)) # End page defaults to whatever the API says is the last one. end_page = options.get("end", None) if end_page is None: end_page = get_end_page(query, certificate_api) if end_page is None: logging.warn("Error looking up number of pages.") exit(1) else: end_page = int(end_page) max_records = ((end_page - start_page) + 1) * page_size # Cache hostnames in a dict for de-duping. hostnames_map = {} fields = [ "parsed.subject.common_name", "parsed.extensions.subject_alt_name.dns_names" ] current_page = start_page logging.warn("Fetching up to %i records, starting at page %i." % (max_records, start_page)) last_cached = False force = options.get("force", False) while current_page <= end_page: if (not last_cached) and (current_page > start_page): logging.debug("(Waiting %is before fetching page %i.)" % (delay, current_page)) last_cached = False time.sleep(delay) logging.debug("Fetching page %i." % current_page) cache_page = utils.cache_path(str(current_page), "censys") if (force is False) and (os.path.exists(cache_page)): logging.warn("\t[%i] Cached page." % current_page) last_cached = True certs_raw = open(cache_page).read() certs = json.loads(certs_raw) if (certs.__class__ is dict) and certs.get('invalid'): continue else: try: certs = list( certificate_api.search(query, fields=fields, page=current_page, max_records=page_size)) utils.write(utils.json_for(certs), cache_page) except censys.base.CensysException: logging.warn(utils.format_last_exception()) logging.warn("Censys error, skipping page %i." % current_page) utils.write(utils.invalid({}), cache_page) continue except: logging.warn(utils.format_last_exception()) logging.warn("Unexpected error, skipping page %i." % current_page) utils.write(utils.invalid({}), cache_page) exit(1) for cert in certs: # Common name + SANs names = cert.get('parsed.subject.common_name', []) + cert.get( 'parsed.extensions.subject_alt_name.dns_names', []) logging.debug(names) for name in names: # Strip off any wildcard prefix. name = re.sub(wildcard_pattern, '', name).lower().strip() # Strip off any redacted ? prefixes. (Ugh.) name = re.sub(redacted_pattern, '', name).lower().strip() hostnames_map[name] = None current_page += 1 logging.debug("Done fetching from API.") # Iterator doesn't buy much efficiency, since we paginated already. # Necessary evil to de-dupe before returning hostnames, though. for hostname in hostnames_map.keys(): yield hostname
def scan_parallel(scanner, server_info, data, options): logging.debug("\tRunning scans in parallel.") def queue(command): try: return scanner.queue_scan_command(server_info, command) except OSError as err: text = ("OSError - likely too many processes and open files.") data['errors'].append(text) logging.warn("%s\n%s" % (text, utils.format_last_exception())) return None, None, None, None, None, None except Exception as err: text = ("Unknown exception queueing sslyze command.\n%s" % utils.format_last_exception()) data['errors'].append(text) logging.warn(text) return None, None, None, None, None, None # Initialize commands and result containers sslv2, sslv3, tlsv1, tlsv1_1, tlsv1_2, certs = None, None, None, None, None, None # Queue them all up queue(Sslv20ScanCommand()) queue(Sslv30ScanCommand()) queue(Tlsv10ScanCommand()) queue(Tlsv11ScanCommand()) queue(Tlsv12ScanCommand()) if options.get("sslyze-certs", True) is True: queue(CertificateInfoScanCommand()) # Reassign them back to predictable places after they're all done was_error = False for result in scanner.get_results(): try: if isinstance(result, PluginRaisedExceptionScanResult): error = ("Scan command failed: %s" % result.as_text()) logging.warn(error) data['errors'].append(error) return None, None, None, None, None, None if type(result.scan_command) == Sslv20ScanCommand: sslv2 = result elif type(result.scan_command) == Sslv30ScanCommand: sslv3 = result elif type(result.scan_command) == Tlsv10ScanCommand: tlsv1 = result elif type(result.scan_command) == Tlsv11ScanCommand: tlsv1_1 = result elif type(result.scan_command) == Tlsv12ScanCommand: tlsv1_2 = result elif type(result.scan_command) == CertificateInfoScanCommand: certs = result else: error = "Couldn't match scan result with command! %s" % result logging.warn("\t%s" % error) data['errors'].append(error) was_error = True except Exception as err: was_error = True text = ("Exception inside async scanner result processing.\n%s" % utils.format_last_exception()) data['errors'].append(text) logging.warn("\t%s" % text) # There was an error during async processing. if was_error: return None, None, None, None, None, None logging.debug("\tDone scanning.") return sslv2, sslv3, tlsv1, tlsv1_1, tlsv1_2, certs