def init(options): """ Download the Chrome preload list at the beginning of the scan, and re-use it for each scan. It is unnecessary to re-download the list for each scan because it changes infrequently. """ global exclude_list global parents_list exclude_path = options.get("subdomains-exclude", None) parents_path = options.get("subdomains-parents", None) if (exclude_path is None) or (parents_path is None): logging.warn( "Specify CSVs with --subdomains-exclude and --subdomains-parents.") return False # list of subdomains to manually exclude exclude_list = utils.load_domains(exclude_path) # make a map of {'domain.gov': 'name of owner'} parents_list = utils.load_domains(parents_path, whole_rows=True) for domain_info in parents_list: domain_map[domain_info[0]] = domain_info[2] return True
def gather(suffix, options): url = options.get("url") if url is None: logging.warn("A --url is required. (Can be a local path.)") exit(1) # remote URL if url.startswith("http:") or url.startswith("https:"): # Though it's saved in cache/, it will be downloaded every time. remote_path = os.path.join(utils.cache_dir(), "url.csv") try: response = requests.get(url) utils.write(response.text, remote_path) except: logging.error("Remote URL not downloaded successfully.") print(utils.format_last_exception()) exit(1) # local path else: remote_path = url for domain in utils.load_domains(remote_path): yield domain
def init(options): global analytics_domains analytics_file = options.get("analytics") if (not analytics_file) or (not analytics_file.endswith(".csv")): no_csv = "--analytics should point to the file path or URL to a CSV of participating domains." logging.error(no_csv) return False # It's a URL, download it first. if analytics_file.startswith("http:") or analytics_file.startswith("https:"): analytics_path = os.path.join(utils.cache_dir(), "analytics.csv") try: response = requests.get(analytics_file) utils.write(response.text, analytics_path) except: no_csv = "--analytics URL not downloaded successfully." logging.error(no_csv) return False # Otherwise, read it off the disk else: analytics_path = analytics_file if (not os.path.exists(analytics_path)): no_csv = "--analytics file not found." logging.error(no_csv) return False analytics_domains = utils.load_domains(analytics_path) return True
def init(options): global exclude_list global parents_list exclude_path = options.get("subdomains-exclude", None) parents_path = options.get("subdomains-parents", None) if (exclude_path is None) or (parents_path is None): logging.warn("Specify CSVs with --subdomains-exclude and --subdomains-parents.") return False # list of subdomains to manually exclude exclude_list = utils.load_domains(exclude_path) # make a map of {'domain.gov': 'name of owner'} parents_list = utils.load_domains(parents_path, whole_rows=True) for domain_info in parents_list: domain_map[domain_info[0]] = domain_info[2] return True
def init(options): global exclude_list global parents_list exclude_path = options.get("subdomains-exclude", None) parents_path = options.get("subdomains-parents", None) if (exclude_path is None) or (parents_path is None): logging.warn( "Specify CSVs with --subdomains-exclude and --subdomains-parents.") return False # list of subdomains to manually exclude exclude_list = utils.load_domains(exclude_path) # make a map of {'domain.gov': 'name of owner'} parents_list = utils.load_domains(parents_path, whole_rows=True) for domain_info in parents_list: domain_map[domain_info[0]] = domain_info[2] return True
def init(options): global analytics_domains analytics_file = options.get("analytics") if ((not analytics_file) or (not analytics_file.endswith(".csv")) or (not os.path.exists(analytics_file))): no_csv = "--analytics should point to a CSV of participating domains." logging.error(no_csv) return False analytics_domains = utils.load_domains(analytics_file) return True
def init(options): """ Download the Chrome preload list at the beginning of the scan, and re-use it for each scan. It is unnecessary to re-download the list for each scan because it changes infrequently. """ global exclude_list global parents_list exclude_path = options.get("subdomains-exclude", None) parents_path = options.get("subdomains-parents", None) if (exclude_path is None) or (parents_path is None): logging.warn("Specify CSVs with --subdomains-exclude and --subdomains-parents.") return False # list of subdomains to manually exclude exclude_list = utils.load_domains(exclude_path) # make a map of {'domain.gov': 'name of owner'} parents_list = utils.load_domains(parents_path, whole_rows=True) for domain_info in parents_list: domain_map[domain_info[0]] = domain_info[2] return True
def gather(suffixes, options, extra={}): # Returns a parsed, processed Google service credentials object. credentials = load_credentials() if credentials is None: logging.warn("No BigQuery credentials provided.") logging.warn( "Set BIGQUERY_CREDENTIALS or BIGQUERY_CREDENTIALS_PATH environment variables." ) exit(1) # When using this form of instantiation, the client won't pull # the project_id out of the creds, has to be set explicitly. client = bigquery.Client(project=credentials.project_id, credentials=credentials) # Allow override of default timeout (in seconds). timeout = int(options.get("timeout", default_timeout)) # Construct the query. query = query_for(suffixes) logging.debug("Censys query:\n%s\n" % query) # Plan to store in cache/censys/export.csv. download_path = utils.cache_path("export", "censys", ext="csv") # Reuse of cached data can be turned on with --cache. cache = options.get("cache", False) if (cache is True) and os.path.exists(download_path): logging.warn("Using cached download data.") # But by default, fetch new data from the BigQuery API, # and write it to the expected download location. else: logging.warn("Kicking off SQL query job.") rows = None # Actually execute the query. try: # Executes query and loads all results into memory. query_job = client.query(query) iterator = query_job.result(timeout=timeout) rows = list(iterator) except google.api_core.exceptions.Forbidden: logging.warn("Access denied to Censys' BigQuery tables.") except: logging.warn(utils.format_last_exception()) logging.warn("Error talking to BigQuery, aborting.") # At this point, the job is complete and we need to download # the resulting CSV URL in results_url. logging.warn("Caching results of SQL query.") download_file = open(download_path, 'w', newline='') download_writer = csv.writer(download_file) download_writer.writerow(["Domain"]) # will be skipped on read # Parse the rows and write them out as they were returned (dupes # and all), to be de-duped by the central gathering script. for row in rows: domains = row['common_name'] + row['dns_names'] for domain in domains: download_writer.writerow([domain]) # End CSV writing. download_file.close() # Whether we downloaded it fresh or not, read from the cached data. for domain in utils.load_domains(download_path): if domain: yield domain