def insert_record(self, result, source_name): """ Insert the provided source as a record from the provided source name. :param result: The result of a DNS lookup as a JSON object including the fqdn, type, value, zone, and created values. :param source_name: The DNS record source ("ssl","virustotal","sonar_dns","common_crawl") """ query = { "fqdn": result["fqdn"], "type": result["type"], "value": result["value"], } check = self.mongo_connector.perform_find_one(self.all_dns_collection, query) if check is None: result["sources"] = [] result["sources"].append({}) result["sources"][0]["source"] = source_name result["sources"][0]["updated"] = datetime.now() result["updated"] = datetime.now() self.mongo_connector.perform_insert(self.all_dns_collection, result) else: source_index = -1 for i in range(0, len(check["sources"])): if check["sources"][i]["source"] == source_name: source_index = i if source_index != -1: name = "sources." + str(source_index) + ".updated" entry = {} entry[name] = datetime.now() self.all_dns_collection.update_one( {"_id": ObjectId(check["_id"])}, {"$set": entry}) self.all_dns_collection.update_one( {"_id": ObjectId(check["_id"])}, {"$set": { "updated": datetime.now() }}, ) else: entry = {} entry["source"] = source_name entry["updated"] = datetime.now() self.all_dns_collection.update_one( {"_id": ObjectId(check["_id"])}, {"$push": { "sources": entry }}) self.all_dns_collection.update_one( {"_id": ObjectId(check["_id"])}, {"$set": { "updated": datetime.now() }}, ) if result["type"] == "a" or result["type"] == "aaaa": ip_manager = IPManager.IPManager(self.mongo_connector) ip_manager.insert_record(result["value"], source_name)
def insert_record(self, result, source_name): """ Insert the provided source as a record from the provided source name. :param result: The result of a DNS lookup as a JSON object including the fqdn, type, value, zone, and created values. :param source_name: The DNS record source ("ssl","virustotal","sonar_dns","common_crawl") """ query = { 'fqdn': result['fqdn'], 'type': result['type'], 'value': result['value'] } check = self.mongo_connector.perform_find_one(self.all_dns_collection, query) if check is None: result['sources'] = [] result['sources'].append({}) result['sources'][0]['source'] = source_name result['sources'][0]['updated'] = datetime.now() result['updated'] = datetime.now() self.all_dns_collection.insert(result) else: source_index = -1 for i in range(0, len(check['sources'])): if check['sources'][i]['source'] == source_name: source_index = i if source_index != -1: name = 'sources.' + str(source_index) + '.updated' entry = {} entry[name] = datetime.now() self.all_dns_collection.update({'_id': ObjectId(check['_id'])}, {"$set": entry}) self.all_dns_collection.update( {'_id': ObjectId(check['_id'])}, {"$set": { 'updated': datetime.now() }}) else: entry = {} entry['source'] = source_name entry['updated'] = datetime.now() self.all_dns_collection.update({'_id': ObjectId(check['_id'])}, {'$push': { 'sources': entry }}) self.all_dns_collection.update( {'_id': ObjectId(check['_id'])}, {"$set": { 'updated': datetime.now() }}) if result['type'] == 'a' or result['type'] == 'aaaa': ip_manager = IPManager.IPManager(self.mongo_connector) ip_manager.insert_record(result['value'], source_name)
def create_network_data_sets(groups, mongo_connector): """ Group results based on network type ("Tracked", "AWS", or "Akamai") """ group_data = {} group_data['aws_count'] = 0 group_data['tracked_count'] = 0 group_data['akamai_count'] = 0 group_data['azure_count'] = 0 group_data['gcp_count'] = 0 ip_manager = IPManager.IPManager(mongo_connector) for group in groups: cidr = group.replace(REPLACE_CHAR, ".") fake_ip = cidr + ".1" group_data[group] = {} group_data[group]['class_c'] = cidr if ip_manager.is_aws_ip(fake_ip): group_data[group]['aws'] = True group_data['aws_count'] = group_data['aws_count'] + 1 else: group_data[group]['aws'] = False if ip_manager.is_azure_ip(fake_ip): group_data[group]['azure'] = True group_data['azure_count'] = group_data['azure_count'] + 1 else: group_data[group]['azure'] = False if ip_manager.is_akamai_ip(fake_ip): group_data['akamai_count'] = group_data['akamai_count'] + 1 group_data[group]['akamai'] = True else: group_data[group]['akamai'] = False if ip_manager.is_tracked_ip(fake_ip): group_data[group]['tracked'] = True group_data['tracked_count'] = group_data['tracked_count'] + 1 else: group_data[group]['tracked'] = False if ip_manager.is_gcp_ip(fake_ip): group_data[group]['gcp'] = True group_data['gcp_count'] = group_data['gcp_count'] + 1 else: group_data[group]['gcp'] = False return group_data
def main(): """ Begin Main... """ # The sources for which to remove expired entries # Infoblox is handled separately # {"source_name": date_difference_in_months} sources = [{ "name": "sonar_dns", "diff": -2 }, { "name": "sonar_dns_saved", "diff": -2 }, { "name": "sonar_rdns", "diff": -2 }, { "name": "sonar_rdns_saved", "diff": -2 }, { "name": "ssl", "diff": -2 }, { "name": "ssl_saved", "diff": -2 }, { "name": "virustotal", "diff": -2 }, { "name": "virustotal_saved", "diff": -2 }, { "name": "UltraDNS", "diff": -2 }, { "name": "UltraDNS_saved", "diff": -2 }, { "name": "marinus", "diff": -2 }, { "name": "marinus_saved", "diff": -2 }, { "name": "mx", "diff": -2 }, { "name": "mx_saved", "diff": -2 }, { "name": "common_crawl", "diff": -4 }, { "name": "common_crawl_saved", "diff": -4 }] amass_diff = -2 now = datetime.now() print("Starting: " + str(now)) mongo_connector = MongoConnector.MongoConnector() all_dns_collection = mongo_connector.get_all_dns_connection() dns_manager = DNSManager.DNSManager(mongo_connector) GDNS = GoogleDNS.GoogleDNS() ip_manager = IPManager.IPManager(mongo_connector) jobs_manager = JobsManager.JobsManager(mongo_connector, 'remove_expired_entries') jobs_manager.record_job_start() zones = ZoneManager.get_distinct_zones(mongo_connector) # Get the date for today minus two months d_minus_2m = monthdelta(datetime.now(), -2) print("Removing SRDNS as of: " + str(d_minus_2m)) # Remove the old records srdns_collection = mongo_connector.get_sonar_reverse_dns_connection() srdns_collection.remove({'updated': {"$lt": d_minus_2m}}) ip_manager.delete_records_by_date(d_minus_2m) # Before completely removing old entries, make an attempt to see if they are still valid. # Occasionally, a host name will still be valid but, for whatever reason, is no longer tracked by a source. # Rather than throw away valid information, this will archive it. for entry in sources: removal_date = monthdelta(datetime.now(), entry['diff']) source = entry['name'] print("Removing " + source + " as of: " + str(removal_date)) last_domain = "" results = all_dns_collection.find({ 'sources': { "$size": 1 }, 'sources.source': source, 'sources.updated': { "$lt": removal_date } }) for result in results: if result['fqdn'] != last_domain: last_domain = result['fqdn'] lookup_int = get_lookup_int(result, GDNS) dns_result = GDNS.fetch_DNS_records(result['fqdn'], lookup_int) if dns_result != []: insert_current_results(dns_result, dns_manager, zones, result, source) dns_manager.remove_all_by_source_and_date(source, entry['diff']) # Process amass entries temp_sources = mongo_connector.perform_distinct(all_dns_collection, 'sources.source') amass_sources = [] for entry in temp_sources: if entry.startswith("amass:"): amass_sources.append(entry) for source in amass_sources: removal_date = monthdelta(datetime.now(), amass_diff) print("Removing " + source + " as of: " + str(removal_date)) last_domain = "" results = mongo_connector.perform_find( all_dns_collection, { 'sources': { "$size": 1 }, 'sources.source': source, 'sources.updated': { "$lt": removal_date } }) for result in results: if result['fqdn'] != last_domain: last_domain = result['fqdn'] lookup_int = get_lookup_int(result, GDNS) dns_result = GDNS.fetch_DNS_records(result['fqdn'], lookup_int) if dns_result != []: insert_current_results(dns_result, dns_manager, zones, result, source) dns_manager.remove_all_by_source_and_date(source, amass_diff) # Record status jobs_manager.record_job_complete() now = datetime.now() print("Complete: " + str(now))
def main(): """ Beging Main... """ global global_exit_flag global global_retest_list global global_sleep_time global global_queue_size global global_zgrab_path logger = LoggingUtil.create_log(__name__) global_retest_list = [] parser = argparse.ArgumentParser( description="Launch zgrab against IPs using port 22, 25, 443, or 465.") parser.add_argument( "-p", choices=["22", "25", "443", "465"], metavar="port", help="The port to scan: 22, 25, 443, or 465", ) parser.add_argument("-t", default=5, type=int, metavar="threadCount", help="The number of threads") parser.add_argument( "--mx", action="store_true", help="Scan only IPs from MX records. Useful for SMTP scans.", ) parser.add_argument( "-s", default=0, type=int, metavar="sleepTime", help="Sleep time in order to spread out the batches", ) parser.add_argument( "--qs", default=0, type=int, metavar="queueSize", help="How many hosts to scan in a batch", ) parser.add_argument("--zones_only", action="store_true", help="Scan only IPs from IP zones.") parser.add_argument( "--zgrab_path", default=global_zgrab_path, metavar="zgrabVersion", help="The version of ZGrab to use", ) args = parser.parse_args() if args.p == None: logger.error("A port value (22, 25, 443, or 465) must be provided.") exit(1) if is_running(os.path.basename(__file__)): """ Check to see if a previous attempt to parse is still running... """ now = datetime.now() logger.warning(str(now) + ": I am already running! Goodbye!") exit(0) now = datetime.now() print("Starting: " + str(now)) logger.info("Starting...") rm_connector = RemoteMongoConnector.RemoteMongoConnector() all_dns_collection = rm_connector.get_all_dns_connection() ip_manager = IPManager.IPManager(rm_connector, True) jobs_manager = JobsManager.JobsManager(rm_connector, "zgrab_port_ip-" + args.p) jobs_manager.record_job_start() zones_struct = {} zones_struct["zones"] = ZoneManager.get_distinct_zones(rm_connector) # Not pretty but works zones_struct["ip_manager"] = ip_manager if args.mx: (ips, ip_context) = get_mx_ips(zones_struct["zones"], ip_manager, all_dns_collection) elif args.zones_only: (ips, ip_context) = get_only_ipzones(ip_manager.Tracked_CIDRs) else: (ips, ip_context) = get_ips(ip_manager, all_dns_collection) if args.s and int(args.s) > 0: global_sleep_time = int(args.s) if args.qs and int(args.qs) > 0: global_queue_size = int(args.qs) logger.info("Got IPs: " + str(len(ips))) zones_struct["ip_context"] = ip_context zgrab_collection = rm_connector.get_zgrab_port_data_connection() if args.p == "443": run_command = run_port_443_command elif args.p == "22": run_command = run_port_22_command elif args.p == "25": run_command = run_port_25_command elif args.p == "465": run_command = run_port_465_command check_save_location("./json_p" + args.p) global_zgrab_path = args.zgrab_path threads = [] logger.debug("Creating " + str(args.t) + " threads") for thread_id in range(1, args.t + 1): thread = ZgrabThread( thread_id, global_work_queue, args.p, run_command, zones_struct, zgrab_collection, ) thread.start() threads.append(thread) thread_id += 1 logger.info("Populating Queue") global_queue_lock.acquire() for ip in ips: global_work_queue.put(ip) global_queue_lock.release() # Wait for queue to empty while not global_work_queue.empty(): pass # Notify threads it's time to exit global_exit_flag = 1 # Wait for all threads to complete for t in threads: t.join() logger.info("Exiting Main Thread") logger.info("Global retest list: " + str(len(global_retest_list))) # Retest any SMTP hosts that did not respond to the StartTLS handshake if args.p == "25" and len(global_retest_list) > 0: process_thread( logger, global_retest_list, args.p, run_port_25_no_tls_command, zones_struct, zgrab_collection, "retest", ) # Remove old entries from before the scan if args.p == "443": other_results = zgrab_collection.find({ "data.tls": { "$exists": True }, "data.tls.timestamp": { "$lt": now } }) for result in other_results: zgrab_collection.update_one({"_id": ObjectId(result["_id"])}, {"$unset": { "data.tls": "" }}) elif args.p == "22": if "zgrab2" in global_zgrab_path: other_results = zgrab_collection.find({ "data.ssh": { "$exists": True }, "data.ssh.timestamp": { "$lt": now } }) for result in other_results: zgrab_collection.update_one({"_id": ObjectId(result["_id"])}, {"$unset": { "data.ssh": "" }}) else: other_results = zgrab_collection.find({ "data.xssh": { "$exists": True }, "data.xssh.timestamp": { "$lt": now } }) for result in other_results: zgrab_collection.update_one({"_id": ObjectId(result["_id"])}, {"$unset": { "data.xssh": "" }}) elif args.p == "25": other_results = zgrab_collection.find({ "data.smtp": { "$exists": True }, "data.smtp.timestamp": { "$lt": now } }) for result in other_results: zgrab_collection.update_one({"_id": ObjectId(result["_id"])}, {"$unset": { "data.smtp": "" }}) elif args.p == "465": other_results = zgrab_collection.find({ "data.smtps": { "$exists": True }, "data.smtps.timestamp": { "$lt": now } }) for result in other_results: zgrab_collection.update_one({"_id": ObjectId(result["_id"])}, {"$unset": { "data.smtps": "" }}) # Remove any completely empty entries zgrab_collection.delete_many({"data": {}}) jobs_manager.record_job_complete() now = datetime.now() print("Complete: " + str(now)) logger.info("Complete.")
def main(): """ Begin main... """ logger = LoggingUtil.create_log(__name__) if is_running("get_censys_files.py"): """ Check to see if a download is in process... """ logger.warning("Can't run due to get_files running. Goodbye!") exit(0) if is_running(os.path.basename(__file__)): """ Check to see if a previous attempt to parse is still running... """ logger.warning("I am already running! Goodbye!") exit(0) # Make the relevant database connections RMC = RemoteMongoConnector.RemoteMongoConnector() ip_manager = IPManager.IPManager(RMC) # Verify that the get_files script has a recent file in need of parsing. jobs_collection = RMC.get_jobs_connection() status = jobs_collection.find_one({"job_name": "censys"}) if status["status"] != "DOWNLOADED": logger.warning("The status is not set to DOWNLOADED. Goodbye!") exit(0) now = datetime.now() print("Starting: " + str(now)) logger.info("Starting...") # Collect the list of available zones zones = ZoneManager.get_distinct_zones(RMC) logger.info("Zones: " + str(len(zones))) # Get the current configuration information for Marinus. config_collection = RMC.get_config_connection() configs = config_collection.find({}) orgs = [] for org in configs[0]["SSL_Orgs"]: orgs.append(org) logger.info("Orgs: " + str(len(orgs))) # Obtain the name of the decompressed file. filename_f = open(FILENAME_FILE, "r") decompressed_file = filename_f.readline() filename_f.close() # For manual testing: decompressed_file = "ipv4.json" logger.info("Beginning file processing...") # Remove old results from the database results_collection = RMC.get_results_connection() results_collection.delete_many({}) all_dns_collection = RMC.get_all_dns_connection() try: with open(decompressed_file, "r") as dec_f: for line in dec_f: try: entry = json.loads(line) """ Does the SSL certificate match a known organization? Is the IP address in a known CIDR? Is the IP address recorded in Splunk? """ if (check_in_org(entry, orgs) or ip_manager.is_tracked_ip(entry["ip"]) or ip_manager.find_splunk_data(entry["ip"], "AWS") is not None or ip_manager.find_splunk_data( entry["ip"], "AZURE") is not None): entry["zones"] = check_in_zone(entry, zones) entry["aws"] = ip_manager.is_aws_ip(entry["ip"]) entry["azure"] = ip_manager.is_azure_ip(entry["ip"]) (domains, zones) = lookup_domain(entry, zones, all_dns_collection) if len(domains) > 0: entry["domains"] = domains if len(zones) > 0: for zone in zones: if zone not in entry["zones"]: entry["zones"].append(zone) insert_result(entry, results_collection) # else: # #This will add days to the amount of time necessary to scan the file. # matched_zones = check_in_zone(entry, zones) # if matched_zones != []: # entry['zones'] = matched_zones # entry['aws'] = ip_manager.is_aws_ip(entry['ip']) # entry['azure'] = ip_manager.is_azure_ip(entry['ip']) # insert_result(entry, results_collection) except ValueError as err: logger.error("Value Error!") logger.error(str(err)) except: logger.error("Line unexpected error: " + str(sys.exc_info()[0])) logger.error("Line unexpected error: " + str(sys.exc_info()[1])) except IOError as err: logger.error("I/O error({0}): {1}".format(err.errno, err.strerror)) exit(1) except: logger.error("Unexpected error: " + str(sys.exc_info()[0])) logger.error("Unexpected error: " + str(sys.exc_info()[1])) exit(1) # Indicate that the processing of the job is complete and ready for download to Marinus jobs_collection.update_one( {"job_name": "censys"}, { "$currentDate": { "updated": True }, "$set": { "status": "COMPLETE" } }, ) now = datetime.now() print("Ending: " + str(now)) logger.info("Complete.")
def main(): """ Begin Main... """ logger = LoggingUtil.create_log(__name__) if is_running(os.path.basename(__file__)): logger.warning("Already running...") exit(0) now = datetime.now() print("Starting: " + str(now)) logger.info("Starting...") r7 = Rapid7.Rapid7() mongo_connection = MongoConnector.MongoConnector() dns_manager = DNSManager.DNSManager(mongo_connection) ip_manager = IPManager.IPManager(mongo_connection) rdns_collection = mongo_connection.get_sonar_reverse_dns_connection() zones = ZoneManager.get_distinct_zones(mongo_connection) logger.info("Zone length: " + str(len(zones))) save_directory = "./files/" parser = argparse.ArgumentParser( description='Parse Sonar files based on CIDRs.') parser.add_argument('--sonar_file_type', required=True, help='Specify "dns" or "rdns"') args = parser.parse_args() check_save_location(save_directory) # A session is necessary for the multi-step log-in process s = requests.Session() if args.sonar_file_type == "rdns": jobs_manager = JobsManager.JobsManager(mongo_connection, 'get_data_by_cidr_rdns') jobs_manager.record_job_start() try: html_parser = r7.find_file_locations(s, "rdns", jobs_manager) if html_parser.rdns_url == "": logger.error("Unknown Error") jobs_manager.record_job_error() exit(0) unzipped_rdns = download_remote_files(logger, s, html_parser.rdns_url, save_directory, jobs_manager) update_rdns(logger, unzipped_rdns, rdns_collection, dns_manager, ip_manager, zones) except Exception as ex: logger.error("Unexpected error: " + str(ex)) jobs_manager.record_job_error() exit(0) logger.info("RDNS Complete") jobs_manager.record_job_complete() elif args.sonar_file_type == "dns": jobs_manager = JobsManager.JobsManager(mongo_connection, 'get_data_by_cidr_dns') jobs_manager.record_job_start() try: html_parser = r7.find_file_locations(s, "fdns", jobs_manager) if html_parser.any_url != "": unzipped_dns = download_remote_files(logger, s, html_parser.any_url, save_directory, jobs_manager) update_dns(logger, unzipped_dns, dns_manager, ip_manager, zones) if html_parser.a_url != "": unzipped_dns = download_remote_files(logger, s, html_parser.a_url, save_directory, jobs_manager) update_dns(logger, unzipped_dns, dns_manager, ip_manager, zones) if html_parser.aaaa_url != "": unzipped_dns = download_remote_files(logger, s, html_parser.aaaa_url, save_directory, jobs_manager) update_dns(logger, unzipped_dns, dns_manager, ip_manager, zones) except Exception as ex: logger.error("Unexpected error: " + str(ex)) jobs_manager.record_job_error() exit(0) logger.info("DNS Complete") jobs_manager.record_job_complete() else: logger.error("Unrecognized sonar_file_type option. Exiting...") now = datetime.now() print("Complete: " + str(now)) logger.info("Complete.")
def main(): """ Begin Main... """ global global_exit_flag global global_zgrab_path logger = LoggingUtil.create_log(__name__) parser = argparse.ArgumentParser( description="Launch zgrab against IPs using port 80 or 443.") parser.add_argument("-p", choices=["443", "80"], metavar="port", help="The web port: 80 or 443") parser.add_argument("-t", default=5, type=int, metavar="threadCount", help="The number of threads") parser.add_argument( "--zgrab_path", default=global_zgrab_path, metavar="zgrabVersion", help="The version of ZGrab to use", ) args = parser.parse_args() if args.p == None: logger.error("A port value (80 or 443) must be provided.") exit(1) if is_running(os.path.basename(__file__)): """ Check to see if a previous attempt to parse is still running... """ now = datetime.now() logger.warning(str(now) + ": I am already running! Goodbye!") exit(0) now = datetime.now() print("Starting: " + str(now)) logger.info("Starting...") rm_connector = RemoteMongoConnector.RemoteMongoConnector() all_dns_collection = rm_connector.get_all_dns_connection() ip_manager = IPManager.IPManager(rm_connector, True) jobs_manager = JobsManager.JobsManager(rm_connector, "zgrab_http_ip-" + args.p) jobs_manager.record_job_start() zones_struct = {} zones_struct["zones"] = ZoneManager.get_distinct_zones(rm_connector) # Not pretty but cleaner than previous method zones_struct["ip_manager"] = ip_manager (ips, ip_context) = get_ips(ip_manager, all_dns_collection) logger.info("Got IPs: " + str(len(ips))) zones_struct["ip_context"] = ip_context if args.p == "443": zgrab_collection = rm_connector.get_zgrab_443_data_connection() run_command = run_port_443_command else: zgrab_collection = rm_connector.get_zgrab_80_data_connection() run_command = run_port_80_command check_save_location("./json_p" + args.p) global_zgrab_path = args.zgrab_path threads = [] logger.debug("Creating " + str(args.t) + " threads") for thread_id in range(1, args.t + 1): thread = ZgrabThread( thread_id, global_work_queue, args.p, run_command, zones_struct, zgrab_collection, ) thread.start() threads.append(thread) thread_id += 1 logger.info("Populating Queue") global_queue_lock.acquire() for ip in ips: global_work_queue.put(ip) global_queue_lock.release() # Wait for queue to empty while not global_work_queue.empty(): pass # Notify threads it's time to exit global_exit_flag = 1 # Wait for all threads to complete for t in threads: t.join() logger.info("Exiting Main Thread") # Remove last week's old entries lastweek = datetime.now() - timedelta(days=7) zgrab_collection.delete_many({ "ip": { "$ne": "<nil>" }, "timestamp": { "$lt": lastweek } }) jobs_manager.record_job_complete() now = datetime.now() print("Complete: " + str(now)) logger.info("Complete.")
def main(): """ Begin Main... """ global global_exit_flag global global_zgrab_path logger = LoggingUtil.create_log(__name__) parser = argparse.ArgumentParser( description='Launch zgrab against domains using port 80 or 443.') parser.add_argument('-p', choices=['443', '80'], metavar="port", help='The web port: 80 or 443') parser.add_argument('-t', default=5, type=int, metavar="threadCount", help='The number of threads') parser.add_argument('--zgrab_path', default=global_zgrab_path, metavar='zgrabVersion', help='The version of ZGrab to use') args = parser.parse_args() if args.p == None: logger.error("A port value (80 or 443) must be provided.") exit(1) if is_running(os.path.basename(__file__)): """ Check to see if a previous attempt to parse is still running... """ now = datetime.now() logger.warning(str(now) + ": I am already running! Goodbye!") exit(0) now = datetime.now() print("Starting: " + str(now)) logger.info("Starting...") rm_connector = RemoteMongoConnector.RemoteMongoConnector() all_dns_collection = rm_connector.get_all_dns_connection() jobs_manager = JobsManager.JobsManager(rm_connector, "zgrab_http_domain-" + args.p) jobs_manager.record_job_start() if args.p == "443": zgrab_collection = rm_connector.get_zgrab_443_data_connection() run_command = run_port_443_command else: zgrab_collection = rm_connector.get_zgrab_80_data_connection() run_command = run_port_80_command check_save_location("./json_p" + args.p) global_zgrab_path = args.zgrab_path zones = ZoneManager.get_distinct_zones(rm_connector) ip_manager = IPManager.IPManager(rm_connector) for zone in zones: global_exit_flag = 0 domains = get_domains(all_dns_collection, ip_manager, zone) if len(domains) == 0: continue num_threads = args.t if len(domains) < args.t: num_threads = len(domains) logger.debug("Creating " + str(num_threads) + " threads") threads = [] for thread_id in range(1, num_threads + 1): thread = ZgrabThread(thread_id, global_work_queue, args.p, run_command, zone, zgrab_collection) thread.start() threads.append(thread) thread_id += 1 logger.debug(zone + " length: " + str(len(domains))) logger.info("Populating Queue") global_queue_lock.acquire() for domain in domains: global_work_queue.put(domain) global_queue_lock.release() # Wait for queue to empty while not global_work_queue.empty(): pass logger.info("Queue empty") # Notify threads it's time to exit global_exit_flag = 1 # Wait for all threads to complete for t in threads: t.join() # Remove last week's old entries lastweek = datetime.now() - timedelta(days=7) zgrab_collection.remove({ 'domain': { "$ne": "<nil>" }, 'timestamp': { "$lt": lastweek } }) jobs_manager.record_job_complete() now = datetime.now() print("Complete: " + str(now)) logger.info("Complete.")
def main(): """ Begin Main """ logger = LoggingUtil.create_log(__name__) now = datetime.now() print("Starting: " + str(now)) logger.info("Starting...") mongo_connector = MongoConnector.MongoConnector() dns_manager = DNSManager.DNSManager(mongo_connector) graphs_collection = mongo_connector.get_graphs_connection() graphs_data_collection = mongo_connector.get_graphs_data_connection() graphs_links_collection = mongo_connector.get_graphs_links_connection() graphs_docs_collection = mongo_connector.get_graphs_docs_connection() ip_manager = IPManager.IPManager(mongo_connector) jobs_manager = JobsManager.JobsManager(mongo_connector, 'create_graphs2') jobs_manager.record_job_start() zones = ZoneManager.get_distinct_zones(mongo_connector) for zone in zones: groups = [] graph = nx.Graph() add_to_list(zone, groups) graph.add_node(zone, data_type="tld", type=0, depends=[], dependedOnBy=[], docs="<h1>Parent</h1>") find_all_dns_by_zone(graph, zone, groups, dns_manager, ip_manager) find_srdns_by_zone(graph, zone, groups, mongo_connector, ip_manager) data = json_graph.node_link_data(graph) reformat_data(data, zone, groups) new_data = {} new_data['directed'] = data['directed'] new_data['graph'] = data['graph'] new_data['multigraph'] = data['multigraph'] new_data['errs'] = [] config = {} config['title'] = zone + " Network Map" config['graph'] = {} config['graph']['linkDistance'] = 150 config['graph']['charge'] = -400 config['graph']['height'] = 800 config['graph']['numColors'] = len(groups) config['graph']['labelPadding'] = { "left": 3, "right": 3, "top": 2, "bottom": 2 } config['graph']['labelMargin'] = { "left": 3, "right": 3, "top": 2, "bottom": 2 } config['graph']['ticksWithoutCollisions'] = 50 config['graph_type'] = "tracked_domain" config['types'] = {} regex_str = "^[0-9]+\\.[0-9]+\\.[0-9]+$" regx = re.compile(regex_str) for tgroup in groups: group = tgroup.replace(REPLACE_CHAR, ".") data_type = "tpd" if group in zones: data_type = "tracked_domain" elif re.match(regx, group): data_type = "cidr" config['types'][tgroup] = { "short": group, "long": "A group from the network: " + group, "data_type": data_type } config['constraints'] = [] tmp = int(math.ceil(math.sqrt(len(groups)))) + 1 x = [] y = [] for i in range(1, tmp): val = round((i * 1.0) / tmp, 2) x.append(str(val)) y.append(str(val)) x_pos = 0 y_pos = 0 for group in groups: config['constraints'].append({ "has": { "type": group }, "type": "position", "x": x[x_pos], "y": y[y_pos] }) x_pos = x_pos + 1 if x_pos >= len(x): x_pos = 0 y_pos = y_pos + 1 config['jsonUrl'] = "/api/v1.0/graphs/" + zone new_data['config'] = config new_data['created'] = datetime.now() new_data['zone'] = zone new_docs_data = {} new_docs_data['docs'] = {} new_docs_data['zone'] = zone new_docs_data['created'] = datetime.now() new_graph_data = {} new_graph_data['data'] = {} for i in range(0, len(data['nodes'])): new_graph_data['data'][data['nodes'][i]['id'].replace( ".", REPLACE_CHAR)] = data['nodes'][i] new_docs_data['docs'][data['nodes'][i]['id'].replace( ".", REPLACE_CHAR)] = data['nodes'][i]['docs'] del new_graph_data['data'][data['nodes'][i]['id'].replace( ".", REPLACE_CHAR)]['docs'] new_graph_data['created'] = datetime.now() new_graph_data['zone'] = zone new_graph_data['directed'] = data['directed'] new_graph_data['multigraph'] = data['multigraph'] new_graph_data['errs'] = [] new_links_data = {} new_links_data['links'] = data['links'] new_links_data['created'] = datetime.now() new_links_data['zone'] = zone new_links_data['directed'] = data['directed'] new_links_data['multigraph'] = data['multigraph'] new_links_data['errs'] = [] try: graphs_collection.remove({'zone': zone}) graphs_collection.insert_one(new_data) graphs_data_collection.remove({'zone': zone}) graphs_data_collection.insert_one(new_graph_data) graphs_links_collection.remove({'zone': zone}) graphs_links_collection.insert_one(new_links_data) graphs_docs_collection.remove({'zone': zone}) graphs_docs_collection.insert_one(new_docs_data) except: logger.error("ERROR: Can't insert: " + zone) time.sleep(1) # Remove last week's old entries # In theory, shouldn't do anything but being complete lastweek = datetime.now() - timedelta(days=7) graphs_collection.remove({'created': {"$lt": lastweek}}) graphs_data_collection.remove({'created': {"$lt": lastweek}}) graphs_links_collection.remove({'created': {"$lt": lastweek}}) graphs_docs_collection.remove({'created': {"$lt": lastweek}}) # Record status jobs_manager.record_job_complete() now = datetime.now() print("Ending: " + str(now)) logger.info("Complete")
def main(): """ Begin Main... """ logger = LoggingUtil.create_log(__name__) now = datetime.now() print("Starting: " + str(now)) logger.info("Starting...") mongo_connector = MongoConnector.MongoConnector() all_dns_collection = mongo_connector.get_all_dns_connection() dns_manager = DNSManager.DNSManager(mongo_connector) GDNS = GoogleDNS.GoogleDNS() ip_manager = IPManager.IPManager(mongo_connector) jobs_manager = JobsManager.JobsManager(mongo_connector, 'remove_expired_entries') jobs_manager.record_job_start() zones = ZoneManager.get_distinct_zones(mongo_connector) # The sources for which to remove expired entries results = mongo_connector.perform_distinct(all_dns_collection, 'sources.source') sources = [] for source in results: temp = {} temp['name'] = source if "common_crawl" in source: temp['diff'] = -4 else: temp['diff'] = -2 sources.append(temp) # Before completely removing old entries, make an attempt to see if they are still valid. # Occasionally, a host name will still be valid but, for whatever reason, is no longer tracked by a source. # Rather than throw away valid information, this will archive it. for entry in sources: removal_date = monthdelta(datetime.now(), entry['diff']) source = entry['name'] logger.debug("Removing " + source + " as of: " + str(removal_date)) last_domain = "" results = all_dns_collection.find({ 'sources': { "$size": 1 }, 'sources.source': source, 'sources.updated': { "$lt": removal_date } }) for result in results: if result['fqdn'] != last_domain: last_domain = result['fqdn'] lookup_int = get_lookup_int(logger, result, GDNS) dns_result = GDNS.fetch_DNS_records(result['fqdn'], lookup_int) if dns_result != []: insert_current_results(dns_result, dns_manager, zones, result, source) dns_manager.remove_all_by_source_and_date(source, entry['diff']) # Get the date for today minus two months d_minus_2m = monthdelta(datetime.now(), -2) logger.info("Removing SRDNS as of: " + str(d_minus_2m)) # Remove the old records srdns_collection = mongo_connector.get_sonar_reverse_dns_connection() srdns_collection.remove({'updated': {"$lt": d_minus_2m}}) ip_manager.delete_records_by_date(d_minus_2m) # Record status jobs_manager.record_job_complete() now = datetime.now() print("Complete: " + str(now)) logger.info("Complete")