Beispiel #1
0
    def insert_record(self, result, source_name):
        """
        Insert the provided source as a record from the provided source name.
        :param result: The result of a DNS lookup as a JSON object including
                       the fqdn, type, value, zone, and created values.
        :param source_name: The DNS record source ("ssl","virustotal","sonar_dns","common_crawl")
        """
        query = {
            "fqdn": result["fqdn"],
            "type": result["type"],
            "value": result["value"],
        }
        check = self.mongo_connector.perform_find_one(self.all_dns_collection,
                                                      query)

        if check is None:
            result["sources"] = []
            result["sources"].append({})
            result["sources"][0]["source"] = source_name
            result["sources"][0]["updated"] = datetime.now()
            result["updated"] = datetime.now()
            self.mongo_connector.perform_insert(self.all_dns_collection,
                                                result)
        else:
            source_index = -1
            for i in range(0, len(check["sources"])):
                if check["sources"][i]["source"] == source_name:
                    source_index = i
            if source_index != -1:
                name = "sources." + str(source_index) + ".updated"
                entry = {}
                entry[name] = datetime.now()
                self.all_dns_collection.update_one(
                    {"_id": ObjectId(check["_id"])}, {"$set": entry})
                self.all_dns_collection.update_one(
                    {"_id": ObjectId(check["_id"])},
                    {"$set": {
                        "updated": datetime.now()
                    }},
                )
            else:
                entry = {}
                entry["source"] = source_name
                entry["updated"] = datetime.now()
                self.all_dns_collection.update_one(
                    {"_id": ObjectId(check["_id"])},
                    {"$push": {
                        "sources": entry
                    }})
                self.all_dns_collection.update_one(
                    {"_id": ObjectId(check["_id"])},
                    {"$set": {
                        "updated": datetime.now()
                    }},
                )

        if result["type"] == "a" or result["type"] == "aaaa":
            ip_manager = IPManager.IPManager(self.mongo_connector)
            ip_manager.insert_record(result["value"], source_name)
Beispiel #2
0
    def insert_record(self, result, source_name):
        """
        Insert the provided source as a record from the provided source name.
        :param result: The result of a DNS lookup as a JSON object including
                       the fqdn, type, value, zone, and created values.
        :param source_name: The DNS record source ("ssl","virustotal","sonar_dns","common_crawl")
        """
        query = {
            'fqdn': result['fqdn'],
            'type': result['type'],
            'value': result['value']
        }
        check = self.mongo_connector.perform_find_one(self.all_dns_collection,
                                                      query)

        if check is None:
            result['sources'] = []
            result['sources'].append({})
            result['sources'][0]['source'] = source_name
            result['sources'][0]['updated'] = datetime.now()
            result['updated'] = datetime.now()
            self.all_dns_collection.insert(result)
        else:
            source_index = -1
            for i in range(0, len(check['sources'])):
                if check['sources'][i]['source'] == source_name:
                    source_index = i
            if source_index != -1:
                name = 'sources.' + str(source_index) + '.updated'
                entry = {}
                entry[name] = datetime.now()
                self.all_dns_collection.update({'_id': ObjectId(check['_id'])},
                                               {"$set": entry})
                self.all_dns_collection.update(
                    {'_id': ObjectId(check['_id'])},
                    {"$set": {
                        'updated': datetime.now()
                    }})
            else:
                entry = {}
                entry['source'] = source_name
                entry['updated'] = datetime.now()
                self.all_dns_collection.update({'_id': ObjectId(check['_id'])},
                                               {'$push': {
                                                   'sources': entry
                                               }})
                self.all_dns_collection.update(
                    {'_id': ObjectId(check['_id'])},
                    {"$set": {
                        'updated': datetime.now()
                    }})

        if result['type'] == 'a' or result['type'] == 'aaaa':
            ip_manager = IPManager.IPManager(self.mongo_connector)
            ip_manager.insert_record(result['value'], source_name)
def create_network_data_sets(groups, mongo_connector):
    """
    Group results based on network type ("Tracked", "AWS", or "Akamai")
    """
    group_data = {}
    group_data['aws_count'] = 0
    group_data['tracked_count'] = 0
    group_data['akamai_count'] = 0
    group_data['azure_count'] = 0
    group_data['gcp_count'] = 0

    ip_manager = IPManager.IPManager(mongo_connector)

    for group in groups:
        cidr = group.replace(REPLACE_CHAR, ".")
        fake_ip = cidr + ".1"
        group_data[group] = {}
        group_data[group]['class_c'] = cidr
        if ip_manager.is_aws_ip(fake_ip):
            group_data[group]['aws'] = True
            group_data['aws_count'] = group_data['aws_count'] + 1
        else:
            group_data[group]['aws'] = False

        if ip_manager.is_azure_ip(fake_ip):
            group_data[group]['azure'] = True
            group_data['azure_count'] = group_data['azure_count'] + 1
        else:
            group_data[group]['azure'] = False

        if ip_manager.is_akamai_ip(fake_ip):
            group_data['akamai_count'] = group_data['akamai_count'] + 1
            group_data[group]['akamai'] = True
        else:
            group_data[group]['akamai'] = False

        if ip_manager.is_tracked_ip(fake_ip):
            group_data[group]['tracked'] = True
            group_data['tracked_count'] = group_data['tracked_count'] + 1
        else:
            group_data[group]['tracked'] = False

        if ip_manager.is_gcp_ip(fake_ip):
            group_data[group]['gcp'] = True
            group_data['gcp_count'] = group_data['gcp_count'] + 1
        else:
            group_data[group]['gcp'] = False

    return group_data
Beispiel #4
0
def main():
    """
    Begin Main...
    """

    # The sources for which to remove expired entries
    # Infoblox is handled separately
    # {"source_name": date_difference_in_months}
    sources = [{
        "name": "sonar_dns",
        "diff": -2
    }, {
        "name": "sonar_dns_saved",
        "diff": -2
    }, {
        "name": "sonar_rdns",
        "diff": -2
    }, {
        "name": "sonar_rdns_saved",
        "diff": -2
    }, {
        "name": "ssl",
        "diff": -2
    }, {
        "name": "ssl_saved",
        "diff": -2
    }, {
        "name": "virustotal",
        "diff": -2
    }, {
        "name": "virustotal_saved",
        "diff": -2
    }, {
        "name": "UltraDNS",
        "diff": -2
    }, {
        "name": "UltraDNS_saved",
        "diff": -2
    }, {
        "name": "marinus",
        "diff": -2
    }, {
        "name": "marinus_saved",
        "diff": -2
    }, {
        "name": "mx",
        "diff": -2
    }, {
        "name": "mx_saved",
        "diff": -2
    }, {
        "name": "common_crawl",
        "diff": -4
    }, {
        "name": "common_crawl_saved",
        "diff": -4
    }]

    amass_diff = -2

    now = datetime.now()
    print("Starting: " + str(now))

    mongo_connector = MongoConnector.MongoConnector()
    all_dns_collection = mongo_connector.get_all_dns_connection()
    dns_manager = DNSManager.DNSManager(mongo_connector)
    GDNS = GoogleDNS.GoogleDNS()
    ip_manager = IPManager.IPManager(mongo_connector)

    jobs_manager = JobsManager.JobsManager(mongo_connector,
                                           'remove_expired_entries')
    jobs_manager.record_job_start()

    zones = ZoneManager.get_distinct_zones(mongo_connector)

    # Get the date for today minus two months
    d_minus_2m = monthdelta(datetime.now(), -2)

    print("Removing SRDNS as of: " + str(d_minus_2m))

    # Remove the old records
    srdns_collection = mongo_connector.get_sonar_reverse_dns_connection()
    srdns_collection.remove({'updated': {"$lt": d_minus_2m}})

    ip_manager.delete_records_by_date(d_minus_2m)

    # Before completely removing old entries, make an attempt to see if they are still valid.
    # Occasionally, a host name will still be valid but, for whatever reason, is no longer tracked by a source.
    # Rather than throw away valid information, this will archive it.
    for entry in sources:
        removal_date = monthdelta(datetime.now(), entry['diff'])
        source = entry['name']
        print("Removing " + source + " as of: " + str(removal_date))

        last_domain = ""
        results = all_dns_collection.find({
            'sources': {
                "$size": 1
            },
            'sources.source': source,
            'sources.updated': {
                "$lt": removal_date
            }
        })
        for result in results:
            if result['fqdn'] != last_domain:
                last_domain = result['fqdn']

                lookup_int = get_lookup_int(result, GDNS)
                dns_result = GDNS.fetch_DNS_records(result['fqdn'], lookup_int)

                if dns_result != []:
                    insert_current_results(dns_result, dns_manager, zones,
                                           result, source)

        dns_manager.remove_all_by_source_and_date(source, entry['diff'])

    # Process amass entries
    temp_sources = mongo_connector.perform_distinct(all_dns_collection,
                                                    'sources.source')
    amass_sources = []
    for entry in temp_sources:
        if entry.startswith("amass:"):
            amass_sources.append(entry)

    for source in amass_sources:
        removal_date = monthdelta(datetime.now(), amass_diff)
        print("Removing " + source + " as of: " + str(removal_date))

        last_domain = ""
        results = mongo_connector.perform_find(
            all_dns_collection, {
                'sources': {
                    "$size": 1
                },
                'sources.source': source,
                'sources.updated': {
                    "$lt": removal_date
                }
            })
        for result in results:
            if result['fqdn'] != last_domain:
                last_domain = result['fqdn']

                lookup_int = get_lookup_int(result, GDNS)
                dns_result = GDNS.fetch_DNS_records(result['fqdn'], lookup_int)

                if dns_result != []:
                    insert_current_results(dns_result, dns_manager, zones,
                                           result, source)

        dns_manager.remove_all_by_source_and_date(source, amass_diff)

    # Record status
    jobs_manager.record_job_complete()

    now = datetime.now()
    print("Complete: " + str(now))
Beispiel #5
0
def main():
    """
    Beging Main...
    """
    global global_exit_flag
    global global_retest_list
    global global_sleep_time
    global global_queue_size
    global global_zgrab_path

    logger = LoggingUtil.create_log(__name__)

    global_retest_list = []

    parser = argparse.ArgumentParser(
        description="Launch zgrab against IPs using port 22, 25, 443, or 465.")
    parser.add_argument(
        "-p",
        choices=["22", "25", "443", "465"],
        metavar="port",
        help="The port to scan: 22, 25, 443, or 465",
    )
    parser.add_argument("-t",
                        default=5,
                        type=int,
                        metavar="threadCount",
                        help="The number of threads")
    parser.add_argument(
        "--mx",
        action="store_true",
        help="Scan only IPs from MX records. Useful for SMTP scans.",
    )
    parser.add_argument(
        "-s",
        default=0,
        type=int,
        metavar="sleepTime",
        help="Sleep time in order to spread out the batches",
    )
    parser.add_argument(
        "--qs",
        default=0,
        type=int,
        metavar="queueSize",
        help="How many hosts to scan in a batch",
    )
    parser.add_argument("--zones_only",
                        action="store_true",
                        help="Scan only IPs from IP zones.")
    parser.add_argument(
        "--zgrab_path",
        default=global_zgrab_path,
        metavar="zgrabVersion",
        help="The version of ZGrab to use",
    )
    args = parser.parse_args()

    if args.p == None:
        logger.error("A port value (22, 25, 443, or 465) must be provided.")
        exit(1)

    if is_running(os.path.basename(__file__)):
        """
        Check to see if a previous attempt to parse is still running...
        """
        now = datetime.now()
        logger.warning(str(now) + ": I am already running! Goodbye!")
        exit(0)

    now = datetime.now()
    print("Starting: " + str(now))
    logger.info("Starting...")

    rm_connector = RemoteMongoConnector.RemoteMongoConnector()
    all_dns_collection = rm_connector.get_all_dns_connection()
    ip_manager = IPManager.IPManager(rm_connector, True)

    jobs_manager = JobsManager.JobsManager(rm_connector,
                                           "zgrab_port_ip-" + args.p)
    jobs_manager.record_job_start()

    zones_struct = {}
    zones_struct["zones"] = ZoneManager.get_distinct_zones(rm_connector)

    # Not pretty but works
    zones_struct["ip_manager"] = ip_manager

    if args.mx:
        (ips, ip_context) = get_mx_ips(zones_struct["zones"], ip_manager,
                                       all_dns_collection)
    elif args.zones_only:
        (ips, ip_context) = get_only_ipzones(ip_manager.Tracked_CIDRs)
    else:
        (ips, ip_context) = get_ips(ip_manager, all_dns_collection)

    if args.s and int(args.s) > 0:
        global_sleep_time = int(args.s)

    if args.qs and int(args.qs) > 0:
        global_queue_size = int(args.qs)

    logger.info("Got IPs: " + str(len(ips)))
    zones_struct["ip_context"] = ip_context

    zgrab_collection = rm_connector.get_zgrab_port_data_connection()
    if args.p == "443":
        run_command = run_port_443_command
    elif args.p == "22":
        run_command = run_port_22_command
    elif args.p == "25":
        run_command = run_port_25_command
    elif args.p == "465":
        run_command = run_port_465_command

    check_save_location("./json_p" + args.p)

    global_zgrab_path = args.zgrab_path

    threads = []

    logger.debug("Creating " + str(args.t) + " threads")
    for thread_id in range(1, args.t + 1):
        thread = ZgrabThread(
            thread_id,
            global_work_queue,
            args.p,
            run_command,
            zones_struct,
            zgrab_collection,
        )
        thread.start()
        threads.append(thread)
        thread_id += 1

    logger.info("Populating Queue")
    global_queue_lock.acquire()
    for ip in ips:
        global_work_queue.put(ip)
    global_queue_lock.release()

    # Wait for queue to empty
    while not global_work_queue.empty():
        pass

    # Notify threads it's time to exit
    global_exit_flag = 1

    # Wait for all threads to complete
    for t in threads:
        t.join()

    logger.info("Exiting Main Thread")

    logger.info("Global retest list: " + str(len(global_retest_list)))

    # Retest any SMTP hosts that did not respond to the StartTLS handshake
    if args.p == "25" and len(global_retest_list) > 0:
        process_thread(
            logger,
            global_retest_list,
            args.p,
            run_port_25_no_tls_command,
            zones_struct,
            zgrab_collection,
            "retest",
        )

    # Remove old entries from before the scan
    if args.p == "443":
        other_results = zgrab_collection.find({
            "data.tls": {
                "$exists": True
            },
            "data.tls.timestamp": {
                "$lt": now
            }
        })
        for result in other_results:
            zgrab_collection.update_one({"_id": ObjectId(result["_id"])},
                                        {"$unset": {
                                            "data.tls": ""
                                        }})
    elif args.p == "22":
        if "zgrab2" in global_zgrab_path:
            other_results = zgrab_collection.find({
                "data.ssh": {
                    "$exists": True
                },
                "data.ssh.timestamp": {
                    "$lt": now
                }
            })
            for result in other_results:
                zgrab_collection.update_one({"_id": ObjectId(result["_id"])},
                                            {"$unset": {
                                                "data.ssh": ""
                                            }})
        else:
            other_results = zgrab_collection.find({
                "data.xssh": {
                    "$exists": True
                },
                "data.xssh.timestamp": {
                    "$lt": now
                }
            })
            for result in other_results:
                zgrab_collection.update_one({"_id": ObjectId(result["_id"])},
                                            {"$unset": {
                                                "data.xssh": ""
                                            }})
    elif args.p == "25":
        other_results = zgrab_collection.find({
            "data.smtp": {
                "$exists": True
            },
            "data.smtp.timestamp": {
                "$lt": now
            }
        })
        for result in other_results:
            zgrab_collection.update_one({"_id": ObjectId(result["_id"])},
                                        {"$unset": {
                                            "data.smtp": ""
                                        }})
    elif args.p == "465":
        other_results = zgrab_collection.find({
            "data.smtps": {
                "$exists": True
            },
            "data.smtps.timestamp": {
                "$lt": now
            }
        })
        for result in other_results:
            zgrab_collection.update_one({"_id": ObjectId(result["_id"])},
                                        {"$unset": {
                                            "data.smtps": ""
                                        }})

    # Remove any completely empty entries
    zgrab_collection.delete_many({"data": {}})

    jobs_manager.record_job_complete()

    now = datetime.now()
    print("Complete: " + str(now))
    logger.info("Complete.")
def main():
    """
    Begin main...
    """
    logger = LoggingUtil.create_log(__name__)

    if is_running("get_censys_files.py"):
        """
        Check to see if a download is in process...
        """
        logger.warning("Can't run due to get_files running. Goodbye!")
        exit(0)

    if is_running(os.path.basename(__file__)):
        """
        Check to see if a previous attempt to parse is still running...
        """
        logger.warning("I am already running! Goodbye!")
        exit(0)

    # Make the relevant database connections
    RMC = RemoteMongoConnector.RemoteMongoConnector()

    ip_manager = IPManager.IPManager(RMC)

    # Verify that the get_files script has a recent file in need of parsing.
    jobs_collection = RMC.get_jobs_connection()

    status = jobs_collection.find_one({"job_name": "censys"})
    if status["status"] != "DOWNLOADED":
        logger.warning("The status is not set to DOWNLOADED. Goodbye!")
        exit(0)

    now = datetime.now()
    print("Starting: " + str(now))
    logger.info("Starting...")

    # Collect the list of available zones
    zones = ZoneManager.get_distinct_zones(RMC)

    logger.info("Zones: " + str(len(zones)))

    # Get the current configuration information for Marinus.
    config_collection = RMC.get_config_connection()

    configs = config_collection.find({})
    orgs = []
    for org in configs[0]["SSL_Orgs"]:
        orgs.append(org)

    logger.info("Orgs: " + str(len(orgs)))

    # Obtain the name of the decompressed file.
    filename_f = open(FILENAME_FILE, "r")
    decompressed_file = filename_f.readline()
    filename_f.close()

    # For manual testing: decompressed_file = "ipv4.json"

    logger.info("Beginning file processing...")

    # Remove old results from the database
    results_collection = RMC.get_results_connection()
    results_collection.delete_many({})
    all_dns_collection = RMC.get_all_dns_connection()

    try:
        with open(decompressed_file, "r") as dec_f:
            for line in dec_f:
                try:
                    entry = json.loads(line)
                    """
                    Does the SSL certificate match a known organization?
                    Is the IP address in a known CIDR?
                    Is the IP address recorded in Splunk?
                    """
                    if (check_in_org(entry, orgs)
                            or ip_manager.is_tracked_ip(entry["ip"])
                            or ip_manager.find_splunk_data(entry["ip"], "AWS")
                            is not None or ip_manager.find_splunk_data(
                                entry["ip"], "AZURE") is not None):
                        entry["zones"] = check_in_zone(entry, zones)
                        entry["aws"] = ip_manager.is_aws_ip(entry["ip"])
                        entry["azure"] = ip_manager.is_azure_ip(entry["ip"])
                        (domains,
                         zones) = lookup_domain(entry, zones,
                                                all_dns_collection)
                        if len(domains) > 0:
                            entry["domains"] = domains
                            if len(zones) > 0:
                                for zone in zones:
                                    if zone not in entry["zones"]:
                                        entry["zones"].append(zone)
                        insert_result(entry, results_collection)
                    # else:
                    #     #This will add days to the amount of time necessary to scan the file.
                    #     matched_zones = check_in_zone(entry, zones)
                    #     if matched_zones != []:
                    #         entry['zones'] = matched_zones
                    #         entry['aws'] = ip_manager.is_aws_ip(entry['ip'])
                    #         entry['azure'] = ip_manager.is_azure_ip(entry['ip'])
                    #         insert_result(entry, results_collection)
                except ValueError as err:
                    logger.error("Value Error!")
                    logger.error(str(err))
                except:
                    logger.error("Line unexpected error: " +
                                 str(sys.exc_info()[0]))
                    logger.error("Line unexpected error: " +
                                 str(sys.exc_info()[1]))
    except IOError as err:
        logger.error("I/O error({0}): {1}".format(err.errno, err.strerror))
        exit(1)
    except:
        logger.error("Unexpected error: " + str(sys.exc_info()[0]))
        logger.error("Unexpected error: " + str(sys.exc_info()[1]))
        exit(1)

    # Indicate that the processing of the job is complete and ready for download to Marinus
    jobs_collection.update_one(
        {"job_name": "censys"},
        {
            "$currentDate": {
                "updated": True
            },
            "$set": {
                "status": "COMPLETE"
            }
        },
    )

    now = datetime.now()
    print("Ending: " + str(now))
    logger.info("Complete.")
def main():
    """
    Begin Main...
    """
    logger = LoggingUtil.create_log(__name__)

    if is_running(os.path.basename(__file__)):
        logger.warning("Already running...")
        exit(0)

    now = datetime.now()
    print("Starting: " + str(now))
    logger.info("Starting...")

    r7 = Rapid7.Rapid7()

    mongo_connection = MongoConnector.MongoConnector()
    dns_manager = DNSManager.DNSManager(mongo_connection)
    ip_manager = IPManager.IPManager(mongo_connection)
    rdns_collection = mongo_connection.get_sonar_reverse_dns_connection()

    zones = ZoneManager.get_distinct_zones(mongo_connection)
    logger.info("Zone length: " + str(len(zones)))

    save_directory = "./files/"

    parser = argparse.ArgumentParser(
        description='Parse Sonar files based on CIDRs.')
    parser.add_argument('--sonar_file_type',
                        required=True,
                        help='Specify "dns" or "rdns"')
    args = parser.parse_args()

    check_save_location(save_directory)

    # A session is necessary for the multi-step log-in process
    s = requests.Session()

    if args.sonar_file_type == "rdns":
        jobs_manager = JobsManager.JobsManager(mongo_connection,
                                               'get_data_by_cidr_rdns')
        jobs_manager.record_job_start()

        try:
            html_parser = r7.find_file_locations(s, "rdns", jobs_manager)
            if html_parser.rdns_url == "":
                logger.error("Unknown Error")
                jobs_manager.record_job_error()
                exit(0)

            unzipped_rdns = download_remote_files(logger, s,
                                                  html_parser.rdns_url,
                                                  save_directory, jobs_manager)
            update_rdns(logger, unzipped_rdns, rdns_collection, dns_manager,
                        ip_manager, zones)
        except Exception as ex:
            logger.error("Unexpected error: " + str(ex))
            jobs_manager.record_job_error()
            exit(0)

        logger.info("RDNS Complete")
        jobs_manager.record_job_complete()

    elif args.sonar_file_type == "dns":
        jobs_manager = JobsManager.JobsManager(mongo_connection,
                                               'get_data_by_cidr_dns')
        jobs_manager.record_job_start()

        try:
            html_parser = r7.find_file_locations(s, "fdns", jobs_manager)
            if html_parser.any_url != "":
                unzipped_dns = download_remote_files(logger, s,
                                                     html_parser.any_url,
                                                     save_directory,
                                                     jobs_manager)
                update_dns(logger, unzipped_dns, dns_manager, ip_manager,
                           zones)
            if html_parser.a_url != "":
                unzipped_dns = download_remote_files(logger, s,
                                                     html_parser.a_url,
                                                     save_directory,
                                                     jobs_manager)
                update_dns(logger, unzipped_dns, dns_manager, ip_manager,
                           zones)
            if html_parser.aaaa_url != "":
                unzipped_dns = download_remote_files(logger, s,
                                                     html_parser.aaaa_url,
                                                     save_directory,
                                                     jobs_manager)
                update_dns(logger, unzipped_dns, dns_manager, ip_manager,
                           zones)
        except Exception as ex:
            logger.error("Unexpected error: " + str(ex))

            jobs_manager.record_job_error()
            exit(0)

        logger.info("DNS Complete")

        jobs_manager.record_job_complete()

    else:
        logger.error("Unrecognized sonar_file_type option. Exiting...")

    now = datetime.now()
    print("Complete: " + str(now))
    logger.info("Complete.")
Beispiel #8
0
def main():
    """
    Begin Main...
    """
    global global_exit_flag
    global global_zgrab_path

    logger = LoggingUtil.create_log(__name__)

    parser = argparse.ArgumentParser(
        description="Launch zgrab against IPs using port 80 or 443.")
    parser.add_argument("-p",
                        choices=["443", "80"],
                        metavar="port",
                        help="The web port: 80 or 443")
    parser.add_argument("-t",
                        default=5,
                        type=int,
                        metavar="threadCount",
                        help="The number of threads")
    parser.add_argument(
        "--zgrab_path",
        default=global_zgrab_path,
        metavar="zgrabVersion",
        help="The version of ZGrab to use",
    )
    args = parser.parse_args()

    if args.p == None:
        logger.error("A port value (80 or 443) must be provided.")
        exit(1)

    if is_running(os.path.basename(__file__)):
        """
        Check to see if a previous attempt to parse is still running...
        """
        now = datetime.now()
        logger.warning(str(now) + ": I am already running! Goodbye!")
        exit(0)

    now = datetime.now()
    print("Starting: " + str(now))
    logger.info("Starting...")

    rm_connector = RemoteMongoConnector.RemoteMongoConnector()
    all_dns_collection = rm_connector.get_all_dns_connection()
    ip_manager = IPManager.IPManager(rm_connector, True)

    jobs_manager = JobsManager.JobsManager(rm_connector,
                                           "zgrab_http_ip-" + args.p)
    jobs_manager.record_job_start()

    zones_struct = {}
    zones_struct["zones"] = ZoneManager.get_distinct_zones(rm_connector)

    # Not pretty but cleaner than previous method
    zones_struct["ip_manager"] = ip_manager

    (ips, ip_context) = get_ips(ip_manager, all_dns_collection)
    logger.info("Got IPs: " + str(len(ips)))
    zones_struct["ip_context"] = ip_context

    if args.p == "443":
        zgrab_collection = rm_connector.get_zgrab_443_data_connection()
        run_command = run_port_443_command
    else:
        zgrab_collection = rm_connector.get_zgrab_80_data_connection()
        run_command = run_port_80_command

    check_save_location("./json_p" + args.p)

    global_zgrab_path = args.zgrab_path

    threads = []

    logger.debug("Creating " + str(args.t) + " threads")
    for thread_id in range(1, args.t + 1):
        thread = ZgrabThread(
            thread_id,
            global_work_queue,
            args.p,
            run_command,
            zones_struct,
            zgrab_collection,
        )
        thread.start()
        threads.append(thread)
        thread_id += 1

    logger.info("Populating Queue")
    global_queue_lock.acquire()
    for ip in ips:
        global_work_queue.put(ip)
    global_queue_lock.release()

    # Wait for queue to empty
    while not global_work_queue.empty():
        pass

    # Notify threads it's time to exit
    global_exit_flag = 1

    # Wait for all threads to complete
    for t in threads:
        t.join()

    logger.info("Exiting Main Thread")

    # Remove last week's old entries
    lastweek = datetime.now() - timedelta(days=7)
    zgrab_collection.delete_many({
        "ip": {
            "$ne": "<nil>"
        },
        "timestamp": {
            "$lt": lastweek
        }
    })

    jobs_manager.record_job_complete()

    now = datetime.now()
    print("Complete: " + str(now))
    logger.info("Complete.")
def main():
    """
    Begin Main...
    """
    global global_exit_flag
    global global_zgrab_path

    logger = LoggingUtil.create_log(__name__)

    parser = argparse.ArgumentParser(
        description='Launch zgrab against domains using port 80 or 443.')
    parser.add_argument('-p',
                        choices=['443', '80'],
                        metavar="port",
                        help='The web port: 80 or 443')
    parser.add_argument('-t',
                        default=5,
                        type=int,
                        metavar="threadCount",
                        help='The number of threads')
    parser.add_argument('--zgrab_path',
                        default=global_zgrab_path,
                        metavar='zgrabVersion',
                        help='The version of ZGrab to use')
    args = parser.parse_args()

    if args.p == None:
        logger.error("A port value (80 or 443) must be provided.")
        exit(1)

    if is_running(os.path.basename(__file__)):
        """
        Check to see if a previous attempt to parse is still running...
        """
        now = datetime.now()
        logger.warning(str(now) + ": I am already running! Goodbye!")
        exit(0)

    now = datetime.now()
    print("Starting: " + str(now))
    logger.info("Starting...")

    rm_connector = RemoteMongoConnector.RemoteMongoConnector()
    all_dns_collection = rm_connector.get_all_dns_connection()
    jobs_manager = JobsManager.JobsManager(rm_connector,
                                           "zgrab_http_domain-" + args.p)
    jobs_manager.record_job_start()

    if args.p == "443":
        zgrab_collection = rm_connector.get_zgrab_443_data_connection()
        run_command = run_port_443_command
    else:
        zgrab_collection = rm_connector.get_zgrab_80_data_connection()
        run_command = run_port_80_command

    check_save_location("./json_p" + args.p)

    global_zgrab_path = args.zgrab_path

    zones = ZoneManager.get_distinct_zones(rm_connector)
    ip_manager = IPManager.IPManager(rm_connector)

    for zone in zones:
        global_exit_flag = 0

        domains = get_domains(all_dns_collection, ip_manager, zone)

        if len(domains) == 0:
            continue

        num_threads = args.t
        if len(domains) < args.t:
            num_threads = len(domains)

        logger.debug("Creating " + str(num_threads) + " threads")

        threads = []
        for thread_id in range(1, num_threads + 1):
            thread = ZgrabThread(thread_id, global_work_queue, args.p,
                                 run_command, zone, zgrab_collection)
            thread.start()
            threads.append(thread)
            thread_id += 1

        logger.debug(zone + " length: " + str(len(domains)))

        logger.info("Populating Queue")
        global_queue_lock.acquire()
        for domain in domains:
            global_work_queue.put(domain)
        global_queue_lock.release()

        # Wait for queue to empty
        while not global_work_queue.empty():
            pass

        logger.info("Queue empty")
        # Notify threads it's time to exit
        global_exit_flag = 1

        # Wait for all threads to complete
        for t in threads:
            t.join()

    # Remove last week's old entries
    lastweek = datetime.now() - timedelta(days=7)
    zgrab_collection.remove({
        'domain': {
            "$ne": "<nil>"
        },
        'timestamp': {
            "$lt": lastweek
        }
    })

    jobs_manager.record_job_complete()

    now = datetime.now()
    print("Complete: " + str(now))
    logger.info("Complete.")
Beispiel #10
0
def main():
    """
    Begin Main
    """
    logger = LoggingUtil.create_log(__name__)

    now = datetime.now()
    print("Starting: " + str(now))
    logger.info("Starting...")

    mongo_connector = MongoConnector.MongoConnector()
    dns_manager = DNSManager.DNSManager(mongo_connector)
    graphs_collection = mongo_connector.get_graphs_connection()
    graphs_data_collection = mongo_connector.get_graphs_data_connection()
    graphs_links_collection = mongo_connector.get_graphs_links_connection()
    graphs_docs_collection = mongo_connector.get_graphs_docs_connection()
    ip_manager = IPManager.IPManager(mongo_connector)

    jobs_manager = JobsManager.JobsManager(mongo_connector, 'create_graphs2')
    jobs_manager.record_job_start()

    zones = ZoneManager.get_distinct_zones(mongo_connector)

    for zone in zones:
        groups = []
        graph = nx.Graph()
        add_to_list(zone, groups)
        graph.add_node(zone,
                       data_type="tld",
                       type=0,
                       depends=[],
                       dependedOnBy=[],
                       docs="<h1>Parent</h1>")
        find_all_dns_by_zone(graph, zone, groups, dns_manager, ip_manager)
        find_srdns_by_zone(graph, zone, groups, mongo_connector, ip_manager)

        data = json_graph.node_link_data(graph)

        reformat_data(data, zone, groups)

        new_data = {}
        new_data['directed'] = data['directed']
        new_data['graph'] = data['graph']
        new_data['multigraph'] = data['multigraph']
        new_data['errs'] = []

        config = {}
        config['title'] = zone + " Network Map"
        config['graph'] = {}
        config['graph']['linkDistance'] = 150
        config['graph']['charge'] = -400
        config['graph']['height'] = 800
        config['graph']['numColors'] = len(groups)
        config['graph']['labelPadding'] = {
            "left": 3,
            "right": 3,
            "top": 2,
            "bottom": 2
        }
        config['graph']['labelMargin'] = {
            "left": 3,
            "right": 3,
            "top": 2,
            "bottom": 2
        }
        config['graph']['ticksWithoutCollisions'] = 50
        config['graph_type'] = "tracked_domain"

        config['types'] = {}
        regex_str = "^[0-9]+\\.[0-9]+\\.[0-9]+$"
        regx = re.compile(regex_str)
        for tgroup in groups:
            group = tgroup.replace(REPLACE_CHAR, ".")
            data_type = "tpd"
            if group in zones:
                data_type = "tracked_domain"
            elif re.match(regx, group):
                data_type = "cidr"
            config['types'][tgroup] = {
                "short": group,
                "long": "A group from the network: " + group,
                "data_type": data_type
            }

        config['constraints'] = []
        tmp = int(math.ceil(math.sqrt(len(groups)))) + 1
        x = []
        y = []
        for i in range(1, tmp):
            val = round((i * 1.0) / tmp, 2)
            x.append(str(val))
            y.append(str(val))
        x_pos = 0
        y_pos = 0
        for group in groups:
            config['constraints'].append({
                "has": {
                    "type": group
                },
                "type": "position",
                "x": x[x_pos],
                "y": y[y_pos]
            })
            x_pos = x_pos + 1
            if x_pos >= len(x):
                x_pos = 0
                y_pos = y_pos + 1

        config['jsonUrl'] = "/api/v1.0/graphs/" + zone

        new_data['config'] = config
        new_data['created'] = datetime.now()
        new_data['zone'] = zone

        new_docs_data = {}
        new_docs_data['docs'] = {}
        new_docs_data['zone'] = zone
        new_docs_data['created'] = datetime.now()

        new_graph_data = {}
        new_graph_data['data'] = {}
        for i in range(0, len(data['nodes'])):
            new_graph_data['data'][data['nodes'][i]['id'].replace(
                ".", REPLACE_CHAR)] = data['nodes'][i]
            new_docs_data['docs'][data['nodes'][i]['id'].replace(
                ".", REPLACE_CHAR)] = data['nodes'][i]['docs']
            del new_graph_data['data'][data['nodes'][i]['id'].replace(
                ".", REPLACE_CHAR)]['docs']
        new_graph_data['created'] = datetime.now()
        new_graph_data['zone'] = zone
        new_graph_data['directed'] = data['directed']
        new_graph_data['multigraph'] = data['multigraph']
        new_graph_data['errs'] = []

        new_links_data = {}
        new_links_data['links'] = data['links']
        new_links_data['created'] = datetime.now()
        new_links_data['zone'] = zone
        new_links_data['directed'] = data['directed']
        new_links_data['multigraph'] = data['multigraph']
        new_links_data['errs'] = []

        try:
            graphs_collection.remove({'zone': zone})
            graphs_collection.insert_one(new_data)

            graphs_data_collection.remove({'zone': zone})
            graphs_data_collection.insert_one(new_graph_data)

            graphs_links_collection.remove({'zone': zone})
            graphs_links_collection.insert_one(new_links_data)

            graphs_docs_collection.remove({'zone': zone})
            graphs_docs_collection.insert_one(new_docs_data)
        except:
            logger.error("ERROR: Can't insert: " + zone)

        time.sleep(1)

    # Remove last week's old entries
    # In theory, shouldn't do anything but being complete
    lastweek = datetime.now() - timedelta(days=7)
    graphs_collection.remove({'created': {"$lt": lastweek}})
    graphs_data_collection.remove({'created': {"$lt": lastweek}})
    graphs_links_collection.remove({'created': {"$lt": lastweek}})
    graphs_docs_collection.remove({'created': {"$lt": lastweek}})

    # Record status
    jobs_manager.record_job_complete()

    now = datetime.now()
    print("Ending: " + str(now))
    logger.info("Complete")
def main():
    """
    Begin Main...
    """
    logger = LoggingUtil.create_log(__name__)

    now = datetime.now()
    print("Starting: " + str(now))
    logger.info("Starting...")

    mongo_connector = MongoConnector.MongoConnector()
    all_dns_collection = mongo_connector.get_all_dns_connection()
    dns_manager = DNSManager.DNSManager(mongo_connector)
    GDNS = GoogleDNS.GoogleDNS()
    ip_manager = IPManager.IPManager(mongo_connector)

    jobs_manager = JobsManager.JobsManager(mongo_connector,
                                           'remove_expired_entries')
    jobs_manager.record_job_start()

    zones = ZoneManager.get_distinct_zones(mongo_connector)

    # The sources for which to remove expired entries
    results = mongo_connector.perform_distinct(all_dns_collection,
                                               'sources.source')

    sources = []
    for source in results:
        temp = {}
        temp['name'] = source
        if "common_crawl" in source:
            temp['diff'] = -4
        else:
            temp['diff'] = -2

        sources.append(temp)

    # Before completely removing old entries, make an attempt to see if they are still valid.
    # Occasionally, a host name will still be valid but, for whatever reason, is no longer tracked by a source.
    # Rather than throw away valid information, this will archive it.
    for entry in sources:
        removal_date = monthdelta(datetime.now(), entry['diff'])
        source = entry['name']
        logger.debug("Removing " + source + " as of: " + str(removal_date))

        last_domain = ""
        results = all_dns_collection.find({
            'sources': {
                "$size": 1
            },
            'sources.source': source,
            'sources.updated': {
                "$lt": removal_date
            }
        })
        for result in results:
            if result['fqdn'] != last_domain:
                last_domain = result['fqdn']

                lookup_int = get_lookup_int(logger, result, GDNS)
                dns_result = GDNS.fetch_DNS_records(result['fqdn'], lookup_int)

                if dns_result != []:
                    insert_current_results(dns_result, dns_manager, zones,
                                           result, source)

        dns_manager.remove_all_by_source_and_date(source, entry['diff'])

# Get the date for today minus two months
    d_minus_2m = monthdelta(datetime.now(), -2)

    logger.info("Removing SRDNS as of: " + str(d_minus_2m))

    # Remove the old records
    srdns_collection = mongo_connector.get_sonar_reverse_dns_connection()
    srdns_collection.remove({'updated': {"$lt": d_minus_2m}})

    ip_manager.delete_records_by_date(d_minus_2m)

    # Record status
    jobs_manager.record_job_complete()

    now = datetime.now()
    print("Complete: " + str(now))
    logger.info("Complete")