Exemple #1
0
def main():
    """
    Begin main...
    """
    logger = LoggingUtil.create_log(__name__)

    # Make database connections
    mongo_connector = MongoConnector.MongoConnector()

    now = datetime.now()
    print("Starting: " + str(now))
    logger.info("Starting...")

    jobs_manager = JobsManager.JobsManager(mongo_connector, 'get_aws_data')
    jobs_manager.record_job_start()

    # Download the JSON file
    req = requests.get(JSON_LOCATION)

    if req.status_code != 200:
        logger.error("Bad Request")
        jobs_manager.record_job_error()
        exit(1)

    # Convert the response to JSON
    json_data = json.loads(req.text)

    # Replace the old entries with the new entries
    aws_collection = mongo_connector.get_aws_ips_connection()
    aws_collection.remove({})
    aws_collection.insert(json_data)

    # Record status
    jobs_manager.record_job_complete()

    now = datetime.now()
    print("Complete: " + str(now))
    logger.info("Complete.")
Exemple #2
0
def main():
    """
    Begin Main...
    """
    logger = LoggingUtil.create_log(__name__)

    now = datetime.now()
    print("Starting: " + str(now))
    logger.info("Starting...")

    # Obtain the list of known email addresses from the config collection
    MC = MongoConnector.MongoConnector()
    PT = PassiveTotal.PassiveTotal()
    zi = ZoneIngestor.ZoneIngestor()
    config_collection = MC.get_config_connection()
    res = config_collection.find({})

    jobs_manager = JobsManager.JobsManager(MC, 'get_passivetotal_data')
    jobs_manager.record_job_start()

    # Perform a search for each email address
    for i in range(0, len(res[0]['DNS_Admins'])):
        search_pt_email(logger, res[0]['DNS_Admins'][i], PT, zi, jobs_manager)

    for i in range(0, len(res[0]['Whois_Orgs'])):
        search_pt_org(logger, res[0]['Whois_Orgs'][i], PT, zi, jobs_manager)

    for i in range(0, len(res[0]['Whois_Name_Servers'])):
        search_pt_nameserver(logger, res[0]['Whois_Name_Servers'][i],
                             res[0]['Whois_Orgs'], PT, zi, jobs_manager)

    # Record status
    jobs_manager.record_job_complete()

    now = datetime.now()
    print("Complete: " + str(now))
    logger.info("Complete.")
def main():
    """
    Begin main...
    """
    logger = LoggingUtil.create_log(__name__)

    now = datetime.now()
    print("Starting: " + str(now))

    parser = argparse.ArgumentParser(
        description="Search Splunk logs for IP address")
    parser.add_argument(
        "--collection_name",
        choices=["http_80", "http_443"],
        metavar="COLLECTION",
        required=True,
        help="The collection to upload to Splunk",
    )
    args = parser.parse_args()

    mongo_connector = MongoConnector.MongoConnector()
    splunk_manager = SplunkHECManager.SplunkHECManager()

    jobs_manager = JobsManager.JobsManager(mongo_connector,
                                           "splunk_headers_upload")
    jobs_manager.record_job_start()

    if args.collection_name == "http_443":
        upload_zgrab_443(logger, splunk_manager, mongo_connector)
    elif args.collection_name == "http_80":
        upload_zgrab_80(logger, splunk_manager, mongo_connector)

    jobs_manager.record_job_complete()

    now = datetime.now()
    print("Complete: " + str(now))
    logger.info("Complete.")
Exemple #4
0
class APIHelper(object):

    _logger = logging.getLogger(__name__)

    MC = MongoConnector.MongoConnector()

    INCORRECT_RESPONSE_JSON_ALLOWED = 20

    def handle_api_error(self, err, job_name):
        """
        Exits the script execution post setting the status in database.
        :param err: Exception causing script exit.
        :param job_manager: The JobManager for the exiting script.
        """
        self._logger.error(err)
        self._logger.error('Exiting script execution.')
        jobs_manager = JobsManager.JobsManager(self.MC, job_name)
        jobs_manager.record_job_error()
        exit(1)

    def connection_error_retry(self, details):
        self._logger.error(
            'Connection Error encountered. Retrying in {wait:0.1f} seconds'.
            format(**details))
Exemple #5
0
def main():
    """
    Begin Main...
    """
    logger = LoggingUtil.create_log(__name__)

    now = datetime.now()
    print("Starting: " + str(now))
    logger.info("Starting...")

    parser = argparse.ArgumentParser(
        description="Send specific collections to the remote MongoDB. If no arguments are provided, then all data is mirrored."
    )
    parser.add_argument(
        "--send_zones", action="store_true", required=False, help="Send IP zones"
    )
    parser.add_argument(
        "--send_ip_zones", action="store_true", required=False, help="Send IP zones"
    )
    parser.add_argument(
        "--send_third_party_zones",
        action="store_true",
        required=False,
        help="Send AWS, Azure, etc.",
    )
    parser.add_argument(
        "--send_config", action="store_true", required=False, help="Send configs"
    )
    parser.add_argument(
        "--send_dns_records",
        action="store_true",
        required=False,
        help="Replace all DNS records",
    )
    parser.add_argument(
        "--send_dns_diff",
        action="store_true",
        required=False,
        help="Send new DNS records",
    )
    parser.add_argument(
        "--date_diff",
        default=2,
        type=int,
        help="The number of days used for identifying new records in send_dns_diff",
    )
    args = parser.parse_args()

    send_all = False
    if len(sys.argv) == 1:
        send_all = True

    mongo_connector = MongoConnector.MongoConnector()
    remote_mongo_connector = RemoteMongoConnector.RemoteMongoConnector()

    jobs_manager = JobsManager.JobsManager(mongo_connector, "send_remote_server")
    jobs_manager.record_job_start()

    if send_all or args.send_zones:
        try:
            zone_list = update_zones(
                logger, mongo_connector, remote_mongo_connector, True
            )
        except:
            logger.error(
                "Could not communicate with the remote database when sending zones"
            )
            jobs_manager.record_job_error()
            exit(1)
    else:
        zone_list = update_zones(logger, mongo_connector, remote_mongo_connector, False)

    if send_all or args.send_ip_zones:
        try:
            update_ip_zones(logger, mongo_connector, remote_mongo_connector)
        except:
            logger.error(
                "Could not communicate with the remote database when sending IP zones"
            )
            jobs_manager.record_job_error()
            exit(1)

    if send_all or args.send_third_party_zones:
        try:
            update_aws_cidrs(logger, mongo_connector, remote_mongo_connector)
            update_azure_cidrs(logger, mongo_connector, remote_mongo_connector)
            update_akamai_cidrs(logger, mongo_connector, remote_mongo_connector)
            update_gcp_cidrs(logger, mongo_connector, remote_mongo_connector)
        except:
            logger.error(
                "Could not communicate with the remote database when sending third-party zones"
            )
            jobs_manager.record_job_error()
            exit(1)

    if send_all or args.send_config:
        try:
            update_config(logger, mongo_connector, remote_mongo_connector)
        except:
            logger.error(
                "Could not communicate with the remote database when sending config data"
            )
            jobs_manager.record_job_error()
            exit(1)

    # This will completely repopulate the DNS records table.
    if args.send_dns_records is not False:
        try:
            update_all_dns(logger, mongo_connector, remote_mongo_connector, zone_list)
        except:
            logger.error(
                "Could not communicate with the remote database when sending DNS records"
            )
            jobs_manager.record_job_error()
            exit(1)

    # If you have a large data set, then you may only want to send updated records
    if send_all or args.send_dns_diff:
        try:
            update_all_dns_diff_mode(
                logger,
                mongo_connector,
                remote_mongo_connector,
                zone_list,
                args.date_diff,
            )
        except:
            logger.error(
                "Could not communicate with the remote database when sending DNS diff records"
            )
            jobs_manager.record_job_error()
            exit(1)

    # Record status
    jobs_manager.record_job_complete()

    now = datetime.now()
    print("Complete: " + str(now))
    logger.info("Complete.")
Exemple #6
0
def main():
    """
    Begin Main()
    """
    logger = LoggingUtil.create_log(__name__)

    now = datetime.now()
    print("Starting: " + str(now))
    logger.info("Starting...")

    mongo_connector = MongoConnector.MongoConnector()
    mongo_ct = mongo_connector.get_certificate_transparency_connection()
    cert_graphs_collection = mongo_connector.get_cert_graphs_connection()
    jobs_manager = JobsManager.JobsManager(mongo_connector,
                                           "create_cert_graphs")
    jobs_manager.record_job_start()

    zones = ZoneManager.get_distinct_zones(mongo_connector)

    parser = argparse.ArgumentParser(
        description=
        "Creates and stores certificate graphs in the database based on one or more sources."
    )
    parser.add_argument(
        "--check_censys",
        action="store_true",
        default=False,
        required=False,
        help="Whether to check the Censys collection in the database",
    )
    parser.add_argument(
        "--check_443_scans",
        action="store_true",
        default=False,
        required=False,
        help="Whether to check the zgrab collection in the database",
    )
    parser.add_argument(
        "--check_ct_scans",
        action="store_true",
        default=False,
        required=False,
        help="Whether to check the CT collection in the database",
    )
    parser.add_argument(
        "--zgrab_version",
        default=2,
        type=int,
        choices=[1, 2],
        metavar="version",
        help="The version of ZGrab used to collect data",
    )
    args = parser.parse_args()

    if args.check_censys is True:
        censys_collection = mongo_connector.get_censys_connection()

    if args.check_443_scans is True:
        zgrab_collection = mongo_connector.get_zgrab_443_data_connection()

    for zone in zones:
        logger.info("Creating: " + zone)
        graph = nx.DiGraph()

        certs_list = {}

        if args.check_ct_scans:
            certs_list = get_current_ct_certificates(mongo_ct, zone)
        if args.check_censys:
            certs_list = add_censys_certificates(censys_collection, zone,
                                                 certs_list)
        if args.check_443_scans:
            if args.zgrab_version == 1:
                certs_list = add_terminal_zgrab_certificates(
                    zgrab_collection, zone, certs_list)
                certs_list = add_initial_zgrab_certificates(
                    zgrab_collection, zone, certs_list)
            else:
                certs_list = add_terminal_zgrab2_certificates(
                    zgrab_collection, zone, certs_list)
                certs_list = add_initial_zgrab2_certificates(
                    zgrab_collection, zone, certs_list)

        graph = create_nodes(graph, mongo_connector, zone, certs_list)
        data = json_graph.node_link_data(graph)

        my_data = {}
        my_data["links"] = data["links"]
        my_data["nodes"] = data["nodes"]
        my_data["zone"] = zone
        my_data["created"] = datetime.now()

        cert_graphs_collection.delete_one({"zone": zone})
        mongo_connector.perform_insert(cert_graphs_collection, my_data)

    # Record status
    jobs_manager.record_job_complete()

    now = datetime.now()
    print("Ending: " + str(now))
    logger.info("Complete.")
Exemple #7
0
class InfobloxZone(object):
    alphabets = list(string.ascii_lowercase + string.digits)
    alphabet_queried = None
    APIH = APIHelper.APIHelper()
    IH = InfobloxHelper.InfobloxHelper()

    _logger = None

    # Connect to the database
    MC = MongoConnector.MongoConnector()
    zone_collection = MC.get_zone_connection()
    ip_collection = MC.get_ipzone_connection()
    job_manager = None

    ZI = ZoneIngestor.ZoneIngestor()

    next_page_id = None
    source = 'Infoblox'

    def __get_base_url(self):
        """
        Returns the Infoblox zone API URL
        :return: Infoblox zone API URL
        """
        return 'https://' + self.IH.IBLOX_HOST + '/wapi/v' + self.IH.IBLOX_VERSION + '/zone_auth'

    def __get_previous_zones(self):
        """
        Fetches the currently present zones/sub-zones in the zone collection with source 'Infoblox'.
        The result is a dictionary with the zones as keys. The value of the key is True if the zone
        is sub_zone.
        """
        zones = self.zone_collection.find(
            {
                '$or': [{
                    'reporting_sources.source': self.source
                }, {
                    'sub_zones.source': self.source
                }]
            }, {
                'reporting_sources': 1,
                'zone': 1,
                'sub_zones': 1
            })
        self.previous_zones = {}
        for zone in zones:
            for reporting_source in zone['reporting_sources']:
                if reporting_source['source'] == self.source:
                    self.previous_zones[zone['zone']] = False
            for sub_zone in zone['sub_zones']:
                if sub_zone['source'] == self.source:
                    self.previous_zones[sub_zone['sub_zone']] = True

    def __clean_collection(self):
        """
        Cleans the zone collection of the zones which were earlier seen in the Infoblox API
        but are not seen now. Such zones/sub-zones are marked with source 'Infoblox-Retired'.
        """
        parent_zones = []
        sub_zones = []
        for zone_name, is_sub_zone in self.previous_zones.items():
            if is_sub_zone:
                sub_zones.append(zone_name)
            else:
                parent_zones.append(zone_name)

        # Update the sub_zones from 'Infoblox' to 'Infoblox-Retired'
        self.zone_collection.update_many(
            {
                'sub_zones': {
                    '$elemMatch': {
                        'sub_zone': {
                            '$in': sub_zones
                        },
                        'source': self.source
                    }
                }
            }, {'$set': {
                'sub_zones.$.source': 'Infoblox-Retired'
            }})

        self.zone_collection.update_many(
            {
                'zone': {
                    '$in': parent_zones
                },
                'reporting_sources.source': self.source
            }, {'$set': {
                'reporting_sources.$.source': 'Infoblox-Retired'
            }})

    def __insert_zone(self, zone):
        """
        Inserts the zone into the zone collection or into ip_zones collection in case
        it is an IP.
        :param zone: Zone value to be inserted into collections. This is a dictionary
                     with keys 'fqdn' and 'parent'.
        """
        # Some zones are actually IP addresses.
        # If the IP address is new, add it.
        # Change the update date if it already exists
        utf8_zone = zone['fqdn'].encode('utf-8').decode('utf8')
        if re.match(r"^([0-9]{1,3}\.){3}[0-9]{1,3}\/\d\d$",
                    utf8_zone) is not None:
            cursor = self.ip_collection.find({"zone": zone['fqdn']})
            if cursor.count() == 0:
                insert_text = dict()
                insert_text['zone'] = utf8_zone
                insert_text['source'] = 'Infoblox'
                insert_text['status'] = 'unconfirmed'
                insert_text['created'] = datetime.now()
                insert_text['updated'] = datetime.now()
                self.ip_collection.insert_one(insert_text)
                self._logger.info("Added IP: " + utf8_zone)
            else:
                for _ in cursor:
                    self.ip_collection.update_one(
                        {'zone': zone['fqdn']},
                        {'$currentDate': {
                            "updated": True
                        }})
                    self._logger.info("Updated IP: " + utf8_zone)
        else:
            # cleaning the values from the previous zones found. The resultant set
            # will need to be cleared of the source value 'Infoblox'.
            if zone['fqdn'] in self.previous_zones:
                del self.previous_zones[zone['fqdn']]
            self.ZI.add_zone(zone['fqdn'], self.source, zone['parent'])

    def __infoblox_response_handler(self, response):
        """
        Handles the API response. Incorrect JSON parsing is allowed upto 20 times post which the
        script exits. If the 'next_page_id' is received in the response, then that is set as an
        identification for the next page of the API to be queried.
        :param response: Response object
        """
        try:
            response_data = response.json()
            response_result = response_data['result']
        except (ValueError, AttributeError) as err:
            if self.incorrect_response_json_allowed > 0:
                self._logger.warning(
                    'Unable to parse response JSON for alphabet ' +
                    self.alphabet_queried)
                self.incorrect_response_json_allowed -= 1
            else:
                self.APIH.handle_api_error(
                    'Unable to parse response JSON for 20 alphabets: ' +
                    repr(err),
                    self.job_manager,
                )
        else:
            for entry in response_result:
                zone = dict()
                zone['fqdn'] = entry['fqdn']
                zone['parent'] = entry['parent']
                self.__insert_zone(zone)

            if 'next_page_id' in response_data:
                self.next_page_id = response_data['next_page_id']

    @backoff.on_exception(backoff.expo,
                          requests.exceptions.ConnectionError,
                          max_tries=4,
                          factor=10,
                          on_backoff=APIH.connection_error_retry)
    def __backoff_api_retry(self):
        """
        Makes API calls to Infoblox with exponential retry capabilities using 'backoff'. The API is
        retried 3 times in case of ConnectionError exception before the script exists.
        """
        url = self.__get_base_url()
        params = {
            'view': 'External',
            'fqdn~': '.*' + self.alphabet_queried + '$',
            '_return_fields': 'parent,fqdn',
        }
        if not self.next_page_id:
            params.update({
                '_paging': '1',
                '_return_as_object': '1',
                '_max_results': '1500'
            })
        else:
            params.update({'_page_id': self.next_page_id})

        return requests.get(url,
                            params,
                            auth=HTTPBasicAuth(self.IH.IBLOX_UNAME,
                                               self.IH.IBLOX_PASSWD),
                            verify=False)

    def __infoblox_paginated_request(self):
        """
        Makes paginated API calls to Infoblox. The API is retried 3 times in case of ConnectionError
        exception before the script exists. The script exists on encountering HTTPError or any other
        RequestException.
        """
        try:
            response = self.__backoff_api_retry()
            response.raise_for_status()
        except requests.exceptions.HTTPError as herr:
            self.APIH.handle_api_error(herr, self.job_manager)
        except requests.exceptions.RequestException as err:
            self.APIH.handle_api_error(err, self.job_manager)
        else:
            self.next_page_id = None
            self.__infoblox_response_handler(response)

    def get_infoblox_zones(self):
        """
        Extracts the Infoblox zones using paginated requests.
        """
        print("Starting: " + str(datetime.now()))
        self._logger.info("Starting....")
        self.job_manager = JobsManager.JobsManager(self.MC,
                                                   'get_iblox_alpha_zones')
        self.job_manager.record_job_start()

        self.__get_previous_zones()
        for alphabet in self.alphabets:
            self.alphabet_queried = alphabet
            self.next_page_id = None
            self.__infoblox_paginated_request()
            while self.next_page_id:
                self.__infoblox_paginated_request()

        self.__clean_collection()

        # Record status
        self.job_manager.record_job_complete()

        print("Ending: " + str(datetime.now()))
        self._logger.info("Complete")

    def __init__(self):
        self._logger = LoggingUtil.create_log(__name__)
        self.incorrect_response_json_allowed = self.APIH.INCORRECT_RESPONSE_JSON_ALLOWED
        self.get_infoblox_zones()
Exemple #8
0
def main():
    """
    Begin Main...
    """
    logger = LoggingUtil.create_log(__name__)

    now = datetime.now()
    print("Starting: " + str(now))
    logger.info("Starting...")

    mongo_connector = MongoConnector.MongoConnector()
    dns_manager = DNSManager.DNSManager(mongo_connector)
    google_dns = GoogleDNS.GoogleDNS()
    jobs_manager = JobsManager.JobsManager(mongo_connector,
                                           'extract_ssl_domains')
    jobs_manager.record_job_start()

    parser = argparse.ArgumentParser(
        description='Search TLS certificates for additional DNS names')
    parser.add_argument('--zgrab_version',
                        default=2,
                        type=int,
                        choices=[1, 2],
                        metavar="version",
                        help='The version of ZGrab used to collect data')
    args = parser.parse_args()

    dns_names = []
    round_two = []

    zones = ZoneManager.get_distinct_zones(mongo_connector)

    # Collect the list of domains from the SSL Certificates
    extract_ct_certificate_names(dns_names, mongo_connector)
    # extract_censys_certificate_names(dns_names, mongo_connector)
    if args.zgrab_version == 1:
        extract_zgrab_certificate_names(logger, dns_names, mongo_connector)
    else:
        extract_zgrab2_certificate_names(logger, dns_names, mongo_connector)

    input_list = []

    # Some SSL certificates are for multiple domains.
    # The tracked company may not own all domains.
    # Therefore, we filter to only the root domains that belong to the tracked company.
    logger.info("Pre-filter list: " + str(len(dns_names)))
    for hostname in dns_names:
        if not hostname.startswith("*"):
            zone = get_tracked_zone(hostname, zones)
            if zone != None:
                ips = google_dns.fetch_DNS_records(hostname)

                # Pause to prevent DoS-ing of Google's HTTPS DNS Service
                time.sleep(1)

                if ips != []:
                    for ip_addr in ips:
                        temp_zone = get_tracked_zone(ip_addr['fqdn'], zones)
                        if temp_zone is not None:
                            record = {"fqdn": ip_addr['fqdn']}
                            record['zone'] = temp_zone
                            record['created'] = datetime.now()
                            record['type'] = ip_addr['type']
                            record['value'] = ip_addr['value']
                            record['status'] = 'unknown'
                            input_list.append(record)

                        if ip_addr['type'] == "cname" and is_tracked_zone(
                                ip_addr['value'], zones):
                            add_to_round_two(ip_addr['value'], round_two)

                else:
                    logger.warning("Failed IP Lookup for: " + hostname)
            else:
                logger.warning("Failed match on zone for: " + hostname)
        else:
            logger.warning("Skipping wildcard: " + hostname)

    dead_dns_collection = mongo_connector.get_dead_dns_connection()

    # Some DNS records will be CNAME records pointing to other tracked domains.
    # This is a single level recursion to lookup those domains.
    logger.info("Round Two list: " + str(len(round_two)))
    for hostname in round_two:
        zone = get_tracked_zone(hostname, zones)
        if zone != None:
            ips = google_dns.fetch_DNS_records(hostname)
            time.sleep(1)
            if ips != []:
                for ip_addr in ips:
                    temp_zone = get_tracked_zone(ip_addr['fqdn'], zones)
                    if temp_zone is not None:
                        record = {"fqdn": ip_addr['fqdn']}
                        record['zone'] = temp_zone
                        record['created'] = datetime.now()
                        record['type'] = ip_addr['type']
                        record['value'] = ip_addr['value']
                        record['status'] = 'unknown'
                        input_list.append(record)
            else:
                logger.warning("Failed IP Lookup for: " + hostname)
                original_record = dns_manager.find_one({"fqdn": hostname},
                                                       "ssl")
                if original_record != None:
                    original_record.pop("_id")
                    dead_dns_collection.insert(original_record)
        else:
            logger.warning("Failed match on zone for: " + hostname)

    # Record all the results.
    dns_manager.remove_by_source("ssl")
    logger.info("List length: " + str(len(input_list)))
    for final_result in input_list:
        dns_manager.insert_record(final_result, "ssl")

    # Record status
    jobs_manager.record_job_complete()

    now = datetime.now()
    print("Ending: " + str(now))
    logger.info("Complete")
def main():
    """
    Begin main...
    """
    parser = argparse.ArgumentParser(
        description='Search the Common Crawl graph dataset for new domains')
    parser.add_argument('--url',
                        metavar="URL",
                        help='The URL for the latest vertices file')
    args = parser.parse_args()

    if args.url != None:
        CURRENT_FILE_LIST = args.url

    now = datetime.now()
    print("Starting: " + str(now))

    mongo_connector = MongoConnector.MongoConnector()
    dns_manager = DNSManager.DNSManager(mongo_connector)
    jobs_collection = mongo_connector.get_jobs_connection()

    reversed_zones = ZoneManager.get_reversed_zones(mongo_connector)

    alphabet = list(string.digits + string.ascii_lowercase)

    # Create a dictionary of the zones grouped by their first letter
    # This will allow us to reduce the number of comparisons in the alphabetized CC files.
    grouped_zones = {}
    for letter in alphabet:
        grouped_zones[letter] = []

    for zone in reversed_zones:
        first_letter = zone[0]
        grouped_zones[first_letter].append(zone)

    compressed_download_list = download_file(CURRENT_FILE_LIST)
    subprocess.check_call(["gunzip", "-f", compressed_download_list])

    download_list = compressed_download_list.split(".")[:-1]
    list_file = ".".join(download_list)

    vertices_file_entries = open(list_file, "r")

    for entry in vertices_file_entries:
        # Download file
        vert_file_url = "http://commoncrawl.s3.amazonaws.com/" + entry.rstrip(
            "\n")
        compressed_vertices_file = download_file(vert_file_url)

        # Decompress file
        subprocess.check_call(["gunzip", "-f", compressed_vertices_file])
        vertices_list = compressed_vertices_file.split(".")[:-1]
        vertices_file = ".".join(vertices_list)

        # Get the first and last line of the file
        (first_line, last_line) = get_first_and_last_line(vertices_file)

        # Get the first and last domain
        parts = first_line.split("\t")
        first_domain = parts[1].rstrip("\n")
        first_char = first_domain[0]

        parts = last_line.split("\t")
        last_domain = parts[1].rstrip("\n")
        last_char = last_domain[0]

        # Get the list of zones relevant to that range
        searchable_zones = get_zone_sublist(first_char, last_char,
                                            grouped_zones)

        # Parse file and insert matches
        parse_file(vertices_file, searchable_zones, dns_manager)
        subprocess.check_call(["rm", vertices_file])

    # Remove all entries more than two months old
    # Note: This commented out because Common Crawl graph data is not additive.
    # dns_manager.remove_all_by_source_and_date("common_crawl", -4)

    jobs_collection.update_one({'job_name': 'common_crawl_graph'}, {
        '$currentDate': {
            "updated": True
        },
        "$set": {
            'status': 'COMPLETE'
        }
    })

    now = datetime.now()
    print("Ending: " + str(now))
Exemple #10
0
def main():
    """
    Begin Main...
    """
    logger = LoggingUtil.create_log(__name__)

    now = datetime.now()
    print("Starting: " + str(now))
    logger.info("Starting...")

    parser = argparse.ArgumentParser(
        description=
        'Send specific collections to the remote MongoDB. If no arguments are provided, then all data is mirrored.'
    )
    parser.add_argument('--send_zones',
                        action="store_true",
                        required=False,
                        help='Send IP zones')
    parser.add_argument('--send_ip_zones',
                        action="store_true",
                        required=False,
                        help='Send IP zones')
    parser.add_argument('--send_third_party_zones',
                        action="store_true",
                        required=False,
                        help='Send AWS, Azure, etc.')
    parser.add_argument('--send_config',
                        action="store_true",
                        required=False,
                        help='Send configs')
    parser.add_argument('--send_dns_records',
                        action="store_true",
                        required=False,
                        help='Send DNS records')
    args = parser.parse_args()

    send_all = False
    if len(sys.argv) == 1:
        send_all = True

    mongo_connector = MongoConnector.MongoConnector()
    remote_mongo_connector = RemoteMongoConnector.RemoteMongoConnector()

    jobs_manager = JobsManager.JobsManager(mongo_connector,
                                           'send_remote_server')
    jobs_manager.record_job_start()

    if send_all or args.send_zones:
        zone_list = update_zones(logger, mongo_connector,
                                 remote_mongo_connector, True)
    else:
        zone_list = update_zones(logger, mongo_connector,
                                 remote_mongo_connector, False)

    if send_all or args.send_ip_zones:
        update_ip_zones(logger, mongo_connector, remote_mongo_connector)

    if send_all or args.send_third_party_zones:
        update_aws_cidrs(logger, mongo_connector, remote_mongo_connector)
        update_azure_cidrs(logger, mongo_connector, remote_mongo_connector)
        update_akamai_cidrs(logger, mongo_connector, remote_mongo_connector)
        update_gcp_cidrs(logger, mongo_connector, remote_mongo_connector)

    if send_all or args.send_config:
        update_config(logger, mongo_connector, remote_mongo_connector)

    if send_all or args.send_dns_records:
        update_all_dns(logger, mongo_connector, remote_mongo_connector,
                       zone_list)

    # Record status
    jobs_manager.record_job_complete()

    now = datetime.now()
    print("Complete: " + str(now))
    logger.info("Complete.")
Exemple #11
0
def main():
    """
    Begin Main...
    """
    now = datetime.now()
    print("Starting: " + str(now))

    mongo_connector = MongoConnector.MongoConnector()
    dns_manager = DNSManager.DNSManager(mongo_connector)
    jobs_manager = JobsManager.JobsManager(mongo_connector, 'sonar_round_two')
    google_dns = GoogleDNS.GoogleDNS()
    jobs_manager.record_job_start()

    zones = ZoneManager.get_distinct_zones(mongo_connector)

    results = dns_manager.find_multiple({'type': 'cname'}, "sonar_dns")

    round_two = []
    round_three = []

    # Get all the CNAME values from all_dns and append them to round_two
    for result in results:
        if is_tracked_zone(result['value'], zones):
            round_two.append(result['value'])

    print("Round two pre-list: " + str(len(round_two)))

    dead_dns_collection = mongo_connector.get_dead_dns_connection()

    for value in round_two:
        is_present = dns_manager.find_count({'fqdn': value}, "sonar_dns")
        if is_present == 0:
            print(value + " not found")
            time.sleep(1)
            result = google_dns.fetch_DNS_records(value)
            if result == []:
                print("Unable to resolve")
                original_records = dns_manager.find_multiple({"value": value},
                                                             "sonar_dns")
                for record in original_records:
                    check = dead_dns_collection.find({
                        'fqdn': record['fqdn']
                    }).count()
                    if check == 0:
                        record.pop("_id")
                        dead_dns_collection.insert(record)
            else:
                for entry in result:
                    if is_tracked_zone(entry['fqdn'], zones):
                        new_record = entry
                        new_record['status'] = 'unconfirmed'
                        new_record['zone'] = get_fld_from_value(value, '')
                        new_record['created'] = datetime.now()
                        if result[0]['type'] == "cname" and is_tracked_zone(
                                entry['value'], zones):
                            add_to_list(entry['value'], round_three)
                        print("Found: " + value)
                        if new_record['zone'] != '':
                            dns_manager.insert_record(new_record, "marinus")

    # For each tracked CName result found in the first pass across Sonar DNS
    print("Round Three length: " + str(len(round_three)))
    for hostname in round_three:
        zone = get_fld_from_value(hostname, '')
        if zone != None and zone != '':
            ips = google_dns.fetch_DNS_records(hostname)
            time.sleep(1)
            if ips != []:
                for ip_addr in ips:
                    if is_tracked_zone(ip_addr['fqdn'], zones):
                        record = {"fqdn": ip_addr['fqdn']}
                        record['zone'] = get_fld_from_value(
                            ip_addr['fqdn'], '')
                        record['created'] = datetime.now()
                        record['type'] = ip_addr['type']
                        record['value'] = ip_addr['value']
                        record['status'] = 'unconfirmed'
                        dns_manager.insert_record(new_record, "marinus")
            else:
                original_record = dns_manager.find_one({"fqdn": hostname},
                                                       "marinus")
                if original_record != None:
                    original_record.pop("_id")
                    dead_dns_collection.insert(original_record)
                print("Failed IP Lookup for: " + hostname)
        else:
            print("Failed match on zone for: " + hostname)

    # Record status
    jobs_manager.record_job_complete()

    now = datetime.now()
    print("Ending: " + str(now))
def main():

    mongo_connector = MongoConnector.MongoConnector()

    now = datetime.now()
    print("Starting: " + str(now))

    jobs_manager = JobsManager.JobsManager(mongo_connector, 'owasp_amass')

    zones = ZoneManager.get_distinct_zones(mongo_connector)
    dns_manager = DNSManager.DNSManager(mongo_connector)

    output_dir = "./amass_files/"

    arg_parser = argparse.ArgumentParser(
        description=
        'Run the OWASP Amass tool and store the results in the database.')
    arg_parser.add_argument(
        '--config_file',
        required=False,
        help='An optional Amass config file. Otherwise, defaults will be used.'
    )
    arg_parser.add_argument('--amass_path',
                            required=True,
                            help='The path to the amass binary')
    arg_parser.add_argument('--output_dir',
                            default=output_dir,
                            help="The path where to save Amass files.")
    arg_parser.add_argument('--amass_version',
                            type=int,
                            default=3,
                            help='The version of OWASP Amass being used.')
    arg_parser.add_argument(
        '--sleep',
        type=int,
        default=5,
        help=
        'Sleep time in seconds between amass runs so as not to overuse service limits.'
    )
    args = arg_parser.parse_args()

    if not os.path.isfile(args.amass_path):
        print("ERROR: Incorrect amass_path argument provided")
        exit(1)

    if 'config_file' in args and not os.path.isfile(args.config_file):
        print("ERROR: Incorrect config_file location")
        exit(1)

    if 'output_dir' in args:
        output_dir = args.output_dir
        if not output_dir.endswith("/"):
            output_dir = output_dir + "/"

    check_save_location(output_dir)

    jobs_manager.record_job_start()

    for zone in zones:
        # Pace out calls to the Amass services
        time.sleep(args.sleep)

        command_line = []

        command_line.append(args.amass_path)

        if int(args.amass_version) >= 3:
            command_line.append("enum")

        if args.config_file:
            command_line.append("-config")
            command_line.append(args.config_file)

        command_line.append("-d")
        command_line.append(zone)
        command_line.append("-src")
        command_line.append("-ip")
        command_line.append("-do")
        command_line.append(output_dir + zone + "-do.json")

        try:
            subprocess.check_call(command_line)
        except subprocess.CalledProcessError as e:
            # Even when there is an error, there will likely still be results.
            # We can continue with the data that was collected thus far.
            print("ERROR: Amass run exited with a non-zero status: " + str(e))

        if os.path.isfile(output_dir + zone + "-do.json"):
            output = open(output_dir + zone + "-do.json", "r")
            json_data = []
            for line in output:
                try:
                    json_data.append(json.loads(line))
                except:
                    print("Amass wrote an incomplete line: " + str(line))
            output.close()

            for finding in json_data:
                if 'type' in finding and finding[
                        'type'] == 'infrastructure' or finding[
                            'type'] == 'domain':
                    # Not currently recording
                    continue
                elif is_tracked_zone(finding['domain'], zones):
                    record_finding(dns_manager, finding)
                else:
                    # print("Skipping: " + finding['domain'] + " type: " + finding['type'])
                    pass

    jobs_manager.record_job_complete()

    now = datetime.now()
    print("Complete: " + str(now))
Exemple #13
0
class InfobloxExtattrManager(object):
    # Make database connections
    MC = MongoConnector.MongoConnector()
    APIH = APIHelper.APIHelper()
    IH = InfobloxHelper.InfobloxHelper()

    iblox_extattr_collection = MC.get_infoblox_extattr_connection()
    zone_queried = None
    record_type = None
    next_page_id = None
    _logger = None


    def _log(self):
        """
        Get the log
        """
        return logging.getLogger(__name__)


    def __get_record_type_url(self):
        """
        Returns the url to be queried at infoblox to return the extattr information.
        Paging information is appended to the URL as per the condition satisfied.
        :return: string: URL to be queried
        """
        paging_info = self.IH.get_pagination_params(self.next_page_id)

        return_fields = '&_return_fields=extattrs,zone'
        if self.record_type == 'a':
            return_fields += ',ipv4addr'
        elif self.record_type == 'aaaa':
            return_fields += ',ipv6addr'
        elif self.record_type == 'zone':
            return_fields = '&_return_fields=extattrs'

        url = self.IH.get_infoblox_base_url(
            self.zone_queried,
            self.record_type,
        ).format(
            return_fields=return_fields,
            paging_info=paging_info,
        )
        return url

    def __insert_extattrs(self, insert_object):
        """
        Inserts/Updates the extattr information in the database. '_ref' uniquely identifies the
        resource.
        :param insert_object: Dictionary containing the details of the resource.
        """
        if not insert_object['_ref'] in self.previous_records:
            insert_object['created'] = datetime.now()
            insert_object['updated'] = datetime.now()
            self.iblox_extattr_collection.insert(insert_object)
        else:
            self.previous_records.remove(insert_object['_ref'])
            self.iblox_extattr_collection.update_one({'_ref': insert_object['_ref']},
                                                    {"$set": {
                                                        'updated': datetime.now(),
                                                        'extattrs': insert_object['extattrs']
                                                    }})

    def __get_previous_records(self):
        """
        Retrieve the current data related to the zone and record_type. This is evaluated
        against the data that we receive in the latest script run to determine stale records.
        The data is stored as a list of _ref
        """
        self.previous_records = []
        previous_records = self.iblox_extattr_collection.find({'zone': self.zone_queried,
                                                              'record_type': self.record_type,
                                                              }, {'_ref': 1})
        for record in previous_records:
            self.previous_records.append(record['_ref'])

    def __sanitise_response(self, response_object):
        """
        For record_type of zone type, we extract 'infoblox_zone' from the '_ref'.
        For record_type of a type, we get the 'value' from the 'ipv4addr' key for other
        record_types it is extracted from '_ref'
        The general format of _ref is: {record_type_iden}/{hash_iden}:{value}/External
        The valid record_type_iden values are 'zone_auth', 'record:cname', 'record:host', 'record:a'
        :param response_object: Value of 'result' key of response in JSON format.
        """
        insert_object = {
            'record_type': self.record_type,
            'zone': self.zone_queried,
        }

        if self.record_type == 'zone':
            response_object['infoblox_zone'] = response_object['_ref'].split(':')[1].split('/')[0]
        else:
            response_object['infoblox_zone'] = response_object['zone']
            response_object.pop('zone')

        if self.record_type == 'a':
            response_object['value'] = response_object['ipv4addr']
            response_object.pop('ipv4addr')
        elif self.record_type == 'aaaa':
            response_object['value'] = response_object['ipv6addr']
            response_object.pop('ipv6addr')
        else:
            response_object['value'] = response_object['_ref'].split('/')[1].split(':')[1]

        response_object.update(insert_object)

    def __infoblox_response_handler(self, response):
        """
        Handles the API response. Incorrect JSON parsing is allowed upto 20 times post which the
        script exits. No action is performed when the 'extattrs' is an empty dictionary.
        :param response: Response object
        """
        try:
            response_data = response.json()
            response_result = response_data['result']
        except (ValueError, AttributeError) as err:
            if self.incorrect_response_json_allowed > 0:
                self._logger.warning('Unable to parse response JSON for zone ' + self.zone_queried)
                self.incorrect_response_json_allowed -= 1
            else:
                self.APIH.handle_api_error(
                    'Unable to parse response JSON for 20 zones: ' + repr(err),
                    'get_infoblox_' + self.record_type.lower() + '_extattr',
                )
        else:
            for response_object in response_result:
                if not response_object['extattrs']:
                    continue

                # Adding the exception handling for the scenario when the '_ref' format
                # changes and leads to 'split' not working as expected.
                try:
                    self.__sanitise_response(response_object)
                except IndexError as err:
                    self.APIH.handle_api_error(err, 'get_infoblox_' + self.record_type.lower() + '_extattr')
                else:
                    self.__insert_extattrs(response_object)

            if "next_page_id" in response_data:
                self.next_page_id = response_data['next_page_id']

    @backoff.on_exception(backoff.expo,
                          requests.exceptions.ConnectionError,
                          max_tries=4,
                          factor=10,
                          on_backoff=APIH.connection_error_retry)
    def __backoff_api_retry(self):
        """
        Makes API calls to Infoblox with exponential retry capabilities using 'backoff'. The API is
        retried 3 times in case of ConnectionError exception before the script exists.
        :return:
        """
        return requests.get((self.__get_record_type_url()),
                            auth=HTTPBasicAuth(self.IH.IBLOX_UNAME, self.IH.IBLOX_PASSWD), verify='/etc/ssl/certs/ca-bundle.crt')

    def __infoblox_paginated_request(self):
        """
        Makes paginated API calls to Infoblox. The API is retried 3 times in case of ConnectionError
        exception before the script exists. The script exists on encountering HTTPError or any other
        RequestException. On success, the next_page_id is set to None for the next API call.
        """
        try:
            response = self.__backoff_api_retry()
            response.raise_for_status()
        except requests.exceptions.HTTPError as herr:
            self.APIH.handle_api_error(herr, 'get_infoblox_' + self.record_type.lower() + '_extattr')
        except requests.exceptions.RequestException as err:
            self.APIH.handle_api_error(err, 'get_infoblox_' + self.record_type.lower() + '_extattr')
        else:
            self.next_page_id = None
            self.__infoblox_response_handler(response)

    def get_infoblox_extattr(self):
        """
        Extracts the zones from the zone collection to query Infoblox. The API calls continue to be made
        for the zone till the next_page_id is set to None indicating no new results to be fetched.
        Post the retrieval of all the data, the archaic data for a zone and record_type is purged.
        """
        zones = ZoneManager.get_zones_by_source(self.MC, 'Infoblox')
        for zone in zones:
            self.zone_queried = zone
            self.next_page_id = None
            self.__get_previous_records()
            self.__infoblox_paginated_request()
            while self.next_page_id:
                self.__infoblox_paginated_request()
            self.IH.clean_collection(self.previous_records, self.iblox_extattr_collection)


    def __init__(self, record_type):
        self.record_type = record_type
        self.incorrect_response_json_allowed = self.APIH.INCORRECT_RESPONSE_JSON_ALLOWED
        self._logger = self._log()
Exemple #14
0
class ZoneIngestor(object):

    # Connect to the database
    MC = MongoConnector.MongoConnector()
    zone_collection = MC.get_zone_connection()
    zone_manager = ZoneManager.ZoneManager(MC)

    _logger = logging.getLogger(__name__)

    def __check_parent_zone(self, zone):
        """
        For the provided zone, find the existing parent zone record.
        :param zone: Zone for which parent record needs to be found.
        :return: Parent zone record found or None
        """
        zones_present = list(self.zone_collection.find({}, {'zone': 1}))
        zone_segments = zone.split('.')
        segment_length = len(zone_segments)
        parent_zone = zone_segments[segment_length - 1]

        for segment in zone_segments[segment_length - 2:0:-1]:
            parent_zone = segment + '.' + parent_zone
            for zone_present in zones_present:
                if zone_present['zone'] == parent_zone:
                    return zone_present
        return None

    def __check_sub_zone(self, zone):
        """
        Return sub-zones as a list of the zone provided
        :param zone: Zone for which sub-zones need to be found.
        :return: List of the sub-zones found.
        """
        zones_present = list(self.zone_collection.find({}, {'_id': 0}))
        zone = '.' + zone
        sub_zones_matched = []
        for zone_present in zones_present:
            if zone_present['zone'].rfind(zone) > 0:
                sub_zones_matched.append(zone_present)
        return sub_zones_matched

    @staticmethod
    def __create_sub_zone_entries(sub_zone):
        """
        Iterate recursively through the sub_zone list to create the sub-zones list.
        :param sub_zone: Sub-zone record to be iterated to create the sub-zone list
        :return: List of sub-zones prepared.
        """
        temp_sub_zone_list = []
        temp_sub_zone = dict()
        temp_sub_zone['sub_zone'] = sub_zone['zone']
        temp_sub_zone['source'] = sub_zone['reporting_sources'][0]['source']
        temp_sub_zone['created'] = sub_zone['created']
        temp_sub_zone['updated'] = datetime.now()
        temp_sub_zone['status'] = sub_zone['reporting_sources'][0]['status']
        temp_sub_zone_list.append(temp_sub_zone)
        for sz in sub_zone['sub_zones']:
            temp_sub_zone = dict()
            temp_sub_zone['sub_zone'] = sz['sub_zone']
            temp_sub_zone['source'] = sz['source']
            temp_sub_zone['created'] = sz['created']
            temp_sub_zone['updated'] = datetime.now()
            temp_sub_zone['status'] = sz['status']
            temp_sub_zone_list.append(temp_sub_zone)
        return temp_sub_zone_list

    def __update_parent_sub_zones(self, sub_zone_records, source, parent):
        """
        Add a new document for the parent record with the source provided. Add the
        sub_zone_records found as sub_zones to the parent zone.
        :param sub_zone_records: Sub-zone records found.
        :param source: Source of the parent
        :param parent: Parent value to be added
        """
        sub_zones = []
        for sub_zone in sub_zone_records:
            sub_zones.extend(self.__create_sub_zone_entries(sub_zone))

        insert_zone = dict()
        insert_zone['zone'] = parent
        insert_zone['reporting_sources'] = list()
        insert_zone['reporting_sources'].append({
            'created': datetime.now(),
            'updated': datetime.now(),
            'status': 'unconfirmed',
            'source': source,
        })
        insert_zone['created'] = datetime.now()
        insert_zone['updated'] = datetime.now()
        insert_zone['status'] = 'unconfirmed'
        insert_zone['sub_zones'] = sub_zones
        self.zone_collection.insert_one(insert_zone)

    def __add_sub_zone(self, zone, source, parent_record):
        """
        Add the zone as sub-zone for the parent_record provided with the provided source.
        The updated time of the parent_record will be updated also.

        :param zone: Sub-zone value to be added.
        :param source: Source value of the sub-zone.
        :param parent_record: Parent document to which the sub-zone needs to be added.
        """
        sub_zone = dict()
        sub_zone['sub_zone'] = zone
        sub_zone['source'] = source
        sub_zone['created'] = datetime.now()
        sub_zone['updated'] = datetime.now()
        sub_zone['status'] = 'unconfirmed'

        self.zone_collection.update_one(
            {'_id': ObjectId(parent_record['_id'])}, {
                '$push': {
                    'sub_zones': sub_zone
                },
                '$set': {
                    'updated': datetime.now()
                }
            })

    def __add_new_zone(self, zone, source, parent, custom_fields):
        """
        Add a new record with the parent zone and the sub-zone.
        The source value is as provided in the initial function call.
        The zone value can be None indicating we are adding only TLD.
        :param zone: Sub-zone value to be added.
        :param source: Source of the parent and the zone.
        :param parent: Parent zone value to be added.
        """
        sub_zones = list()
        # zone value can be None
        if zone:
            sub_zones.append({})
            sub_zones[0]['sub_zone'] = zone
            sub_zones[0]['source'] = source
            sub_zones[0]['created'] = datetime.now()
            sub_zones[0]['updated'] = datetime.now()
            sub_zones[0]['status'] = 'unconfirmed'

        insert_zone = dict()
        insert_zone['zone'] = parent
        insert_zone['reporting_sources'] = list()
        sources_data = {
            'created': datetime.now(),
            'updated': datetime.now(),
            'status': 'unconfirmed',
            'source': source,
        }

        if custom_fields is not None:
            for key_value in custom_fields.keys():
                sources_data[key_value] = custom_fields[key_value]

        insert_zone['reporting_sources'].append(sources_data)
        insert_zone['created'] = datetime.now()
        insert_zone['updated'] = datetime.now()
        insert_zone['status'] = 'unconfirmed'
        insert_zone['sub_zones'] = sub_zones
        self.zone_collection.insert_one(insert_zone)

    def __update_source_time(self, record, source, custom_fields):
        """
        Append the source to the list of sources of parent zone if not previously present.
        Update the updated time of the parent zone entry.
        :param record: Document which needs to be updated.
        :param source: Source value which needs to be added.
        """
        source_contained = False
        for reporting_source in record['reporting_sources']:
            if reporting_source['source'] == source:
                source_contained = True

        if not source_contained:
            # the source does not exist in the zone so push one.
            source_data = dict()
            source_data['created'] = datetime.now()
            source_data['updated'] = datetime.now()
            source_data['status'] = 'unconfirmed'
            source_data['source'] = source

            if custom_fields is not None:
                for key_value in custom_fields.keys():
                    source_data[key_value] = custom_fields[key_value]

            self.zone_collection.update_one({'_id': ObjectId(record['_id'])}, {
                '$push': {
                    'reporting_sources': source_data
                },
                '$set': {
                    'updated': datetime.now()
                }
            })

        else:
            self.zone_collection.update_one(
                {
                    '_id': ObjectId(record['_id']),
                    'reporting_sources.source': source
                }, {
                    '$set': {
                        'reporting_sources.$.updated': datetime.now(),
                        'updated': datetime.now()
                    }
                })
            if custom_fields is not None:
                for key_value in custom_fields.keys():
                    self.zone_collection.update_one(
                        {
                            '_id': ObjectId(record['_id']),
                            'reporting_sources.source': source
                        }, {
                            '$set': {
                                'reporting_sources.$.' + key_value:
                                custom_fields[key_value],
                                'updated':
                                datetime.now()
                            }
                        })

    def __update_time(self, record, zone):
        """
        Update the time of the zone record and that of the sub-zone.

        :param record: Document which needs to be updated.
        :param zone: Sub-zone value of document whose time needs to be updated.
        """
        self.zone_collection.update_one(
            {
                '_id': ObjectId(record['_id']),
                'sub_zones.sub_zone': zone
            }, {
                '$set': {
                    'sub_zones.$.updated': datetime.now(),
                    'updated': datetime.now()
                }
            })

    def __delete_zone(self, zone):
        """
        Delete the zone record.
        :param zone: Zone value to be deleted.
        """
        self.zone_collection.remove({'zone': zone})

    def __zone_previously_not_present(self, zone, source, parent,
                                      custom_fields):
        """
        Handling of the zone while it does not already exists.
        1. Check if the parent value has been provided in the parameters.
        2. If yes:
                    -- if parent is present as a zone: Return if more than one document is found.
                                                       Else add zone to the parent document as a sub-zone with the
                                                       source value provided.
                    -- if parent is not present as a zone: create a new zone and parent entry with source.
        3. If no:
                    -- if any parent zone is already present: Add the zone as sub-zone of parent zone record.
                    -- if no parent zone is already present: Add zone as parent zone with any existing sub-zones added
                                                             as sub-zones. Delete existing sub-zones.
                                                             Else add zone as parent zone with no sub-zones.
        :param zone: Zone to be added which is not previously present.
        :param source: Source of the zone provided.
        :param parent: Parent value of the zone.
        """
        if parent:
            # check if the parent is present as zone.
            # If yes, add zone as sub-zone
            # If no, add zone and parent as new entry.
            parent_record = self.zone_collection.find({'zone': parent})
            if parent_record:
                if parent_record.count() > 1:
                    self._logger.error(
                        'Error: Too many records for the parent zone:{parent}.'
                        .format(parent=parent))
                    return False
                self.__add_sub_zone(zone, source, parent_record[0])
            else:
                self.__add_new_zone(zone, source, parent, custom_fields)

        else:
            # check for a previously present parent.
            # If yes, add zone as sub-zone
            # If no, add zone as new entry.
            parent_zone_record = self.__check_parent_zone(zone)

            if parent_zone_record:
                self.__add_sub_zone(zone, source, parent_zone_record)
            else:
                # check for sub-zone existing for this zone.
                # This could be the case when the sub-zone was ingested before parent zone
                sub_zone_records = self.__check_sub_zone(zone)
                if sub_zone_records:
                    # zone as parent zone and source provided. Previous sub_zone record to be taken.
                    # call delete sub-zone also.
                    self.__update_parent_sub_zones(sub_zone_records, source,
                                                   zone)
                    for sub_zone in sub_zone_records:
                        self.__delete_zone(sub_zone['zone'])
                else:
                    self.__add_new_zone(None, source, zone, custom_fields)

    def __zone_previously_present(self, zone, source, parent, cursor,
                                  custom_fields):
        """
        Handling of the zone while it already exists in the collection as zone/sub-zone. The function returns
        in case multiple documents of the zone are discovered.
        1. Return if more than one existing record of zone is discovered.
        2. Check if zone exists as parent zone.
        3. If yes:
                    -- if parent value is not provided: update date and source of the zone
                    -- if parent is provided: Return if current zone entry has more than 1 reporting sources or current
                                              source is not equal to provided source since one sub-zone can have only
                                              one source.
                                              Else create a new zone and parent entry with source and delete older entry
        4. If no (i.e zone exists as sub-zone):
                    -- Return if the parent provided is not the same as the parent zone value.
                    -- Return if the source provided is not the same as the source of the sub-zone
                    -- Else update date and time of sub-zone and zone.
        :param zone: Zone value to be added
        :param source: Source value of the zone
        :param parent: Parent of the zone to be added
        :param cursor: Existing record of zone provided.
        """

        if cursor.count() > 1:
            self._logger.error(
                'Error: The zone:{zone} is present in multiple records. Rectify.'
                .format(zone=zone))
            return

        record = cursor[0]
        # if record['status'] == 'false_positive':
        #     self._logger.error('False positive encountered in collection for zone:{zone}. No action required.'.format(zone=zone))
        #     return

        if record['zone'] == zone:
            if not parent:
                self.__update_source_time(record, source, custom_fields)

                if record['status'] == self.zone_manager.EXPIRED:
                    self.zone_manager.set_status(zone,
                                                 self.zone_manager.UNCONFIRMED,
                                                 source)
            else:
                # Return in case the zone is present with another source since sub-zones cannot have two sources.
                if len(record['reporting_sources']) > 1 or not (
                        record['reporting_sources'][0]['source'] == source):
                    self._logger.error(
                        'Error: The zone:{zone} has multiple sources'.format(
                            zone=zone))
                    return

                self.__add_new_zone(zone, source, parent, custom_fields)
                self.__delete_zone(zone)
        else:
            record_zone = None
            for sub_zone in record['sub_zones']:
                if sub_zone['sub_zone'] == zone:
                    record_zone = sub_zone
            # if record_zone['status'] == 'false_positive':
            #     self._logger.error('False positive encountered in collection for zone:{zone}. No action required.'.format(zone=zone))
            #     return
            if parent and (not record['zone'] == parent):
                self._logger.error(
                    'Error: The zone:{zone} pre-exists as a sub-zone of another parent zone apart from parent:{parent}.'
                    .format(zone=zone, parent=parent))
                return
            if not record_zone['source'] == source:
                self._logger.error(
                    'Error: The zone:{zone} pre-exists as a sub-zone from another source:{source}.'
                    .format(zone=zone, source=source))
                return

            self.__update_time(record, zone)

            if record['status'] == self.zone_manager.EXPIRED:
                self.zone_manager.set_status(zone,
                                             self.zone_manager.UNCONFIRMED,
                                             source)

    def add_zone(self, zone, source='Manual', parent=None, custom_fields=None):
        """
        Publicly exposed function responsible to ingest the zone into zone collection
        :param zone: Zone value.
        :param source: Source of the zone being ingested. Default value is Manual.
        :param parent: Parent zone of the zone being ingested if any. Default value is None
        :param custom_fields: An optional dictionary of custom fields to add to the source record.
        """
        if not zone:
            self._logger.error('Error: Provide zone value.')
            return

        # Reject any zone which does not contain a TLD.
        if '.' not in zone:
            self._logger.error('Error: Invalid zone entry : ' + zone)
            return

        cursor = self.zone_collection.find(
            {'$or': [{
                'sub_zones.sub_zone': zone
            }, {
                'zone': zone
            }]})

        if cursor.count() == 0:
            self.__zone_previously_not_present(zone, source, parent,
                                               custom_fields)
        else:
            self.__zone_previously_present(zone, source, parent, cursor,
                                           custom_fields)
def main():
    """
    Begin Main...
    """
    logger = LoggingUtil.create_log(__name__)

    now = datetime.now()
    print("Starting: " + str(now))
    logger.info("Starting...")

    # Make database connections
    mongo_connector = MongoConnector.MongoConnector()
    ct_collection = mongo_connector.get_certificate_transparency_connection()
    jobs_manager = JobsManager.JobsManager(mongo_connector, "facebook_certs")

    jobs_manager.record_job_start()

    file_path = "/mnt/workspace/ct_facebook/"

    fb_connector = FacebookConnector.FacebookConnector()
    access_token = fb_connector.get_facebook_access_token()

    zones = ZoneManager.get_distinct_zones(mongo_connector)
    x509_parser = X509Parser.X509Parser()

    parser = argparse.ArgumentParser(
        description='Download DNS and/or certificate information from crt.sh.')
    parser.add_argument(
        '--fetch_cert_records',
        choices=['dbAndSave', 'dbOnly'],
        default="dbAndSave",
        help=
        'Indicates whether to download the raw files or just record in the database'
    )
    parser.add_argument(
        '--cert_save_location',
        required=False,
        default=file_path,
        help=
        'Indicates where to save the certificates on disk when choosing dbAndSave'
    )
    args = parser.parse_args()

    check_save_location(args.cert_save_location)

    save_location = args.cert_save_location
    if not save_location.endswith("/"):
        save_location = save_location + "/"

    for zone in zones:
        time.sleep(15)
        results = fetch_domain(logger, fb_connector, access_token, zone)

        if results is None:
            logger.warning("ERROR looking up: " + zone)
            continue

        logger.info(zone + ": " + str(len(results)))

        for result in results:
            if args.fetch_cert_records == "dbAndSave":
                cert_f = open(
                    save_location + zone + "_" + result['id'] + ".pem", "w")
                cert_f.write(result['certificate_pem'])
                cert_f.close()

            cert = x509_parser.parse_data(result['certificate_pem'],
                                          "facebook")
            cert['facebook_id'] = result['id']

            if ct_collection.find({
                    'fingerprint_sha256':
                    cert['fingerprint_sha256']
            }).count() == 0:
                ct_collection.insert(cert)
            else:
                if ct_collection.find({
                        'fingerprint_sha256':
                        cert['fingerprint_sha256'],
                        'facebook_id':
                        result['id'],
                        'zones':
                        zone
                }).count() == 0:
                    ct_collection.update(
                        {'fingerprint_sha256': cert['fingerprint_sha256']}, {
                            "$set": {
                                'marinus_updated': datetime.now(),
                                'facebook_id': result['id']
                            },
                            "$addToSet": {
                                'zones': zone
                            }
                        })

    jobs_manager.record_job_complete()

    now = datetime.now()
    print("Complete: " + str(now))
    logger.info("Complete.")
Exemple #16
0
def main():
    """
    Begin Main...
    """
    logger = LoggingUtil.create_log(__name__)

    now = datetime.now()
    print("Starting: " + str(now))
    logger.info("Starting...")

    mongo_connector = MongoConnector.MongoConnector()
    dns_manager = DNSManager.DNSManager(mongo_connector)
    jobs_manager = JobsManager.JobsManager(mongo_connector, "sonar_round_two")
    google_dns = GoogleDNS.GoogleDNS()
    jobs_manager.record_job_start()

    zones = ZoneManager.get_distinct_zones(mongo_connector)

    results = dns_manager.find_multiple({"type": "cname"}, "sonar_dns")

    round_two = []
    round_three = []

    # Get all the CNAME values from all_dns and append them to round_two
    for result in results:
        if is_tracked_zone(result["value"], zones):
            round_two.append(result["value"])

    logger.info("Round two pre-list: " + str(len(round_two)))

    dead_dns_collection = mongo_connector.get_dead_dns_connection()

    for value in round_two:
        is_present = dns_manager.find_count({"fqdn": value}, "sonar_dns")
        if is_present == 0:
            logger.debug(value + " not found")
            time.sleep(1)
            result = google_dns.fetch_DNS_records(value)
            if result == []:
                logger.debug("Unable to resolve")
                original_records = dns_manager.find_multiple({"value": value},
                                                             "sonar_dns")
                for record in original_records:
                    check = dead_dns_collection.count_documents(
                        {"fqdn": record["fqdn"]})
                    if check == 0:
                        record.pop("_id")
                        dead_dns_collection.insert(record)
            else:
                for entry in result:
                    if is_tracked_zone(entry["fqdn"], zones):
                        new_record = entry
                        new_record["status"] = "unconfirmed"
                        new_record["zone"] = get_fld_from_value(value, "")
                        new_record["created"] = datetime.now()
                        if result[0]["type"] == "cname" and is_tracked_zone(
                                entry["value"], zones):
                            add_to_list(entry["value"], round_three)
                        logger.debug("Found: " + value)
                        if new_record["zone"] != "":
                            dns_manager.insert_record(new_record, "marinus")

    # For each tracked CName result found in the first pass across Sonar DNS
    logger.info("Round Three length: " + str(len(round_three)))
    for hostname in round_three:
        zone = get_fld_from_value(hostname, "")
        if zone != None and zone != "":
            ips = google_dns.fetch_DNS_records(hostname)
            time.sleep(1)
            if ips != []:
                for ip_addr in ips:
                    if is_tracked_zone(ip_addr["fqdn"], zones):
                        record = {"fqdn": ip_addr["fqdn"]}
                        record["zone"] = get_fld_from_value(
                            ip_addr["fqdn"], "")
                        record["created"] = datetime.now()
                        record["type"] = ip_addr["type"]
                        record["value"] = ip_addr["value"]
                        record["status"] = "unconfirmed"
                        dns_manager.insert_record(new_record, "marinus")
            else:
                original_record = dns_manager.find_one({"fqdn": hostname},
                                                       "marinus")
                if original_record != None:
                    original_record.pop("_id")
                    dead_dns_collection.insert(original_record)
                logger.debug("Failed IP Lookup for: " + hostname)
        else:
            logger.debug("Failed match on zone for: " + hostname)

    # Record status
    jobs_manager.record_job_complete()

    now = datetime.now()
    print("Ending: " + str(now))
    logger.info("Complete.")
Exemple #17
0
def main():
    """
    Begin Main()
    """

    now = datetime.now()
    print("Starting: " + str(now))

    mongo_connector = MongoConnector.MongoConnector()
    jobs_collection = mongo_connector.get_jobs_connection()
    mongo_ct = mongo_connector.get_certificate_transparency_connection()
    cert_graphs_collection = mongo_connector.get_cert_graphs_connection()

    zones = ZoneManager.get_distinct_zones(mongo_connector)

    parser = argparse.ArgumentParser(description='Creates and stores certificate graphs in the database based on one or more sources.')
    parser.add_argument('--check_censys', action='store_true', default=False, required=False, help='Whether to check the Censys collection in the database')
    parser.add_argument('--check_443_scans', action='store_true', default=False, required=False, help='Whether to check the zgrab collection in the database')
    parser.add_argument('--check_ct_scans', action='store_true', default=False, required=False, help='Whether to check the CT collection in the database')
    args = parser.parse_args()

    if args.check_censys is True:
        censys_collection = mongo_connector.get_censys_connection()

    if args.check_443_scans is True:
        zgrab_collection = mongo_connector.get_zgrab_443_data_connection()

    for zone in zones:
        print("Creating: " + zone)
        graph = nx.DiGraph()

        certs_list = {}

        if args.check_ct_scans:
            certs_list = get_current_ct_certificates(mongo_ct, zone)
        if args.check_censys:
            certs_list = add_censys_certificates(censys_collection, zone, certs_list)
        if args.check_443_scans:
            certs_list = add_terminal_zgrab_certificates(zgrab_collection, zone, certs_list)
            certs_list = add_initial_zgrab_certificates(zgrab_collection, zone, certs_list)



        graph = create_nodes(graph, mongo_connector, zone, certs_list)
        data = json_graph.node_link_data(graph)

        my_data = {}
        my_data['links'] = data['links']
        my_data['nodes'] = data['nodes']
        my_data['zone'] = zone
        my_data['created'] = datetime.now()

        cert_graphs_collection.remove({'zone': zone})
        cert_graphs_collection.insert(my_data)

    # Record status
    jobs_collection.update_one({'job_name': 'create_cert_graphs'},
                               {'$currentDate': {"updated": True},
                                "$set": {'status': 'COMPLETE'}})

    now = datetime.now()
    print("Ending: " + str(now))
Exemple #18
0
def main():
    """
    Begin Main...
    """
    logger = LoggingUtil.create_log(__name__)

    now = datetime.now()
    print("Starting: " + str(now))
    logger.info("Starting...")

    # Make database connections
    mongo_connector = MongoConnector.MongoConnector()
    ct_collection = mongo_connector.get_certificate_transparency_connection()
    config_collection = mongo_connector.get_config_connection()
    x509parser = X509Parser.X509Parser()

    zones = ZoneManager.get_distinct_zones(mongo_connector)
    result = config_collection.find_one({}, {'SSL_Orgs': 1, "_id": 0})
    ssl_orgs = result['SSL_Orgs']

    # Defaults
    save_location = '/mnt/workspace/'
    download_method = 'dbAndSave'
    save_type = "PEM"

    parser = argparse.ArgumentParser(
        description='Download certificate information from the provide CT Log.'
    )
    parser.add_argument(
        '--log_source',
        required=True,
        help=
        'Indicates which log to query based on values in the x509Parser library'
    )
    parser.add_argument(
        '--include_precerts',
        action="store_true",
        help='Include pre-certificates which are not finalized')
    parser.add_argument(
        '--download_methods',
        choices=['dbAndSave', 'dbOnly'],
        default=download_method,
        help=
        'Indicates whether to download the raw files or just save to the database'
    )
    parser.add_argument(
        '--starting_index',
        required=False,
        default=-1,
        type=int,
        help='Force the script to start at specific index within the log.')
    parser.add_argument(
        '--cert_save_location',
        required=False,
        default=save_location,
        help=
        'Indicates where to save the certificates on disk when choosing dbAndSave'
    )
    parser.add_argument(
        '--save_type',
        choices=['PEM', 'ASN1'],
        default=save_type,
        help='Indicates which format to use for the data. The default is PEM')
    args = parser.parse_args()

    source = args.log_source
    try:
        ct_log_map = x509parser.CT_LOG_MAP[source]
    except:
        logger.error("ERROR: UNKNOWN LOG SOURCE: " + source)
        exit(1)

    if args.cert_save_location:
        save_location = args.cert_save_location
        if not save_location.endswith("/"):
            save_location = save_location + "/"

    if args.download_methods:
        download_method = args.download_methods
        check_save_location(save_location, source)

    if args.save_type:
        save_type = args.save_type

    jobs_manager = JobsManager.JobsManager(mongo_connector, "ct_log-" + source)
    jobs_manager.record_job_start()

    if args.starting_index == -1:
        starting_index = fetch_starting_index(ct_collection, source)
    else:
        starting_index = args.starting_index
    logger.info("Starting Index: " + str(starting_index))

    sth_data = fetch_sth(logger, "https://" + ct_log_map['url'])
    logger.info("Tree size: " + str(sth_data['tree_size']))

    current_index = starting_index
    while current_index < sth_data['tree_size']:
        ending_index = current_index + 256
        if ending_index > sth_data['tree_size']:
            ending_index = sth_data['tree_size']

        logger.debug("Checking from index: " + str(current_index) +
                     " to index " + str(ending_index))
        certs = fetch_certificate_batch(logger, "https://" + ct_log_map['url'],
                                        current_index, ending_index)

        for entry in certs['entries']:
            der_cert, cert_type = get_cert_from_leaf(logger,
                                                     entry['leaf_input'])
            if der_cert is None and cert_type == 1 and not args.include_precerts:
                current_index = current_index + 1
                continue
            elif der_cert is None and cert_type == 0:
                current_index = current_index + 1
                continue
            elif der_cert is None and cert_type == 1:
                der_cert = get_cert_from_extra_data(entry['extra_data'])

            cert = x509parser.parse_data(der_cert, source)
            if cert is None:
                logger.warning("Skipping certificate index: " +
                               str(current_index))
                current_index = current_index + 1
                continue

            if cert_type == 1:
                cert['ct_log_type'] = "PRE-CERTIFICATE"
            else:
                cert['ct_log_type'] = "CERTIFICATE"

            cert_zones = check_zone_relevancy(cert, zones)

            if check_org_relevancy(cert, ssl_orgs) or cert_zones != []:
                cert[source + "_id"] = current_index
                cert['zones'] = cert_zones
                logger.info("Adding " + source + " id: " + str(current_index) +
                            " SHA256: " + cert['fingerprint_sha256'])
                insert_certificate(cert, source, ct_collection, cert_zones)

                if download_method == 'dbAndSave':
                    write_file(logger, cert, save_location, save_type, source)

            current_index = current_index + 1

    # Set isExpired for any entries that have recently expired.
    ct_collection.update(
        {
            "not_after": {
                "$lt": datetime.utcnow()
            },
            "isExpired": False
        }, {"$set": {
            "isExpired": True
        }},
        multi=True)

    jobs_manager.record_job_complete()

    now = datetime.now()
    print("Ending: " + str(now))
    logger.info("Complete.")
Exemple #19
0
def main():
    """
    Begin Main...
    """
    now = datetime.now()
    print("Starting: " + str(now))

    # Obtain the list of known email addresses from the config collection
    mongo_connector = MongoConnector.MongoConnector()
    whois_collection = mongo_connector.get_whois_connection()
    all_dns_collection = mongo_connector.get_all_dns_connection()
    zones_collection = mongo_connector.get_zone_connection()
    jobs_collection = mongo_connector.get_jobs_connection()

    # Grab all zones that are not expired of false_positives
    # Also exclude any that were recently created since they won't have data yet
    date_delta = datetime.today() - timedelta(days=30)
    zones = zones_collection.distinct('zone', {'created': {"$lt": date_delta}, 'status': {"$nin": [ZoneManager.EXPIRED, ZoneManager.FALSE_POSITIVE]}})

    # The Python Whois library is hit and miss with some international zones.
    # For now, this script focuses on the most popular TLDs.
    new_zones = get_primary_zones(zones)

    expired_list = []
    for zone in new_zones:
        if whois_collection.find({'zone': zone}).count() == 0:
            # Assume it is expired if there is no longer a whois record present
            expired_list.append(zone)

    for zone in expired_list:
        if all_dns_collection.find({'zone': zone}).count() > 0:
            # This may be a case where the Python Whois library failed
            # and the zone is still active.
            print("DNS records still exist for " + zone)
            expired_list.remove(zone)

    zone_manager = ZoneManager(mongo_connector)

    # Need to get this list before setting zones to expired in order to avoid a recursion problem.
    already_expired = zone_manager.get_zones_by_status(ZoneManager.EXPIRED)

    possibly_renewed = []
    for zone in already_expired:
        if whois_collection.find({'zone': zone}).count() == 1:
            possibly_renewed.append(zone)


    for zone in expired_list:
        print("Expiring: " + zone)
        zone_manager.set_status(zone, ZoneManager.EXPIRED, "mark_expired.py")


    # Get the list of known registering entities.
    # This will only work for some whois lookups since Python Whois doesn't get
    # a valid org for all lookups and some have privacy enabled.
    config_collection = mongo_connector.get_config_connection()
    result = config_collection.find({}, {'Whois_Orgs': 1})
    orgs = result[0]['Whois_Orgs']

    for zone in possibly_renewed:
        # We need to be careful of automatically marking something renewed
        # since it could have been registered by someone else.
        if whois_collection.find({'zone': zone, 'org': {"$in": orgs}}).count() == 1:
            print("ATTENTION: " + zone + "has been renewed")
            zone_manager.set_status(zone, ZoneManager.UNCONFIRMED, "mark_expired.py")
        else:
            print("WARNING: " + zone + " has been renewed by an unknown entity")


    # Record status
    jobs_collection.update_one({'job_name': 'mark_expired'},
                               {'$currentDate': {"updated": True},
                                "$set": {'status': 'COMPLETE'}})


    now = datetime.now()
    print("Ending: " + str(now))
Exemple #20
0
def main():
    """
    Begin Main...
    """
    logger = LoggingUtil.create_log(__name__)

    now = datetime.now()
    print("Starting: " + str(now))
    logger.info("Starting...")

    # Create an instance of the VirusTotal class
    vt_instance = VirusTotal.VirusTotal()

    # Get collections for the queries
    mongo_connector = MongoConnector.MongoConnector()
    vt_collection = mongo_connector.get_virustotal_connection()

    jobs_manager = JobsManager.JobsManager(mongo_connector,
                                           "get_virustotal_data")
    jobs_manager.record_job_start()

    # Collect the list of tracked TLDs
    zones = ZoneManager.get_distinct_zones(mongo_connector)

    # For each tracked TLD
    for zone in zones:
        logger.debug("Checking " + zone)
        results = vt_instance.get_domain_report(zone)

        if results is None:
            logger.warning("Error querying zone " + zone)
        elif results["response_code"] == -1:
            logger.warning("VT unhappy with " + zone)
        elif results["response_code"] == 0:
            logger.warning("VT doesn't have " + zone)
        else:
            logger.debug("Matched " + zone)

            results["zone"] = zone
            results["created"] = datetime.now()

            # Mongo doesn't allow key names with periods in them
            # Re-assign to an undotted key name
            if "Dr.Web category" in results:
                results["Dr Web category"] = results.pop("Dr.Web category")
            elif "alphaMountain.ai category" in results:
                results["alphaMountain_ai category"] = results.pop(
                    "alphaMountain.ai category")

            vt_collection.delete_one({"zone": zone})

            if "last_https_certificate" in results:
                if "extensions" in results["last_https_certificate"]:
                    if ("1.3.6.1.4.1.11129.2.4.2" in
                            results["last_https_certificate"]["extensions"]):
                        results["last_https_certificate"]["extensions"][
                            "sct_list"] = results["last_https_certificate"][
                                "extensions"].pop("1.3.6.1.4.1.11129.2.4.2")

            mongo_connector.perform_insert(vt_collection, results)

        # This sleep command is so that we don't exceed the daily limit on the free API
        # This setting results in this script taking several days to complete
        time.sleep(25)

    # Record status
    jobs_manager.record_job_complete()

    now = datetime.now()
    print("Complete: " + str(now))
    logger.info("Complete.")
Exemple #21
0
def main():
    """
    Begin Main...
    """

    now = datetime.now()
    print("Starting: " + str(now))

    mongo_connector = MongoConnector.MongoConnector()
    dns_manager = DNSManager.DNSManager(mongo_connector)
    jobs_manager = JobsManager.JobsManager(mongo_connector,
                                           'extract_mx_domains')
    google_dns = GoogleDNS.GoogleDNS()

    jobs_manager.record_job_start()

    dns_names = []
    round_two = []

    zones = ZoneManager.get_distinct_zones(mongo_connector)

    # Collect the list of domains from the MX Records
    extract_mx_names(dns_names, dns_manager)

    input_list = []

    # Some MX records point to the third-party domains.
    # Therefore, we filter to only the root domains that belong to the tracked company.
    print("Pre-filter list: " + str(len(dns_names)))
    for hostname in dns_names:
        zone = get_tracked_zone(hostname, zones)
        if zone != None:
            ips = google_dns.fetch_DNS_records(hostname)

            # Pause to prevent DoS-ing of Google's HTTPS DNS Service
            time.sleep(1)

            if ips != []:
                for ip_addr in ips:
                    temp_zone = get_tracked_zone(ip_addr['fqdn'], zones)
                    if temp_zone is not None:
                        record = {"fqdn": ip_addr['fqdn']}
                        record['zone'] = temp_zone
                        record['created'] = datetime.now()
                        record['type'] = ip_addr['type']
                        record['value'] = ip_addr['value']
                        record['status'] = 'unknown'
                        input_list.append(record)

                    if ip_addr['type'] == "cname" and is_tracked_zone(
                            ip_addr['value'], zones):
                        add_to_round_two(ip_addr['value'], round_two)
            else:
                print("Failed IP Lookup for: " + hostname)
        else:
            print("Failed match on zone for: " + hostname)

    dead_dns_collection = mongo_connector.get_dead_dns_connection()

    # Some DNS records will be CNAME records pointing to other tracked domains.
    # This is a single level recursion to lookup those domains.
    print("Round Two list: " + str(len(round_two)))
    for hostname in round_two:
        zone = get_tracked_zone(hostname, zones)
        if zone != None:
            ips = google_dns.fetch_DNS_records(hostname)
            time.sleep(1)
            if ips != []:
                for ip_addr in ips:
                    temp_zone = get_tracked_zone(ip_addr['fqdn'], zones)
                    if temp_zone is not None:
                        record = {"fqdn": ip_addr['fqdn']}
                        record['zone'] = temp_zone
                        record['created'] = datetime.now()
                        record['type'] = ip_addr['type']
                        record['value'] = ip_addr['value']
                        record['status'] = 'unknown'
                        input_list.append(record)
            else:
                print("Failed IP Lookup for: " + hostname)
                original_record = dns_manager.find_one({"fqdn": hostname},
                                                       "mx")
                if original_record != None:
                    original_record.pop("_id")
                    dead_dns_collection.insert(original_record)
        else:
            print("Failed match on zone for: " + hostname)

    # Record all the results.
    dns_manager.remove_by_source("mx")
    print("List length: " + str(len(input_list)))
    for final_result in input_list:
        dns_manager.insert_record(final_result, "mx")

    # Record status
    jobs_manager.record_job_complete()

    now = datetime.now()
    print("Ending: " + str(now))
Exemple #22
0
def main():
    """
    Begin Main...
    """
    logger = LoggingUtil.create_log(__name__)

    if is_running(os.path.basename(__file__)):
        logger.warning("Already running...")
        exit(0)

    now = datetime.now()
    print("Starting: " + str(now))
    logger.info("Starting...")

    parser = argparse.ArgumentParser(
        description="Parse Sonar files based on domain zones.")
    parser.add_argument(
        "--sonar_file_type",
        choices=["dns-any", "dns-a", "rdns"],
        required=True,
        help='Specify "dns-any", "dns-a", or "rdns"',
    )
    parser.add_argument(
        "--database",
        choices=["local", "remote"],
        required=False,
        default="local",
        help="Whether to use the local or remote DB",
    )
    args = parser.parse_args()

    r7 = Rapid7.Rapid7()

    if args.database == "remote":
        mongo_connector = RemoteMongoConnector.RemoteMongoConnector()
        dns_manager = DNSManager.DNSManager(mongo_connector,
                                            "get_sonar_data_dns")
    else:
        mongo_connector = MongoConnector.MongoConnector()
        dns_manager = DNSManager.DNSManager(mongo_connector)

    zones = ZoneManager.get_distinct_zones(mongo_connector)

    save_directory = "./files/"
    check_save_location(save_directory)

    # A session is necessary for the multi-step log-in process
    s = requests.Session()

    if args.sonar_file_type == "rdns":
        logger.info("Updating RDNS records")
        jobs_manager = JobsManager.JobsManager(mongo_connector,
                                               "get_sonar_data_rdns")
        jobs_manager.record_job_start()

        try:
            html_parser = r7.find_file_locations(s, "rdns", jobs_manager)
            if html_parser.rdns_url == "":
                logger.error("Unknown Error")
                jobs_manager.record_job_error()
                exit(0)

            unzipped_rdns = download_remote_files(logger, s,
                                                  html_parser.rdns_url,
                                                  save_directory, jobs_manager)
            update_rdns(logger, unzipped_rdns, zones, dns_manager,
                        mongo_connector)
        except Exception as ex:
            logger.error("Unexpected error: " + str(ex))
            jobs_manager.record_job_error()
            exit(0)

        jobs_manager.record_job_complete()
    elif args.sonar_file_type == "dns-any":
        logger.info("Updating DNS ANY records")

        jobs_manager = JobsManager.JobsManager(mongo_connector,
                                               "get_sonar_data_dns-any")
        jobs_manager.record_job_start()

        try:
            html_parser = r7.find_file_locations(s, "fdns", jobs_manager)
            if html_parser.any_url != "":
                unzipped_dns = download_remote_files(logger, s,
                                                     html_parser.any_url,
                                                     save_directory,
                                                     jobs_manager)
                update_dns(logger, unzipped_dns, zones, dns_manager)
        except Exception as ex:
            logger.error("Unexpected error: " + str(ex))
            jobs_manager.record_job_error()
            exit(0)

        jobs_manager.record_job_complete()
    elif args.sonar_file_type == "dns-a":
        logger.info("Updating DNS A, AAAA, and CNAME records")

        jobs_manager = JobsManager.JobsManager(mongo_connector,
                                               "get_sonar_data_dns-a")
        jobs_manager.record_job_start()

        try:
            html_parser = r7.find_file_locations(s, "fdns", jobs_manager)
            if html_parser.a_url != "":
                logger.info("Updating A records")
                unzipped_dns = download_remote_files(logger, s,
                                                     html_parser.a_url,
                                                     save_directory,
                                                     jobs_manager)
                update_dns(logger, unzipped_dns, zones, dns_manager)
            if html_parser.aaaa_url != "":
                logger.info("Updating AAAA records")
                unzipped_dns = download_remote_files(logger, s,
                                                     html_parser.aaaa_url,
                                                     save_directory,
                                                     jobs_manager)
                update_dns(logger, unzipped_dns, zones, dns_manager)
            if html_parser.cname_url != "":
                logger.info("Updating CNAME records")
                unzipped_dns = download_remote_files(logger, s,
                                                     html_parser.cname_url,
                                                     save_directory,
                                                     jobs_manager)
                update_dns(logger, unzipped_dns, zones, dns_manager)
        except Exception as ex:
            logger.error("Unexpected error: " + str(ex))
            jobs_manager.record_job_error()
            exit(0)

        jobs_manager.record_job_complete()
    else:
        logger.error("Unrecognized sonar_file_type option. Exiting...")

    now = datetime.now()
    print("Complete: " + str(now))
    logger.info("Complete.")
Exemple #23
0
def main():
    """
    Begin Main...
    """
    logger = LoggingUtil.create_log(__name__)

    now = datetime.now()
    print("Starting: " + str(now))
    logger.info("Starting...")

    # Obtain the list of known email addresses from the config collection
    mongo_connector = MongoConnector.MongoConnector()
    whois_collection = mongo_connector.get_whois_connection()
    all_dns_collection = mongo_connector.get_all_dns_connection()
    zones_collection = mongo_connector.get_zone_connection()
    jobs_manager = JobsManager.JobsManager(mongo_connector, 'mark_expired')
    jobs_manager.record_job_start()

    # Grab all zones that are not expired of false_positives
    # Also exclude any that were recently created since they won't have data yet
    date_delta = datetime.today() - timedelta(days=30)
    zones = zones_collection.distinct(
        'zone', {
            'created': {
                "$lt": date_delta
            },
            'status': {
                "$nin": [ZoneManager.EXPIRED, ZoneManager.FALSE_POSITIVE]
            }
        })

    # The Python Whois library is hit and miss with some international zones.
    # For now, this script focuses on the most popular TLDs.
    new_zones = get_primary_zones(logger, zones)

    expired_list = []
    for zone in new_zones:
        if whois_collection.find({'zone': zone}).count() == 0:
            # Assume it is expired if there is no longer a whois record present
            expired_list.append(zone)

    for zone in expired_list:
        if all_dns_collection.find({'zone': zone}).count() > 0:
            # This may be a case where the Python Whois library failed
            # and the zone is still active.
            logger.debug("DNS records still exist for " + zone)
            expired_list.remove(zone)

    zone_manager = ZoneManager(mongo_connector)

    # Need to get this list before setting zones to expired in order to avoid a recursion problem.
    already_expired = zone_manager.get_zones_by_status(ZoneManager.EXPIRED)

    possibly_renewed = []
    for zone in already_expired:
        if whois_collection.find({'zone': zone}).count() == 1:
            possibly_renewed.append(zone)

    for zone in expired_list:
        logger.debug("Expiring: " + zone)
        zone_manager.set_status(zone, ZoneManager.EXPIRED, "mark_expired.py")

    # Get the list of known registering entities.
    # This will only work for some whois lookups since Python Whois doesn't get
    # a valid org for all lookups and some have privacy enabled.
    config_collection = mongo_connector.get_config_connection()
    result = config_collection.find({}, {
        'Whois_Orgs': 1,
        'Whois_Name_Servers': 1
    })
    orgs = result[0]['Whois_Orgs']
    name_servers = []
    if 'Whois_Name_Servers' in result[0]:
        name_servers = result[0]['Whois_Name_Servers']

    logger.debug(str(name_servers))

    for zone in possibly_renewed:
        # We need to be careful of automatically marking something renewed
        # since it could have been registered by someone else.
        if whois_collection.find({
                'zone': zone,
                'org': {
                    "$in": orgs
                }
        }).count() == 1:
            logger.warning("ATTENTION: " + zone +
                           " has been renewed based on org")
            zone_manager.set_status(zone, ZoneManager.UNCONFIRMED,
                                    "mark_expired.py")
        else:
            result = whois_collection.find({'zone': zone}, {
                'name_servers': 1,
                "_id": 0
            })
            found = 0
            if result is not None and 'name_servers' in result[0] and result[
                    0]['name_servers'] is not None:
                for entry in result[0]['name_servers']:
                    if entry.lower() in name_servers:
                        logger.warning(
                            "ATTENTION: " + zone +
                            " has been renewed based on name servers")
                        zone_manager.set_status(zone, ZoneManager.UNCONFIRMED,
                                                "mark_expired.py")
                        found = 1
                        break
            if found == 0:
                result = whois_collection.find({'zone': zone}, {
                    'name_server_groups': 1,
                    "_id": 0
                })
                if result is not None and 'name_server_groups' in result[
                        0] and result[0]['name_server_groups'] is not None:
                    for entry in result[0]['name_server_groups']:
                        if entry.lower() in name_servers:
                            logger.warning(
                                "ATTENTION: " + zone +
                                " has been renewed based on name server_groups"
                            )
                            zone_manager.set_status(zone,
                                                    ZoneManager.UNCONFIRMED,
                                                    "mark_expired.py")
                            found = 1
                            break
            if found == 0:
                logger.warning(zone + " has been renewed by an unknown entity")

    # Record status
    jobs_manager.record_job_complete()

    now = datetime.now()
    print("Ending: " + str(now))
    logger.info("Complete.")
Exemple #24
0
def main():

    now = datetime.now()
    print("Starting: " + str(now))

    azure_connector = AzureConnector.AzureConnector()
    mongo_connector = MongoConnector.MongoConnector()
    dns_manager = DNSManager.DNSManager(mongo_connector)
    zone_ingestor = ZoneIngestor.ZoneIngestor()
    jobs_manager = JobsManager.JobsManager(mongo_connector, 'fetch_azure_dns')
    jobs_manager.record_job_start()

    current_zones = ZoneManager.get_distinct_zones(mongo_connector)

    resource_client = azure_connector.get_resources_client()
    resources = []

    # The resource list is not currently used.
    for item in resource_client.resource_groups.list():
        resources.append(item.name)

    dns_client = azure_connector.get_dns_client()

    zones = dns_client.zones.list()

    # The type of records the Azure DNS will let you configure
    record_types = {
        'A': 'arecords',
        'AAAA': 'aaaa_records',
        'MX': 'mx_records',
        'NS': 'ns_records',
        'PTR': 'ptr_records',
        'SRV': 'srv_records',
        'TXT': 'txt_records',
        'CNAME': 'cname_record',
        'SOA': 'soa_record'
    }

    for zone in zones:
        print("Zone: " + zone.name)
        data = split_id(zone.id)

        if zone.zone_type == ZoneType.public:
            print(zone.name + " is public:")

            if zone.name not in current_zones:
                print("Creating zone: " + zone.name)
                zone_ingestor.add_zone(zone.name,
                                       "azure:" + data["resourceGroups"])

            try:
                print("ResourceGroup: " + data["resourceGroups"])
                records = dns_client.record_sets.list_all_by_dns_zone(
                    data["resourceGroups"], zone.name)
                for entry in records:
                    # The record_data id value ends in rtype/rvalue so you must guess the rtype
                    record_data = split_id(entry.id)
                    for rtype in record_types:
                        if rtype in record_data:
                            results = extract_record_set_value(rtype, entry)
                            for result in results:
                                result['zone'] = zone.name
                                result['created'] = datetime.now()
                                result['status'] = 'confirmed'
                                dns_manager.insert_record(
                                    result, "azure:" + data["resourceGroups"])
            except:
                print("No records found")

    jobs_manager.record_job_complete()
Exemple #25
0
def main():
    """
    Begin the main function.
    """
    parser = argparse.ArgumentParser(description='Setup utility for the Marinus MongoDB instance.')
    parser.add_argument("--create_collections", help="Initialize the collections in the database.", action="store_true")
    parser.add_argument("--add_zone", metavar="ROOT_DOMAIN", help="Add a new domain zone to Marinus", action="store", type=str)
    parser.add_argument("--add_IPv4_network", metavar="IPv4_CIDR", help="Add an IPv4 CIDR zone to Marinus", action="store", type=str)
    parser.add_argument("--add_IPv6_network", metavar="IPv6_CIDR", help="Add an IPv6 CIDR zone to Marinus", action="store", type=str)
    parser.add_argument("--add_tls_org", metavar="TLS_ORGANIZATION_VALUE", help="Add a TLS organization to the Marinus config", action="store", type=str)
    parser.add_argument("--add_whois_org", metavar="WHOIS_ORGANIZATION_VALUE", help="Add a Whois organization to the Marinus config", action="store", type=str)
    parser.add_argument("--add_dns_admin", metavar="DNS_ADMIN_EMAIL", help="Add a DNS administrator to the Marinus config", action="store", type=str)
    parser.add_argument("--add_user", help="Add a SSO userid to Marinus", action="store_true")
    parser.add_argument("--add_user_to_group", help="Assign a user to a group", action="store_true")
    parser.add_argument("--add_group_admin", help="Assign another admin to a group", action="store_true")
    parser.add_argument("--username", metavar="USERNAME", help="The username for add_user or add_user_to_group", action="store", type=str)
    parser.add_argument("--group", metavar="GROUP", choices=['admin','data_admin'], help="The group_value for add_user_to_group", action="store", type=str)
    parser.add_argument("--add_new_job", metavar="PYTHON_SCRIPT_NAME", help="Add a new tracked script to the jobs table", action="store", type=str)


    args = parser.parse_args()

    mongo_connector = MongoConnector.MongoConnector()

    if args.create_collections:
        create_collections(mongo_connector.m_connection)
        create_job_collection(mongo_connector)
        create_config_collection(mongo_connector)
        create_user(mongo_connector,"marinus")
        create_first_groups(mongo_connector, "marinus")
    elif args.add_user_to_group:
        if args.username == None or args.group == None:
            print("A username and group value must be provided")
            exit(1)
        add_user_to_group(mongo_connector, args.username, args.group)
    elif args.add_group_admin:
        if args.username == None or args.group == None:
            print("A username and group value must be provided")
            exit(1)
        add_admin_to_group(mongo_connector, args.username, args.group)
    elif args.add_user:
        if args.username == None:
            print("A username must be provided!")
            exit(1)
        create_user(mongo_connector, args.username)
    elif args.add_zone:
        create_zone(args.add_zone)
    elif args.add_IPv4_network:
        create_IPv4_zone(mongo_connector, args.add_IPv4_network)
    elif args.add_IPv6_network:
        create_IPv6_zone(mongo_connector, args.add_IPv6_network)
    elif args.add_tls_org is not None:
        add_tls_org(mongo_connector, args.add_tls_org)
    elif args.add_whois_org is not None:
        add_whois_org(mongo_connector, args.add_whois_org)
    elif args.add_dns_admin is not None:
        add_dns_admin(mongo_connector, args.add_dns_admin)
    elif args.add_new_job is not None:
        add_new_job(mongo_connector, args.add_new_job)
    else:
        print("ERROR: Unrecognized action")
Exemple #26
0
def main():
    now = datetime.now()
    print("Starting: " + str(now))

    dns_types = {
        "a": 1,
        "ns": 2,
        "cname": 5,
        "soa": 6,
        "ptr": 12,
        "hinfo": 13,
        "mx": 15,
        "txt": 16,
        "aaaa": 28,
        "srv": 33,
        "naptr": 35,
        "ds": 43,
        "rrsig": 46,
        "dnskey": 48
    }

    mongo_connector = MongoConnector.MongoConnector()
    all_dns_collection = mongo_connector.get_all_dns_connection()
    jobs_manager = JobsManager.JobsManager(mongo_connector, 'marinus_dns')
    jobs_manager.record_job_start()

    dns_manager = DNSManager.DNSManager(mongo_connector)

    zones = ZoneManager.get_distinct_zones(mongo_connector)

    google_dns = GoogleDNS.GoogleDNS()

    for zone in zones:
        time.sleep(1)
        for dtype, dnum in dns_types.items():
            result = google_dns.fetch_DNS_records(zone, dnum)

            if result == []:
                print("No records found for " + zone)
            else:
                new_record = result[0]
                new_record['status'] = 'confirmed'
                new_record['zone'] = zone
                new_record['created'] = datetime.now()
                print("Found " + dtype + " for: " + zone)
                dns_manager.insert_record(new_record, "marinus")

    print("Starting SOA Search")

    soa_searches = find_sub_zones(all_dns_collection)
    for entry in soa_searches:
        time.sleep(1)
        result = google_dns.fetch_DNS_records(zone, dns_types['soa'])
        if result != []:
            new_record = result[0]
            new_record['status'] = 'confirmed'
            new_record['zone'] = get_fld_from_value(entry, '')
            new_record['created'] = datetime.now()
            print("Found SOA: " + entry)
            if new_record['zone'] != '':
                dns_manager.insert_record(new_record, "marinus")

    jobs_manager.record_job_complete()

    now = datetime.now()
    print("Complete: " + str(now))
# governing permissions and limitations under the License.
"""
This script downloads updates from the remote MongoDB server that is used for larger jobs.
This script is only necessary if a remote MongoDB is set up.

This script can be run daily.
"""

import logging

from datetime import datetime, timedelta
from libs3 import MongoConnector, RemoteMongoConnector, JobsManager
from libs3.LoggingUtil import LoggingUtil

# Connect to the remote databases
mongo_connector = MongoConnector.MongoConnector()
rm_connector = RemoteMongoConnector.RemoteMongoConnector()


def download_censys_scan_info(censys_collection, remote_censys_collection):
    """
    Download the latest censys scan information
    """
    # Grab the new results from the remote server.
    results = remote_censys_collection.find({}, {"_id": 0})

    # Remove the previous results from the local Censys collection
    censys_collection.remove({})

    # Insert the new results from the remote server into the local server
    for result in results:
def main():
    """
    Begin Main...
    """
    logger = LoggingUtil.create_log(__name__)

    if is_running(os.path.basename(__file__)):
        logger.warning("Already running...")
        exit(0)

    now = datetime.now()
    print("Starting: " + str(now))
    logger.info("Starting...")

    r7 = Rapid7.Rapid7()

    mongo_connection = MongoConnector.MongoConnector()
    dns_manager = DNSManager.DNSManager(mongo_connection)
    ip_manager = IPManager.IPManager(mongo_connection)
    rdns_collection = mongo_connection.get_sonar_reverse_dns_connection()

    zones = ZoneManager.get_distinct_zones(mongo_connection)
    logger.info("Zone length: " + str(len(zones)))

    save_directory = "./files/"

    parser = argparse.ArgumentParser(
        description='Parse Sonar files based on CIDRs.')
    parser.add_argument('--sonar_file_type',
                        required=True,
                        help='Specify "dns" or "rdns"')
    args = parser.parse_args()

    check_save_location(save_directory)

    # A session is necessary for the multi-step log-in process
    s = requests.Session()

    if args.sonar_file_type == "rdns":
        jobs_manager = JobsManager.JobsManager(mongo_connection,
                                               'get_data_by_cidr_rdns')
        jobs_manager.record_job_start()

        try:
            html_parser = r7.find_file_locations(s, "rdns", jobs_manager)
            if html_parser.rdns_url == "":
                logger.error("Unknown Error")
                jobs_manager.record_job_error()
                exit(0)

            unzipped_rdns = download_remote_files(logger, s,
                                                  html_parser.rdns_url,
                                                  save_directory, jobs_manager)
            update_rdns(logger, unzipped_rdns, rdns_collection, dns_manager,
                        ip_manager, zones)
        except Exception as ex:
            logger.error("Unexpected error: " + str(ex))
            jobs_manager.record_job_error()
            exit(0)

        logger.info("RDNS Complete")
        jobs_manager.record_job_complete()

    elif args.sonar_file_type == "dns":
        jobs_manager = JobsManager.JobsManager(mongo_connection,
                                               'get_data_by_cidr_dns')
        jobs_manager.record_job_start()

        try:
            html_parser = r7.find_file_locations(s, "fdns", jobs_manager)
            if html_parser.any_url != "":
                unzipped_dns = download_remote_files(logger, s,
                                                     html_parser.any_url,
                                                     save_directory,
                                                     jobs_manager)
                update_dns(logger, unzipped_dns, dns_manager, ip_manager,
                           zones)
            if html_parser.a_url != "":
                unzipped_dns = download_remote_files(logger, s,
                                                     html_parser.a_url,
                                                     save_directory,
                                                     jobs_manager)
                update_dns(logger, unzipped_dns, dns_manager, ip_manager,
                           zones)
            if html_parser.aaaa_url != "":
                unzipped_dns = download_remote_files(logger, s,
                                                     html_parser.aaaa_url,
                                                     save_directory,
                                                     jobs_manager)
                update_dns(logger, unzipped_dns, dns_manager, ip_manager,
                           zones)
        except Exception as ex:
            logger.error("Unexpected error: " + str(ex))

            jobs_manager.record_job_error()
            exit(0)

        logger.info("DNS Complete")

        jobs_manager.record_job_complete()

    else:
        logger.error("Unrecognized sonar_file_type option. Exiting...")

    now = datetime.now()
    print("Complete: " + str(now))
    logger.info("Complete.")