def run(self): while True: with self.__switch_lock: on = self.__switch[0] if not on: break else: try: test = self.__tests.pop(0) # OSVDB | SERVER | METHOD | PATH | HEADER | DATA | MATCH_CODE | MATCH | MATCH_AND | MATCH_OR | FAIL_CODE | FAIL | FAIL_OR | DESCRIPTION | TRIGGER request = core_http.HTTPRequest() request.set_method(test[u'METHOD']) request.set_path(self.__root + test[u'PATH'], False) if test[u'DATA'] != u'': request.set_data(test[u'DATA']) if test[u'HEADER'] != u'': request.set_header(test[u'HEADER'].split(u':')[0].strip(), test[u'HEADER'].split(u':')[1].strip()) # Note: the header provided in the databse entry is set at last in order to # overwrite a previously set default header response = self.__httpClient.perform_request(request) if response != None: result = self.__test_hit(test, request, response) if result != None: issue_result(self.__results, self.__results_lock, self.__cli, result) if result.has_key(u'TRIGGER') and result[u'TRIGGER'] != u'': self.__triggers.append(result[u'TRIGGER']) else: if self.__counter % MISS_DISPLAY_RATE == 0: issue_result(self.__results, self.__results_lock, self.__cli, {u'MISS': request.get_path(True)}) continue else: issue_result(self.__results, self.__results_lock, self.__cli, {u'ERROR': u'Request failed. Server may be overloaded.'}) continue except IndexError: break
def update_custom_database(): test = core_http.test_http(HOST_CUSTOM, 80) if test[0]: httpClient = core_http.HTTPClient(HOST_CUSTOM, 80) request_tests = core_http.HTTPRequest() request_tests.set_method('GET') request_tests.set_path(PATH_CUSTOM_TESTS) response_tests = httpClient.perform_request(request_tests) request_banners = core_http.HTTPRequest() request_banners.set_method('GET') request_banners.set_path(PATH_CUSTOM_BANNERS) response_banners = httpClient.perform_request(request_banners) if response_tests != None and response_banners != None: if response_tests.get_code( ) == u'200' and response_banners.get_code() == u'200': db_banners = response_banners.get_data() db_tests = response_tests.get_data() try: core_file.db_custom_update(db_tests, db_banners) return True except IOError: # typically: permission denied.... return False else: return False else: return False else: return False
def update_nikto_database(): test = core_http.test_http(HOST_CIRT, 80) if test[0]: httpClient = core_http.HTTPClient(HOST_CIRT, 80) request_tests = core_http.HTTPRequest() request_tests.set_method('GET') request_tests.set_path(PATH_NIKTO_TESTS) response_tests = httpClient.perform_request(request_tests) request_vars = core_http.HTTPRequest() request_vars.set_method('GET') request_vars.set_path(PATH_NIKTO_VARIABLES) response_vars = httpClient.perform_request(request_vars) if response_tests != None and response_vars != None: if response_tests.get_code() == u'200' and response_vars.get_code( ) == u'200': db_vars = response_vars.get_data() db_tests = response_tests.get_data() try: core_file.db_nikto_update(db_tests, db_vars) return True except IOError: # typically: permission denied.... return False else: return False else: return False else: return False
def perform(hosts, ports, roots, server, skip_string, cli, results, results_lock, switch, switch_lock): # Input: hosts - [unicode] - list of target hosts or IP addresses # ports - [integer] - list of target ports # roots - [unicode] - list of root directories # server - unicode - force this as the web server # skip_string - unicode - user-defined false positive indicator # cli - boolean - set verbosity on STDOUT to ON/OFF # results - [dict] - (shared) output results list # results_lock - threading.Lock - (shared) lock on the output list # switch - [boolean] - (shared) switch controlling scan life-cycle (ON/OFF) # switch_lock - threading.Lock - (shared) lock on switch # # Return: (void) # # This function performs a URL scan of on target hosts/ports based on test files. The results # are appended to the (shared) results list provided as parameter. The scan ends when all # the applicable entries in the test list(s) lists have been processed or as soon as the (shared) # switch is 'turned off' (i.e. switch = [False]) by the calling instance (e.g. GUI/CLI). try: # loading the list of known webserver banners from database file known_banners = core_file.db_load_known_banners() # Loading and checking the config parameters cfParser = core_file.cfg_start_get() use_db_nikto = core_file.cfg_get_use_db_nikto(cfParser) use_db_custom = core_file.cfg_get_use_db_custom(cfParser) threads = core_file.cfg_get_scan_threads(cfParser) scan_show_codes_str = core_file.cfg_get_scan_show_codes(core_file.cfg_start_get()) core_file.cfg_end_get(cfParser) if not core_utilities.check_threads(threads) or not core_utilities.check_http_codes(scan_show_codes_str): issue_result(results, results_lock, cli, {u'ERROR': u'Invalid configuration settings.'}) else: # Nikto tests can be loaded here as they are not dependent on the target # they are thus only loaded once (minimizing file access) if use_db_nikto: nikto_tests = core_file.db_load_nikto_tests() scan_show_codes = [c.strip() for c in scan_show_codes_str.split(u',')] hosts_done = [] for host in hosts: with switch_lock: on = switch[0] if not on: break else: if host in hosts_done: continue else: hosts_done.append(host) ports_done = [] ipaddress = core_utilities.get_ip_address(host) for port in ports: with switch_lock: on = switch[0] if not on: break else: if port in ports_done: continue else: ports_done.append(port) issue_result(results, results_lock, cli, {u'HOST': host, u'PORT': unicode(port), u'IPADDRESS': ipaddress, u'TARGET': host + u' / ' + unicode(port)}) # test if target is up and valid... http_test = core_http.test_http(host, port) if not http_test[0]: error_message = http_test[1] issue_result(results, results_lock, cli, {u'HOST': host, u'PORT': unicode(port), u'IPADDRESS': ipaddress, u'ERROR': error_message}) continue # if the host/port is not valid, skip to next port or next host else: # host/port respond to http # indentify server httpClient = core_http.HTTPClient(host, port) request_404 = core_http.HTTPRequest() request_404.set_method(u'GET') request_404.set_path(u'/' + u''.join([choice(letters + digits) for i in range(8)])) response_404 = httpClient.perform_request(request_404) if response_404 == None: issue_result(results, results_lock, cli, {u'HOST': host, u'PORT': unicode(port), u'IPADDRESS': ipaddress, u'ERROR': u'HTTP Request to server failed. Scan aborted.'}) continue # if these request does not work, the following tests will cause trouble, # thus consider that host/port is not valid... else: server_known, server_banner, server_id = __identify_server(response_404, known_banners) # if the user wants to force the server if server != u'' and server in known_banners.values(): server_id = server issue_result(results, results_lock, cli, {u'HOST': host, u'PORT': unicode(port), u'IPADDRESS': ipaddress, u'SERVER': server_id + ' *FORCED', u'BANNER': server_banner}) else: issue_result(results, results_lock, cli, {u'HOST': host, u'PORT': unicode(port), u'IPADDRESS': ipaddress, u'SERVER': server_id, u'BANNER': server_banner}) # loading the tests from database tests_original = [] if use_db_nikto: tests_original.extend(nikto_tests) if use_db_custom: tests_original.extend(core_file.db_load_custom_tests([], host, True)) roots_done = [] for root in roots: with switch_lock: on = switch[0] if not on: break else: if root in roots_done: continue else: roots_done.append(root) # fingerprint interesting messages # root request_root = core_http.HTTPRequest() request_root.set_method(u'GET') request_root.set_path(root) response_root = httpClient.perform_request(request_root) # index.php request_indexphp = core_http.HTTPRequest() request_indexphp.set_method(u'GET') request_indexphp.set_path(root + u'index.php') response_indexphp = httpClient.perform_request(request_indexphp) # 404 message - or should be request_404 = core_http.HTTPRequest() request_404.set_method(u'GET') request_404.set_path(root + u''.join([choice(letters + digits) for i in range(8)])) response_404 = httpClient.perform_request(request_404) fingerprints = {} fingerprints[u'root'] = core_http.fingerprint_response(request_root, response_root) if fingerprints[u'root'] == None: issue_result(results, results_lock, cli, {u'HOST': host, u'PORT': unicode(port), u'IPADDRESS': ipaddress, u'ERROR': u'FP(' + request_root.get_path() + ') => failure!'}) else: issue_result(results, results_lock, cli, {u'HOST': host, u'PORT': unicode(port), u'IPADDRESS': ipaddress, u'INFO': u'FP(' + request_root.get_path() + ') => ' + str(fingerprints[u'root'][0]) + u'#' + str(fingerprints[u'root'][1]) + u'#' + str(fingerprints[u'root'][2]) + u'#' + str(fingerprints[u'root'][3])}) fingerprints[u'error404'] = core_http.fingerprint_response(request_404, response_404) if fingerprints[u'error404'] == None: issue_result(results, results_lock, cli, {u'HOST': host, u'PORT': unicode(port), u'IPADDRESS': ipaddress, u'ERROR': u'FP(' + request_404.get_path() + ') => failure!'}) else: issue_result(results, results_lock, cli, {u'HOST': host, u'PORT': unicode(port), u'IPADDRESS': ipaddress, u'INFO': u'FP(' + request_404.get_path() + ') => ' + str(fingerprints[u'error404'][0]) + u'#' + str(fingerprints[u'error404'][1]) + u'#' + str(fingerprints[u'error404'][2]) + u'#' + str(fingerprints[u'error404'][3])}) fingerprints[u'index.php'] = core_http.fingerprint_response(request_indexphp, response_indexphp) if fingerprints[u'index.php'] == None: issue_result(results, results_lock, cli, {u'HOST': host, u'PORT': unicode(port), u'IPADDRESS': ipaddress, u'ERROR': u'FP(' + request_indexphp.get_path() + ') => failure!'}) else: issue_result(results, results_lock, cli, {u'HOST': host, u'PORT': unicode(port), u'IPADDRESS': ipaddress, u'INFO': u'FP(' + request_indexphp.get_path() + ') => ' + str(fingerprints[u'index.php'][0]) + u'#' + str(fingerprints[u'index.php'][1]) + u'#' + str(fingerprints[u'index.php'][2]) + u'#' + str(fingerprints[u'index.php'][3])}) # robots.txt request_robots = core_http.HTTPRequest() request_robots.set_method(u'GET') request_robots.set_path(root + u'robots.txt') response_robots = httpClient.perform_request(request_robots) if response_robots.get_code() == u'200': robots_fingerprint = core_http.fingerprint_response(request_robots, response_robots) if not core_http.test_response_fingerprint(robots_fingerprint, fingerprints[u'root']) and not core_http.test_response_fingerprint(robots_fingerprint, fingerprints[u'error404']) and not core_http.test_response_fingerprint(robots_fingerprint, fingerprints[u'index.php']): issue_result(results, results_lock, cli, {u'HOST': host, u'PORT': unicode(port), u'IPADDRESS': ipaddress, u'INFO': root + u'robots.txt found. It might be interesting to have a look inside.'}) tests = tests_original[:] used_triggers = [] triggers = [server_id] while len(triggers) > 0: with switch_lock: on = switch[0] if not on: break else: if use_db_custom: tests.extend(core_file.db_load_custom_tests(triggers, host, False)) used_triggers.extend(triggers) triggers = [] running = [] for i in range(0, threads): scan_thread = ScanThread(host, port, ipaddress, root, fingerprints, tests, triggers, skip_string, scan_show_codes, cli, results, results_lock, switch, switch_lock) scan_thread.start() running.append(scan_thread) for thread in running: thread.join() for trigger in triggers: if trigger in used_triggers: triggers.remove(trigger) except core_error.Config_Error, e: issue_result(results, results_lock, cli, {u'ERROR': e.error_message})
def perform(hosts, ports, roots, mode, modecpl, cli, results, results_lock, switch, switch_lock): # Input: hosts - [unicode] - list of target hosts or IP addresses # ports - [integer] - list of target ports # roots - [unicode] - list of root directories # mode - integer - 0 = listmode, 1 = filemode # modecpl - unicode - recursive (list mode) or filename generator (gen mode) # cli - boolean - set verbosity on STDOUT to ON/OFF # # results - [dict] - (shared) output results list # results_lock - threading.Lock - (shared) lock on the output list # switch - [boolean] - (shared) switch controlling scan life-cycle (ON/OFF) # switch_lock - threading.Lock - (shared) lock on switch # # Return: (void) # # This function performs a fuzzing scan on target hosts/ports. The encountered results # are appended to the (shared) results list provided as parameter. The scan ends when all # the entries in the fuzzing list(s) have been processed or as soon as the (shared) # switch is 'turned off' (i.e. switch = [False]) by the calling instance (e.g. GUI/CLI). # Note that fuzzing lists can be read from file (list mode) or generated on the fly # (generator mode) using a user defined expression. try: # load parameters from config file and test them # if some parameter is not valid, issue an 'error' result threads = core_file.cfg_get_fuzz_threads(core_file.cfg_start_get()) fuzz_show_codes_str = core_file.cfg_get_fuzz_show_codes( core_file.cfg_start_get()) fuzz_method = core_file.cfg_get_fuzz_method(core_file.cfg_start_get()) if not core_utilities.check_threads( threads) or not core_utilities.check_http_codes( fuzz_show_codes_str ) or not core_utilities.check_http_method(fuzz_method, True): result = {u'ERROR': u'Invalid configuration settings.'} issue_result(results, results_lock, cli, result) else: fuzz_show_codes = [ c.strip() for c in fuzz_show_codes_str.split(u',') ] hosts_done = [] for host in hosts: # check if switch is still 'ON' with switch_lock: on = switch[0] if not on: break else: if host in hosts_done: continue else: hosts_done.append(host) ports_done = [] ipaddress = core_utilities.get_ip_address(host) for port in ports: # check if switch is still 'ON' with switch_lock: on = switch[0] if not on: break else: if port in ports_done: continue else: ports_done.append(port) # this 'informative' result is only for display of the new target on GUI/CLI result = { u'HOST': host, u'PORT': unicode(port), u'IPADDRESS': ipaddress, u'TARGET': host + ' / ' + unicode(port) } issue_result(results, results_lock, cli, result) # test if target is up and valid... http_test = core_http.test_http( host, port, fuzz_method) if not http_test[0]: error_message = http_test[1] # if target is down output an 'error' result result = { u'HOST': host, u'PORT': unicode(port), u'IPADDRESS': ipaddress, u'ERROR': error_message } issue_result(results, results_lock, cli, result) # if the host/port is not valid, go to next port or next host continue else: # if the target responds to HTTP, go on... httpClient = core_http.HTTPClient(host, port) roots_done = [] for root in roots: # check if switch is still 'ON' with switch_lock: on = switch[0] if not on: break else: if root in roots_done: continue else: roots_done.append(root) # fingerprint interesting messages # root request_root = core_http.HTTPRequest() request_root.set_method(u'GET') request_root.set_path(root) response_root = httpClient.perform_request( request_root) # 404 message - or should be request_404 = core_http.HTTPRequest() request_404.set_method(u'GET') request_404.set_path(root + u''.join([ choice(letters + digits) for i in range(8) ])) response_404 = httpClient.perform_request( request_404) fingerprints = {} fingerprints[ u'root'] = core_http.fingerprint_response( request_root, response_root) if fingerprints[u'root'] == None: issue_result( results, results_lock, cli, { u'HOST': host, u'PORT': unicode(port), u'IPADDRESS': ipaddress, u'ERROR': u'FP(' + request_root.get_path() + ') => failure!' }) else: issue_result( results, results_lock, cli, { u'HOST': host, u'PORT': unicode(port), u'IPADDRESS': ipaddress, u'INFO': u'FP(' + request_root.get_path() + ') => ' + str(fingerprints[u'root'] [0]) + u'#' + str(fingerprints[u'root'] [1]) + u'#' + str(fingerprints[u'root'] [2]) + u'#' + str(fingerprints[u'root'] [3]) }) fingerprints[ u'error404'] = core_http.fingerprint_response( request_404, response_404) if fingerprints[u'error404'] == None: issue_result( results, results_lock, cli, { u'HOST': host, u'PORT': unicode(port), u'IPADDRESS': ipaddress, u'ERROR': u'FP(' + request_404.get_path() + ') => failure!' }) else: issue_result( results, results_lock, cli, { u'HOST': host, u'PORT': unicode(port), u'IPADDRESS': ipaddress, u'INFO': u'FP(' + request_404.get_path() + ') => ' + str(fingerprints[ u'error404'][0]) + u'#' + str(fingerprints[ u'error404'][1]) + u'#' + str(fingerprints[ u'error404'][2]) + u'#' + str(fingerprints[ u'error404'][3]) }) if mode == 0: with switch_lock: on = switch[0] if not on: break else: if modecpl[0] == u'1': directories = __find_directories( host, port, ipaddress, root, fingerprints, fuzz_method, threads, fuzz_show_codes, cli, results, results_lock, switch, switch_lock) else: directories = [] with switch_lock: on = switch[0] if not on: break else: if modecpl[1] == u'1': __find_files( host, port, ipaddress, root, fingerprints, fuzz_method, threads, fuzz_show_codes, cli, results, results_lock, switch, switch_lock, directories) else: with switch_lock: on = switch[0] if not on: break else: __find_generator( host, port, ipaddress, root, fingerprints, fuzz_method, modecpl.strip(), threads, fuzz_show_codes, cli, results, results_lock, switch, switch_lock) except core_error.Config_Error, e: issue_result(results, results_lock, cli, {u'ERROR': e.error_message})
def run(self): # Loop until there are no more entries # left in the list (shared among threads) while True: with self.__switch_lock: on = self.__switch[0] # if switch is OFF, give up... if not on: break # if switch is still ON, go on with next entry... else: try: # pop the next entry from the (shared list) # note that pop() is an atomic operation # and is thus assumed to be thread-safe entry = self.__entries.pop(0) # directory mode if self.__extensions == None: req = core_http.HTTPRequest() req.set_method(self.__fuzz_method) req.set_path(self.__root + entry + u'/', True) # peform the request through the HTTP client resp = self.__httpClient.perform_request(req) if resp != None: self.__counter += 1 code = resp.get_code() if code in self.__fuzz_show_codes: fingerprint = core_http.fingerprint_response( req, resp) if core_http.test_response_fingerprint( fingerprint, self.__fingerprints[u'error404']): if self.__counter % MISS_DISPLAY_RATE == 0: issue_result( self.__results, self.__results_lock, self.__cli, {u'MISS': req.get_path(True)}) continue if core_http.test_response_fingerprint( fingerprint, self.__fingerprints[u'root']): if self.__counter % MISS_DISPLAY_RATE == 0: issue_result( self.__results, self.__results_lock, self.__cli, {u'MISS': req.get_path(True)}) continue result = { u'HOST': self.__host, u'PORT': unicode(str(self.__port)), u'IPADDRESS': self.__ipaddress, u'PATH': req.get_path(True), u'CODE': resp.get_code() } # if the response indicates a hit, append # this result to the results list. issue_result(self.__results, self.__results_lock, self.__cli, result) # whe n a directory is found, it is appended to the # (internal) list of directories (used as roots to # fuzz files) self.__internal_results.append(result[u'PATH']) else: if self.__counter % MISS_DISPLAY_RATE == 0: issue_result(self.__results, self.__results_lock, self.__cli, {u'MISS': req.get_path(True)}) continue else: issue_result( self.__results, self.__results_lock, self.__cli, { u'ERROR': u'Request failed. Server may be overloaded.' }) continue # file mode else: for extension in self.__extensions: with self.__switch_lock: on = self.__switch[0] # if switch is OFF, give up... if not on: break # if switch is still ON, go on with next entry... else: # create a new HTTP request from the entry req = core_http.HTTPRequest() req.set_method(self.__fuzz_method) req.set_path(self.__root + entry + extension, True) # peform the request through the HTTP client resp = self.__httpClient.perform_request(req) if resp != None: self.__counter += 1 code = resp.get_code() if code in self.__fuzz_show_codes: fingerprint = core_http.fingerprint_response( req, resp) if core_http.test_response_fingerprint( fingerprint, self. __fingerprints[u'error404']): if self.__counter % MISS_DISPLAY_RATE == 0: issue_result( self.__results, self.__results_lock, self.__cli, { u'MISS': req.get_path(True) }) continue if core_http.test_response_fingerprint( fingerprint, self.__fingerprints[u'root']): if self.__counter % MISS_DISPLAY_RATE == 0: issue_result( self.__results, self.__results_lock, self.__cli, { u'MISS': req.get_path(True) }) continue result = { u'HOST': self.__host, u'PORT': unicode(str(self.__port)), u'IPADDRESS': self.__ipaddress, u'PATH': req.get_path(True), u'CODE': resp.get_code() } # if the response indicates a hit, append # this result to the results list. issue_result(self.__results, self.__results_lock, self.__cli, result) else: if self.__counter % MISS_DISPLAY_RATE == 0: issue_result( self.__results, self.__results_lock, self.__cli, {u'MISS': req.get_path(True)}) continue else: issue_result( self.__results, self.__results_lock, self.__cli, { u'ERROR': u'Request failed. Server may be overloaded.' }) continue except IndexError: break
def perform(target, cli, results, results_lock, switch, switch_lock): # Input: target - unicode - target machine (IPv4 or hostname) # cli - boolean - set verbosity on STDOUT to ON/OFF # results - [dict] - (shared) output results list # results_lock - threading.Lock - (shared) lock on the output list # switch - [boolean] - (shared) switch controlling scan life-cycle (ON/OFF) # switch_lock - threading.Lock - (shared) lock on switch # # Return: (void) # # This function is in charge of retrieving all the referenced domains # that are hosted at given IP address (or at the IP address corresponding # to the given hostname). For this, the LIVE search webservice is used, # a valid Live AppID is needed (stored in the configuration file). try: # load the Live ID from the configuration file live_id = core_file.cfg_get_live_id(core_file.cfg_start_get()) if not core_utilities.check_live_id(live_id): __issue_result(results, results_lock, cli, { u'ERROR': u'Invalid LIVE ID. Please verify configuration file.' }) else: domains = [] limit_iteration = 10 offset = 0 count = 50 iteration = 1 found = 0 path = urlparse(WS_ENDPOINT).path port = urlparse(WS_ENDPOINT).port host = urlparse(WS_ENDPOINT).netloc.replace( u':' + unicode(port), u'') ip_address = gethostbyname(target) httpClient = core_http.HTTPClient(host, port, True) # test if web service is up and valid... http_test = core_http.test_http(host, port, override_disable_ssl=True) if not http_test[0]: error_message = http_test[1] __issue_result( results, results_lock, cli, {u'ERROR': u'Web service end-point: ' + error_message}) else: __issue_result(results, results_lock, cli, { u'IPADDRESS': ip_address, u'TARGET': ip_address }) while iteration < limit_iteration: # crafting the HTTP request # httpRequest = core_http.HTTPRequest() httpRequest.set_method(u'POST') httpRequest.set_path(path) httpRequest.set_data( __soap_live_request(live_id, ip_address, offset, count)) # sending the HTTP request # httpResponse = httpClient.perform_request(httpRequest) if httpResponse != None: if httpResponse.get_code() == u'200': parsedData = parseString( httpResponse.get_data().encode(WS_ENCODING)) # checking if the request produced a SOAP error # faults = parsedData.getElementsByTagName( u'soapenv:Fault') if len(faults) > 0: errorMessage = faults[0].getElementsByTagName( u'detail')[0].firstChild.data.strip() __issue_result( results, results_lock, cli, {u'ERROR': u'SOAP error: ' + errorMessage}) break # processing the results # res = parsedData.getElementsByTagName(u'Result') if len(res) > 0: for item in res: for url_item in item.getElementsByTagName( u'Url'): url = url_item.firstChild.data.strip() domain = urlparse(url).netloc if domain not in domains: domains.append(domain) __issue_result( results, results_lock, cli, { u'IPADDRESS': ip_address, u'VHOST': domain }) found += 1 offset += 1 iteration += 1 parsedData.unlink() else: # If there are no more results left, exit the loop, # even if limit_iteration is not reached. break else: break else: break except core_error.Config_Error, e: __issue_result(results, results_lock, cli, {u'ERROR': e.error_message})
def perform(host, port, root, cli, results, results_lock, switch, switch_lock): # Input: host - unicode - target host # port - unicode - target port # root - unicode - target root # cli - boolean - set verbosity on STDOUT to ON/OFF # results - [dict] - (shared) output results list # results_lock - threading.Lock - (shared) lock on the output list # switch - [boolean] - (shared) switch controlling scan life-cycle (ON/OFF) # switch_lock - threading.Lock - (shared) lock on switch # # Return: (void) # # This function crawls a given website extracting all the encountered e-mail addresses # external links and internal directories. The spider follows all the internal links and # redirections and parses all the pages having a 'text/html' content-type. External links # are extracted and returned as result but not followed. try: # Loading and testing config parameters threads = core_file.cfg_get_spider_threads(core_file.cfg_start_get()) use_robots = core_file.cfg_get_use_robots(core_file.cfg_start_get()) if not core_utilities.check_threads(threads): issue_result( results, results_lock, cli, { u'ERROR': u'Invalid configuration parameters. Please verify configuration file.' }) else: ipaddress = core_utilities.get_ip_address(host) issue_result( results, results_lock, cli, { u'HOST': host, u'PORT': unicode(port), u'IPADDRESS': ipaddress, u'TARGET': host + ' / ' + unicode(port) }) # test if target is up and valid... http_test = core_http.test_http(host, port) if not http_test[0]: error_message = http_test[1] issue_result( results, results_lock, cli, { u'HOST': host, u'PORT': unicode(port), u'IPADDRESS': ipaddress, u'ERROR': error_message }) else: running = [] known = {} known[ root] = False # put the root in the spider links to initiate known_lock = threading.Lock() # cheat.... # issue_result( results, results_lock, False, { u'HOST': host, u'PORT': unicode(port), u'IPADDRESS': ipaddress, u'INTERNAL': (dirname(root) + u'/').replace(u'//', u'/') }) # grabbing robots.txt if use_robots: httpClient = core_http.HTTPClient(host, port) request = core_http.HTTPRequest() request.set_method(u'GET') request.set_path(root + u'robots.txt') response = httpClient.perform_request(request) if response != None: if response.get_code() == u'200': robots_text = response.get_data() robots_directories = ROBOTS_DIRS_REGEXP.findall( robots_text) for rdir in robots_directories: # note that the last '/' is removed before putting it as a key # in 'known' to comply with os.path.dirname used below that returns # dirnames without last slash if not known.has_key(rdir[:-1]): known[rdir[:-1]] = False else: continue issue_result( results, results_lock, cli, { u'HOST': host, u'PORT': unicode(port), u'IPADDRESS': ipaddress, u'INTERNAL': rdir }) else: pass else: pass else: # ignore robots.txt pass # start the first thread to grab the first page # and populate the 'known' dictionary with a few # results running.append( SpiderThread(host, port, ipaddress, known, known_lock, cli, results, results_lock, switch, switch_lock)) running[0].start() sleep(THREAD_DELAY) # after a delay, spawn the other threads for i in range(1, threads): spiderThread = SpiderThread(host, port, ipaddress, known, known_lock, cli, results, results_lock, switch, switch_lock) spiderThread.start() running.append(spiderThread) for thread in running: thread.join() except core_error.Config_Error, e: issue_result(results, results_lock, cli, {u'ERROR': e.error_message})
def run(self): while self.__links < MAX_LINKS: with self.__switch_lock: on = self.__switch[0] if not on: break # fetch an unvisited link # more = False with self.__known_lock: for (link, visited) in self.__known.items(): if visited == False: more = True next = link self.__known[link] = True self.__links += 1 break if not more: break else: # test if the extension indicates that the # document should be ignored if next.split('.')[-1] in IGNORE: continue # if the extension is not in the list # of files to ignore, then send a HEAD # request to get the 'content-type' header. httpRequest = core_http.HTTPRequest() httpRequest.set_method(u'HEAD') httpRequest.set_path(next) httpHead = self.__httpClient.perform_request(httpRequest) if httpHead != None: status = httpHead.get_code() content = httpHead.get_header(u'content-type') # if content type does not match accepted types (text/html) # or if it is not provided do not request the body if content != None and ACCEPT_CONTENT_REGEXP.search( content): # if the response is a 200 OK, request the body # using GET method and parse it if status == u'200': httpRequest.set_method(u'GET') httpResponse = self.__httpClient.perform_request( httpRequest) if httpResponse != None: self.__extract_links(next, httpResponse) self.__extract_emails(httpResponse) continue else: issue_result( self.__results, self.__results_lock, self.__cli, { u'ERROR': u'GET ' + httpRequest.get_path() + u' failed. Server may be overloaded.' }) continue # empyrical... elif status in NOTIFY_STATUS: issue_result( self.__results, self.__results_lock, self.__cli, { u'HOST': self.__host, u'PORT': unicode(self.__port), u'IPADDRESS': self.__ipaddress, u'ERROR': u'HEAD ' + next + ' => ' + status }) continue # if the response is a redirection, check if it # is an internal redirection and follow it (or not) # if it is not already known. If the redirection # points to another website, don't follow it. elif status in REDIRECT: redirection = httpHead.get_header(u'location') if redirection != None: redir_proto = urlparse.urlparse( redirection).scheme redir_base = urlparse.urlparse( redirection).netloc if redir_base == u'': # relative internal link redir_parse = urlparse.urlparse( urlparse.urljoin( u'http://' + self.__base + urlparse.urlparse(next).path, redirection)) abs_redir = redir_parse.path #~ if redir_parse.query != u'': #~ abs_redir += u'?' + redir_parse.query with self.__known_lock: if not self.__known.has_key(abs_redir): if redir_proto == u'' or redir_proto == u'http': self.__known[ abs_redir] = False # to visit else: self.__known[ abs_redir] = True # not to visit if not self.__known.has_key( dirname(abs_redir)): self.__known[dirname( abs_redir )] = True # not to visit issue_result( self.__results, self.__results_lock, self.__cli, { u'HOST': self.__host, u'PORT': unicode(self.__port), u'IPADDRESS': self.__ipaddress, u'INTERNAL': dirname(abs_redir) + u'/' }) continue else: continue else: continue elif redir_base == self.__base: # absolute internal link temp_redir = urlparse.urlparse(redirection) temp_path = temp_redir.path #~ if temp_redir.query != u'': #~ temp_path += u'?' + temp_redir.query redir_parse = urlparse.urlparse( urlparse.urljoin( u'http://' + self.__base + urlparse.urlparse(next).path, temp_path)) abs_redir = redir_parse.path #~ if redir_parse.query != u'': #~ abs_redir += u'?' + redir_parse.query with self.__known_lock: if not self.__known.has_key(abs_redir): if redir_proto == u'' or redir_proto == u'http': self.__known[ abs_redir] = False # to visit else: self.__known[ abs_redir] = True # not to visit if not self.__known.has_key( dirname(abs_redir)): self.__known[dirname( abs_redir )] = True # not to visit issue_result( self.__results, self.__results_lock, self.__cli, { u'HOST': self.__host, u'PORT': unicode(self.__port), u'IPADDRESS': self.__ipaddress, u'INTERNAL': dirname(abs_redir) + u'/' }) continue else: continue else: continue else: # external abs_redir = redir_base with self.__known_lock: if not self.__known.has_key(abs_redir): self.__known[ abs_redir] = True # not to visit issue_result( self.__results, self.__results_lock, self.__cli, { u'HOST': self.__host, u'PORT': unicode( self.__port), u'IPADDRESS': self.__ipaddress, u'EXTERNAL': abs_redir }) continue else: continue else: # no 'location' header continue else: # other status code continue else: # content type != 'text/html continue else: # request failed issue_result( self.__results, self.__results_lock, self.__cli, { u'ERROR': u'HEAD ' + httpRequest.get_path() + u' failed. Server may be overloaded.' }) continue