def execute(): """ Fetch /.svn/entries and parse for target paths """ textutils.output_info(" - Svn Plugin: Searching for /.svn/entries") target_url = conf.target_base_path + "/.svn/entries" fetcher = Fetcher() response_code, content, headers = fetcher.fetch_url( target_url, conf.user_agent, conf.fetch_timeout_secs, limit_len=False ) if response_code is 200 or response_code is 302: if conf.allow_download: textutils.output_info(" - Svn Plugin: /.svn/entries found! crawling... (will download files to output/)") else: textutils.output_info( " - Svn Plugin: /.svn/entries found! crawling... (use -a to download files instead of printing)" ) # parse entries parse_svn_entries(conf.target_base_path) # Clean up display if conf.allow_download: textutils.output_info("") else: textutils.output_info(" - Svn Plugin: no /.svn/entries found")
def __init__(self, thread_id, output=True): Thread.__init__(self) self.kill_received = False self.thread_id = thread_id self.fetcher = Fetcher() self.output = output reset_behavior_database()
def execute(): """ Fetch /.svn/entries and parse for target paths """ current_template = dict(conf.path_template) current_template['description'] = '/.svn/entries found directory' target_url = urljoin(conf.target_base_path, "/.svn/entries") fetcher = Fetcher() response_code, content, headers = fetcher.fetch_url(target_url, conf.user_agent, conf.fetch_timeout_secs, limit_len=False) if response_code is 200 or response_code is 302 and content: added = 0 try: tree = ElementTree.fromstring(content) entry_tags = tree.iter() if entry_tags: for entry in entry_tags: kind = entry.attrib.get("kind") if kind and kind == "dir": current_template = current_template.copy() current_template['url'] = '/' + entry.attrib["name"] database.paths.append(current_template) added += 1 except Exception: textutils.output_info(' - Svn Plugin: no usable entries in /.svn/entries') else: if added > 0: textutils.output_info(' - Svn Plugin: added ' + str(added) + ' base paths using /.svn/entries') else : textutils.output_info(' - Svn Plugin: no usable entries in /.svn/entries') else: textutils.output_info(' - Svn Plugin: no /.svn/entries found')
class TestFileExistsWorker(Thread): """ This worker get an url from the work queue and call the url fetcher """ def __init__(self, thread_id, output=True): Thread.__init__(self) self.kill_received = False self.thread_id = thread_id self.fetcher = Fetcher() self.output = output def run(self): while not self.kill_received: try: # Non-Blocking get since we use the queue as a ringbuffer queued = database.fetch_queue.get(False) url = conf.target_base_path + queued.get("url") description = queued.get("description") match_string = queued.get("match_string") textutils.output_debug("Testing: " + url + " " + str(queued)) stats.update_stats(url) # Fetch the target url start_time = datetime.now() if match_string: response_code, content, headers = self.fetcher.fetch_url( url, conf.user_agent, database.latest_successful_request_time, limit_len=False ) else: response_code, content, headers = self.fetcher.fetch_url( url, conf.user_agent, database.latest_successful_request_time ) end_time = datetime.now() # handle timeout if response_code in conf.timeout_codes: handle_timeout(queued, url, self.thread_id, output=self.output) elif response_code == 500: textutils.output_found("ISE, " + description + " at: " + conf.target_host + url) elif response_code in conf.expected_file_responses: # If the CRC missmatch, and we have an expected code, we found a valid link if match_string and re.search(re.escape(match_string), content, re.I): textutils.output_found("String-Matched " + description + " at: " + conf.target_host + url) elif test_valid_result(content): textutils.output_found(description + " at: " + conf.target_host + url) elif response_code in conf.redirect_codes: location = headers.get("location") if location: handle_redirects(queued, location) # Stats if response_code not in conf.timeout_codes: stats.update_processed_items() compute_request_time(start_time, end_time) # Mark item as processed database.fetch_queue.task_done() except Empty: continue
def execute(): """ Fetch /.svn/entries and parse for target paths """ textutils.output_info(' - Svn Plugin: Searching for /.svn/entries') target_url = conf.target_base_path + "/.svn/entries" fetcher = Fetcher() response_code, content, headers = fetcher.fetch_url( target_url, conf.user_agent, conf.fetch_timeout_secs, limit_len=False) if response_code is 200 or response_code is 302: if conf.allow_download: textutils.output_info( ' - Svn Plugin: /.svn/entries found! crawling... (will download files to output/)' ) else: textutils.output_info( ' - Svn Plugin: /.svn/entries found! crawling... (use -a to download files instead of printing)' ) # parse entries parse_svn_entries(conf.target_base_path) # Clean up display if conf.allow_download: textutils.output_info('') else: textutils.output_info(' - Svn Plugin: no /.svn/entries found')
def execute(): """ Fetch sitemap.xml and add each entry as a target """ current_template = dict(conf.path_template) current_template['description'] = 'sitemap.xml entry' target_url = urljoin(conf.target_base_path, "/sitemap.xml") fetcher = Fetcher() response_code, content, headers = fetcher.fetch_url( target_url, conf.user_agent, conf.fetch_timeout_secs, limit_len=False, add_headers={}) if not isinstance(content, str): content = content.decode('utf-8', 'ignore') if response_code is 200 or response_code is 302 and content: regexp = re.compile('(?im).*<url>\s*<loc>(.*)</loc>\s*</url>.*') matches = re.findall(regexp, content) textutils.output_debug("SitemapXML plugin") added = 0 for match in matches: if not isinstance(match, str): match = match.decode('utf-8', 'ignore') parsed = urlparse(match) if parsed.path: new_path = parsed.path else: continue # Remove trailing / if new_path.endswith('/'): new_path = new_path[:-1] if add_path(new_path): added += 1 textutils.output_debug(" - Added: %s from /sitemap.xml" % new_path) if added > 0: textutils.output_info(' - SitemapXML Plugin: added %d base paths ' 'using /sitemap.xml' % added) else: textutils.output_info(' - SitemapXML Plugin: no usable entries ' 'in /sitemap.xml') else: textutils.output_info( ' - SitemapXML Plugin: /sitemap.xml not found on ' 'target site')
def execute(): """ Fetch /robots.txt and add the disallowed paths as target """ current_template = dict(conf.path_template) current_template['description'] = 'Robots.txt entry' target_url = urljoin(conf.target_base_path, "/robots.txt") fetcher = Fetcher() response_code, content, headers = fetcher.fetch_url( target_url, conf.user_agent, conf.fetch_timeout_secs, limit_len=False) if isinstance(content, str): content = content.encode('utf-8') if response_code is 200 or response_code is 302 and content: if not isinstance(content, str): content = content.decode('utf-8', 'ignore') matches = re.findall(r'Disallow:\s*/[a-zA-Z0-9-/\r]+\n', content) textutils.output_debug(content) added = 0 for match in matches: # Filter out some characters match = filter(lambda c: c not in ' *?.\n\r\t', match) if match: match = ''.join(match) # Split on ':' splitted = match.split(':') if splitted[1]: target_path = splitted[1] textutils.output_debug(target_path) # Remove trailing / if target_path.endswith('/'): target_path = target_path[:-1] current_template = current_template.copy() current_template['url'] = target_path database.paths.append(current_template) textutils.output_debug(' - Robots Plugin Added: ' + str(target_path) + ' from robots.txt') added += 1 if added > 0: textutils.output_info(' - Robots Plugin: added ' + str(added) + ' base paths using /robots.txt') else: textutils.output_info( ' - Robots Plugin: no usable entries in /robots.txt') else: textutils.output_info( ' - Robots Plugin: /robots.txt not found on target site')
def execute(): """ Fetch sitemap.xml and add each entry as a target """ current_template = dict(conf.path_template) current_template['description'] = 'sitemap.xml entry' target_url = urljoin(conf.target_base_path, "/sitemap.xml") fetcher = Fetcher() response_code, content, headers = fetcher.fetch_url(target_url, conf.user_agent, conf.fetch_timeout_secs, limit_len=False, add_headers={} ) if not isinstance(content, str): content = content.decode('utf-8', 'ignore') if response_code is 200 or response_code is 302 and content: regexp = re.compile('(?im).*<url>\s*<loc>(.*)</loc>\s*</url>.*') matches = re.findall(regexp, content) textutils.output_debug("SitemapXML plugin") added = 0 for match in matches: new_path = match.decode().split(conf.target_host)[1] # Remove trailing / if new_path.endswith('/'): new_path = new_path[:-1] add_path(new_path) add_file(new_path) textutils.output_debug(" - Added: %s from /sitemap.xml" % new_path) added += 1 if added > 0: textutils.output_info(' - SitemapXML Plugin: added %d base paths ' 'using /sitemap.xml' % added) else : textutils.output_info(' - SitemapXML Plugin: no usable entries ' 'in /sitemap.xml') else: textutils.output_info(' - SitemapXML Plugin: /sitemap.xml not found on ' 'target site')
def get_session_cookies(): """ Fetch initial session cookies """ textutils.output_info('Fetching session cookie') path = conf.path_template.copy() path['url'] = '/' # Were not using the fetch cache for session cookie sampling fetcher = Fetcher() code, content, headers = fetcher.fetch_url('/', conf.user_agent, 10) if code is 200: cookies = headers.get('Set-Cookie') if cookies: database.session_cookie = cookies
def execute(): """ Fetch /robots.txt and add the disallowed paths as target """ current_template = dict(conf.path_template) current_template['description'] = 'Robots.txt entry' target_url = urljoin(conf.target_base_path, "/robots.txt") fetcher = Fetcher() response_code, content, headers = fetcher.fetch_url(target_url, conf.user_agent, conf.fetch_timeout_secs, limit_len=False) if isinstance(content, str): content = content.encode('utf-8') if response_code is 200 or response_code is 302 and content: if not isinstance(content, str): content = content.decode('utf-8', 'ignore') matches = re.findall(r'Disallow:\s*/[a-zA-Z0-9-/\r]+\n', content) textutils.output_debug(content) added = 0 for match in matches: # Filter out some characters match = filter(lambda c: c not in ' *?.\n\r\t', match) if match: match = ''.join(match) # Split on ':' splitted = match.split(':') if splitted[1]: target_path = splitted[1] textutils.output_debug(target_path) # Remove trailing / if target_path.endswith('/'): target_path = target_path[:-1] current_template = current_template.copy() current_template['url'] = target_path database.paths.append(current_template) textutils.output_debug(' - Robots Plugin Added: ' + str(target_path) + ' from robots.txt') added += 1 if added > 0: textutils.output_info(' - Robots Plugin: added ' + str(added) + ' base paths using /robots.txt') else : textutils.output_info(' - Robots Plugin: no usable entries in /robots.txt') else: textutils.output_info(' - Robots Plugin: /robots.txt not found on target site')
class FetchCrafted404Worker(Thread): """ This worker fetch lenght-limited 404 footprint and store them for Ratcliff-Obershelf comparing """ def __init__(self, thread_id, output=True): Thread.__init__(self) self.kill_received = False self.thread_id = thread_id self.fetcher = Fetcher() self.output = output def run(self): while not self.kill_received: try: # Non-Blocking get since we use the queue as a ringbuffer queued = database.fetch_queue.get(False) url = conf.target_base_path + queued.get('url') textutils.output_debug("Fetching crafted 404: " + str(url)) stats.update_stats(url) # Fetch the target url timeout = False response_code, content, headers = self.fetcher.fetch_url(url, conf.user_agent, conf.fetch_timeout_secs) # Handle fetch timeouts by re-adding the url back to the global fetch queue # if timeout count is under max timeout count if response_code is 0 or response_code is 500: handle_timeout(queued, url, self.thread_id, output=self.output) # increase throttle delay throttle.increase_throttle_delay() timeout = True elif response_code in conf.expected_file_responses: # The server responded with whatever code but 404 or invalid stuff (500). We take a sample if len(content) < conf.file_sample_len: crafted_404 = content[0:len(content) - 1] else: crafted_404 = content[0:conf.file_sample_len - 1] database.crafted_404s.append(crafted_404) # Exception case for root 404, since it's used as a model for other directories textutils.output_debug("Computed and saved a sample 404 for: " + str(queued) + ": " + crafted_404) elif response_code in conf.redirect_codes: location = headers.get('location') if location: handle_redirects(queued, location) # Decrease throttle delay if needed if not timeout: throttle.decrease_throttle_delay() # Dequeue item stats.update_processed_items() database.fetch_queue.task_done() except Empty: continue textutils.output_debug("Thread #" + str(self.thread_id) + " killed.")
def execute(): """ Fetch /.svn/entries and parse for target paths """ textutils.output_info(' - Svn Plugin: Searching for /.svn/entries') target_url = conf.target_base_path + "/.svn/entries" fetcher = Fetcher() response_code, content, headers = fetcher.fetch_url(target_url, conf.user_agent, conf.fetch_timeout_secs, limit_len=False) svn_legacy = True if not isinstance(content, str): content = content.decode('utf-8', 'ignore') if response_code in conf.expected_file_responses and content: if conf.allow_download: textutils.output_info(' - Svn Plugin: /.svn/entries found! crawling... (will download files to output/)') else: textutils.output_info(' - Svn Plugin: /.svn/entries found! crawling... (use -a to download files instead of printing)') # test for version 1.7+ target_url = conf.target_base_path + "/.svn/wc.db" fetcher = Fetcher() response_code, content, headers = fetcher.fetch_url(target_url, conf.user_agent, conf.fetch_timeout_secs, limit_len=False) #if response_code in conf.expected_file_responses and content: # textutils.output_info(' - Svn Plugin: SVN 1.7+ detected, parsing wc.db') # svn_legacy = False # save_file(conf.target_base_path + '/wc.db', content) # Process index if svn_legacy: # parse entries parse_svn_entries(conf.target_base_path) #else: # parse_svn_17_db(conf.target_base_path + '/wc.db') # Clean up display if conf.allow_download: textutils.output_info('') else: textutils.output_info(' - Svn Plugin: no /.svn/entries found')
def parse_svn_entries(url): description_file = "SVN entries file at" description_dir = "SVN entries Dir at" target_url = url + "/.svn/entries" fetcher = Fetcher() response_code, content, headers = fetcher.fetch_url( target_url, conf.user_agent, conf.fetch_timeout_secs, limit_len=False ) if response_code is 200 or response_code is 302 and content: tokens = content.split("\n") if "dir" in tokens: for pos, token in enumerate(tokens): if token == "dir": # Fetch more entries recursively if tokens[pos - 1] != "": textutils.output_debug(" - Svn Plugin: Found dir: " + url + "/" + tokens[pos - 1]) if conf.allow_download: textutils.output_info(" - Svn Plugin: Downloading: " + url + "/" + tokens[pos - 1] + "\r") else: textutils.output_found(description_dir + " at: " + url + "/" + tokens[pos - 1]) # Parse next parse_svn_entries(url + "/" + tokens[pos - 1]) elif token == "file": textutils.output_debug(" - Svn Plugin: Found file: " + url + "/" + tokens[pos - 1]) if conf.allow_download: textutils.output_info(" - Svn Plugin: Downloading: " + url + "/" + tokens[pos - 1] + "\r") # Fetch text-base file path = url + "/.svn/text-base" + "/" + tokens[pos - 1] + ".svn-base" fetcher = Fetcher() response_code, content, headers = fetcher.fetch_url( path, conf.user_agent, conf.fetch_timeout_secs, limit_len=False ) save_file(url + "/" + tokens[pos - 1], content) else: textutils.output_found(description_file + " at: " + url + "/" + tokens[pos - 1])
def parse_svn_entries(url): description_file = 'SVN entries file at' description_dir = "SVN entries Dir at" target_url = url + "/.svn/entries" fetcher = Fetcher() response_code, content, headers = fetcher.fetch_url(target_url, conf.user_agent, conf.fetch_timeout_secs, limit_len=False, add_headers=base_headers) if not isinstance(content, str): content = content.decode('utf-8', 'ignore') if response_code in conf.expected_file_responses and content: tokens = content.split('\n') if 'dir' in tokens: for pos, token in enumerate(tokens): if token == 'dir': # Fetch more entries recursively if tokens[pos-1] != '': textutils.output_debug(' - Svn Plugin: Found dir: ' + url + '/' + tokens[pos-1]) if conf.allow_download: textutils.output_info(' - Svn Plugin: Downloading: ' + url + '/' + tokens[pos-1] + '\r') else: textutils.output_found(description_dir + ' at: ' + url + '/' + tokens[pos-1]) # Parse next parse_svn_entries(url + "/" + tokens[pos-1]) elif token == 'file': textutils.output_debug(' - Svn Plugin: Found file: ' + url + '/' + tokens[pos-1]) if conf.allow_download: textutils.output_info(' - Svn Plugin: Downloading: ' + url + '/' + tokens[pos-1] + '\r') # Fetch text-base file path = url + "/.svn/text-base" + '/' + tokens[pos-1] + ".svn-base" fetcher = Fetcher() response_code, content, headers = fetcher.fetch_url(path, conf.user_agent, conf.fetch_timeout_secs, limit_len=False) save_file(url + '/' + tokens[pos-1], content) else: textutils.output_found(description_file + ' at: ' + url + '/' + tokens[pos-1])
def execute(): """ Fetch /.svn/entries and parse for target paths """ current_template = dict(conf.path_template) current_template['description'] = '/.svn/entries found directory' target_url = urljoin(conf.target_base_path, "/.svn/entries") fetcher = Fetcher() response_code, content, headers = fetcher.fetch_url( target_url, conf.user_agent, conf.fetch_timeout_secs, limit_len=False) if response_code is 200 or response_code is 302 and content: added = 0 try: tree = ElementTree.fromstring(content) entry_tags = tree.iter() if entry_tags: for entry in entry_tags: kind = entry.attrib.get("kind") if kind and kind == "dir": current_template = current_template.copy() current_template['url'] = '/' + entry.attrib["name"] database.paths.append(current_template) added += 1 except Exception: textutils.output_info( ' - Svn Plugin: no usable entries in /.svn/entries') else: if added > 0: textutils.output_info(' - Svn Plugin: added ' + str(added) + ' base paths using /.svn/entries') else: textutils.output_info( ' - Svn Plugin: no usable entries in /.svn/entries') else: textutils.output_info(' - Svn Plugin: no /.svn/entries found')
def execute(): """ Fetch /robots.txt and add the disallowed paths as target """ worker_template = {'url': '', 'expected_response': [200, 302], 'timeout_count': 0, 'description': 'Robots.txt entry'} target_url = urljoin(conf.target_host, "/robots.txt") fetcher = Fetcher() response_code, content, headers = fetcher.fetch_url(target_url, 'GET', conf.user_agent, True, conf.fetch_timeout_secs) if response_code is 200 or response_code is 302 and content: if conf.debug: utils.output_debug(content) match = re.findall(r'Disallow:\s*[a-zA-Z0-9-/.]*', content) added = 0 for match_obj in match: if '?' not in match_obj and '.' not in match_obj: splitted = match_obj.split(':') if splitted[1]: path = splitted[1].strip() if path != '/' or path != '': new_path = urljoin(conf.target_host, path) current_template = dict(worker_template) current_template['url'] = new_path database.paths.append(current_template) if conf.debug: utils.output_debug(str(current_template)) added += 1 if added > 0: utils.output_info('Robots plugin: added ' + str(added) + ' base paths using /robots.txt') else : utils.output_info('Robots plugin: no usable entries in /robots.txt') else: utils.output_info('Robots plugin: /robots.txt not found on target site')
def parse_svn_entries(url): description_file = 'SVN entries file at' description_dir = "SVN entries Dir at" target_url = url + "/.svn/entries" fetcher = Fetcher() response_code, content, headers = fetcher.fetch_url( target_url, conf.user_agent, conf.fetch_timeout_secs, limit_len=False, add_headers=base_headers) if response_code in conf.expected_file_responses and content: tokens = content.decode().split('\n') if 'dir' in tokens: for pos, token in enumerate(tokens): if token == 'dir': # Fetch more entries recursively if tokens[pos - 1] != '': textutils.output_debug(' - Svn Plugin: Found dir: ' + url + '/' + tokens[pos - 1]) if conf.allow_download: textutils.output_info( ' - Svn Plugin: Downloading: ' + url + '/' + tokens[pos - 1] + '\r') else: textutils.output_found(description_dir + ' at: ' + url + '/' + tokens[pos - 1]) # Parse next parse_svn_entries(url + "/" + tokens[pos - 1]) elif token == 'file': textutils.output_debug(' - Svn Plugin: Found file: ' + url + '/' + tokens[pos - 1]) if conf.allow_download: textutils.output_info(' - Svn Plugin: Downloading: ' + url + '/' + tokens[pos - 1] + '\r') # Fetch text-base file path = url + "/.svn/text-base" + '/' + tokens[ pos - 1] + ".svn-base" fetcher = Fetcher() response_code, content, headers = fetcher.fetch_url( path, conf.user_agent, conf.fetch_timeout_secs, limit_len=False) save_file(url + '/' + tokens[pos - 1], content) else: textutils.output_found(description_file + ' at: ' + url + '/' + tokens[pos - 1])
def execute(): """ Fetch /.svn/entries and parse for target paths """ textutils.output_info(' - Svn Plugin: Searching for /.svn/entries') target_url = conf.target_base_path + "/.svn/entries" fetcher = Fetcher() response_code, content, headers = fetcher.fetch_url( target_url, conf.user_agent, conf.fetch_timeout_secs, limit_len=False) svn_legacy = True if response_code in conf.expected_file_responses and content: if conf.allow_download: textutils.output_info( ' - Svn Plugin: /.svn/entries found! crawling... (will download files to output/)' ) else: textutils.output_info( ' - Svn Plugin: /.svn/entries found! crawling... (use -a to download files instead of printing)' ) # test for version 1.7+ target_url = conf.target_base_path + "/.svn/wc.db" fetcher = Fetcher() response_code, content, headers = fetcher.fetch_url( target_url, conf.user_agent, conf.fetch_timeout_secs, limit_len=False) #if response_code in conf.expected_file_responses and content: # textutils.output_info(' - Svn Plugin: SVN 1.7+ detected, parsing wc.db') # svn_legacy = False # save_file(conf.target_base_path + '/wc.db', content) # Process index if svn_legacy: # parse entries parse_svn_entries(conf.target_base_path) else: parse_svn_17_db(conf.target_base_path + '/wc.db') # Clean up display if conf.allow_download: textutils.output_info('') else: textutils.output_info(' - Svn Plugin: no /.svn/entries found')
class Scheduler(object): test = Tester() fetch = Fetcher() redis = RedisDataBase() def _test(self, queue): while True: if not self.redis.is_empty: print(getTime('测试代理模块开始启动')) self.test.run() else: print(getTime('代理池枯竭, 测试代理模块被迫进入休眠状态')) queue.put('True') delay(60 * 60) def _fetch(self, queue, flag=False): while True: if not queue.empty(): flag = queue.get() if flag: flag = False print(getTime('代理池枯竭, 获取代理模块被迫启动')) self.fetch.run() if times() in [6, 18]: # 设置时间为6、18 时启动 print(getTime('获取代理模块开始启动')) self.fetch.run() print(getTime('获取完成, 获取代理模块进入休眠状态')) def _app(self): app.run(host='0.0.0.0') def all_run(self): print(getTime('代理池开始运行......')) tester = Process(target=self._test, args=(queue,)) tester.start() fetcher = Process(target=self._fetch, args=(queue,)) fetcher.start() app = Process(target=self._app) app.start()
class TestFileExistsWorker(Thread): """ This worker get an url from the work queue and call the url fetcher """ def __init__(self, thread_id, output=True): Thread.__init__(self) self.kill_received = False self.thread_id = thread_id self.fetcher = Fetcher() self.output = output def run(self): while not self.kill_received: try: # Non-Blocking get since we use the queue as a ringbuffer queued = database.fetch_queue.get(False) url = conf.target_base_path + queued.get('url') description = queued.get('description') match_string = queued.get('match_string') textutils.output_debug("Testing: " + url + " " + str(queued)) stats.update_stats(url) # Throttle if needed #if throttle.get_throttle() > 0: # sleep(throttle.get_throttle()) # Fetch the target url timeout = False if match_string: response_code, content, headers = self.fetcher.fetch_url( url, conf.user_agent, conf.fetch_timeout_secs, limit_len=False) else: response_code, content, headers = self.fetcher.fetch_url( url, conf.user_agent, conf.fetch_timeout_secs) # handle timeout if response_code in conf.timeout_codes: handle_timeout(queued, url, self.thread_id, output=self.output) throttle.increase_throttle_delay() timeout = True elif response_code in conf.expected_file_responses: # Compare content with generated 404 samples is_valid_result = test_valid_result(content) # If the CRC missmatch, and we have an expected code, we found a valid link if is_valid_result: # Content Test if match_string provided if match_string and re.search(re.escape(match_string), content, re.I): # Add path to valid_path for future actions database.valid_paths.append(queued) textutils.output_found("String-Matched " + description + ' at: ' + conf.target_host + url) elif not match_string: if response_code == 500: textutils.output_found('ISE, ' + description + ' at: ' + conf.target_host + url) else: textutils.output_found(description + ' at: ' + conf.target_host + url) # Add path to valid_path for future actions database.valid_paths.append(queued) elif response_code in conf.redirect_codes: location = headers.get('location') if location: handle_redirects(queued, location) # Decrease throttle delay if needed if not timeout: throttle.decrease_throttle_delay() # Mark item as processed stats.update_processed_items() database.fetch_queue.task_done() except Empty: continue
def __init__(self, thread_id, output=True): Thread.__init__(self) self.kill_received = False self.thread_id = thread_id self.fetcher = Fetcher() self.output = output
class FetchCrafted404Worker(Thread): """ This worker fetch lenght-limited 404 footprint and store them for Ratcliff-Obershelf comparing """ def __init__(self, thread_id, output=True): Thread.__init__(self) self.kill_received = False self.thread_id = thread_id self.fetcher = Fetcher() self.output = output def run(self): while not self.kill_received: try: # Non-Blocking get since we use the queue as a ringbuffer queued = database.fetch_queue.get(False) url = conf.target_base_path + queued.get('url') textutils.output_debug("Fetching crafted 404: " + str(url)) stats.update_stats(url) # Fetch the target url timeout = False response_code, content, headers = self.fetcher.fetch_url( url, conf.user_agent, conf.fetch_timeout_secs) # Handle fetch timeouts by re-adding the url back to the global fetch queue # if timeout count is under max timeout count if response_code is 0 or response_code is 500: handle_timeout(queued, url, self.thread_id, output=self.output) # increase throttle delay throttle.increase_throttle_delay() timeout = True elif response_code in conf.expected_file_responses: # The server responded with whatever code but 404 or invalid stuff (500). We take a sample if len(content) < conf.file_sample_len: crafted_404 = content[0:len(content) - 1] else: crafted_404 = content[0:conf.file_sample_len - 1] database.crafted_404s.append(crafted_404) # Exception case for root 404, since it's used as a model for other directories textutils.output_debug( "Computed and saved a sample 404 for: " + str(queued) + ": " + crafted_404) elif response_code in conf.redirect_codes: location = headers.get('location') if location: handle_redirects(queued, location) # Decrease throttle delay if needed if not timeout: throttle.decrease_throttle_delay() # Dequeue item stats.update_processed_items() database.fetch_queue.task_done() except Empty: continue textutils.output_debug("Thread #" + str(self.thread_id) + " killed.")
class TestPathExistsWorker(Thread): """ This worker test if a path exists. Each path is matched against a fake generated path while scanning root. """ def __init__(self, thread_id, output=True): Thread.__init__(self) self.kill_received = False self.thread_id = thread_id self.fetcher = Fetcher() self.output = output def run(self): while not self.kill_received: try: queued = database.fetch_queue.get(False) url = conf.target_base_path + queued.get("url") description = queued.get("description") textutils.output_debug("Testing directory: " + url + " " + str(queued)) stats.update_stats(url) # Add trailing / for paths if not url.endswith("/") and url != "/": url += "/" # Fetch directory start_time = datetime.now() response_code, content, headers = self.fetcher.fetch_url( url, conf.user_agent, database.latest_successful_request_time, limit_len=False ) end_time = datetime.now() # Fetch '/' but don't submit it to more logging/existance tests if queued.get("url") == "/": if queued not in database.valid_paths: database.valid_paths.append(queued) database.fetch_queue.task_done() continue if response_code == 500: textutils.output_debug("HIT 500 on: " + str(queued)) # handle timeout if response_code in conf.timeout_codes: handle_timeout(queued, url, self.thread_id, output=self.output) elif response_code == 404 and detect_tomcat_fake_404(content): database.valid_paths.append(queued) textutils.output_found("Tomcat redirect, " + description + " at: " + conf.target_host + url) elif response_code in conf.expected_path_responses: # Compare content with generated 404 samples is_valid_result = test_valid_result(content) # Skip subfile testing if forbidden if response_code == 401: # Output result, but don't keep the url since we can't poke in protected folder textutils.output_found("Password Protected - " + description + " at: " + conf.target_host + url) elif is_valid_result: # Add path to valid_path for future actions database.valid_paths.append(queued) if response_code == 500: textutils.output_found("ISE, " + description + " at: " + conf.target_host + url) elif response_code == 403: textutils.output_found("*Forbidden* " + description + " at: " + conf.target_host + url) else: textutils.output_found(description + " at: " + conf.target_host + url) elif response_code in conf.redirect_codes: location = headers.get("location") if location: handle_redirects(queued, location) # Stats if response_code not in conf.timeout_codes: stats.update_processed_items() compute_request_time(start_time, end_time) # Mark item as processed database.fetch_queue.task_done() except Empty: continue
class FetchCrafted404Worker(Thread): """ This worker fetch lenght-limited 404 footprint and store them for Ratcliff-Obershelf comparing """ def __init__(self, thread_id, output=True): Thread.__init__(self) self.kill_received = False self.thread_id = thread_id self.fetcher = Fetcher() self.output = output def run(self): while not self.kill_received: try: # Non-Blocking get since we use the queue as a ringbuffer queued = database.fetch_queue.get(False) url = conf.target_base_path + queued.get('url') textutils.output_debug("Fetching crafted 404: " + str(url)) stats.update_stats(url) # Fetch the target url start_time = datetime.now() response_code, content, headers = self.fetcher.fetch_url(url, conf.user_agent, database.latest_successful_request_time) end_time = datetime.now() # Handle fetch timeouts by re-adding the url back to the global fetch queue # if timeout count is under max timeout count if response_code is 0 or response_code is 500: handle_timeout(queued, url, self.thread_id, output=self.output) elif response_code in conf.expected_file_responses: # Encoding edge case # Must be a string to be compared to the 404 fingerprint if not isinstance(content, str): content = content.decode('utf-8', 'ignore') # The server responded with whatever code but 404 or invalid stuff (500). We take a sample if not len(content): crafted_404 = "" # empty file, still a forged 404 elif len(content) < conf.file_sample_len: crafted_404 = content[0:len(content) - 1] else: crafted_404 = content[0:conf.file_sample_len - 1] crafted_404 = crafted_404.strip('\r\n ') database.crafted_404s.append(crafted_404) # Exception case for root 404, since it's used as a model for other directories textutils.output_debug("Computed and saved a sample 404 for: " + str(queued) + ": " + crafted_404) elif response_code in conf.redirect_codes: if queued.get('handle_redirect', True): location = headers.get('location') if location: handle_redirects(queued, location) # Stats if response_code not in conf.timeout_codes: stats.update_processed_items() compute_request_time(start_time, end_time) # Dequeue item database.fetch_queue.task_done() except Empty: continue textutils.output_debug("Thread #" + str(self.thread_id) + " killed.")
class TestFileExistsWorker(Thread): """ This worker get an url from the work queue and call the url fetcher """ def __init__(self, thread_id, output=True): Thread.__init__(self) self.kill_received = False self.thread_id = thread_id self.fetcher = Fetcher() self.output = output def run(self): while not self.kill_received: try: # Non-Blocking get since we use the queue as a ringbuffer queued = database.fetch_queue.get(False) url = conf.target_base_path + queued.get('url') description = queued.get('description') match_string = queued.get('match_string') textutils.output_debug("Testing: " + url + " " + str(queued)) stats.update_stats(url) # Fetch the target url start_time = datetime.now() if match_string: response_code, content, headers = self.fetcher.fetch_url( url, conf.user_agent, database.latest_successful_request_time, limit_len=False) else: response_code, content, headers = self.fetcher.fetch_url( url, conf.user_agent, database.latest_successful_request_time) end_time = datetime.now() # handle timeout if response_code in conf.timeout_codes: handle_timeout(queued, url, self.thread_id, output=self.output) elif response_code == 500: textutils.output_found('ISE, ' + description + ' at: ' + conf.target_host + url) elif response_code in conf.expected_file_responses: # If the CRC missmatch, and we have an expected code, we found a valid link if match_string and re.search(re.escape(match_string), content, re.I): textutils.output_found("String-Matched " + description + ' at: ' + conf.target_host + url) elif test_valid_result(content): textutils.output_found(description + ' at: ' + conf.target_host + url) elif response_code in conf.redirect_codes: location = headers.get('location') if location: handle_redirects(queued, location) # Stats if response_code not in conf.timeout_codes: stats.update_processed_items() compute_request_time(start_time, end_time) # Mark item as processed database.fetch_queue.task_done() except Empty: continue
class TestFileExistsWorker(Thread): """ This worker get an url from the work queue and call the url fetcher """ def __init__(self, thread_id, output=True): Thread.__init__(self) self.kill_received = False self.thread_id = thread_id self.fetcher = Fetcher() self.output = output def run(self): while not self.kill_received: try: # Non-Blocking get since we use the queue as a ringbuffer queued = database.fetch_queue.get(False) url = conf.target_base_path + queued.get('url') description = queued.get('description') match_string = queued.get('match_string') textutils.output_debug("Testing: " + url + " " + str(queued)) stats.update_stats(url) # Throttle if needed #if throttle.get_throttle() > 0: # sleep(throttle.get_throttle()) # Fetch the target url timeout = False if match_string: response_code, content, headers = self.fetcher.fetch_url(url, conf.user_agent, conf.fetch_timeout_secs, limit_len=False) else: response_code, content, headers = self.fetcher.fetch_url(url, conf.user_agent, conf.fetch_timeout_secs) # handle timeout if response_code in conf.timeout_codes: handle_timeout(queued, url, self.thread_id, output=self.output) throttle.increase_throttle_delay() timeout = True elif response_code in conf.expected_file_responses: # Compare content with generated 404 samples is_valid_result = test_valid_result(content) # If the CRC missmatch, and we have an expected code, we found a valid link if is_valid_result: # Content Test if match_string provided if match_string and re.search(re.escape(match_string), content, re.I): # Add path to valid_path for future actions database.valid_paths.append(queued) textutils.output_found("String-Matched " + description + ' at: ' + conf.target_host + url) elif not match_string: if response_code == 500: textutils.output_found('ISE, ' + description + ' at: ' + conf.target_host + url) else: textutils.output_found(description + ' at: ' + conf.target_host + url) # Add path to valid_path for future actions database.valid_paths.append(queued) elif response_code in conf.redirect_codes: location = headers.get('location') if location: handle_redirects(queued, location) # Decrease throttle delay if needed if not timeout: throttle.decrease_throttle_delay() # Mark item as processed stats.update_processed_items() database.fetch_queue.task_done() except Empty: continue
class TestPathExistsWorker(Thread): """ This worker test if a path exists. Each path is matched against a fake generated path while scanning root. """ def __init__(self, thread_id, output=True): Thread.__init__(self) self.kill_received = False self.thread_id = thread_id self.fetcher = Fetcher() self.output = output def run(self): while not self.kill_received: try: queued = database.fetch_queue.get(False) url = conf.target_base_path + queued.get('url') description = queued.get('description') textutils.output_debug("Testing directory: " + url + " " + str(queued)) stats.update_stats(url) # Add trailing / for paths if not url.endswith('/') and url != '/': url += '/' # Fetch directory start_time = datetime.now() response_code, content, headers = self.fetcher.fetch_url( url, conf.user_agent, database.latest_successful_request_time, limit_len=False) end_time = datetime.now() # Fetch '/' but don't submit it to more logging/existance tests if queued.get('url') == '/': if queued not in database.valid_paths: database.valid_paths.append(queued) database.fetch_queue.task_done() continue if response_code == 500: textutils.output_debug("HIT 500 on: " + str(queued)) # handle timeout if response_code in conf.timeout_codes: handle_timeout(queued, url, self.thread_id, output=self.output) elif response_code == 404 and detect_tomcat_fake_404(content): database.valid_paths.append(queued) textutils.output_found('Tomcat redirect, ' + description + ' at: ' + conf.target_host + url) elif response_code in conf.expected_path_responses: # Compare content with generated 404 samples is_valid_result = test_valid_result(content) # Skip subfile testing if forbidden if response_code == 401: # Output result, but don't keep the url since we can't poke in protected folder textutils.output_found('Password Protected - ' + description + ' at: ' + conf.target_host + url) elif is_valid_result: # Add path to valid_path for future actions database.valid_paths.append(queued) if response_code == 500: textutils.output_found('ISE, ' + description + ' at: ' + conf.target_host + url) elif response_code == 403: textutils.output_found('*Forbidden* ' + description + ' at: ' + conf.target_host + url) else: textutils.output_found(description + ' at: ' + conf.target_host + url) elif response_code in conf.redirect_codes: location = headers.get('location') if location: handle_redirects(queued, location) # Stats if response_code not in conf.timeout_codes: stats.update_processed_items() compute_request_time(start_time, end_time) # Mark item as processed database.fetch_queue.task_done() except Empty: continue
class TestPathExistsWorker(Thread): """ This worker test if a path exists. Each path is matched against a fake generated path while scanning root. """ def __init__(self, thread_id, output=True): Thread.__init__(self) self.kill_received = False self.thread_id = thread_id self.fetcher = Fetcher() self.output = output reset_behavior_database() def run(self): while not self.kill_received: try: queued = database.fetch_queue.get(block=False) url = conf.target_base_path + queued.get('url') description = queued.get('description') textutils.output_debug("Testing directory: " + url + " " + str(queued)) stats.update_stats(url) # Add trailing / for paths if not url.endswith('/') and url != '/': url += '/' # Fetch directory start_time = datetime.now() response_code, content, headers = self.fetcher.fetch_url(url, conf.user_agent, database.latest_successful_request_time, limit_len=False) end_time = datetime.now() # Fetch '/' but don't submit it to more logging/existance tests if queued.get('url') == '/': if queued not in database.valid_paths: database.valid_paths.append(queued) database.fetch_queue.task_done() continue if response_code == 500: textutils.output_debug("HIT 500 on: " + str(queued)) # handle timeout if response_code in conf.timeout_codes: handle_timeout(queued, url, self.thread_id, output=self.output) elif response_code == 404 and detect_tomcat_fake_404(content): database.valid_paths.append(queued) textutils.output_found('Tomcat redirect, ' + description + ' at: ' + conf.base_url + url, { "description": description, "url": conf.base_url + url, "code": response_code, "special": "tomcat-redirect", "severity": queued.get('severity'), }) elif response_code in conf.expected_path_responses: # Compare content with generated 404 samples is_valid_result = test_valid_result(content) if is_valid_result: # Test if behavior is ok. normal_behavior = test_behavior(content) else: # We don't compute behavior on invalid results normal_behavior = True if normal_behavior and database.behavior_error: textutils.output_info('Normal behavior seems to be restored.') database.behavior_error = False if is_valid_result and not normal_behavior: # We don't declare a behavior change until the current hit has exceeded the maximum # chances it can get. if not database.behavior_error and queued.get('behavior_chances', 0) >= conf.max_behavior_tries: textutils.output_info('Behavior change detected! Results may ' 'be incomplete or tachyon may never exit.') textutils.output_debug('Chances taken: ' + str(queued.get('behavior_chances', 0))) textutils.output_debug(queued.get('url')) database.behavior_error = True # If we find a valid result but the behavior buffer is not full, we give a chance to the # url and increase it's chances count. We consider this a false behavior test. # We do this since an incomplete behavior buffer could give false positives # Additionally, if the fetch queue is empty and we're still not in global behavior error, we # consider all the remaining hits as valid, as they are hits that were given a chance. if is_valid_result and len(database.behavioral_buffer) < conf.behavior_queue_size \ and not database.behavior_error and database.fetch_queue.qsize() != 0: if not queued.get('behavior_chances'): queued['behavior_chances'] = 1 else: queued['behavior_chances'] += 1 if queued['behavior_chances'] < conf.max_behavior_tries: textutils.output_debug('Time for a chance') textutils.output_debug('Chance left to target ' + queued.get('url') + ', re-queuing ' + ' qsize: ' + str(database.fetch_queue.qsize()) + ' chances: ' + str(queued.get('behavior_chances'))) database.fetch_queue.put(queued) database.fetch_queue.task_done() continue else: textutils.output_debug('Chances count busted! ' + queued.get('url') + ' qsize: ' + str(database.fetch_queue.qsize())) elif response_code == 401: # Output result, but don't keep the url since we can't poke in protected folder textutils.output_found('Password Protected - ' + description + ' at: ' + conf.base_url + url, { "description": description, "url": conf.base_url + url, "code": response_code, "severity": queued.get('severity'), }) # At this point, we have a valid result and the behavioral buffer is full. # The behavior of the hit has been taken in account and the app is not in global behavior error elif is_valid_result: # Add path to valid_path for future actions database.valid_paths.append(queued) # If we reach this point, all edge-cases should be handled and all subsequent requests # should be benchmarked against this new behavior reset_behavior_database() if response_code == 500: textutils.output_found('ISE, ' + description + ' at: ' + conf.base_url + url, { "description": description, "url": conf.base_url + url, "code": response_code, "severity": queued.get('severity'), }) elif response_code == 403: textutils.output_found('*Forbidden* ' + description + ' at: ' + conf.base_url + url, { "description": description, "url": conf.base_url + url, "code": response_code, "severity": queued.get('severity'), }) else: textutils.output_found(description + ' at: ' + conf.base_url + url, { "description": description, "url": conf.base_url + url, "code": response_code, "severity": queued.get('severity'), }) elif response_code in conf.redirect_codes: if queued.get('handle_redirect', True): location = headers.get('location') if location: handle_redirects(queued, location) # Stats if response_code not in conf.timeout_codes: stats.update_processed_items() compute_request_time(start_time, end_time) # Mark item as processed database.fetch_queue.task_done() except Empty: continue
class FetchCrafted404Worker(Thread): """ This worker fetch lenght-limited 404 footprint and store them for Ratcliff-Obershelf comparing """ def __init__(self, thread_id, output=True): Thread.__init__(self) self.kill_received = False self.thread_id = thread_id self.fetcher = Fetcher() self.output = output def run(self): while not self.kill_received: try: # Non-Blocking get since we use the queue as a ringbuffer queued = database.fetch_queue.get(block=False) url = conf.target_base_path + queued.get('url') textutils.output_debug("Fetching crafted 404: " + str(url)) stats.update_stats(url) # Fetch the target url start_time = datetime.now() response_code, content, headers = self.fetcher.fetch_url( url, conf.user_agent, database.latest_successful_request_time) end_time = datetime.now() # Handle fetch timeouts by re-adding the url back to the global fetch queue # if timeout count is under max timeout count if response_code is 0 or response_code is 500: handle_timeout(queued, url, self.thread_id, output=self.output) elif response_code in conf.expected_file_responses: # Encoding edge case # Must be a string to be compared to the 404 fingerprint if not isinstance(content, str): content = content.decode('utf-8', 'ignore') # The server responded with whatever code but 404 or invalid stuff (500). We take a sample if len(content) < conf.file_sample_len: crafted_404 = content[0:len(content) - 1] else: crafted_404 = content[0:conf.file_sample_len - 1] crafted_404 = crafted_404.strip('\r\n ') database.crafted_404s.append(crafted_404) # Exception case for root 404, since it's used as a model for other directories textutils.output_debug( "Computed and saved a sample 404 for: " + str(queued) + ": " + crafted_404) elif response_code in conf.redirect_codes: if queued.get('handle_redirect', True): location = headers.get('location') if location: handle_redirects(queued, location) # Stats if response_code not in conf.timeout_codes: stats.update_processed_items() compute_request_time(start_time, end_time) # Dequeue item database.fetch_queue.task_done() except Empty: continue textutils.output_debug("Thread #" + str(self.thread_id) + " killed.")
class FetchUrlWorker(Thread): """ This worker get an url from the work queue and call the url fetcher """ def __init__(self, thread_id, output): Thread.__init__(self) self.kill_received = False self.thread_id = thread_id self.fetcher = Fetcher() self.output = output def run(self): while not self.kill_received: # don't wait for any items if empty if not database.fetch_queue.empty(): queued = database.fetch_queue.get() url = urljoin(conf.target_host, queued.get('url')) expected = queued.get('expected_response') description = queued.get('description') content_type_blacklist = queued.get('blacklist_content_types') if not content_type_blacklist: content_type_blacklist = [] if conf.use_get: method = 'GET' else: method = 'HEAD' response_code, content, headers = self.fetcher.fetch_url(url, method, conf.user_agent, False, conf.fetch_timeout_secs) if conf.debug: utils.output_info("Thread #" + str(self.thread_id) + ": " + str(queued)) if response_code is 0: # timeout if queued.get('timeout_count') < conf.max_timeout_count: new_timeout_count = queued.get('timeout_count') + 1 queued['timeout_count'] = new_timeout_count if conf.debug: utils.output_info('Thread #' + str(self.thread_id) + ': re-queuing ' + str(queued)) # Add back the timed-out item database.fetch_queue.put(queued) else: utils.output_timeout(url) elif response_code in expected: # Response content type content_type = headers['content-type'] if not content_type: content_type = '' # Fuse with current url. (/test become url.dom/test) queued['url'] = urljoin(conf.target_host, queued['url']) # If we don't blacklist, just show the result if not conf.content_type_blacklist: if self.output: if response_code == 401: utils.output_found('*Password Protected* ' + description + ' at: ' + url) else: utils.output_found(description + ' at: ' + url) # Add to valid path database.valid_paths.append(queued) # if we DO blacklist but content is not blacklisted, show the result elif content_type not in content_type_blacklist: if self.output: if response_code == 401: utils.output_found('*Password Protected* ' + description + ' at: ' + url) else: utils.output_found(description + ' at: ' + url) # Add to valid path database.valid_paths.append(queued) # Mark item as processed database.fetch_queue.task_done()
class TestPathExistsWorker(Thread): """ This worker test if a path exists. Each path is matched against a fake generated path while scanning root. """ def __init__(self, thread_id, output=True): Thread.__init__(self) self.kill_received = False self.thread_id = thread_id self.fetcher = Fetcher() self.output = output reset_behavior_database() def run(self): while not self.kill_received: try: queued = database.fetch_queue.get(block=False) url = conf.target_base_path + queued.get('url') description = queued.get('description') textutils.output_debug("Testing directory: " + url + " " + str(queued)) stats.update_stats(url) # Add trailing / for paths if not url.endswith('/') and url != '/': url += '/' # Fetch directory start_time = datetime.now() response_code, content, headers = self.fetcher.fetch_url( url, conf.user_agent, database.latest_successful_request_time, limit_len=False) end_time = datetime.now() # Fetch '/' but don't submit it to more logging/existance tests if queued.get('url') == '/': if queued not in database.valid_paths: database.valid_paths.append(queued) database.fetch_queue.task_done() continue if response_code == 500: textutils.output_debug("HIT 500 on: " + str(queued)) # handle timeout if response_code in conf.timeout_codes: handle_timeout(queued, url, self.thread_id, output=self.output) elif response_code == 404 and detect_tomcat_fake_404(content): database.valid_paths.append(queued) textutils.output_found( 'Tomcat redirect, ' + description + ' at: ' + conf.base_url + url, { "description": description, "url": conf.base_url + url, "code": response_code, "special": "tomcat-redirect", "severity": queued.get('severity'), }) elif response_code in conf.expected_path_responses: # Compare content with generated 404 samples is_valid_result = test_valid_result(content) if is_valid_result: # Test if behavior is ok. normal_behavior = test_behavior(content) else: # We don't compute behavior on invalid results normal_behavior = True if normal_behavior and database.behavior_error: textutils.output_info( 'Normal behavior seems to be restored.') database.behavior_error = False if is_valid_result and not normal_behavior: # We don't declare a behavior change until the current hit has exceeded the maximum # chances it can get. if not database.behavior_error and queued.get( 'behavior_chances', 0) >= conf.max_behavior_tries: textutils.output_info( 'Behavior change detected! Results may ' 'be incomplete or tachyon may never exit.') textutils.output_debug( 'Chances taken: ' + str(queued.get('behavior_chances', 0))) textutils.output_debug(queued.get('url')) database.behavior_error = True # If we find a valid result but the behavior buffer is not full, we give a chance to the # url and increase it's chances count. We consider this a false behavior test. # We do this since an incomplete behavior buffer could give false positives # Additionally, if the fetch queue is empty and we're still not in global behavior error, we # consider all the remaining hits as valid, as they are hits that were given a chance. if is_valid_result and len(database.behavioral_buffer) < conf.behavior_queue_size \ and not database.behavior_error and database.fetch_queue.qsize() != 0: if not queued.get('behavior_chances'): queued['behavior_chances'] = 1 else: queued['behavior_chances'] += 1 if queued['behavior_chances'] < conf.max_behavior_tries: textutils.output_debug('Time for a chance') textutils.output_debug( 'Chance left to target ' + queued.get('url') + ', re-queuing ' + ' qsize: ' + str(database.fetch_queue.qsize()) + ' chances: ' + str(queued.get('behavior_chances'))) database.fetch_queue.put(queued) database.fetch_queue.task_done() continue else: textutils.output_debug( 'Chances count busted! ' + queued.get('url') + ' qsize: ' + str(database.fetch_queue.qsize())) elif response_code == 401: # Output result, but don't keep the url since we can't poke in protected folder textutils.output_found( 'Password Protected - ' + description + ' at: ' + conf.base_url + url, { "description": description, "url": conf.base_url + url, "code": response_code, "severity": queued.get('severity'), }) # At this point, we have a valid result and the behavioral buffer is full. # The behavior of the hit has been taken in account and the app is not in global behavior error elif is_valid_result: # Add path to valid_path for future actions database.valid_paths.append(queued) # If we reach this point, all edge-cases should be handled and all subsequent requests # should be benchmarked against this new behavior reset_behavior_database() if response_code == 500: textutils.output_found( 'ISE, ' + description + ' at: ' + conf.base_url + url, { "description": description, "url": conf.base_url + url, "code": response_code, "severity": queued.get('severity'), }) elif response_code == 403: textutils.output_found( '*Forbidden* ' + description + ' at: ' + conf.base_url + url, { "description": description, "url": conf.base_url + url, "code": response_code, "severity": queued.get('severity'), }) else: textutils.output_found( description + ' at: ' + conf.base_url + url, { "description": description, "url": conf.base_url + url, "code": response_code, "severity": queued.get('severity'), }) elif response_code in conf.redirect_codes: if queued.get('handle_redirect', True): location = headers.get('location') if location: handle_redirects(queued, location) # Stats if response_code not in conf.timeout_codes: stats.update_processed_items() compute_request_time(start_time, end_time) # Mark item as processed database.fetch_queue.task_done() except Empty: continue
class TestPathExistsWorker(Thread): """ This worker test if a path exists. Each path is matched against a fake generated path while scanning root. """ def __init__(self, thread_id, output=True): Thread.__init__(self) self.kill_received = False self.thread_id = thread_id self.fetcher = Fetcher() self.output = output def run(self): while not self.kill_received: try: queued = database.fetch_queue.get(False) url = conf.target_base_path + queued.get('url') description = queued.get('description') textutils.output_debug("Testing directory: " + url + " " + str(queued)) stats.update_stats(url) # Throttle if needed # if throttle.get_throttle() > 0: # sleep(throttle.get_throttle()) # Add trailing / for paths if url[:-1] != '/' and url != '/': url += '/' # Fetch directory timeout = False response_code, content, headers = self.fetcher.fetch_url(url, conf.user_agent, conf.fetch_timeout_secs, limit_len=False) # Fetch '/' but don't submit it to more logging/existance tests if queued.get('url') == '/': if queued not in database.valid_paths: database.valid_paths.append(queued) database.fetch_queue.task_done() continue if response_code == 500: textutils.output_debug("HIT 500 on: " + str(queued)) # handle timeout if response_code in conf.timeout_codes: handle_timeout(queued, url, self.thread_id, output=self.output) # increase throttle delay throttle.increase_throttle_delay() timeout = True elif response_code in conf.expected_path_responses: # Compare content with generated 404 samples is_valid_result = test_valid_result(content) # Skip subfile testing if forbidden if response_code == 401: # Output result, but don't keep the url since we can't poke in protected folder textutils.output_found('Password Protected - ' + description + ' at: ' + conf.target_host + url) elif is_valid_result: # Add path to valid_path for future actions database.valid_paths.append(queued) if response_code == 500: textutils.output_found('ISE, ' + description + ' at: ' + conf.target_host + url) elif response_code == 403: textutils.output_found('*Forbidden* ' + description + ' at: ' + conf.target_host + url) else: textutils.output_found(description + ' at: ' + conf.target_host + url) elif response_code in conf.redirect_codes: location = headers.get('location') if location: handle_redirects(queued, location) # Decrease throttle delay if needed if not timeout: throttle.decrease_throttle_delay() # Mark item as processed stats.update_processed_items() database.fetch_queue.task_done() except Empty: continue
class TestFileExistsWorker(Thread): """ This worker get an url from the work queue and call the url fetcher """ def __init__(self, thread_id, output=True): Thread.__init__(self) self.kill_received = False self.thread_id = thread_id self.fetcher = Fetcher() self.output = output reset_behavior_database() def run(self): while not self.kill_received: try: # Non-Blocking get since we use the queue as a ringbuffer queued = database.fetch_queue.get(block=False) url = conf.target_base_path + queued.get('url') description = queued.get('description') match_string = queued.get('match_string') textutils.output_debug("Testing: " + url + " " + str(queued)) stats.update_stats(url) # Fetch the target url start_time = datetime.now() if match_string: response_code, content, headers = self.fetcher.fetch_url( url, conf.user_agent, database.latest_successful_request_time, limit_len=False) # Make sure we always match string against a string content if not isinstance(content, str): content = content.decode('utf-8', 'ignore') else: response_code, content, headers = self.fetcher.fetch_url( url, conf.user_agent, database.latest_successful_request_time) end_time = datetime.now() # handle timeout if response_code in conf.timeout_codes: handle_timeout(queued, url, self.thread_id, output=self.output) elif response_code == 500: textutils.output_found( 'ISE, ' + description + ' at: ' + conf.base_url + url, { "description": description, "url": conf.base_url + url, "code": response_code, "severity": queued.get('severity'), }) elif response_code in conf.expected_file_responses: # Test if result is valid is_valid_result = test_valid_result(content, is_file=True) if is_valid_result: # Test if behavior is ok. normal_behavior = test_behavior(content) textutils.output_debug('Normal behavior ' + str(normal_behavior) + ' ' + str(response_code)) else: normal_behavior = True # Reset behavior chance when we detect a new state if normal_behavior and database.behavior_error: textutils.output_info( 'Normal behavior seems to be restored.') database.behavior_error = False if is_valid_result and not normal_behavior: # Looks like the new behavior is now the norm. It's a false positive. # Additionally, we report a behavior change to the user at this point. if not database.behavior_error: textutils.output_info( 'Behavior change detected! Results may ' 'be incomplete or tachyon may never exit.') textutils.output_debug( 'Chances taken: ' + str(queued.get('behavior_chances', 0))) textutils.output_debug(queued.get('url')) database.behavior_error = True # If we find a valid result but the behavior buffer is not full, we give a chance to the # url and increase it's chances count. We consider this a false behavior test. # We do this since an incomplete behavior buffer could give false positives # Additionally, if the fetch queue is empty and we're still not in global behavior error, we # consider all the remaining hits as valid, as they are hits that were given a chance. elif is_valid_result and len(database.behavioral_buffer) < conf.behavior_queue_size \ and not database.behavior_error and database.fetch_queue.qsize() != 0: if not queued.get('behavior_chances'): queued['behavior_chances'] = 1 else: queued['behavior_chances'] += 1 if queued['behavior_chances'] < conf.max_behavior_tries: textutils.output_debug( 'Chance left to target, re-queuing') database.fetch_queue.put(queued) elif is_valid_result: # Make sure we base our next analysis on that positive hit reset_behavior_database() if len(content) == 0: textutils.output_found( 'Empty ' + description + ' at: ' + conf.base_url + url, { "description": "Empty " + description, "url": conf.base_url + url, "code": response_code, "severity": 'info', }) else: textutils.output_found( description + ' at: ' + conf.base_url + url, { "description": description, "url": conf.base_url + url, "code": response_code, "severity": queued.get('severity'), }) elif match_string and re.search(re.escape(match_string), content, re.I): textutils.output_found( "String-Matched " + description + ' at: ' + conf.base_url + url, { "description": description, "url": conf.base_url + url, "code": response_code, "string": match_string, "severity": queued.get('severity'), }) elif response_code in conf.redirect_codes: if queued.get('handle_redirect', True): location = headers.get('location') if location: handle_redirects(queued, location) # Stats if response_code not in conf.timeout_codes: stats.update_processed_items() compute_request_time(start_time, end_time) # Mark item as processed database.fetch_queue.task_done() except Empty: continue
class TestFileExistsWorker(Thread): """ This worker get an url from the work queue and call the url fetcher """ def __init__(self, thread_id, output=True): Thread.__init__(self) self.kill_received = False self.thread_id = thread_id self.fetcher = Fetcher() self.output = output reset_behavior_database() def run(self): while not self.kill_received: try: # Non-Blocking get since we use the queue as a ringbuffer queued = database.fetch_queue.get(block=False) url = conf.target_base_path + queued.get('url') description = queued.get('description') match_string = queued.get('match_string') textutils.output_debug("Testing: " + url + " " + str(queued)) stats.update_stats(url) # Fetch the target url start_time = datetime.now() if match_string: response_code, content, headers = self.fetcher.fetch_url(url, conf.user_agent, database.latest_successful_request_time, limit_len=False) # Make sure we always match string against a string content if not isinstance(content, str): content = content.decode('utf-8', 'ignore') else: response_code, content, headers = self.fetcher.fetch_url(url, conf.user_agent, database.latest_successful_request_time) end_time = datetime.now() # handle timeout if response_code in conf.timeout_codes: handle_timeout(queued, url, self.thread_id, output=self.output) elif response_code == 500: textutils.output_found('ISE, ' + description + ' at: ' + conf.base_url + url, { "description": description, "url": conf.base_url + url, "code": response_code, "severity": queued.get('severity'), }) elif response_code in conf.expected_file_responses: # Test if result is valid is_valid_result = test_valid_result(content, is_file=True) if is_valid_result: # Test if behavior is ok. normal_behavior = test_behavior(content) textutils.output_debug('Normal behavior ' + str(normal_behavior) + ' ' + str(response_code)) else: normal_behavior = True # Reset behavior chance when we detect a new state if normal_behavior and database.behavior_error: textutils.output_info('Normal behavior seems to be restored.') database.behavior_error = False if is_valid_result and not normal_behavior: # Looks like the new behavior is now the norm. It's a false positive. # Additionally, we report a behavior change to the user at this point. if not database.behavior_error: textutils.output_info('Behavior change detected! Results may ' 'be incomplete or tachyon may never exit.') textutils.output_debug('Chances taken: ' + str(queued.get('behavior_chances', 0))) textutils.output_debug(queued.get('url')) database.behavior_error = True # If we find a valid result but the behavior buffer is not full, we give a chance to the # url and increase it's chances count. We consider this a false behavior test. # We do this since an incomplete behavior buffer could give false positives # Additionally, if the fetch queue is empty and we're still not in global behavior error, we # consider all the remaining hits as valid, as they are hits that were given a chance. elif is_valid_result and len(database.behavioral_buffer) < conf.behavior_queue_size \ and not database.behavior_error and database.fetch_queue.qsize() != 0: if not queued.get('behavior_chances'): queued['behavior_chances'] = 1 else: queued['behavior_chances'] += 1 if queued['behavior_chances'] < conf.max_behavior_tries: textutils.output_debug('Chance left to target, re-queuing') database.fetch_queue.put(queued) elif is_valid_result: # Make sure we base our next analysis on that positive hit reset_behavior_database() if len(content) == 0: textutils.output_found('Empty ' + description + ' at: ' + conf.base_url + url, { "description": "Empty " + description, "url": conf.base_url + url, "code": response_code, "severity": 'info', }) else: textutils.output_found(description + ' at: ' + conf.base_url + url, { "description": description, "url": conf.base_url + url, "code": response_code, "severity": queued.get('severity'), }) elif match_string and re.search(re.escape(match_string), content, re.I): textutils.output_found("String-Matched " + description + ' at: ' + conf.base_url + url, { "description": description, "url": conf.base_url + url, "code": response_code, "string": match_string, "severity": queued.get('severity'), }) elif response_code in conf.redirect_codes: if queued.get('handle_redirect', True): location = headers.get('location') if location: handle_redirects(queued, location) # Stats if response_code not in conf.timeout_codes: stats.update_processed_items() compute_request_time(start_time, end_time) # Mark item as processed database.fetch_queue.task_done() except Empty: continue