Example #1
0
def handle_timeout(queued, url, thread_id, output=True):
    """ Handle timeout operation for workers """
    if database.latest_successful_request_time > conf.max_timeout_secs:
        database.latest_successful_request_time = conf.max_timeout_secs
    else:
        database.latest_successful_request_time += 1

    textutils.output_debug("-Ajusted timeout to: " + str(database.latest_successful_request_time))

    if not queued['timeout_count']:
        queued['timeout_count'] = 0

    if queued.get('timeout_count') < conf.max_timeout_count:
        new_timeout_count = queued.get('timeout_count') + 1
        queued['timeout_count'] = new_timeout_count
        textutils.output_debug('Thread #' + str(thread_id) + ': re-queuing ' + str(queued))

        # Add back the timed-out item
        database.fetch_queue.put(queued)
    elif output and not database.kill_received:
        # We definitely timed out
        textutils.output_timeout(queued.get('description') + ' at ' + url)

    # update stats
    database.total_timeouts += 1
Example #2
0
    def run(self):
        while not self.kill_received:
            try:
                # Non-Blocking get since we use the queue as a ringbuffer
                queued = database.fetch_queue.get(False)
                url = conf.target_base_path + queued.get('url')
                description = queued.get('description')
                match_string = queued.get('match_string')

                textutils.output_debug("Testing: " + url + " " + str(queued))
                stats.update_stats(url)

                # Fetch the target url
                start_time = datetime.now()
                if match_string:
                    response_code, content, headers = self.fetcher.fetch_url(
                        url,
                        conf.user_agent,
                        database.latest_successful_request_time,
                        limit_len=False)
                else:
                    response_code, content, headers = self.fetcher.fetch_url(
                        url, conf.user_agent,
                        database.latest_successful_request_time)
                end_time = datetime.now()

                # handle timeout
                if response_code in conf.timeout_codes:
                    handle_timeout(queued,
                                   url,
                                   self.thread_id,
                                   output=self.output)
                elif response_code == 500:
                    textutils.output_found('ISE, ' + description + ' at: ' +
                                           conf.target_host + url)
                elif response_code in conf.expected_file_responses:
                    # If the CRC missmatch, and we have an expected code, we found a valid link
                    if match_string and re.search(re.escape(match_string),
                                                  content, re.I):
                        textutils.output_found("String-Matched " +
                                               description + ' at: ' +
                                               conf.target_host + url)
                    elif test_valid_result(content):
                        textutils.output_found(description + ' at: ' +
                                               conf.target_host + url)

                elif response_code in conf.redirect_codes:
                    location = headers.get('location')
                    if location:
                        handle_redirects(queued, location)

                # Stats
                if response_code not in conf.timeout_codes:
                    stats.update_processed_items()
                    compute_request_time(start_time, end_time)

                # Mark item as processed
                database.fetch_queue.task_done()
            except Empty:
                continue
Example #3
0
def add_generated_path(path):
    current_template = conf.path_template.copy()
    current_template['description'] = 'Computer generated path'
    current_template['is_file'] = False
    current_template['url'] = '/' + path
    database.paths.append(current_template)
    textutils.output_debug(' - PathGenerator Plugin Generated path: ' + str(current_template))
Example #4
0
def decrease_throttle_delay():
    """ If we reach this code, a worker successfully completed a request, we reduce throttling for all threads."""
    if database.throttle_delay > 0:
        database.throttle_delay -= conf.throttle_increment
        if conf.debug:
            textutils.output_debug('Decreasing throttle limit: ' +
                                   str(database.throttle_delay))
Example #5
0
def handle_timeout(queued, url, thread_id, output=True):
    """ Handle timeout operation for workers """
    if database.latest_successful_request_time > conf.max_timeout_secs:
        database.latest_successful_request_time = conf.max_timeout_secs
    else:
        database.latest_successful_request_time += 1

    # Update pool timeout
    textutils.output_debug("-Ajusted timeout to: " +
                           str(database.latest_successful_request_time))

    if not queued['timeout_count']:
        queued['timeout_count'] = 0

    if queued.get('timeout_count') < conf.max_timeout_count:
        new_timeout_count = queued.get('timeout_count') + 1
        queued['timeout_count'] = new_timeout_count
        textutils.output_debug('Thread #' + str(thread_id) + ': re-queuing ' +
                               str(queued))

        # Add back the timed-out item
        database.fetch_queue.put(queued)
    elif output and not database.kill_received:
        # We definitely timed out
        textutils.output_timeout(queued.get('description') + ' at ' + url)

    # update stats
    database.total_timeouts += 1
Example #6
0
def add_generated_file(file):
    """ Add file to database """
    current_template = conf.path_template.copy()
    current_template['description'] = 'Computer generated file'
    current_template['url'] = file
    database.files.append(current_template)
    textutils.output_debug(' - PathGenerator Plugin Generated file: ' + str(current_template))
Example #7
0
def add_generated_path(path):
    current_template = dict(conf.path_template)
    current_template['description'] = 'Computer generated path'
    current_template['url'] = path
    if current_template not in database.files:
        textutils.output_debug(' - PathGenerator Plugin Generated: ' + str(current_template))
        database.files.append(current_template)
Example #8
0
    def wait_for_idle(self, workers, queue):
        """ Wait until fetch queue is empty and handle user interrupt """
        while not self.kill_received and not queue.empty():
            try:
                sleep(0.1)
            except KeyboardInterrupt:
                try:
                    stats.output_stats()
                    sleep(1)
                except KeyboardInterrupt:
                    textutils.output_info(
                        'Keyboard Interrupt Received, cleaning up threads')
                    self.kill_received = True

                    # Kill remaining workers but don't join the queue (we want to abort:))
                    for worker in workers:
                        worker.kill_received = True
                        if worker is not None and worker.isAlive():
                            worker.join(1)

                    # Kill the soft
                    sys.exit()

        # Make sure everything is done before sending control back to application
        textutils.output_debug("Threads: joining queue of size: " +
                               str(queue.qsize()))
        queue.join()
        textutils.output_debug("Threads: join done")

        for worker in workers:
            worker.kill_received = True
            worker.join()
Example #9
0
    def wait_for_idle(self, workers, queue):
            """ Wait until fetch queue is empty and handle user interrupt """
            while not self.kill_received and not queue.empty():
                try:
                    sleep(0.1)
                except KeyboardInterrupt:
                    try:
                        stats.output_stats()
                        sleep(1)  
                    except KeyboardInterrupt:
                        textutils.output_info('Keyboard Interrupt Received, cleaning up threads')
                        # Clean reference to sockets
                        database.connection_pool = None

                        self.kill_received = True
                        
                        # Kill remaining workers but don't join the queue (we want to abort:))
                        for worker in workers:
                            worker.kill_received = True
                            if worker is not None and worker.isAlive():
                                worker.join(1)
            
                        # Kill the soft
                        sys.exit()  

            # Make sure everything is done before sending control back to application
            textutils.output_debug("Threads: joining queue of size: " + str(queue.qsize()))
            queue.join()
            textutils.output_debug("Threads: join done")

            for worker in workers:
                worker.kill_received = True
                worker.join()
Example #10
0
def handle_timeout(queued, url, thread_id, output=True):
    """ Handle timeout operation for workers """
    if database.latest_successful_request_time > conf.max_timeout_secs:
        database.latest_successful_request_time = conf.max_timeout_secs
    else:
        database.latest_successful_request_time += 1

    textutils.output_debug("-Ajusted timeout to: " + str(database.latest_successful_request_time))

    if not queued["timeout_count"]:
        queued["timeout_count"] = 0

    if queued.get("timeout_count") < conf.max_timeout_count:
        new_timeout_count = queued.get("timeout_count") + 1
        queued["timeout_count"] = new_timeout_count
        textutils.output_debug("Thread #" + str(thread_id) + ": re-queuing " + str(queued))

        # Add back the timed-out item
        database.fetch_queue.put(queued)
    elif output:
        # We definitely timed out
        textutils.output_timeout(queued.get("description") + " at " + url)

    # update stats
    database.total_timeouts += 1
Example #11
0
def increase_throttle_delay():
    """ A worker encountered a timeout, we need to increase throttle time for all threads. """
    if database.throttle_delay < conf.max_throttle:
        database.throttle_delay += conf.throttle_increment
        if conf.debug:
            textutils.output_debug('Increasing throttle limit: ' +
                                   str(database.throttle_delay))
Example #12
0
    def wait_for_idle(self, workers, queue):
        """ Wait until fetch queue is empty and handle user interrupt """
        while not database.kill_received and not queue.empty():
            try:
                # Make sure everything is done before sending control back to application
                textutils.output_debug("Threads: joining queue of size: " +
                                       str(queue.qsize()))
                queue.join()
                textutils.output_debug("Threads: join done")
            except KeyboardInterrupt:
                try:
                    stats.output_stats(workers)
                    sleep(
                        1
                    )  # The time you have to re-press ctrl+c to kill the app.
                except KeyboardInterrupt:
                    textutils.output_info(
                        'Keyboard Interrupt Received, waiting for blocking threads to exit'
                    )
                    # Clean reference to sockets
                    database.connection_pool = None
                    database.kill_received = True
                    self.kill_workers(workers)
                    sys.exit(0)

        # Make sure we get all the worker's results before continuing the next step
        self.kill_workers(workers)
Example #13
0
def add_generated_file(file):
    """ Add file to database """
    current_template = conf.path_template.copy()
    current_template['description'] = 'Computer generated file'
    current_template['url'] = file
    current_template['handle_redirect'] = "ignoreRedirect" not in plugin_settings
    database.files.append(current_template)
    textutils.output_debug(' - PathGenerator Plugin Generated file: ' + str(current_template))
Example #14
0
def add_generated_path(path):
    current_template = conf.path_template.copy()
    current_template['description'] = 'Computer generated path'
    current_template['is_file'] = False
    current_template['url'] = '/' + path
    database.paths.append(current_template)
    textutils.output_debug(' - PathGenerator Plugin Generated path: ' +
                           str(current_template))
Example #15
0
def add_generated_path(path):
    current_template = conf.path_template.copy()
    current_template['description'] = 'Computer generated path'
    current_template['is_file'] = False
    current_template['url'] = '/' + path
    current_template['handle_redirect'] = "ignoreRedirect" not in plugin_settings
    database.paths.append(current_template)
    textutils.output_debug(' - PathGenerator Plugin Generated path: ' + str(current_template))
Example #16
0
def add_generated_path(path):
    current_template = dict(conf.path_template)
    current_template['description'] = 'Computer generated path'
    current_template['url'] = path
    if current_template not in database.files:
        textutils.output_debug(' - PathGenerator Plugin Generated: ' +
                               str(current_template))
        database.files.append(current_template)
Example #17
0
def add_generated_file(file):
    """ Add file to database """
    current_template = conf.path_template.copy()
    current_template['description'] = 'Computer generated file'
    current_template['url'] = file
    database.files.append(current_template)
    textutils.output_debug(' - PathGenerator Plugin Generated file: ' +
                           str(current_template))
Example #18
0
def get_host_ip(host, port):
    """ Fetch the resolved ip addresses from the cache and return a random address if load-balanced """
    resolved = database.dns_cache.get(host)
    if not resolved:
        textutils.output_debug("Host entry not found in cache for host:" + str(host) + ", resolving")
        resolved = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM)
        database.dns_cache[host] = resolved

    return _get_random_ip_from_cache(resolved), port
Example #19
0
    def fetch_url(self,
                  url,
                  user_agent,
                  timeout,
                  limit_len=True,
                  add_headers=dict()):
        """ Fetch a given url, with a given user_agent and timeout"""
        try:
            if not add_headers.get('User-Agent'):
                add_headers['User-Agent'] = user_agent
            if not add_headers.get('Connection'):
                add_headers['Connection'] = 'Keep-Alive'
            if not add_headers.get('Host'):
                add_headers['Host'] = conf.target_host

            # Session cookie, priority to used-supplied.
            if conf.cookies:
                add_headers['Cookie'] = conf.cookies
            elif database.session_cookie:
                add_headers['Cookie'] = database.session_cookie

            # Limit request len on binary types
            if limit_len:
                content_range = 'bytes=0-' + str(conf.file_sample_len - 1)
                add_headers['Range'] = content_range
            else:
                if 'Range' in add_headers:
                    del add_headers['Range']

            if conf.proxy_url:
                url = conf.scheme + '://' + conf.target_host + ':' + str(
                    conf.target_port) + url
                textutils.output_debug(url)

            if conf.is_ssl:
                database.connection_pool.ConnectionCls = UnverifiedHTTPSConnection

            response = database.connection_pool.request('GET',
                                                        url,
                                                        headers=add_headers,
                                                        retries=0,
                                                        redirect=False,
                                                        release_conn=False,
                                                        assert_same_host=False,
                                                        timeout=timeout)

            content = response.data
            code = response.status
            headers = response.headers
        except Exception as e:
            #raise
            code = 0
            content = ''
            headers = dict()

        return code, content, headers
Example #20
0
def add_generated_file(file):
    """ Add file to database """
    current_template = conf.path_template.copy()
    current_template['description'] = 'Computer generated file'
    current_template['url'] = file
    current_template[
        'handle_redirect'] = "ignoreRedirect" not in plugin_settings
    database.files.append(current_template)
    textutils.output_debug(' - PathGenerator Plugin Generated file: ' +
                           str(current_template))
Example #21
0
def get_host_ip(host, port):
    """ Fetch the resolved ip addresses from the cache and return a random address if load-balanced """
    resolved = database.dns_cache.get(host)
    if not resolved:
        textutils.output_debug("Host entry not found in cache for host:" +
                               str(host) + ", resolving")
        resolved = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM)
        database.dns_cache[host] = resolved

    return _get_random_ip_from_cache(resolved), port
Example #22
0
def add_generated_path(path):
    current_template = conf.path_template.copy()
    current_template['description'] = 'Computer generated path'
    current_template['is_file'] = False
    current_template['url'] = '/' + path
    current_template[
        'handle_redirect'] = "ignoreRedirect" not in plugin_settings
    database.paths.append(current_template)
    textutils.output_debug(' - PathGenerator Plugin Generated path: ' +
                           str(current_template))
Example #23
0
def execute():
    """ Fetch sitemap.xml and add each entry as a target """

    current_template = dict(conf.path_template)
    current_template['description'] = 'sitemap.xml entry'

    target_url = urljoin(conf.target_base_path, "/sitemap.xml")
    fetcher = Fetcher()
    response_code, content, headers = fetcher.fetch_url(
        target_url,
        conf.user_agent,
        conf.fetch_timeout_secs,
        limit_len=False,
        add_headers={})

    if not isinstance(content, str):
        content = content.decode('utf-8', 'ignore')

    if response_code is 200 or response_code is 302 and content:

        regexp = re.compile('(?im).*<url>\s*<loc>(.*)</loc>\s*</url>.*')
        matches = re.findall(regexp, content)

        textutils.output_debug("SitemapXML plugin")

        added = 0
        for match in matches:
            if not isinstance(match, str):
                match = match.decode('utf-8', 'ignore')
            parsed = urlparse(match)
            if parsed.path:
                new_path = parsed.path
            else:
                continue

            # Remove trailing /
            if new_path.endswith('/'):
                new_path = new_path[:-1]

            if add_path(new_path):
                added += 1

            textutils.output_debug(" - Added: %s from /sitemap.xml" % new_path)

        if added > 0:
            textutils.output_info(' - SitemapXML Plugin: added %d base paths '
                                  'using /sitemap.xml' % added)
        else:
            textutils.output_info(' - SitemapXML Plugin: no usable entries '
                                  'in /sitemap.xml')

    else:
        textutils.output_info(
            ' - SitemapXML Plugin: /sitemap.xml not found on '
            'target site')
Example #24
0
def test_paths_exists():
    """ 
    Test for path existence using http codes and computed 404
    Spawn workers and turn off output for now, it would be irrelevant at this point. 
    """
    manager = ThreadManager()

    # Fill work queue with fetch list
    for path in database.paths:
        dbutils.add_path_to_fetch_queue(path)

    # Consider some file target as potential path
    for file in database.files:
        if not file.get('no_suffix'):
            file_as_path = file.copy()
            file_as_path['url'] = '/' + file_as_path['url']
            dbutils.add_path_to_fetch_queue(file_as_path)

    done_paths = []
    recursion_depth = 0

    textutils.output_debug('Cached: ' + str(database.path_cache))
    while database.fetch_queue.qsize() > 0:
        textutils.output_info('Probing ' + str(database.fetch_queue.qsize()) +
                              ' paths')

        # Wait for initial valid path lookup
        workers = manager.spawn_workers(conf.thread_count,
                                        TestPathExistsWorker)
        manager.wait_for_idle(workers, database.fetch_queue)

        recursion_depth += 1

        if not conf.recursive:
            break

        if recursion_depth >= conf.recursive_depth_limit:
            break

        for validpath in database.valid_paths:

            if validpath['url'] == '/' or validpath['url'] in done_paths:
                continue

            done_paths.append(validpath['url'])

            for path in database.paths:
                if path['url'] in ('/', ''):
                    continue
                path = path.copy()
                path['url'] = validpath['url'] + path['url']
                dbutils.add_path_to_fetch_queue(path)

    textutils.output_info('Found ' + str(len(database.valid_paths)) +
                          ' valid paths')
Example #25
0
def parse_svn_entries(url):
    description_file = 'SVN entries file at'
    description_dir = "SVN entries Dir at"
    target_url = url + "/.svn/entries"
    fetcher = Fetcher()

    response_code, content, headers = fetcher.fetch_url(
        target_url,
        conf.user_agent,
        conf.fetch_timeout_secs,
        limit_len=False,
        add_headers=base_headers)

    if response_code in conf.expected_file_responses and content:
        tokens = content.decode().split('\n')
        if 'dir' in tokens:
            for pos, token in enumerate(tokens):
                if token == 'dir':
                    # Fetch more entries recursively
                    if tokens[pos - 1] != '':
                        textutils.output_debug(' - Svn Plugin: Found dir: ' +
                                               url + '/' + tokens[pos - 1])

                        if conf.allow_download:
                            textutils.output_info(
                                ' - Svn Plugin: Downloading: ' + url + '/' +
                                tokens[pos - 1] + '\r')
                        else:
                            textutils.output_found(description_dir + ' at: ' +
                                                   url + '/' + tokens[pos - 1])

                        # Parse next
                        parse_svn_entries(url + "/" + tokens[pos - 1])

                elif token == 'file':
                    textutils.output_debug(' - Svn Plugin: Found file: ' +
                                           url + '/' + tokens[pos - 1])
                    if conf.allow_download:
                        textutils.output_info(' - Svn Plugin: Downloading: ' +
                                              url + '/' + tokens[pos - 1] +
                                              '\r')
                        # Fetch text-base file
                        path = url + "/.svn/text-base" + '/' + tokens[
                            pos - 1] + ".svn-base"
                        fetcher = Fetcher()
                        response_code, content, headers = fetcher.fetch_url(
                            path,
                            conf.user_agent,
                            conf.fetch_timeout_secs,
                            limit_len=False)
                        save_file(url + '/' + tokens[pos - 1], content)
                    else:
                        textutils.output_found(description_file + ' at: ' +
                                               url + '/' + tokens[pos - 1])
Example #26
0
def test_paths_exists():
    """ 
    Test for path existence using http codes and computed 404
    Spawn workers and turn off output for now, it would be irrelevant at this point. 
    """
    manager = ThreadManager()
    
    # Fill work queue with fetch list
    for path in database.paths:
        dbutils.add_path_to_fetch_queue(path)

    # Consider some file target as potential path
    for file in database.files:
        if not file.get('no_suffix'):
            file_as_path = file.copy()
            file_as_path['url'] = '/' + file_as_path['url']
            dbutils.add_path_to_fetch_queue(file_as_path)

    done_paths = []
    recursion_depth = 0

    textutils.output_debug('Cached: ' + str(database.path_cache))
    while database.fetch_queue.qsize() > 0:
        textutils.output_info('Probing ' + str(database.fetch_queue.qsize()) + ' paths')

        # Wait for initial valid path lookup
        workers = manager.spawn_workers(conf.thread_count, TestPathExistsWorker)
        manager.wait_for_idle(workers, database.fetch_queue)

        recursion_depth += 1
        
        if not conf.recursive:
            break
        
        if recursion_depth >= conf.recursive_depth_limit:
            break    
        
        for validpath in database.valid_paths:
            
            if validpath['url'] == '/' or validpath['url'] in done_paths:
                continue
            
            done_paths.append(validpath['url'])
            
            for path in database.paths:
                if path['url'] in ('/', ''):
                    continue
                path = path.copy()
                path['url'] = validpath['url'] + path['url']
                dbutils.add_path_to_fetch_queue(path)

    textutils.output_info('Found ' + str(len(database.valid_paths)) + ' valid paths')
Example #27
0
def execute():
    """ Fetch sitemap.xml and add each entry as a target """

    current_template = dict(conf.path_template)
    current_template['description'] = 'sitemap.xml entry'

    target_url = urljoin(conf.target_base_path, "/sitemap.xml")
    fetcher = Fetcher()
    response_code, content, headers = fetcher.fetch_url(target_url,
                                                        conf.user_agent,
                                                        conf.fetch_timeout_secs,
                                                        limit_len=False,
                                                        add_headers={}
                                                        )

    if not isinstance(content, str):
        content = content.decode('utf-8', 'ignore')

    if response_code is 200 or response_code is 302 and content:

        regexp = re.compile('(?im).*<url>\s*<loc>(.*)</loc>\s*</url>.*')
        matches = re.findall(regexp, content)

        textutils.output_debug("SitemapXML plugin")

        added = 0
        for match in matches:
            new_path = match.decode().split(conf.target_host)[1]

            # Remove trailing /
            if new_path.endswith('/'):
                new_path = new_path[:-1]   

            add_path(new_path)
            add_file(new_path)

            textutils.output_debug(" - Added: %s from /sitemap.xml" % new_path)

            added += 1

        if added > 0:
            textutils.output_info(' - SitemapXML Plugin: added %d base paths '
                                  'using /sitemap.xml' % added)
        else :
            textutils.output_info(' - SitemapXML Plugin: no usable entries '
                                  'in /sitemap.xml')
               
    else:
        textutils.output_info(' - SitemapXML Plugin: /sitemap.xml not found on '
                              'target site')
Example #28
0
def execute():
    """ Fetch /robots.txt and add the disallowed paths as target """
    current_template = dict(conf.path_template)
    current_template['description'] = 'Robots.txt entry'

    target_url = urljoin(conf.target_base_path, "/robots.txt")
    fetcher = Fetcher()
    response_code, content, headers = fetcher.fetch_url(
        target_url, conf.user_agent, conf.fetch_timeout_secs, limit_len=False)

    if response_code is 200 or response_code is 302 and content:
        matches = re.findall(r'Disallow:\s*/[a-zA-Z0-9-/\r]+\n', content)
        added = 0
        for match in matches:
            # Filter out some characters
            match = filter(lambda c: c not in ' *?.\n\r\t', match)

            if conf.debug:
                textutils.output_debug(match)

            # Split on ':'
            splitted = match.split(':')
            if splitted[1]:
                target_path = splitted[1]

                # Remove trailing /
                if target_path.endswith('/'):
                    target_path = target_path[:-1]

                current_template = dict(current_template)
                current_template['url'] = target_path

                if current_template not in database.paths:
                    database.paths.append(current_template)

                textutils.output_debug(' - Robots Plugin Added: ' +
                                       str(target_path) + ' from robots.txt')

                added += 1

        if added > 0:
            textutils.output_info(' - Robots Plugin: added ' + str(added) +
                                  ' base paths using /robots.txt')
        else:
            textutils.output_info(
                ' - Robots Plugin: no usable entries in /robots.txt')

    else:
        textutils.output_info(
            ' - Robots Plugin: /robots.txt not found on target site')
Example #29
0
    def run(self):
        while not self.kill_received:
            try:
                # Non-Blocking get since we use the queue as a ringbuffer
                queued = database.fetch_queue.get(False)
                url = conf.target_base_path + queued.get("url")
                description = queued.get("description")
                match_string = queued.get("match_string")

                textutils.output_debug("Testing: " + url + " " + str(queued))
                stats.update_stats(url)

                # Fetch the target url
                start_time = datetime.now()
                if match_string:
                    response_code, content, headers = self.fetcher.fetch_url(
                        url, conf.user_agent, database.latest_successful_request_time, limit_len=False
                    )
                else:
                    response_code, content, headers = self.fetcher.fetch_url(
                        url, conf.user_agent, database.latest_successful_request_time
                    )
                end_time = datetime.now()

                # handle timeout
                if response_code in conf.timeout_codes:
                    handle_timeout(queued, url, self.thread_id, output=self.output)
                elif response_code == 500:
                    textutils.output_found("ISE, " + description + " at: " + conf.target_host + url)
                elif response_code in conf.expected_file_responses:
                    # If the CRC missmatch, and we have an expected code, we found a valid link
                    if match_string and re.search(re.escape(match_string), content, re.I):
                        textutils.output_found("String-Matched " + description + " at: " + conf.target_host + url)
                    elif test_valid_result(content):
                        textutils.output_found(description + " at: " + conf.target_host + url)

                elif response_code in conf.redirect_codes:
                    location = headers.get("location")
                    if location:
                        handle_redirects(queued, location)

                # Stats
                if response_code not in conf.timeout_codes:
                    stats.update_processed_items()
                    compute_request_time(start_time, end_time)

                # Mark item as processed
                database.fetch_queue.task_done()
            except Empty:
                continue
Example #30
0
    def fetch_url(self, url, user_agent, timeout, limit_len=True, add_headers=dict()):
        """ Fetch a given url, with a given user_agent and timeout"""
        response = None
        try:
            if not add_headers.get('User-Agent'):
                add_headers['User-Agent'] = user_agent
            if not add_headers.get('Connection'):
                add_headers['Connection'] = 'Keep-Alive'
            if not add_headers.get('Host'):
                add_headers['Host'] = conf.target_host

            # Session cookie, priority to used-supplied.
            if conf.cookies:
                add_headers['Cookie'] = conf.cookies
            elif database.session_cookie:
                add_headers['Cookie'] = database.session_cookie

            # Limit request len on binary types
            if limit_len:
                content_range = 'bytes=0-' + str(conf.file_sample_len-1)
                add_headers['Range'] = content_range
            else:
                if 'Range' in add_headers:
                    del add_headers['Range']
            
            if conf.proxy_url:
                url = conf.scheme + '://' + conf.target_host + ':' + str(conf.target_port) + url
                textutils.output_debug(url)
            
            if conf.is_ssl:
                database.connection_pool.ConnectionCls = UnverifiedHTTPSConnection

            # Dynamic timeout
            request_timeout = Timeout(connect=timeout, read=timeout)

            response = database.connection_pool.request('GET', url, headers=add_headers, retries=0, redirect=False,
                                                        release_conn=True, assert_same_host=False,
                                                        timeout=request_timeout, preload_content=False)

            content = response.data
            code = response.status
            headers = response.headers
            response.release_conn()  # return the connection back to the pool
        except Exception as e:
            code = 0
            content = ''
            headers = dict()

        return code, content, headers
Example #31
0
def sample_404_from_found_path():
    """ For all existing path, compute the 404 CRC so we don't get trapped in a tarpit """
    manager = ThreadManager()
    
    for path in database.valid_paths:
        textutils.output_debug("Path in valid path table: " + str(path))
        for ext in conf.crafted_404_extensions:
            path_clone = dict(path)
            random_file = str(uuid.uuid4())
            
            # We don't benchmark / since we do it first before path discovery
            if path_clone['url'] != '/':
                path_clone['url'] = path_clone['url'] + '/' + random_file + ext   
                database.fetch_queue.put(path_clone)    

    workers = manager.spawn_workers(conf.thread_count, FetchCrafted404Worker)
    manager.wait_for_idle(workers, database.fetch_queue)
Example #32
0
def execute():
    """ Fetch /robots.txt and add the disallowed paths as target """
    current_template = dict(conf.path_template)
    current_template['description'] = 'Robots.txt entry'

    target_url = urljoin(conf.target_base_path, "/robots.txt")
    fetcher = Fetcher()
    response_code, content, headers = fetcher.fetch_url(target_url, conf.user_agent, conf.fetch_timeout_secs, limit_len=False)

    if response_code is 200 or response_code is 302 and content:
        matches = re.findall(r'Disallow:\s*/[a-zA-Z0-9-/\r]+\n', content)
        added = 0
        for match in matches:
            # Filter out some characters
            match = filter(lambda c: c not in ' *?.\n\r\t', match)
            
            if conf.debug:
                textutils.output_debug(match)
                
            # Split on ':'               
            splitted = match.split(':')
            if splitted[1]:
                target_path = splitted[1]
                
                # Remove trailing /
                if target_path.endswith('/'):
                    target_path = target_path[:-1]   

                current_template = dict(current_template)
                current_template['url'] = target_path
                
                if current_template not in database.paths: 
                    database.paths.append(current_template)

                textutils.output_debug(' - Robots Plugin Added: ' + str(target_path) + ' from robots.txt')
                    
                added += 1
                    
        if added > 0:
            textutils.output_info(' - Robots Plugin: added ' + str(added) + ' base paths using /robots.txt')
        else :
            textutils.output_info(' - Robots Plugin: no usable entries in /robots.txt')
               
    else:
        textutils.output_info(' - Robots Plugin: /robots.txt not found on target site')
Example #33
0
def handle_timeout(queued, url, thread_id, output=True):
    """ Handle timeout operation for workers """
    if not queued['timeout_count']:
        queued['timeout_count'] = 0

    if queued.get('timeout_count') < conf.max_timeout_count:
        new_timeout_count = queued.get('timeout_count') + 1
        queued['timeout_count'] = new_timeout_count
        textutils.output_debug('Thread #' + str(thread_id) + ': re-queuing ' + str(queued))

        # Add back the timed-out item
        database.fetch_queue.put(queued)
    elif output:
        # We definitely timed out
        textutils.output_timeout(queued.get('description') + ' at ' + url)

    # update timeout count
    stats.update_timeouts()
Example #34
0
def compute_request_time(start_time, end_time):
    """
     Compute the average request time and set pessimistically (math.ceil) the request timeout value based on it
     This call will mostly decrease the timeout time.
    """
    # Adjust dynamic timeout level:
    completed_time = (end_time - start_time).seconds
    textutils.output_debug("Completed in: " + str(completed_time))

    database.latest_successful_request_time = completed_time + 1

    # We still need to have a max timeout in seconds
    if database.latest_successful_request_time > conf.max_timeout_secs:
        database.latest_successful_request_time = conf.max_timeout_secs
    elif database.latest_successful_request_time < 1:
        database.latest_successful_request_time = 1

    textutils.output_debug("+Ajusted timeout to: " + str(database.latest_successful_request_time))
Example #35
0
def compute_request_time(start_time, end_time):
    """
     Compute the average request time and set pessimistically (math.ceil) the request timeout value based on it
     This call will mostly decrease the timeout time.
    """
    # Adjust dynamic timeout level:
    completed_time = (end_time - start_time).seconds
    textutils.output_debug("Completed in: " + str(completed_time))

    database.latest_successful_request_time = completed_time + 1

    # We still need to have a max timeout in seconds
    if database.latest_successful_request_time > conf.max_timeout_secs:
        database.latest_successful_request_time = conf.max_timeout_secs
    elif database.latest_successful_request_time < 1:
        database.latest_successful_request_time = 1

    textutils.output_debug("+Ajusted timeout to: " +
                           str(database.latest_successful_request_time))
Example #36
0
def handle_timeout(queued, url, thread_id, output=True):
    """ Handle timeout operation for workers """
    if not queued['timeout_count']:
        queued['timeout_count'] = 0

    if queued.get('timeout_count') < conf.max_timeout_count:
        new_timeout_count = queued.get('timeout_count') + 1
        queued['timeout_count'] = new_timeout_count
        textutils.output_debug('Thread #' + str(thread_id) + ': re-queuing ' +
                               str(queued))

        # Add back the timed-out item
        database.fetch_queue.put(queued)
    elif output:
        # We definitely timed out
        textutils.output_timeout(queued.get('description') + ' at ' + url)

    # update timeout count
    stats.update_timeouts()
Example #37
0
def test_behavior(content):
    """ Test if a given valid hit has an improbable behavior. Mainly, no url should have the same return content
    As the previous one if it's already deemed valid by the software (non error, unique content)
    Some identical content should be expected during the runtime, but not the same in X consecutive hits"""

    # Assume normal behavior
    normal = True
    textutils.output_debug('Testing behavior')

    if not isinstance(content, str):
        content = content.decode('utf-8', 'ignore')

    if len(database.behavioral_buffer) <= (conf.behavior_queue_size-1):
        database.behavioral_buffer.append(content)

    # If the queue is full, start to test. if not, the system will let a "chance" to the entries.
    if len(database.behavioral_buffer) >= conf.behavior_queue_size:
        textutils.output_debug('Testing for sameness with bufsize:' + str(len(database.behavioral_buffer)))
        # Check if all results in the buffer are the same
        same = all(SequenceMatcher(isjunk=None, a=content, b=saved_content, autojunk=False).ratio() > 0.80
                   for saved_content in database.behavioral_buffer)
        if same:
            textutils.output_debug('Same!')
            normal = False

    # Kick out only the first item in the queue if the queue is full so we can detect if behavior restores
    if not normal and len(database.behavioral_buffer):
        database.behavioral_buffer.pop(0)

    return normal
Example #38
0
def test_behavior(content):
    """ Test if a given valid hit has an improbable behavior. Mainly, no url should have the same return content
    As the previous one if it's already deemed valid by the software (non error, unique content)
    Some identical content should be expected during the runtime, but not the same in X consecutive hits"""

    # Assume normal behavior
    normal = True
    textutils.output_debug('Testing behavior')

    if not isinstance(content, str):
        content = content.decode('utf-8', 'ignore')

    if len(database.behavioral_buffer) <= (conf.behavior_queue_size - 1):
        database.behavioral_buffer.append(content)

    # If the queue is full, start to test. if not, the system will let a "chance" to the entries.
    if len(database.behavioral_buffer) >= conf.behavior_queue_size:
        textutils.output_debug('Testing for sameness with bufsize:' +
                               str(len(database.behavioral_buffer)))
        # Check if all results in the buffer are the same
        same = all(
            SequenceMatcher(
                isjunk=None, a=content, b=saved_content,
                autojunk=False).ratio() > 0.80
            for saved_content in database.behavioral_buffer)
        if same:
            textutils.output_debug('Same!')
            normal = False

    # Kick out only the first item in the queue if the queue is full so we can detect if behavior restores
    if not normal and len(database.behavioral_buffer):
        database.behavioral_buffer.pop(0)

    return normal
Example #39
0
def test_valid_result(content):
    is_valid_result = True

    # Tweak the content len
    if len(content) > conf.file_sample_len:
        content = content[0:conf.file_sample_len - 1]

    # False positive cleanup for some edge cases
    content = content.strip(b'\r\n ')

    # Test signatures
    for fingerprint in database.crafted_404s:
        textutils.output_debug("Testing [" + content.encode('hex') + "]" +
                               " against Fingerprint: [" +
                               fingerprint.encode('hex') + "]")
        matcher = SequenceMatcher(isjunk=None,
                                  a=fingerprint,
                                  b=content,
                                  autojunk=False)

        textutils.output_debug("Ratio " + str(matcher.ratio()))

        # This content is almost similar to a generated 404, therefore it's a 404.
        if matcher.ratio() > 0.8:
            textutils.output_debug("False positive detected!")
            is_valid_result = False
            break

    return is_valid_result
Example #40
0
def handle_redirects(queued, target):
    """ This call is used to determine if a suggested redirect is valid.
    if it happens to be, we change the url entry with the redirected location and add it back
    to the call stack. """
    retry_count = queued.get('retries')
    if retry_count and retry_count > 1:
        return
    elif not retry_count:
        queued['retries'] = 0

    parsed_taget = urlparse(target)
    target_path = parsed_taget.path

    source_path = conf.target_base_path + queued.get('url')
    textutils.output_debug("Handling redirect from: " + source_path + " to " +
                           target_path)

    matcher = SequenceMatcher(isjunk=None,
                              a=target_path,
                              b=source_path,
                              autojunk=False)
    if matcher.ratio() > 0.8:
        queued['url'] = target_path
        queued['retries'] += 1
        # Add back the timed-out item
        textutils.output_debug("Following redirect! " + str(matcher.ratio()))
        database.fetch_queue.put(queued)
    else:
        textutils.output_debug("Bad redirect! " + str(matcher.ratio()))
Example #41
0
    def wait_for_idle(self, workers, queue):
        """ Wait until fetch queue is empty and handle user interrupt """
        while not database.kill_received and not queue.empty():
            try:
                sleep(0.1)
            except KeyboardInterrupt:
                try:
                    stats.output_stats()
                    sleep(1)
                except KeyboardInterrupt:
                    textutils.output_info(
                        'Keyboard Interrupt Received, cleaning up threads')
                    # Clean reference to sockets
                    database.connection_pool = None
                    database.kill_received = True

                    # Kill remaining workers but don't join the queue (we want to abort:))
                    for worker in workers:
                        if worker is not None and worker.isAlive():
                            worker.kill_received = True
                            worker.join(0)

                    # Set leftover done in cas of a kill.
                    while not queue.empty():
                        queue.get()
                        queue.task_done()

                    break

        # Make sure everything is done before sending control back to application
        textutils.output_debug("Threads: joining queue of size: " +
                               str(queue.qsize()))
        queue.join()
        textutils.output_debug("Threads: join done")

        # Make sure we get all the worker's results before continuing the next step
        for worker in workers:
            if worker is not None and worker.isAlive():
                worker.kill_received = True
                worker.join()
Example #42
0
def test_valid_result(content):
    is_valid_result = True

    # Tweak the content len
    if len(content) > conf.file_sample_len:
        content = content[0 : conf.file_sample_len - 1]

    # False positive cleanup for some edge cases
    content = content.strip("\r\n ")

    # Test signatures
    for fingerprint in database.crafted_404s:
        textutils.output_debug(
            "Testing [" + content.encode("hex") + "]" + " against Fingerprint: [" + fingerprint.encode("hex") + "]"
        )
        matcher = SequenceMatcher(isjunk=None, a=fingerprint, b=content, autojunk=False)

        textutils.output_debug("Ratio " + str(matcher.ratio()))

        # This content is almost similar to a generated 404, therefore it's a 404.
        if matcher.ratio() > 0.8:
            textutils.output_debug("False positive detected!")
            is_valid_result = False
            break

    return is_valid_result
Example #43
0
def handle_redirects(queued, target):
    """ This call is used to determine if a suggested redirect is valid.
    if it happens to be, we change the url entry with the redirected location and add it back
    to the call stack. """
    retry_count = queued.get("retries")
    if retry_count and retry_count > 1:
        return
    elif not retry_count:
        queued["retries"] = 0

    parsed_taget = urlparse(target)
    target_path = parsed_taget.path

    source_path = conf.target_base_path + queued.get("url")
    textutils.output_debug("Handling redirect from: " + source_path + " to " + target_path)

    matcher = SequenceMatcher(isjunk=None, a=target_path, b=source_path, autojunk=False)
    if matcher.ratio() > 0.8:
        queued["url"] = target_path
        queued["retries"] += 1
        # Add back the timed-out item
        textutils.output_debug("Following redirect! " + str(matcher.ratio()))
        database.fetch_queue.put(queued)
    else:
        textutils.output_debug("Bad redirect! " + str(matcher.ratio()))
Example #44
0
def test_valid_result(content):
    is_valid_result = True

    # Encoding edge case
    # Must be a string to be compared to the 404 fingerprint
    if not isinstance(content, str):
        content = content.decode('utf-8', 'ignore')

    if not len(content):
        content = ""  # empty file, still a forged 404
    elif len(content) < conf.file_sample_len:
        content = content[0:len(content) - 1]
    else:
        content = content[0:conf.file_sample_len - 1]

    # False positive cleanup for some edge cases
    content = content.strip('\r\n ')

    # Test signatures
    for fingerprint in database.crafted_404s:
        textutils.output_debug("Testing [" + content + "]" + " against Fingerprint: [" + fingerprint + "]")
        matcher = SequenceMatcher(isjunk=None, a=fingerprint, b=content, autojunk=False)

        textutils.output_debug("Ratio " + str(matcher.ratio()))

        # This content is almost similar to a generated 404, therefore it's a 404.
        if matcher.ratio() > 0.8:
            textutils.output_debug("False positive detected!")
            is_valid_result = False
            break

    return is_valid_result
Example #45
0
def add_files_to_paths():
    """ Combine all path, filenames and suffixes to build the target list """
    work_list = list()
    cache_test = dict()
    for path in database.valid_paths:
        # Combine current path with all files and suffixes if enabled
        for filename in database.files:
            if filename.get('no_suffix'):
                new_filename = dict(filename)
                new_filename['is_file'] = True

                if path['url'] == '/':
                    new_filename['url'] = path['url'] + filename['url']
                else:
                    new_filename['url'] = path['url'] + '/' + filename['url']

                if not cache_test.get(new_filename['url']):
                    work_list.append(new_filename)
                    cache_test[new_filename['url']] = True
                    textutils.output_debug("No Suffix file added: " + str(new_filename))

            else :
                for suffix in conf.file_suffixes:
                    new_filename = dict(filename)
                    new_filename['is_file'] = True

                    if path['url'] == '/':
                        new_filename['url'] = path['url'] + filename['url'] + suffix
                    else:
                        new_filename['url'] = path['url'] + '/' + filename['url'] + suffix

                    if not cache_test.get(new_filename['url']):
                        work_list.append(new_filename)
                        cache_test[new_filename['url']] = True
                        textutils.output_debug("File added: " + str(new_filename))


    # Since we have already output the found directories, replace the valid path list
    database.valid_paths = work_list
Example #46
0
def parse_hostname(hostname):
    ssl = False
    if not re.search(r'http://', hostname, re.I) and not re.search(r'https://', hostname, re.I):
        hostname = 'http://' + hostname

    if re.search(r'https://', hostname, re.I):
        ssl = True

    parsed = urlparse(hostname)
    parsed_path = parsed.path

    if parsed_path.endswith('/'):
        parsed_path = parsed_path[0:-1]

    if not parsed.port:
        parsed_port = 80
    else:
        parsed_port = parsed.port


    textutils.output_debug("Starting scan on: " + parsed.hostname + " base: " + parsed_path + " ssl: " + str(ssl))
    return parsed.hostname, parsed_port, parsed_path, ssl
Example #47
0
    def wait_for_idle(self, workers, queue):
            """ Wait until fetch queue is empty and handle user interrupt """
            while not database.kill_received and not queue.empty():
                try:
                    # Make sure everything is done before sending control back to application
                    textutils.output_debug("Threads: joining queue of size: " + str(queue.qsize()))
                    queue.join()
                    textutils.output_debug("Threads: join done")
                except KeyboardInterrupt:
                    try:
                        stats.output_stats(workers)
                        sleep(1)  # The time you have to re-press ctrl+c to kill the app.
                    except KeyboardInterrupt:
                        textutils.output_info('Keyboard Interrupt Received, waiting for blocking threads to exit')
                        # Clean reference to sockets
                        database.connection_pool = None
                        database.kill_received = True
                        self.kill_workers(workers)
                        sys.exit(0)

            # Make sure we get all the worker's results before continuing the next step
            self.kill_workers(workers)
Example #48
0
def parse_svn_entries(url):
    description_file = "SVN entries file at"
    description_dir = "SVN entries Dir at"
    target_url = url + "/.svn/entries"
    fetcher = Fetcher()
    response_code, content, headers = fetcher.fetch_url(
        target_url, conf.user_agent, conf.fetch_timeout_secs, limit_len=False
    )

    if response_code is 200 or response_code is 302 and content:
        tokens = content.split("\n")
        if "dir" in tokens:
            for pos, token in enumerate(tokens):
                if token == "dir":
                    # Fetch more entries recursively
                    if tokens[pos - 1] != "":
                        textutils.output_debug(" - Svn Plugin: Found dir: " + url + "/" + tokens[pos - 1])

                        if conf.allow_download:
                            textutils.output_info(" - Svn Plugin: Downloading: " + url + "/" + tokens[pos - 1] + "\r")
                        else:
                            textutils.output_found(description_dir + " at: " + url + "/" + tokens[pos - 1])

                        # Parse next
                        parse_svn_entries(url + "/" + tokens[pos - 1])

                elif token == "file":
                    textutils.output_debug(" - Svn Plugin: Found file: " + url + "/" + tokens[pos - 1])
                    if conf.allow_download:
                        textutils.output_info(" - Svn Plugin: Downloading: " + url + "/" + tokens[pos - 1] + "\r")
                        # Fetch text-base file
                        path = url + "/.svn/text-base" + "/" + tokens[pos - 1] + ".svn-base"
                        fetcher = Fetcher()
                        response_code, content, headers = fetcher.fetch_url(
                            path, conf.user_agent, conf.fetch_timeout_secs, limit_len=False
                        )
                        save_file(url + "/" + tokens[pos - 1], content)
                    else:
                        textutils.output_found(description_file + " at: " + url + "/" + tokens[pos - 1])
Example #49
0
def parse_svn_entries(url):
    description_file = 'SVN entries file at'
    description_dir = "SVN entries Dir at"
    target_url = url + "/.svn/entries"
    fetcher = Fetcher()

    response_code, content, headers = fetcher.fetch_url(target_url, conf.user_agent, conf.fetch_timeout_secs, limit_len=False, add_headers=base_headers)
    if not isinstance(content, str):
        content = content.decode('utf-8', 'ignore')

    if response_code in conf.expected_file_responses and content:
        tokens = content.split('\n')
        if 'dir' in tokens:
            for pos, token in enumerate(tokens):
                if token == 'dir':
                    # Fetch more entries recursively
                    if tokens[pos-1] != '':
                        textutils.output_debug(' - Svn Plugin: Found dir: ' + url + '/' + tokens[pos-1])

                        if conf.allow_download:
                            textutils.output_info(' - Svn Plugin: Downloading: ' + url + '/' + tokens[pos-1] + '\r')
                        else:
                            textutils.output_found(description_dir + ' at: ' + url + '/' + tokens[pos-1])

                        # Parse next
                        parse_svn_entries(url + "/" + tokens[pos-1])

                elif token == 'file':
                    textutils.output_debug(' - Svn Plugin: Found file: ' + url + '/' + tokens[pos-1])
                    if conf.allow_download:
                        textutils.output_info(' - Svn Plugin: Downloading: ' + url + '/' + tokens[pos-1] + '\r')
                        # Fetch text-base file
                        path = url + "/.svn/text-base" + '/' + tokens[pos-1] + ".svn-base"
                        fetcher = Fetcher()
                        response_code, content, headers = fetcher.fetch_url(path, conf.user_agent,
                                                                            conf.fetch_timeout_secs, limit_len=False)
                        save_file(url + '/' + tokens[pos-1], content)
                    else:
                        textutils.output_found(description_file + ' at: ' + url + '/' + tokens[pos-1])
Example #50
0
    def run(self):
        while not self.kill_received:
            try:
                # Non-Blocking get since we use the queue as a ringbuffer
                queued = database.fetch_queue.get(False)
                url = conf.target_base_path + queued.get('url')

                textutils.output_debug("Fetching crafted 404: " + str(url))
                stats.update_stats(url)

                # Fetch the target url
                start_time = datetime.now()
                response_code, content, headers = self.fetcher.fetch_url(
                    url, conf.user_agent,
                    database.latest_successful_request_time)
                end_time = datetime.now()

                # Handle fetch timeouts by re-adding the url back to the global fetch queue
                # if timeout count is under max timeout count
                if response_code is 0 or response_code is 500:
                    handle_timeout(queued,
                                   url,
                                   self.thread_id,
                                   output=self.output)
                elif response_code in conf.expected_file_responses:
                    # The server responded with whatever code but 404 or invalid stuff (500). We take a sample
                    if not len(content):
                        crafted_404 = ""  # empty file, still a forged 404
                    elif len(content) < conf.file_sample_len:
                        crafted_404 = content[0:len(content) - 1]
                    else:
                        crafted_404 = content[0:conf.file_sample_len - 1]

                    # Edge case control
                    crafted_404 = crafted_404.strip('\r\n ')
                    database.crafted_404s.append(crafted_404)

                    # Exception case for root 404, since it's used as a model for other directories
                    textutils.output_debug(
                        "Computed and saved a sample 404 for: " + str(queued) +
                        ": " + crafted_404)
                elif response_code in conf.redirect_codes:
                    location = headers.get('location')
                    if location:
                        handle_redirects(queued, location)

                # Stats
                if response_code not in conf.timeout_codes:
                    stats.update_processed_items()
                    compute_request_time(start_time, end_time)

                # Dequeue item
                database.fetch_queue.task_done()

            except Empty:
                continue

        textutils.output_debug("Thread #" + str(self.thread_id) + " killed.")
Example #51
0
    def wait_for_idle(self, workers, queue):
            """ Wait until fetch queue is empty and handle user interrupt """
            while not database.kill_received and not queue.empty():
                try:
                    sleep(0.1)
                except KeyboardInterrupt:
                    try:
                        stats.output_stats()
                        sleep(1)  
                    except KeyboardInterrupt:
                        textutils.output_info('Keyboard Interrupt Received, cleaning up threads')
                        # Clean reference to sockets
                        database.connection_pool = None
                        database.kill_received = True
                        
                        # Kill remaining workers but don't join the queue (we want to abort:))
                        for worker in workers:
                            if worker is not None and worker.isAlive():
                                worker.kill_received = True
                                worker.join(0)

                        # Set leftover done in cas of a kill.
                        while not queue.empty():
                            queue.get()
                            queue.task_done()

                        break

            # Make sure everything is done before sending control back to application
            textutils.output_debug("Threads: joining queue of size: " + str(queue.qsize()))
            queue.join()
            textutils.output_debug("Threads: join done")

            # Make sure we get all the worker's results before continuing the next step
            for worker in workers:
                if worker is not None and worker.isAlive():
                    worker.kill_received = True
                    worker.join()
Example #52
0
def parse_hostname(hostname):
    ssl = False
    if not re.search(r'http://', hostname, re.I) and not re.search(
            r'https://', hostname, re.I):
        hostname = 'http://' + hostname

    if re.search(r'https://', hostname, re.I):
        ssl = True

    parsed = urlparse(hostname)
    parsed_path = parsed.path

    if parsed_path.endswith('/'):
        parsed_path = parsed_path[0:-1]

    if not parsed.port:
        parsed_port = 80
    else:
        parsed_port = parsed.port

    textutils.output_debug("Starting scan on: " + parsed.hostname + " base: " +
                           parsed_path + " ssl: " + str(ssl))
    return parsed.hostname, parsed_port, parsed_path, ssl
Example #53
0
    def run(self):
        while not self.kill_received:
            try:
                # Non-Blocking get since we use the queue as a ringbuffer
                queued = database.fetch_queue.get(False)
                url = conf.target_base_path + queued.get('url')

                textutils.output_debug("Fetching crafted 404: " + str(url))
                stats.update_stats(url)

                # Fetch the target url
                start_time = datetime.now()
                response_code, content, headers = self.fetcher.fetch_url(url, conf.user_agent, database.latest_successful_request_time)
                end_time = datetime.now()

                # Handle fetch timeouts by re-adding the url back to the global fetch queue
                # if timeout count is under max timeout count
                if response_code is 0 or response_code is 500:
                    handle_timeout(queued, url, self.thread_id, output=self.output)
                elif response_code in conf.expected_file_responses:
                    # Encoding edge case
                    # Must be a string to be compared to the 404 fingerprint
                    if not isinstance(content, str):
                        content = content.decode('utf-8', 'ignore')

                    # The server responded with whatever code but 404 or invalid stuff (500). We take a sample
                    if not len(content):
                        crafted_404 = ""  # empty file, still a forged 404
                    elif len(content) < conf.file_sample_len:
                        crafted_404 = content[0:len(content) - 1]
                    else:
                        crafted_404 = content[0:conf.file_sample_len - 1]

                    crafted_404 = crafted_404.strip('\r\n ')
                    database.crafted_404s.append(crafted_404)

                    # Exception case for root 404, since it's used as a model for other directories
                    textutils.output_debug("Computed and saved a sample 404 for: " + str(queued) + ": " + crafted_404)
                elif response_code in conf.redirect_codes:
                    if queued.get('handle_redirect', True):
                        location = headers.get('location')
                        if location:
                            handle_redirects(queued, location)

                # Stats
                if response_code not in conf.timeout_codes:
                    stats.update_processed_items()
                    compute_request_time(start_time, end_time)

                # Dequeue item
                database.fetch_queue.task_done()

            except Empty:
                continue

        textutils.output_debug("Thread #" + str(self.thread_id) + " killed.")
Example #54
0
def add_files_to_paths():
    """ Combine all path, filenames and suffixes to build the target list """
    work_list = list()
    for path in database.valid_paths:
        # Combine current path with all files and suffixes if enabled
        for filename in database.files:
            if filename.get('no_suffix'):
                new_filename = filename.copy()
                new_filename['is_file'] = True

                if path['url'] == '/':
                    new_filename['url'] = ''.join(
                        [path['url'], filename['url']])
                else:
                    new_filename['url'] = ''.join(
                        [path['url'], '/', filename['url']])

                work_list.append(new_filename)
                textutils.output_debug("No Suffix file added: " +
                                       str(new_filename))
            elif filename.get('executable'):
                for executable_suffix in conf.executables_suffixes:
                    new_filename = filename.copy()
                    new_filename['is_file'] = True

                    if path['url'] == '/':
                        new_filename['url'] = ''.join(
                            [path['url'], filename['url'], executable_suffix])
                    else:
                        new_filename['url'] = ''.join([
                            path['url'], '/', filename['url'],
                            executable_suffix
                        ])

                    work_list.append(new_filename)
                    textutils.output_debug("Executable File added: " +
                                           str(new_filename))
            else:
                for suffix in conf.file_suffixes:
                    new_filename = filename.copy()
                    new_filename['is_file'] = True

                    if path['url'] == '/':
                        new_filename['url'] = ''.join(
                            [path['url'], filename['url'], suffix])
                    else:
                        new_filename['url'] = ''.join(
                            [path['url'], '/', filename['url'], suffix])

                    work_list.append(new_filename)
                    textutils.output_debug("Regular File added: " +
                                           str(new_filename))

    # Since we have already output the found directories, replace the valid path list
    database.valid_paths = work_list
Example #55
0
    def run(self):
        while not self.kill_received:
            try:
                # Non-Blocking get since we use the queue as a ringbuffer
                queued = database.fetch_queue.get(False)
                url = conf.target_base_path + queued.get('url')

                textutils.output_debug("Fetching crafted 404: " + str(url))
                stats.update_stats(url)

                # Fetch the target url
                timeout = False
                response_code, content, headers = self.fetcher.fetch_url(url, conf.user_agent, conf.fetch_timeout_secs)

                # Handle fetch timeouts by re-adding the url back to the global fetch queue
                # if timeout count is under max timeout count
                if response_code is 0 or response_code is 500:
                    handle_timeout(queued, url, self.thread_id, output=self.output)
                    # increase throttle delay
                    throttle.increase_throttle_delay()
                    timeout = True
                elif response_code in conf.expected_file_responses:
                    # The server responded with whatever code but 404 or invalid stuff (500). We take a sample
                    if len(content) < conf.file_sample_len:
                        crafted_404 = content[0:len(content) - 1]
                    else:
                        crafted_404 = content[0:conf.file_sample_len - 1]

                    database.crafted_404s.append(crafted_404)

                    # Exception case for root 404, since it's used as a model for other directories
                    textutils.output_debug("Computed and saved a sample 404 for: " + str(queued) + ": " + crafted_404)
                elif response_code in conf.redirect_codes:
                    location = headers.get('location')
                    if location:
                        handle_redirects(queued, location)

                # Decrease throttle delay if needed
                if not timeout:
                    throttle.decrease_throttle_delay()

                # Dequeue item
                stats.update_processed_items()
                database.fetch_queue.task_done()

            except Empty:
                continue

        textutils.output_debug("Thread #" + str(self.thread_id) + " killed.")
Example #56
0
def add_files_to_paths():
    """ Combine all path, filenames and suffixes to build the target list """
    work_list = list()
    for path in database.valid_paths:
        # Combine current path with all files and suffixes if enabled
        for filename in database.files:
            if filename.get('no_suffix'):
                new_filename = filename.copy()
                new_filename['is_file'] = True

                if path['url'] == '/':
                    new_filename['url'] = ''.join([path['url'], filename['url']])
                else:
                    new_filename['url'] = ''.join([path['url'], '/', filename['url']])

                work_list.append(new_filename)
                textutils.output_debug("No Suffix file added: " + str(new_filename))
            elif filename.get('executable'):
                for executable_suffix in conf.executables_suffixes:
                    new_filename = filename.copy()
                    new_filename['is_file'] = True

                    if path['url'] == '/':
                        new_filename['url'] = ''.join([path['url'], filename['url'], executable_suffix])
                    else:
                        new_filename['url'] = ''.join([path['url'], '/', filename['url'], executable_suffix])

                    work_list.append(new_filename)
                    textutils.output_debug("Executable File added: " + str(new_filename))
            else:
                for suffix in conf.file_suffixes:
                    new_filename = filename.copy()
                    new_filename['is_file'] = True

                    if path['url'] == '/':
                        new_filename['url'] = ''.join([path['url'], filename['url'], suffix])
                    else:
                        new_filename['url'] = ''.join([path['url'], '/', filename['url'], suffix])

                    work_list.append(new_filename)
                    textutils.output_debug("Regular File added: " + str(new_filename))

    # Since we have already output the found directories, replace the valid path list
    database.valid_paths = work_list
Example #57
0
def test_valid_result(content, is_file=False):
    is_valid_result = True

    # Encoding edge case
    # Must be a string to be compared to the 404 fingerprint
    if not isinstance(content, str):
        content = content.decode('utf-8', 'ignore')

    if not len(content):
        content = ""  # empty file, still a forged 404
    elif len(content) < conf.file_sample_len:
        content = content[0:len(content) - 1]
    else:
        content = content[0:conf.file_sample_len - 1]

    # False positive cleanup for some edge cases
    content = content.strip('\r\n ')

    # Test signatures
    for fingerprint in database.crafted_404s:
        textutils.output_debug("Testing [" + content + "]" +
                               " against Fingerprint: [" + fingerprint + "]")
        matcher = SequenceMatcher(isjunk=None,
                                  a=fingerprint,
                                  b=content,
                                  autojunk=False)

        textutils.output_debug("Ratio " + str(matcher.ratio()))

        # This content is almost similar to a generated 404, therefore it's a 404.
        if matcher.ratio() > 0.8:
            textutils.output_debug("False positive detected!")
            is_valid_result = False
            break

    # An empty file could be a proof of a hidden structure
    if is_file and content == "":
        is_valid_result = True

    return is_valid_result
Example #58
0
    if len(sys.argv) <= 1:
        parser.print_help()
        print('')
        sys.exit()

    # Spawn synchronized print output worker
    print_worker = PrintWorker()
    print_worker.daemon = True
    print_worker.start()

    # Ensure the host is of the right format and set it in config
    parsed_host, parsed_port, parsed_path, is_ssl = netutils.parse_hostname(
        args[1])
    textutils.output_debug("Parsed: " + parsed_host + " port: " +
                           str(parsed_port) + " " + parsed_path + " SSL:" +
                           str(is_ssl))

    # Set conf values
    conf.target_host = parsed_host
    conf.target_base_path = parsed_path
    conf.is_ssl = is_ssl

    textutils.output_debug('Version: ' + str(conf.version))
    textutils.output_debug('Max timeouts per url: ' +
                           str(conf.max_timeout_count))
    textutils.output_debug('Worker threads: ' + str(conf.thread_count))
    textutils.output_debug('Target Host: ' + str(conf.target_host))
    textutils.output_debug('Using Tor: ' + str(conf.use_tor))
    textutils.output_debug('Eval-able output: ' + str(conf.eval_output))
    textutils.output_debug('Using User-Agent: ' + str(conf.user_agent))