Example #1
0
 def test_url_join_case01(self):
     u = URL('http://w3af.com/foo.bar')
     self.assertEqual(u.url_join('abc.html').url_string,
                      u'http://w3af.com/abc.html')
     
     self.assertEqual(u.url_join('/abc.html').url_string,
                      u'http://w3af.com/abc.html')
Example #2
0
 def test_websocket_secure_proto(self):
     """
     We can also parse and handle ws and wss protocols
     """
     u = URL('wss://w3af.com')
     self.assertEqual(u.get_domain(), 'w3af.com')
     self.assertEqual(u.get_protocol(), 'wss')
Example #3
0
    def test_can_be_pickled(self):
        # Pickle a URL object that contains a cache
        u = URL('http://www.w3af.com/')
        domain_path = u.get_domain_path()

        cPickle.dumps(u)
        cPickle.dumps(domain_path)
Example #4
0
 def test_encode_decode(self):
     """Encode and Decode should be able to run one on the result of the
     other and return the original"""
     original = URL(u'https://w3af.com:443/file.asp?id=1%202')
     encoded = original.url_encode()
     decoded = URL(encoded).url_decode()
     self.assertEqual(original, decoded)
Example #5
0
 def test_default_proto(self):
     """
     http is the default protocol, we can provide URLs with no proto
     """
     u = URL('w3af.com')
     self.assertEqual(u.get_domain(), 'w3af.com')
     self.assertEqual(u.get_protocol(), 'http')
Example #6
0
 def test_simplest_url(self):
     u = URL('http://w3af.com/foo/bar.txt')
     
     self.assertEqual(u.path, '/foo/bar.txt')
     self.assertEqual(u.scheme, 'http')
     self.assertEqual(u.get_file_name(), 'bar.txt')
     self.assertEqual(u.get_extension(), 'txt')
Example #7
0
 def test_url_join_case03(self):
     u = URL('http://w3af.com/def/jkl/')
     self.assertEqual(u.url_join('/def/abc.html').url_string,
                      u'http://w3af.com/def/abc.html')
     
     self.assertEqual(u.url_join('def/abc.html').url_string,
                      u'http://w3af.com/def/jkl/def/abc.html')
Example #8
0
    def from_httplib_resp(cls, httplibresp, original_url=None):
        """
        Factory function. Build a HTTPResponse object from a
        httplib.HTTPResponse instance
    
        :param httplibresp: httplib.HTTPResponse instance
        :param original_url: Optional 'url_object' instance.
    
        :return: A HTTPResponse instance
        """
        resp = httplibresp
        code, msg, hdrs, body = (resp.code, resp.msg, resp.info(), resp.read())
        hdrs = Headers(hdrs.items())

        if original_url:
            url_inst = URL(resp.geturl(), original_url.encoding)
            url_inst = url_inst.url_decode()
        else:
            url_inst = original_url = URL(resp.geturl())

        httplib_time = DEFAULT_WAIT_TIME
        if hasattr(httplibresp, 'get_wait_time'):
            # This is defined in the keep alive http response object
            httplib_time = httplibresp.get_wait_time()

        if isinstance(resp, urllib2.HTTPError):
            # This is possible because in errors.py I do:
            # err = urllib2.HTTPError(req.get_full_url(), code, msg, hdrs, resp)
            charset = getattr(resp.fp, 'encoding', None)
        else:
            # The encoding attribute is only set on CachedResponse instances
            charset = getattr(resp, 'encoding', None)
        
        return cls(code, body, hdrs, url_inst, original_url,
                   msg, charset=charset, time=httplib_time)
Example #9
0
    def test_from_url_keep_form(self):
        o = URL('http://w3af.com/foo/bar.txt')
        o.querystring = URLEncodedForm()

        u = URL.from_URL(o)
        self.assertIsInstance(u.querystring, URLEncodedForm)
        self.assertIsNot(u.querystring, o.querystring)
        self.assertEqual(u.querystring, o.querystring)
Example #10
0
    def http_request(self, req):
        url_instance = URL(req.get_full_url())
        url_instance.set_param(self._url_parameter)

        new_request = HTTPRequest(url_instance, headers=req.headers,
                                  origin_req_host=req.get_origin_req_host(),
                                  unverifiable=req.is_unverifiable(),
                                  retries=req.retries_left)
        return new_request
Example #11
0
    def do_follow_redirect(self, req, fp, code, msg, headers):

        # Check if we can redirect according to the RFC
        if not self.redirect_allowed_by_rfc(req, code):
            raise self.create_error_from_parts(req, code, msg, headers, fp)

        # Some servers (incorrectly) return multiple Location headers
        # (so probably same goes for URI). Use first header.
        if LOCATION in headers:
            new_url_raw = headers.getheaders(LOCATION)[0]
        elif URI in headers:
            new_url_raw = headers.getheaders(URI)[0]
        else:
            raise self.create_error_from_parts(req, code, msg, headers, fp)

        # Calculate the target URL
        try:
            current_url = URL(req.get_full_url())
            new_url_str = current_url.url_join(new_url_raw).url_string
            new_url_obj = current_url.url_join(new_url_raw)
        except ValueError:
            raise self.create_error_from_parts(req, code, msg, headers, fp)

        # For security reasons we do not allow redirects to protocols
        # other than HTTP or HTTPS
        new_url_lower = new_url_str.lower()
        if not (new_url_lower.startswith('http://') or
                new_url_lower.startswith('https://')):
            raise self.create_error_from_parts(req, code, msg, headers, fp)

        # XXX Probably want to forget about the state of the current
        # request, although that might interact poorly with other
        # handlers that also use handler-specific request attributes
        new_request = self.create_redirect_request(req, fp, code, msg,
                                                   headers, new_url_str,
                                                   new_url_obj)

        # loop detection
        # .redirect_dict has a key url if url was previously visited.
        if hasattr(req, 'redirect_dict'):
            visited = new_request.redirect_dict = req.redirect_dict
            if (visited.get(new_url_str, 0) >= self.max_repeats or
                len(visited) >= self.max_redirections):
                raise self.create_error_from_parts(req, code, msg, headers, fp)
        else:
            visited = new_request.redirect_dict = req.redirect_dict = {}

        visited[new_url_str] = visited.get(new_url_str, 0) + 1

        # Don't close the fp until we are sure that we won't use it
        # with HTTPError.
        fp.read()
        fp.close()

        return self.parent.open(new_request, timeout=req.timeout)
Example #12
0
        def get_response(self, http_request, uri, response_headers):
            uri = URL(uri)

            try:
                callback = uri.get_querystring()['callback'][0]
            except KeyError:
                callback = 'default'

            body = '%s({})' % callback
            response_headers['Content-Type'] = 'application/javascript'

            return self.status, response_headers, body
Example #13
0
    def test_memoized(self):
        u = URL('http://www.w3af.com/')
        self.assertEqual(u._cache, dict())

        url = u.uri2url()
        self.assertNotEqual(u._cache, dict())
        self.assertIn(url, u._cache.values())

        second_url = u.uri2url()
        self.assertIs(url, second_url)

        self.assertIsInstance(url, URL)
        self.assertIsInstance(second_url, URL)
Example #14
0
 def test_from_url(self):
     o = URL('http://w3af.com/foo/bar.txt')
     u = URL.from_URL(o)
     
     self.assertEqual(u.path, '/foo/bar.txt')
     self.assertEqual(u.scheme, 'http')
     self.assertEqual(u.get_file_name(), 'bar.txt')
     self.assertEqual(u.get_extension(), 'txt')
     
     o = URL('w3af.com')
     u = URL.from_URL(o)
     self.assertEqual(u.get_domain(), 'w3af.com')
     self.assertEqual(u.get_protocol(), 'http')
Example #15
0
 def test_set_params(self):
     u = URL('http://w3af.com/;id=1')
     u.set_param('file=2')
     
     self.assertEqual(u.get_params_string(), 'file=2')
     
     u = URL('http://w3af.com/xyz.txt;id=1?file=2')
     u.set_param('file=3')
     
     self.assertEqual(u.get_params_string(), 'file=3')
     self.assertEqual(u.get_path_qs(), '/xyz.txt;file=3?file=2')
    def test_redirect_uri_relative(self):
        ws = web_spider()
        body = ''
        url = URL('http://www.w3af.org')
        redir_url = '/redir'
        headers = Headers([('content-type', 'text/html'),
                           ('uri', redir_url)])
        resp = HTTPResponse(200, body, headers, url, url)

        gen = ws._headers_url_generator(resp, None)

        extracted_data = [i for i in gen]
        expected_data = [(url.url_join(redir_url), None, resp, False)]

        self.assertEqual(extracted_data, expected_data)
Example #17
0
    def test_phishtank_match_last_url(self):
        phishtank_inst = self.w3afcore.plugins.get_plugin_inst('crawl',
                                                               'phishtank')

        vuln_url = URL(self.get_last_vulnerable_url())
        phishtank_inst.crawl(FuzzableRequest(vuln_url))

        vulns = self.kb.get('phishtank', 'phishtank')

        self.assertEqual(len(vulns), 1, vulns)
        vuln = vulns[0]

        self.assertEqual(vuln.get_name(), 'Phishing scam')
        self.assertEqual(vuln.get_severity(), MEDIUM)
        self.assertEqual(vuln.get_url().get_domain(), vuln_url.get_domain())
Example #18
0
    def met_search(self, query):
        """
        Query a Public Key Server.

        This method is based from the pks.py file from the massive enumeration
        toolset, coded by pdp and released under GPL v2.
        """
        url = URL(u'http://pgp.mit.edu:11371/pks/lookup')
        url.querystring = [(u'op', [u'index']), (u'search', [query])]

        try:
            response = self._uri_opener.GET(url, headers=self._headers,
                                            cache=True, grep=False)
        except HTTPRequestException:
            # Very naive exception handling for the case where we can't reach
            # the PKS server (it's down, blocking us, bad internet connection)
            return []

        content = response.get_body()

        content = re.sub('(<.*?>|&lt;|&gt;)', '', content)

        results = []
        accounts = []

        for line in content.split('\n')[2:]:
            if not line.strip():
                continue

            tokens = line.split()

            if len(tokens) >= 5:
                email = tokens[-1]
                name = ' '.join(tokens[3:-1])

                if SGMLParser.EMAIL_RE.match(email):

                    account = email.split('@')[0]
                    domain = email.split('@')[1]

                    if domain == query:
                        if account not in accounts:
                            accounts.append(account)
                            
                            pksr = PKSResult(name, account, domain, response.id)
                            results.append(pksr)

        return results
Example #19
0
    def get_response(self, http_request, uri, response_headers):
        """
        Overwrite the mock response with one simple objective: add a delay
        which depends on the length of the redos parameter.
        """
        response_headers.update({'status': self.status})
        response_headers.update(self.headers)

        uri = URL(uri)
        qs = uri.get_querystring()
        redos_param = qs.get('redos')[0]

        delay = len(redos_param) / 13.0
        time.sleep(delay)

        return self.status, response_headers, self.body
Example #20
0
 def test_from_parts(self):
     u = URL.from_parts('http', 'w3af.com', '/foo/bar.txt', None, 'a=b',
                        'frag')
     
     self.assertEqual(u.path, '/foo/bar.txt')
     self.assertEqual(u.scheme, 'http')
     self.assertEqual(u.get_file_name(), 'bar.txt')
     self.assertEqual(u.get_extension(), 'txt')
Example #21
0
    def test_ssl_fail_when_requesting_moth_http(self):
        """
        https://github.com/andresriancho/w3af/issues/7989

        This test takes considerable time to run since it needs to timeout the
        SSL connection for each SSL protocol
        """
        # Note that here I'm using httpS <<---- "S" and that I'm connecting to
        # the net location (host:port) of an HTTP server.
        http_url = URL(get_moth_http())
        test_url = URL('https://%s' % http_url.get_net_location())

        self.uri_opener.settings.set_max_http_retries(0)

        self.assertRaises(HTTPRequestException,
                          self.uri_opener.GET,
                          test_url,
                          timeout=1)
Example #22
0
    def http_request(self, req):
        url_instance = URL(req.get_full_url())
        url_instance.set_param(self._url_parameter)

        new_request = HTTPRequest(url_instance,
                                  method=req.get_method(),
                                  data=req.get_data(),
                                  headers=req.get_headers(),
                                  origin_req_host=req.get_origin_req_host(),
                                  unverifiable=req.is_unverifiable(),
                                  retries=req.retries_left,
                                  cookies=req.cookies,
                                  cache=req.get_from_cache,
                                  new_connection=req.new_connection,
                                  follow_redirects=req.follow_redirects,
                                  use_basic_auth=req.use_basic_auth,
                                  use_proxy=req.use_proxy,
                                  timeout=req.timeout)
        return new_request
Example #23
0
 def test_get_path_qs(self):
     u = URL(u'https://w3af.com:443/xyz/123/456/789/')
     self.assertEqual(u.get_path(), u'/xyz/123/456/789/')
     
     u = URL(u'https://w3af.com:443/xyz/123/456/789/')
     self.assertEqual(u.get_path_qs(), u'/xyz/123/456/789/')
     
     u = URL(u'https://w3af.com:443/xyz/file.asp')
     self.assertEqual(u.get_path_qs(), u'/xyz/file.asp')
     
     u = URL(u'https://w3af.com:443/xyz/file.asp?id=1')
     self.assertEqual(u.get_path_qs(), u'/xyz/file.asp?id=1')
Example #24
0
    def do_ALL(self):
        global global_first_request
        if global_first_request:
            global_first_request = False
            msg = 'The user is navigating through the spider_man proxy.'
            om.out.information(msg)

        # convert relative URL to absolute if request came from CONNECT
        if hasattr(self.server, 'chainedHandler'):
            base_path = "https://" + self.server.chainedHandler.path
            path = base_path + self.path
        else:
            path = self.path

        # Convert to url_object
        path = URL(path)

        # Ignore favicon.ico requests
        # https://github.com/andresriancho/w3af/issues/9135
        if path == TERMINATE_FAVICON_URL:
            return

        if path == TERMINATE_URL:
            om.out.information('The user terminated the spider_man session.')
            self._send_end()
            self._spider_man.stop_proxy()
            return

        msg = '[spider_man] Handling request: %s %s'
        om.out.debug(msg % (self.command, path))

        # Send this information to the plugin so it can send it to the core
        freq = self._create_fuzzable_request()
        self._spider_man.append_fuzzable_request(freq)

        grep = True
        if path.get_domain() != self.server.w3afLayer.target_domain:
            grep = False

        try:
            response = self._send_to_server(grep=grep)
        except Exception, e:
            self._send_error(e)
Example #25
0
    def __init__(self):
        InfrastructurePlugin.__init__(self)

        #
        #   Depend on xssed.com
        #
        self.XSSED_URL = URL("http://www.xssed.com")
        self.UNFIXED = "UNFIXED"
        self.XSSED_DOMAIN_RE = re.compile("<a href='(/mirror/\d*/)'" " target='_blank'>")
        self.XSSED_URL_RE = re.compile("URL: (.*?)</th>")
Example #26
0
    def search(self, query, start, count=10):
        """
        Search the web with Bing.

        This method is based from the msn.py file from the massive enumeration
        toolset, coded by pdp and released under GPL v2.
        """
        url = 'http://www.bing.com/search?'
        query = urllib.urlencode({'q': query,
                                  'first': start + 1,
                                  'FORM': 'PERE'})
        url_instance = URL(url + query)
        response = self._uri_opener.GET(url_instance, headers=self._headers,
                                        cache=True, grep=False,
                                        follow_redirects=True)

        # This regex might become outdated, but the good thing is that we have
        # test_bing.py which is going to fail and tell us that it's outdated
        re_match = re.findall('<a href="((http|https)(.*?))" h="ID=SERP,',
                              response.get_body())

        results = set()

        for url, _, _ in re_match:
            try:
                url = URL(url)
            except ValueError:
                pass
            else:
                # Test for full match.
                if url.get_domain() not in self.BLACKLISTED_DOMAINS:
                    
                    # Now test for partial match
                    for blacklisted_domain in self.BLACKLISTED_DOMAINS:
                        if blacklisted_domain in url.get_domain():
                            # ignore this domain.
                            break
                    else:
                        bing_result = BingResult(url)
                        results.add(bing_result)

        return results
Example #27
0
    def __init__(self, url, body, content_type='text/html', status=200,
                 method='GET', headers=None, delay=None):
        self.url = url
        self.body = body
        self.status = status
        self.method = method
        self.delay = delay

        self.content_type = content_type
        self.headers = {'Content-Type': content_type}

        if headers is not None:
            self.headers.update(headers)

        assert method in self.KNOWN_METHODS, self.NO_MOCK
        assert isinstance(url, (basestring, RE_COMPILE_TYPE))

        if isinstance(url, basestring):
            url = URL(url)
            assert url.get_domain(), 'Need to specify the MockResponse domain'
Example #28
0
    def setUp(self):
        self.kb.cleanup()
        self.w3afcore = w3afCore()
        
        if self.MOCK_RESPONSES:
            httpretty.enable()
            
            try:
                url = URL(self.target_url)
            except ValueError, ve:
                msg = 'When using MOCK_RESPONSES you need to set the'\
                      ' target_url attribute to a valid URL, exception was:'\
                      ' "%s".'
                raise Exception(msg % ve)

            domain = url.get_domain()
            proto = url.get_protocol()
            port = url.get_port()

            self._register_httpretty_uri(proto, domain, port)
Example #29
0
    def url_matches(self, request_uri):
        """
        :param request_uri: The http request URI sent by the plugin
        :return: True if the request_uri matches this mock_response
        """
        if isinstance(self.url, basestring):
            request_uri = URL(request_uri)
            response_uri = URL(self.url)

            request_path = request_uri.get_path_qs()
            request_domain = request_uri.get_domain()

            response_path = response_uri.get_path_qs()
            response_domain = response_uri.get_domain()

            if response_domain != request_domain:
                return False

            if request_path != response_path:
                return False

            return True

        elif isinstance(self.url, RE_COMPILE_TYPE):
            if self.url.match(request_uri):
                return True

        return False
Example #30
0
    def get_uri(self):
        """
        Query the spec / operation and return the URI (with query string
        parameters included).
        """
        request_dict = self._bravado_construct_request()
        url = request_dict['url']

        parameters = self._get_filled_parameters()

        # We only send in the body the parameters that belong there
        for param_name, param_def in self.operation.params.iteritems():
            if param_def.location != 'query':
                parameters.pop(param_name)

        # If the parameter type is an array, we only send the first item
        # TODO: Handle collectionFormat from the param_spec to know if
        #       we should send comma separated (csv) or multiple
        #       parameters with the same name and different values
        for param_name, param_def in self.operation.params.iteritems():
            if 'type' not in param_def.param_spec:
                continue

            if param_def.param_spec['type'] == 'array':
                parameters[param_name] = parameters[param_name][0]

        if parameters:
            formatted_params = [(k, [str(v)]) for k, v in parameters.items() if v is not None]
            query_string = QueryString(formatted_params)
        else:
            # If there are no parameters, we create an empty query string, which is
            # not going to be shown in the HTTP request in any way since it is
            # serialized to an empty string.
            query_string = QueryString()

        uri = URL(url)
        uri.set_querystring(query_string)

        return uri
Example #31
0
def start_scan():
    """
    Starts a new w3af scan

    Receive a JSON containing:
        - A list with the target URLs
        - The profile (eg. the content of fast_scan.pw3af)

    :return: A JSON containing:
        - The URL to the newly created scan (eg. /scans/1)
        - The newly created scan ID (eg. 1)
    """
    if not request.json or not 'scan_profile' in request.json:
        abort(400, 'Expected scan_profile in JSON object')

    if not request.json or not 'target_urls' in request.json:
        abort(400, 'Expected target_urls in JSON object')

    scan_profile = request.json['scan_profile']
    target_urls = request.json['target_urls']

    #
    # First make sure that there are no other scans running, remember that this
    # REST API is an MVP and we can only run one scan at the time (for now)
    #
    scan_infos = SCANS.values()
    if not all([si is None for si in scan_infos]):
        abort(
            400, 'This version of the REST API does not support'
            ' concurrent scans. Remember to DELETE finished scans'
            ' before starting a new one.')

    #
    # Before trying to start a new scan we verify that the scan profile is
    # valid and return an informative error if it's not
    #
    scan_profile_file_name, profile_path = create_temp_profile(scan_profile)
    w3af_core = w3afCore()

    try:
        w3af_core.profiles.use_profile(scan_profile_file_name,
                                       workdir=profile_path)
    except BaseFrameworkException as bfe:
        abort(400, str(bfe))
    finally:
        remove_temp_profile(scan_profile_file_name)

    #
    # Now that we know that the profile is valid I verify the scan target info
    #
    if target_urls is None or not len(target_urls):
        abort(400, 'No target URLs specified')

    for target_url in target_urls:
        try:
            URL(target_url)
        except ValueError:
            abort(400, 'Invalid URL: "%s"' % target_url)

    target_options = w3af_core.target.get_options()
    target_option = target_options['target']
    try:
        target_option.set_value([URL(u) for u in target_urls])
        w3af_core.target.set_options(target_options)
    except BaseFrameworkException as bfe:
        abort(400, str(bfe))

    scan_id = get_new_scan_id()
    scan_info = ScanInfo()
    scan_info.w3af_core = w3af_core
    scan_info.target_urls = target_urls
    scan_info.profile_path = scan_profile_file_name
    scan_info.output = RESTAPIOutput()
    SCANS[scan_id] = scan_info

    #
    # Finally, start the scan in a different thread
    #
    args = (scan_info, )
    t = Process(target=start_scan_helper, name='ScanThread', args=args)
    t.daemon = True

    t.start()

    return jsonify({
        'message': 'Success',
        'id': scan_id,
        'href': '/scans/%s' % scan_id
    }), 201
Example #32
0
    def test_get_path_qs(self):
        u = URL(u'https://w3af.com:443/xyz/123/456/789/')
        self.assertEqual(u.get_path(), u'/xyz/123/456/789/')

        u = URL(u'https://w3af.com:443/xyz/123/456/789/')
        self.assertEqual(u.get_path_qs(), u'/xyz/123/456/789/')

        u = URL(u'https://w3af.com:443/xyz/file.asp')
        self.assertEqual(u.get_path_qs(), u'/xyz/file.asp')

        u = URL(u'https://w3af.com:443/xyz/file.asp?id=1')
        self.assertEqual(u.get_path_qs(), u'/xyz/file.asp?id=1')
Example #33
0
    def test_set_extension(self):
        u = URL('https://www.w3af.com/xyz/foo')
        self.assertRaises(Exception, u.set_extension, 'xml')

        u = URL('https://w3af.com/xyz/d.html')
        u.set_extension('xml')
        self.assertEqual(u.get_extension(), 'xml')

        u = URL('https://w3af.com/xyz/d.html?id=3')
        u.set_extension('xml')
        self.assertEqual(u.get_extension(), 'xml')

        u = URL('https://w3af.com/xyz/d.html.foo?id=3')
        u.set_extension('xml')
        self.assertEqual(u.get_extension(), 'xml')
        self.assertEqual(u.url_string, u'https://w3af.com/xyz/d.html.xml?id=3')
Example #34
0
 def test_hash_equal(self):
     u1 = URL('http://w3af.com/')
     u2 = URL('http://w3af.com/')
     test = [u1, u2]
     self.assertEqual(len(list(set(test))), 1)
Example #35
0
    def test_set_params(self):
        u = URL('http://w3af.com/;id=1')
        u.set_param('file=2')

        self.assertEqual(u.get_params_string(), 'file=2')

        u = URL('http://w3af.com/xyz.txt;id=1?file=2')
        u.set_param('file=3')

        self.assertEqual(u.get_params_string(), 'file=3')
        self.assertEqual(u.get_path_qs(), '/xyz.txt;file=3?file=2')
Example #36
0
 def create_response(self, body, content_type=None):
     content_type = content_type if content_type is not None else 'text/html'
     headers = Headers([('Content-Type', content_type)])
     url = URL('http://www.w3af.org/')
     return HTTPResponse(200, body, headers, url, url)
Example #37
0
 def test_contains_false(self):
     u = URL('http://w3af.com/xyz.txt;id=1?file=2')
     self.assertNotIn('hello!', u)
Example #38
0
    try:
        w3af_core.profiles.use_profile(scan_profile_file_name,
                                       workdir=profile_path)
    except BaseFrameworkException, bfe:
        abort(400, str(bfe))

    #
    # Now that we know that the profile is valid I verify the scan target info
    #
    if target_urls is None or not len(target_urls):
        abort(400, 'No target URLs specified')

    for target_url in target_urls:
        try:
            URL(target_url)
        except ValueError:
            abort(400, 'Invalid URL: "%s"' % target_url)

    target_options = w3af_core.target.get_options()
    target_option = target_options['target']
    try:
        target_option.set_value([URL(u) for u in target_urls])
        w3af_core.target.set_options(target_options)
    except BaseFrameworkException, bfe:
        abort(400, str(bfe))

    scan_id = get_new_scan_id()
    scan_info = ScanInfo()
    scan_info.w3af_core = w3af_core
    scan_info.target_urls = target_urls
Example #39
0
    def set_options(self, options_list):
        """
        This method sets all the options that are configured using the user
        interface generated by the framework using the result of get_options().

        :param options_list: A dictionary with the options for the plugin.
        :return: No value is returned.
        """
        configured_target_urls = options_list['target'].get_value()
        target_urls = []

        for target_url in configured_target_urls:

            self._verify_url(target_url)

            if not target_url.url_string.count('file:///'):
                # It's a common URL just like http://w3af.com/
                target_urls.append(target_url)

            else:
                try:
                    f = urllib2.urlopen(target_url.url_string)
                except:
                    msg = 'Cannot open target file: "%s"'
                    raise BaseFrameworkException(msg % str(target_url))
                else:
                    for line in f:
                        target_in_file = line.strip()

                        # Empty lines are allowed
                        if not target_in_file:
                            continue

                        # Comments starting with # are allowed too
                        if target_in_file.startswith('#'):
                            continue

                        target_in_file_inst = URL(target_in_file)
                        self._verify_url(target_in_file_inst,
                                         file_target=False)
                        target_urls.append(target_in_file_inst)
                    f.close()

        # Now we perform a check to see if the user has specified more than
        # one target domain, for example: "http://google.com, http://yahoo.com".
        domain_list = [
            target_url.get_net_location() for target_url in target_urls
        ]
        domain_list = list(set(domain_list))

        if len(domain_list) > 1:
            msg = 'You specified more than one target domain: %s.'\
                  ' And w3af can only scan one target domain at a time.'
            raise BaseFrameworkException(msg % ', '.join(domain_list))

        # Save in the config, the target URLs, this may be useful for some
        # plugins
        cf.cf.save('targets', target_urls)
        cf.cf.save('target_domains',
                   list(set([u.get_domain() for u in target_urls])))
        cf.cf.save('baseURLs', [i.base_url() for i in target_urls])

        # Advanced target selection
        os = options_list['target_os'].get_value_str()
        if os.lower() in self._operating_systems:
            cf.cf.save('target_os', os.lower())
        else:
            msg = u'Unknown target operating system: "%s"'
            raise BaseFrameworkException(msg % os)

        pf = options_list['target_framework'].get_value_str()
        if pf.lower() in self._programming_frameworks:
            cf.cf.save('target_framework', pf.lower())
        else:
            msg = u'Unknown target programming framework: "%s"'
            raise BaseFrameworkException(msg % pf)
Example #40
0
 def _create_http_response(self, domain, body, is_404):
     url = URL('http://%s/%s' % (domain, FAILED_FILENAME if is_404 else ''))
     resp = HTTPResponse(200, body, self.empty_headers, url, url)
     return resp
Example #41
0
class TestGeneric(PluginTest):

    base_url = get_moth_http('/auth/auth_1/')
    demo_testfire = 'http://demo.testfire.net/bank/'

    _run_config = {
        'target': base_url,
        'plugins': {
            'crawl': (PluginConfig('web_spider',
                        ('only_forward', True, PluginConfig.BOOL),
                        ('ignore_regex', '.*logout.*', PluginConfig.STR)),),
            'audit': (PluginConfig('xss',),),
            'auth': (PluginConfig('generic',
                                 ('username', '*****@*****.**', PluginConfig.STR),
                                 ('password', 'passw0rd', PluginConfig.STR),
                                 ('username_field',
                                  'username', PluginConfig.STR),
                                 ('password_field',
                                  'password', PluginConfig.STR),
                                 ('auth_url', URL(base_url +
                                  'login_form.py'), PluginConfig.URL),
                                 ('check_url', URL(base_url +
                                  'post_auth_xss.py'), PluginConfig.URL),
                                 ('check_string', 'read your input',
                                  PluginConfig.STR),
                                  ),
                         ),
        }
    }

    demo_testfire_net = {
        'target': demo_testfire,
        'plugins': {
        'crawl': (
        PluginConfig('web_spider',
                     ('only_forward', True, PluginConfig.BOOL),
                     ('ignore_regex',
                      '.*logout.*', PluginConfig.STR),
                     (
        'follow_regex', '.*queryxpath.*', PluginConfig.STR)),

        ),
            'auth': (PluginConfig('generic',
                                 ('username', 'admin', PluginConfig.STR),
                                 ('password', 'admin', PluginConfig.STR),
                                 ('username_field', 'uid', PluginConfig.STR),
                                 ('password_field', 'passw', PluginConfig.STR),
                                 ('auth_url', URL(demo_testfire +
                                  'login.aspx'), PluginConfig.URL),
                                 ('check_url', URL(demo_testfire +
                                  'main.aspx'), PluginConfig.URL),
                                 ('check_string', 'View Recent Transactions',
                                  PluginConfig.STR),
                                  ),
                         ),
        }
    }

    @attr('smoke')
    def test_post_auth_xss(self):
        self._scan(self._run_config['target'], self._run_config['plugins'])

        vulns = self.kb.get('xss', 'xss')

        self.assertEquals(len(vulns), 1, vulns)

        vuln = vulns[0]
        self.assertEquals(vuln.get_name(), 'Cross site scripting vulnerability')
        self.assertEquals(vuln.get_token_name(), 'text')
        self.assertEquals(vuln.get_url().get_path(),
                          '/auth/auth_1/post_auth_xss.py')

    @attr('internet')
    @attr('fails')
    def test_demo_testfire_net(self):
        # We don't control the demo.testfire.net domain, so we'll check if its
        # up before doing anything else
        uri_opener = ExtendedUrllib()
        login_url = URL(self.demo_testfire + 'login.aspx')
        try:
            res = uri_opener.GET(login_url)
        except:
            raise SkipTest('demo.testfire.net is unreachable!')
        else:
            if not 'Online Banking Login' in res.body:
                raise SkipTest('demo.testfire.net has changed!')

        self._scan(self.demo_testfire_net['target'],
                   self.demo_testfire_net['plugins'])

        urls = self.kb.get_all_known_urls()
        url_strings = set(str(u) for u in urls)

        self.assertTrue(self.demo_testfire + 'queryxpath.aspx' in url_strings)
        self.assertTrue(
            self.demo_testfire + 'queryxpath.aspx.cs' in url_strings)
Example #42
0
 def setUp(self):
     kb.kb.cleanup()
     self.plugin = ssn()
     self.plugin._already_inspected = set()
     self.url = URL('http://www.w3af.com/')
     self.request = FuzzableRequest(self.url)
Example #43
0
    def test_increasing_delay_on_errors(self):
        expected_log = {
            0: False,
            70: False,
            40: False,
            10: False,
            80: False,
            50: False,
            20: False,
            90: False,
            60: False,
            30: False,
            100: False
        }
        self.assertEqual(self.uri_opener._sleep_log, expected_log)

        return_empty_daemon = UpperDaemon(EmptyTCPHandler)
        return_empty_daemon.start()
        return_empty_daemon.wait_for_start()

        port = return_empty_daemon.get_port()

        # No retries means that the test is easier to read/understand
        self.uri_opener.settings.set_max_http_retries(0)

        # We want to keep going, don't test the _should_stop_scan here.
        self.uri_opener._should_stop_scan = lambda x: False
        self.uri_opener._rate_limit = lambda: True

        url = URL('http://127.0.0.1:%s/' % port)
        http_exception_count = 0
        loops = 100

        # Now check the delays
        with patch('w3af.core.data.url.extended_urllib.time.sleep') as sleepm:
            for i in xrange(loops):
                try:
                    self.uri_opener.GET(url, cache=False)
                except HTTPRequestException:
                    http_exception_count += 1
                except Exception as e:
                    msg = 'Not expecting: "%s"'
                    self.assertTrue(False, msg % e.__class__.__name__)
                else:
                    self.assertTrue(False, 'Expecting HTTPRequestException')

            self.assertEqual(loops - 1, i)

            # Note that the timeouts are increasing based on the error rate and
            # SOCKET_ERROR_DELAY
            expected_calls = [
                call(1.5),
                call(3.0),
                call(4.5),
                call(6.0),
                call(7.5),
                call(9.0),
                call(10.5),
                call(12.0),
                call(13.5)
            ]

            expected_log = {
                0: False,
                70: True,
                40: True,
                10: True,
                80: True,
                50: True,
                20: True,
                90: True,
                60: True,
                30: True,
                100: False
            }
            self.assertEqual(expected_calls, sleepm.call_args_list)
            self.assertEqual(http_exception_count, 100)
            self.assertEqual(self.uri_opener._sleep_log, expected_log)

            # This one should also clear the log
            try:
                self.uri_opener.GET(url, cache=False)
            except HTTPRequestException:
                pass
            else:
                self.assertTrue(False, 'Expected HTTPRequestException')

            # The log was cleared, all values should be False
            self.assertTrue(
                all([not v for v in self.uri_opener._sleep_log.values()]))
Example #44
0
class TestHTMLRendering(PluginTest):
    CONTEXT = {
        'target_urls': ['http://w3af.com/', 'http://w3af.com/blog'],
        'target_domain':
        'w3af.com',
        'enabled_plugins': {
            'audit': ['xss'],
            'crawl': ['web_spider']
        },
        'findings': [
            MockVuln('SQL injection', None, 'High', 1, 'sqli'),
            MockVuln('XSS-2', None, 'Medium', [], 'xss'),
            MockVuln('XSS-3', None, 'Low', [], 'xss'),
            MockVuln('XSS-4', None, 'Information', 4, 'xss')
        ],
        'debug_log': [('Fri Mar 13 14:11:58 2015', 'debug', 'Log 1' * 40),
                      ('Fri Mar 13 14:11:59 2015', 'debug', 'Log 2'),
                      ('Fri Mar 13 14:11:59 2015', 'error', 'Log 3' * 5)],
        'known_urls': [
            URL('http://w3af.com'),
            URL('http://w3af.com/blog'),
            URL('http://w3af.com/oss')
        ]
    }

    def setUp(self):
        super(TestHTMLRendering, self).setUp()
        self.plugin = self.w3afcore.plugins.get_plugin_inst(
            'output', 'html_file')

        HistoryItem().init()

        url = URL('http://w3af.com/a/b/c.php')
        request = HTTPRequest(url, data='a=1')
        hdr = Headers([('Content-Type', 'text/html')])
        res = HTTPResponse(200, '<html>', hdr, url, url)
        h1 = HistoryItem()
        h1.request = request
        res.set_id(1)
        h1.response = res
        h1.save()

        url = URL('http://w3af.com/foo.py')
        request = HTTPRequest(url, data='text=xss')
        hdr = Headers([('Content-Type', 'text/html')])
        res = HTTPResponse(200, '<html>empty</html>', hdr, url, url)
        h1 = HistoryItem()
        h1.request = request
        res.set_id(4)
        h1.response = res
        h1.save()

    def test_render(self):
        output = StringIO()
        template = file(self.plugin._template, 'r')

        result = self.plugin._render_html_file(template, self.CONTEXT, output)

        self.assertTrue(result)

        output.seek(0)
        file(os.path.expanduser(self.plugin._output_file_name),
             'w').write(output.read())
Example #45
0
def add_url():
    url = request.json["url"]
    if url:
        urllist.url_queue.put_nowait(URL(url))

    return jsonify({"status": True})
Example #46
0
 def setUp(self):
     self.url = URL('http://w3af.com')
     self.headers = Headers([(u'content-type', u'text/html')])
     self.dpc = ParserCache()
Example #47
0
from w3af.core.data.request.fuzzable_request import FuzzableRequest
from w3af.core.controllers.daemons.proxy.templates.utils import render
from w3af.core.controllers.plugins.crawl_plugin import CrawlPlugin
from w3af.core.controllers.daemons.proxy import Proxy, ProxyHandler
from w3af.core.controllers.exceptions import RunOnce, ProxyException
from w3af.core.controllers.misc.decorators import runonce

from w3af.core.data.options.opt_factory import opt_factory
from w3af.core.data.options.option_list import OptionList
from w3af.core.data.parsers.doc.url import URL
from w3af.core.data.dc.headers import Headers

# Cohny changed the original http://w3af/spider_man?terminate
# to http://127.7.7.7/spider_man?terminate because in Opera we got
# an error if we used the original one! Thanks Cohny!
TERMINATE_URL = URL('http://127.7.7.7/spider_man?terminate')
TERMINATE_FAVICON_URL = URL('http://127.7.7.7/favicon.ico')


class spider_man(CrawlPlugin):
    """
    SpiderMan is a local proxy that will collect new URLs.

    :author: Andres Riancho ([email protected])
    :author: Alexander Berezhnoy < alexander.berezhnoy |at| gmail.com >
    """
    def __init__(self):
        CrawlPlugin.__init__(self)
        self._first_captured_request = True
        self._proxy = None
Example #48
0
 def setup_basic_authentication(self):
     self.w3afcore.uri_opener.settings.set_basic_auth(
         URL('websocket.com'), 'user1', 'password')
Example #49
0
    def alert_if_target_is_301_all(self):
        """
        Alert the user when the configured target is set to a site which will
        301 redirect all requests to https://

        :see: https://github.com/andresriancho/w3af/issues/14976
        :return: True if the site returns 301 for all resources. Also an Info
                 instance is saved to the KB in order to alert the user.
        """
        site_does_redirect = False
        msg = ('The configured target domain redirects all HTTP requests to a'
               ' different location. The most common scenarios are:\n\n'
               ''
               '    * HTTP redirect to HTTPS\n'
               '    * domain.com redirect to www.domain.com\n\n'
               ''
               'While the scan engine can identify URLs and vulnerabilities'
               ' using the current configuration it might be wise to start'
               ' a new scan setting the target URL to the redirect target.')

        targets = cf.cf.get('targets')

        for url in targets:
            # We test if the target URLs are redirecting to a different protocol
            # or domain.
            try:
                http_response = self._w3af_core.uri_opener.GET(url, cache=False)
            except ScanMustStopByUserRequest:
                # Not a real error, the user stopped the scan
                raise
            except Exception, e:
                emsg = 'Exception found during alert_if_target_is_301_all(): "%s"'
                emsg %= e

                om.out.debug(emsg)
                raise ScanMustStopException(emsg)
            else:
                if 300 <= http_response.get_code() <= 399:

                    # Get the redirect target
                    lower_headers = http_response.get_lower_case_headers()
                    redirect_url = None

                    for header_name in ('location', 'uri'):
                        if header_name in lower_headers:
                            header_value = lower_headers[header_name]
                            header_value = header_value.strip()
                            try:
                                redirect_url = URL(header_value)
                            except ValueError:
                                # No special invalid URL handling required
                                continue

                    if not redirect_url:
                        continue

                    # Check if the protocol was changed:
                    target_proto = url.get_protocol()
                    redirect_proto = redirect_url.get_protocol()

                    if target_proto != redirect_proto:
                        site_does_redirect = True
                        break

                    # Check if the domain was changed:
                    target_domain = url.get_domain()
                    redirect_domain = redirect_url.get_domain()

                    if target_domain != redirect_domain:
                        site_does_redirect = True
                        break
Example #50
0
 def test_basic(self):
     u = URL('http://www.w3af.com')
     req = HTTPRequest(u)
     
     self.assertEqual(req.get_full_url(), 'http://www.w3af.com/')
     self.assertEqual(req.get_uri().url_string, 'http://www.w3af.com/')
Example #51
0
    def test_contains_true(self):
        u = URL('http://w3af.com/xyz.txt;id=1?file=2')
        self.assertIn('1', u)

        u = URL('http://w3af.com/xyz.txt;id=1?file=2')
        self.assertIn('file=2', u)
Example #52
0
 def test_target_post_data(self):
     target = Target(URL(self.SQLI_GET), self.DATA_POST)
     params = target.to_params()
     
     self.assertEqual(params, ["--url=%s" % self.SQLI_GET,
                               "--data=%s" % self.DATA_POST])
Example #53
0
 def test_iter(self):
     url = u'http://w3af.com/xyz.txt;id=1?file=2'
     url_obj = URL(url)
     self.assertEqual(''.join(chr for chr in url_obj), url)
Example #54
0
 def test_target_basic(self):
     target = Target(URL(self.SQLI_GET))
     params = target.to_params()
     
     self.assertEqual(params, ["--url=%s" % self.SQLI_GET])
Example #55
0
 def test_url_in_qs(self):
     u = URL('http://w3af.org/?foo=http://w3af.com')
     self.assertEqual(u.netloc, 'w3af.org')
Example #56
0
class TestAutocompleteInvalidCredentials(PluginTest):
    target_url = 'http://w3af.org/'

    login_form_url = URL(target_url + 'login_form.py')
    login_post_handler_url = URL(target_url + 'login_post.py')

    check_url = URL(target_url + 'admin')
    check_string = 'Logged in'

    MOCK_RESPONSES = [
                      MockResponse('http://w3af.org/login_form.py',
                                   HTML_LOGIN_FORM,
                                   status=200,
                                   method='GET',
                                   headers={'Set-Cookie': '__csrf=09876xyzxyz'}),

                      LoginMockResponse('http://w3af.org/login_post.py',
                                        '',
                                        method='POST'),

                      SessionCheckMockResponse('http://w3af.org/admin', ''),

                      MockResponse('http://w3af.org/unittest',
                                   'Success',
                                   status=200,
                                   method='GET')
                      ]

    _run_config = {
        'target': target_url,
        'plugins': {
            'audit': (PluginConfig('xss'),),
            'auth': (PluginConfig('autocomplete',
                                  ('username', USER, PluginConfig.STR),
                                  #
                                  # The login process fails because of this invalid password
                                  #
                                  ('password', PASS + 'invalid', PluginConfig.STR),
                                  ('login_form_url', login_form_url, PluginConfig.URL),
                                  ('check_url', check_url, PluginConfig.URL),
                                  ('check_string', check_string, PluginConfig.STR)),),
        }
    }

    def test_handle_invalid_credentials(self):
        self._scan(self._run_config['target'], self._run_config['plugins'])

        infos = kb.kb.get('authentication', 'error')

        self.assertEqual(len(infos), 1)
        info = infos[0]

        expected_desc = (
            'The authentication plugin failed to get a valid application session using the user-provided configuration settings.\n'
            '\n'
            'The plugin generated the following log messages:\n'
            '\n'
            'Logging into the application with user: [email protected]\n'
            'Login form with action http://w3af.org/login_post.py found in HTTP response with ID 21\n'
            'Login form sent to http://w3af.org/login_post.py in HTTP request ID 22\n'
            'Checking if session for user [email protected] is active\n'
            'User "*****@*****.**" is NOT logged into the application, the `check_string` was not found in the HTTP response with ID 23.'
        )

        self.assertEqual(info.get_name(), 'Authentication failure')
        self.assertEqual(info.get_desc(with_id=False), expected_desc)
        self.assertEqual(info.get_id(), [21, 22, 23])
Example #57
0
 def test_path_root(self):
     u = URL('http://w3af.com/')
     self.assertEqual(u.path, '/')
class TestSerializedObject(unittest.TestCase):
    def setUp(self):
        kb.kb.cleanup()

        self.plugin = serialized_object()

        self.url = URL('http://www.w3af.com/')
        self.headers = Headers([('content-type', 'text/html')])
        self.response = HTTPResponse(200,
                                     '',
                                     self.headers,
                                     self.url,
                                     self.url,
                                     _id=1)

    def tearDown(self):
        self.plugin.end()

    def test_php_serialized_objects_query_string(self):

        for i, obj in enumerate(SERIALIZED_PHP_OBJECTS):
            url = self.url.copy()

            qs = QueryString([(str(i), [obj])])
            url.set_querystring(qs)

            request = FuzzableRequest(url)

            self.plugin.grep(request, self.response)

        self.assertEquals(
            len(kb.kb.get('serialized_object', 'serialized_object')), 2)

    def test_php_serialized_objects_query_string_b64(self):
        url = self.url.copy()

        b64obj = base64.b64encode(SERIALIZED_PHP_OBJECTS[0])
        qs = QueryString([('viewstate', [b64obj])])
        url.set_querystring(qs)

        request = FuzzableRequest(url)

        self.plugin.grep(request, self.response)

        self.assertEquals(
            len(kb.kb.get('serialized_object', 'serialized_object')), 1)

    def test_php_serialized_objects_headers(self):
        headers = Headers([('X-API-Key', SERIALIZED_PHP_OBJECTS[0])])
        request = FuzzableRequest(self.url, headers=headers)

        self.plugin.grep(request, self.response)

        self.assertEquals(
            len(kb.kb.get('serialized_object', 'serialized_object')), 1)

    def test_php_serialized_objects_cookies(self):
        cookie_value = 'state=%s' % base64.b64encode(SERIALIZED_PHP_OBJECTS[0])
        headers = Headers([('Cookie', cookie_value)])
        request = FuzzableRequest(self.url, headers=headers)

        self.plugin.grep(request, self.response)

        self.assertEquals(
            len(kb.kb.get('serialized_object', 'serialized_object')), 1)

    def test_php_serialized_objects_post_data(self):
        post_data = 'obj=%s' % base64.b64encode(SERIALIZED_PHP_OBJECTS[1])
        headers = Headers([('Content-Type',
                            'application/x-www-form-urlencoded')])

        form = URLEncodedForm.from_postdata(headers, post_data)
        request = FuzzableRequest(self.url, headers=headers, post_data=form)

        self.plugin.grep(request, self.response)

        self.assertEquals(
            len(kb.kb.get('serialized_object', 'serialized_object')), 1)

    def test_not_php_serialized_objects(self):
        # Note that I'm sending the serialized object in reverse string order
        post_data = 'obj=%s' % base64.b64encode(
            SERIALIZED_PHP_OBJECTS[1][::-1])
        headers = Headers([('Content-Type',
                            'application/x-www-form-urlencoded')])

        form = URLEncodedForm.from_postdata(headers, post_data)
        request = FuzzableRequest(self.url, headers=headers, post_data=form)

        self.plugin.grep(request, self.response)

        self.assertEquals(
            len(kb.kb.get('serialized_object', 'serialized_object')), 0)

    def test_mutated_request(self):
        # Note that I'm sending the serialized object in reverse string order
        post_data = 'test=1&obj=%s' % base64.b64encode(
            SERIALIZED_PHP_OBJECTS[1])
        headers = Headers([('Content-Type',
                            'application/x-www-form-urlencoded')])

        form = URLEncodedForm.from_postdata(headers, post_data)
        request = FuzzableRequest(self.url, headers=headers, post_data=form)
        mutants = create_mutants(request, ['x'])

        for mutant in mutants:
            self.plugin.grep(mutant, self.response)

        self.assertEquals(
            len(kb.kb.get('serialized_object', 'serialized_object')), 1)
 def setUp(self):
     kb.kb.cleanup()
     self.plugin = strange_parameters()
     self.url = URL('http://www.w3af.com/')
     self.headers = Headers([('content-type', 'text/html')])
     self.request = FuzzableRequest(self.url)
Example #60
0
class retirejs(GrepPlugin):
    """
    Uses retirejs to identify javascript libraries with known vulnerabilities

    :author: Andres Riancho ([email protected])
    """

    METHODS = ('GET', )
    HTTP_CODES = (200, )

    RETIRE_CMD = 'retire -j --outputformat json --outputpath %s --jspath %s'
    RETIRE_CMD_VERSION = 'retire --version'
    RETIRE_CMD_JSREPO = 'retire -j --outputformat json --outputpath %s --jsrepo %s --jspath %s'

    RETIRE_VERSION = '2.'

    RETIRE_TIMEOUT = 5
    RETIRE_DB_URL = URL(
        'https://raw.githubusercontent.com/RetireJS/retire.js/master/repository/jsrepository.json'
    )
    BATCH_SIZE = 20

    def __init__(self):
        GrepPlugin.__init__(self)

        self._analyzed_hashes = ScalableBloomFilter()
        self._retirejs_path = self._get_retirejs_path()

        self._is_valid_retire_version = None
        self._is_valid_retirejs_exit_code = None
        self._should_run_retirejs_install_check = True

        self._retire_db_filename = None
        self._batch = []
        self._js_temp_directory = None

        # User-configured parameters
        self._retire_db_url = self.RETIRE_DB_URL

    def grep(self, request, response):
        """
        Send HTTP responses to retirejs and parse JSON output.

        For performance, avoid running retirejs on the same file more than once.

        :param request: The HTTP request object.
        :param response: The HTTP response object
        :return: None
        """
        if not self._retirejs_is_installed():
            return

        if request.get_method() not in self.METHODS:
            return

        if response.get_code() not in self.HTTP_CODES:
            return

        if 'javascript' not in response.content_type:
            return

        if not self._should_analyze(response):
            return

        self._download_retire_db()

        if self._retire_db_filename is None:
            return

        with self._plugin_lock:
            batch = self._add_response_to_batch(response)

            if not self._should_analyze_batch(batch):
                return

            self._analyze_batch(batch)
            self._remove_batch(batch)

    def _remove_batch(self, batch):
        for url, response_id, filename in batch:
            self._remove_file(filename)

    def _should_analyze_batch(self, batch):
        return len(batch) == self.BATCH_SIZE

    def _add_response_to_batch(self, response):
        """
        Save the HTTP response body to a file and save (url, filename) to the
        batch.

        :param response: HTTP response body
        :return: A copy of the batch
        """
        response_filename = self._save_response_to_file(response)
        data = (response.get_uri(), response.get_id(), response_filename)
        self._batch.append(data)
        return self._batch[:]

    def _analyze_batch(self, batch):
        """
        1. Run retirejs on all the files in the batch
        2. Parse the JSON file and associate files with URLs

        :param batch: The batch to run on
        :return: None, any vulnerabilities are saved to the KB.
        """
        json_doc = self._run_retire_on_batch(batch)
        self._json_to_kb(batch, json_doc)

    def end(self):
        """
        There might be some pending tasks to analyze in the batch, analyze
        them and then clear the batch.

        :return: None
        """
        self._analyze_batch(self._batch)
        self._remove_batch(self._batch)
        self._batch = []

    def _download_retire_db(self):
        """
        Downloads RETIRE_DB_URL, saves it to the w3af temp directory and
        saves the full path to the DB in self._retire_db_filename

        :return: None
        """
        # Only download once (even when threads are used)
        with self._plugin_lock:

            if self._retire_db_filename is not None:
                return

            # w3af grep plugins shouldn't (by definition) perform HTTP requests
            # But in this case we're breaking that general rule to retrieve the
            # DB at the beginning of the scan
            try:
                http_response = self._uri_opener.GET(self._retire_db_url,
                                                     binary_response=True,
                                                     respect_size_limit=False)
            except Exception, e:
                msg = 'Failed to download the retirejs database: "%s"'
                om.out.error(msg % e)
                return

            if http_response.get_code() != 200:
                msg = ('Failed to download the retirejs database, unexpected'
                       ' HTTP response code %s')
                om.out.error(msg % http_response.get_code())
                return

            om.out.debug('Successfully downloaded the latest retirejs DB')

            db = tempfile.NamedTemporaryFile(dir=get_temp_dir(),
                                             prefix='retirejs-db-',
                                             suffix='.json',
                                             delete=False,
                                             mode='wb')

            json_db = http_response.get_raw_body()
            db.write(json_db)
            db.close()

            self._retire_db_filename = db.name