def _url_path_url_generator(self, resp, fuzzable_req): """ Yields tuples containing: * Newly found URL * The FuzzableRequest instance passed as parameter * The HTTPResponse generated by the FuzzableRequest * Boolean indicating if we trust this reference or not :param resp: HTTP response object :param fuzzable_req: The HTTP request that generated the response """ # Analyze all directories, if the URL w3af just found is: # # http://localhost/a/b/c/f00.php # # I want to GET: # # http://localhost/a/b/c/ # http://localhost/a/b/ # http://localhost/a/ # http://localhost/ # # And analyze the responses... dirs = resp.get_url().get_directories() for ref in unique_justseen(dirs): yield ref, fuzzable_req, resp, False
def end(self): """ Called when the process ends, prints out the list of broken links. """ if len(self._broken_links): om.out.information('The following is a list of broken links that ' 'were found by the web_spider plugin:') for broken, where in unique_justseen(self._broken_links.ordered_iter()): om.out.information('- %s [ referenced from: %s ]' % (broken, where)) self._broken_links.cleanup()
def end(self): """ Called when the process ends, prints out the list of broken links. """ if len(self._broken_links): om.out.information('The following is a list of broken links that' ' were found by the web_spider plugin:') for broken, where in unique_justseen(self._broken_links.ordered_iter()): om.out.information('- %s [ referenced from: %s ]' % (broken, where)) self._broken_links.cleanup()
def _body_url_generator(self, resp, fuzzable_req): """ Yields tuples containing: * Newly found URL * The FuzzableRequest instance passed as parameter * The HTTPResponse generated by the FuzzableRequest * Boolean indicating if we trust this reference or not The newly found URLs are extracted from the http response body using one of the framework's parsers. :param resp: HTTP response object :param fuzzable_req: The HTTP request that generated the response """ # # Note: I WANT to follow links that are in the 404 page. # try: doc_parser = parser_cache.dpc.get_document_parser_for(resp) except BaseFrameworkException as w3: om.out.debug('Failed to find a suitable document parser. ' 'Exception "%s"' % w3) else: # Note: # # - With parsed_refs I'm 100% that it's really # something in the HTML that the developer intended to add. # # - The re_refs are the result of regular expressions, # which in some cases are just false positives. parsed_refs, re_refs = doc_parser.get_references() dirs = resp.get_url().get_directories() only_re_refs = set(re_refs) - set(dirs + parsed_refs) all_refs = itertools.chain(parsed_refs, re_refs) resp_is_404 = is_404(resp) for ref in unique_justseen(sorted(all_refs)): possibly_broken = resp_is_404 or (ref in only_re_refs) yield ref, fuzzable_req, resp, possibly_broken
# I also want to analyze all directories, if the URL I just # fetched is: # http://localhost/a/b/c/f00.php I want to GET: # http://localhost/a/b/c/ # http://localhost/a/b/ # http://localhost/a/ # http://localhost/ # And analyze the responses... dirs = resp.get_url().get_directories() only_re_refs = set(re_refs) - set(dirs + parsed_refs) all_refs = itertools.chain(dirs, parsed_refs, re_refs) resp_is_404 = is_404(resp) for ref in unique_justseen(sorted(all_refs)): possibly_broken = resp_is_404 or (ref in only_re_refs) yield ref, fuzzable_req, resp, possibly_broken def _should_verify_extracted_url(self, ref, resp): """ :param ref: A newly found URL :param resp: The HTTP response where the URL was found :return: Boolean indicating if I should send this new reference to the core. """ # Ignore myself if ref == resp.get_uri(): return False
# I also want to analyze all directories, if the URL I just # fetched is: # http://localhost/a/b/c/f00.php I want to GET: # http://localhost/a/b/c/ # http://localhost/a/b/ # http://localhost/a/ # http://localhost/ # And analyze the responses... dirs = resp.get_url().get_directories() only_re_refs = set(re_refs) - set(dirs + parsed_refs) all_refs = itertools.chain(dirs, parsed_refs, re_refs) resp_is_404 = is_404(resp) for ref in unique_justseen(sorted(all_refs)): possibly_broken = resp_is_404 or (ref in only_re_refs) yield ref, fuzzable_req, resp, possibly_broken def _should_verify_extracted_url(self, ref, resp): """ :param ref: A newly found URL :param resp: The HTTP response where the URL was found :return: Boolean indicating if I should send this new reference to the core. """ # Ignore myself if ref == resp.get_uri(): return False
def _urls_to_verify_generator(self, resp, fuzzable_req): """ :param resp: HTTP response object :param fuzzable_req: The HTTP request that generated the response """ # # Note: I WANT to follow links that are in the 404 page. # # Modified when I added the PDFParser # I had to add this x OR y stuff, just because I don't want # the SGML parser to analyze a image file, its useless and # consumes CPU power. if resp.is_text_or_html() or resp.is_pdf() or resp.is_swf(): original_url = resp.get_redir_uri() try: doc_parser = parser_cache.dpc.get_document_parser_for(resp) except BaseFrameworkException, w3: om.out.debug('Failed to find a suitable document parser. ' 'Exception "%s"' % w3) else: # Note: # - With parsed_refs I'm 100% that it's really # something in the HTML that the developer intended to add. # # - The re_refs are the result of regular expressions, # which in some cases are just false positives. parsed_refs, re_refs = doc_parser.get_references() # I also want to analyze all directories, if the URL I just # fetched is: # http://localhost/a/b/c/f00.php I want to GET: # http://localhost/a/b/c/ # http://localhost/a/b/ # http://localhost/a/ # http://localhost/ # And analyze the responses... dirs = resp.get_url().get_directories() only_re_refs = set(re_refs) - set(dirs + parsed_refs) all_refs = itertools.chain(dirs, parsed_refs, re_refs) for ref in unique_justseen(sorted(all_refs)): # Ignore myself if ref == resp.get_uri(): continue # I don't want w3af sending requests to 3rd parties! if ref.get_domain() != self._target_domain: continue # Filter the URL's according to the configured regexs urlstr = ref.url_string if not self._compiled_follow_re.match(urlstr) or \ self._compiled_ignore_re.match(urlstr): continue if self._only_forward: if not self._is_forward(ref): continue # Work with the parsed references and report broken # links. Then work with the regex references and DO NOT # report broken links if self._need_more_variants(ref): self._known_variants.append(ref) possibly_broken = ref in only_re_refs yield ref, fuzzable_req, original_url, possibly_broken