def newmreq(self): """ Hot, fresh MassRequest everytime! Workaround for the iFuzzer class using self.mreq. """ return MassRequest(**self.mreq_config_dict)
def __init__(self, targets=None, payloads=None, num_threads=10, time_per_url=10, request_timeout=10, proxy_list=None, hadoop_reporting=False, depreciated=None): """ Initialize this WebFuzzer object. targets list of Target objects. Default []. payloads list of Payload objects. Default []. num_threads Number of threads/processes to launch as an int. Default 10. time_per_url Time in seconds to spend on each Target. Default 10. request_timeout Time in seconds to wait for a connection before giving up. Default 10. proxy_list list of proxies specified as dicts. Default empty. hadoop_reporting Output info for hadoop if True. Default False. payload_groups UNUSED. list of groups of Payload objects. Default []. """ super(WebFuzzer, self).__init__() # do this because we may need to create more MassRequest objects in # checks (like bsqli), needs to be configured the same self.mreq_config_dict = {"num_threads": num_threads, "time_per_url": time_per_url, "request_timeout": request_timeout, "proxy_list": proxy_list or [{}], "hadoop_reporting": hadoop_reporting} self.mreq = MassRequest(**self.mreq_config_dict) self.targets = targets or [] self.payloads = payloads or [] self.mxi_check = MXICheck() self.osci_check = OSCICheck() self.sqli_check = SQLICheck() self.trav_check = TravCheck() self.xpathi_check = XPathICheck() self.xss_check = XSSCheck() self.hadoop_reporting = hadoop_reporting if self.hadoop_reporting: logger.info("Hadoop reporting set in fuzzer") self.fuzzy_targets = []
def fetch(self, num_threads=10, time_per_url=10, request_timeout=10, proxy_list=[{}]): """Fetch URLs and append them to the seed list""" self.mreq = MassRequest(num_threads=num_threads, time_per_url=time_per_url, request_timeout=request_timeout, proxy_list=proxy_list, hadoop_reporting=True) unfetched_targets = [ unfetched_target for unfetched_target in self.targets if unfetched_target.status == "unfetched" ] for ut in unfetched_targets: logger.info("Fetching %s", ut) # NB: this only fetches via GET, doesn't submit forms for more links self.mreq.get_targets(self.targets) self.results = self.mreq.results for target in self.targets: target.status = "fetched"
def fetch(self, num_threads=10, time_per_url=10, request_timeout=10, proxy_list=[{}]): """Fetch URLs and append them to the seed list""" self.mreq = MassRequest(num_threads=num_threads, time_per_url=time_per_url, request_timeout=request_timeout, proxy_list=proxy_list, hadoop_reporting=True) unfetched_targets = [unfetched_target for unfetched_target in self.targets if unfetched_target.status == "unfetched"] for ut in unfetched_targets: logger.info("Fetching %s", ut) # NB: this only fetches via GET, doesn't submit forms for more links self.mreq.get_targets(self.targets) self.results = self.mreq.results for target in self.targets: target.status = "fetched"
>>> targets = [target_1, target_2, target_3] >>> mr = MassRequest() >>> mr.request_targets(targets) >>> for r in mr.results: ... print r ... (<massweb.targets.target.Target object at 0x15496d0>, <Response [200]>) (<massweb.targets.target.Target object at 0x1549650>, <Response [200]>) (<massweb.targets.target.Target object at 0x1549490>, <Response [200]>) >>> for target, response in mr.results: ... print target, response.status_code ... http://course.hyperiongray.com/vuln2/898538a7335fd8e6bac310f079ba3fd1/ 200 http://www.hyperiongray.com/ 200 http://course.hyperiongray.com/vuln1 200`` """ from massweb.mass_requests.mass_request import MassRequest from massweb.targets.target import Target target_1 = Target(url=u"http://course.hyperiongray.com/vuln1", data={"password": "******"}, ttype="post") target_2 = Target(url=u"http://course.hyperiongray.com/vuln2/898538a7335fd8e6bac310f079ba3fd1/", data={"how": "I'm good thx"}, ttype="post") target_3 = Target(url=u"http://www.hyperiongray.com/", ttype="get") targets = [target_1, target_2, target_3] mr = MassRequest() mr.request_targets(targets) for result in mr.results: print result for target, response in mr.results: print target, response.status_code
class MassCrawl(object): def __init__(self, seeds=[], add_seeds_to_scope=True): logger.info("Insantiating MassCrawl object") self.seeds = seeds self.domains = [] self.posts_identified = [] self.targets = [] self.results = [] self.mreq = None self.add_seeds_to_scope(seeds) self.add_seeds_to_targets(seeds) def add_seeds_to_scope(self, seeds): for seed in seeds: self.add_to_scope_from_url(seed) def add_seeds_to_targets(self, seeds): for seed in seeds: ct = CrawlTarget(seed) self.add_target(ct) def get_domain_from_url(self, url): domain_raw = urlparse(url).netloc if ":" in domain_raw: domain = domain_raw.split(":")[0] else: domain = domain_raw return domain def add_to_scope_from_url(self, url): domain = self.get_domain_from_url(url) self.add_to_scope(domain) def add_to_scope(self, domain): if domain not in self.domains: self.domains.append(domain) def in_scope(self, url): domain = self.get_domain_from_url(url) return domain in self.domains def add_target(self, target): if target not in self.targets: self.targets.append(target) def parse_response(self, response, stay_in_scope=True, max_links=10): links = [] for tag in BeautifulSoup(response.text, 'html.parser', parse_only=SoupStrainer(['a', 'img', 'script', 'link'])): # stop finding links if max links reached if len(links) <= max_links: link = self.parse_tag(tag, response, stay_in_scope) if link: links.append(link) return links def parse_tag(self, tag, response, stay_in_scope): href = None if tag.get('href'): href = tag.get('href') elif tag.get('src'): href = tag.get('src') if href and not href.startswith("mailto:"): link_normed = normalize_link(href, response.url)["norm_url"] if stay_in_scope: if self.in_scope(link_normed): return link_normed else: return link_normed def dedupe_targets(self): seen_hashes = [] for target in self.targets: target_hash = hash(target) if target_hash in seen_hashes: self.targets.pop(self.targets.index(target)) logger.warn("Found duplicate target: %s", target) else: seen_hashes.append(target_hash) def filter_targets_by_scope(self): #FIXME: !in large-scale crawls, there's some out of scope posts, # this is a hack to stop that, real issue should be found # and resolved logger.info("Filtering targets by scope") for target in self.targets: if not self.in_scope(target.url): self.targets.pop(self.targets.index(target)) logger.warn("Target filtered out that was not in scope: %s", target.url) def fetch(self, num_threads=10, time_per_url=10, request_timeout=10, proxy_list=[{}]): """Fetch URLs and append them to the seed list""" self.mreq = MassRequest(num_threads=num_threads, time_per_url=time_per_url, request_timeout=request_timeout, proxy_list=proxy_list, hadoop_reporting=True) unfetched_targets = [unfetched_target for unfetched_target in self.targets if unfetched_target.status == "unfetched"] for ut in unfetched_targets: logger.info("Fetching %s", ut) # NB: this only fetches via GET, doesn't submit forms for more links self.mreq.get_targets(self.targets) self.results = self.mreq.results for target in self.targets: target.status = "fetched" def parse(self, stay_in_scope=True, max_links=10): for target, response in self.results: # skip 40X replies and strings (i.e. failed requests) logger.info("Attempting to parse %s", target) try: response.raise_for_status() except (HTTPError, AttributeError) as exc: # only exception type we care about from requests.Response logger.debug("Failed request.", exc_info=True) continue if parse_worthy(response, content_type_match="text/html", hadoop_reporting=True): logger.info("pase_worthy function tells us to parse") else: logger.info("pase_worthy function tells us not to try" " parsing") continue logger.info("Finding post requests on page %s", response.url) #FIXME: !this doesn't stay in scope? post_request_targets = find_post_requests(target=response.url, response_text=response.text) for target_post in post_request_targets: ct_post = CrawlTarget(target_post.url) ct_post.__dict__ = target_post.__dict__ ct_post.status = "unfetched" self.add_target(ct_post) links = self.parse_response(response, stay_in_scope=stay_in_scope, max_links=max_links) for link in links: ct_link = CrawlTarget(unicode(link)) self.add_target(ct_link) if stay_in_scope: self.filter_targets_by_scope() logger.info("Finished attempted parsing for %s", target) def crawl(self, depth=3, num_threads=10, time_per_url=10, request_timeout=10, proxy_list=None, stay_in_scope=True, max_links=20, dedupe=True): for _ in range(depth): logger.info("Entering the fetch phase at depth %d", depth) self.fetch(num_threads=num_threads, time_per_url=time_per_url, request_timeout=request_timeout, proxy_list=proxy_list or None) logger.info("Entering the parse phase at depth %d", depth) self.parse(max_links=max_links, stay_in_scope=stay_in_scope) if dedupe: self.dedupe_targets() if stay_in_scope: self.filter_targets_by_scope()
""" >>> from massweb.mass_requests.mass_request import MassRequest >>> urls_to_fetch = [u"http://www.hyperiongray.com", u"http://course.hyperiongray.com/vuln1/", u"http://course.hyperiongray.com/vuln2/898538a7335fd8e6bac310f079ba3fd1/"] >>> mr = MassRequest() >>> mr.get_urls(urls_to_fetch) >>> for r in mr.results: ... print r ... ('http://www.hyperiongray.com', <Response [200]>) ('http://course.hyperiongray.com/vuln2/898538a7335fd8e6bac310f079ba3fd1/', <Response [200]>) ('http://course.hyperiongray.com/vuln1/', <Response [200]>) """ from massweb.mass_requests.mass_request import MassRequest urls_to_fetch = [ u"http://www.hyperiongray.com", u"http://course.hyperiongray.com/vuln1/", u"http://course.hyperiongray.com/vuln2/898538a7335fd8e6bac310f079ba3fd1/" ] mr = MassRequest() mr.get_urls(urls_to_fetch) for target, response in mr.results: print target, response
>>> urls_file = "urls.txt" >>> proxies = [{"http": "user:password@http://proxy.example.com:1234/some/path"}, {"http": "otheruser:otherpassword@http://proxy.example.net:6789/someother/path"}] >>> from massweb.mass_requests.mass_request import MassRequest >>> mr = MassRequest(num_threads=20, time_per_url=2, proxy_list=proxies) >>> mr.get_urls_from_file(urls_file) >>> len(mr.results) 1000 >>> for target, response in mr.results[:10]: ... print target, response ... ('http://www.abcselfstorage.co.uk/', '__PNK_REQ_FAILED') ('http://www.abcskiphirews32.co.uk/', '__PNK_REQ_FAILED') ('http://abcskateboarding.co.uk/', <Response [404]>) ('http://www.abcsalestraining.co.uk/', <Response [200]>) ('http://www.abcservice.co.uk/', <Response [200]>) ('http://www.abcseaangling.co.uk/', <Response [200]>) ('http://www.abcselfdrive.co.uk/', <Response [404]>) ('http://www.abcselfstore.co.uk/storage-blogwp-login.php?redirect_to=http%3A%2F%2Fwww.abcselfstore.co.uk%2Fstorage-blog%2Fwp-admin%2F&reauth=1', <Response [404]>) ('http://www.abcselfstore.co.uk/abc24-hour-access.html', <Response [200]>) """ urls_file = "example/urls.txt" proxies = [{"http": "user:password@http://proxy.example.com:1234/some/path"}, {"http": "otheruser:otherpassword@http://proxy.example.net:6789/someother/path"}] from massweb.mass_requests.mass_request import MassRequest mr = MassRequest(num_threads=20, time_per_url=2, proxy_list=proxies) mr.get_urls_from_file(urls_file) len(mr.results) for target, response in mr.results[:10]: print target, response
class WebFuzzer(iFuzzer): """ Fuzzy a generated list of Targets. Generates lists of targets with associated payloads and runs them against the target systems. """ def __init__(self, targets=None, payloads=None, num_threads=10, time_per_url=10, request_timeout=10, proxy_list=None, hadoop_reporting=False, depreciated=None): """ Initialize this WebFuzzer object. targets list of Target objects. Default []. payloads list of Payload objects. Default []. num_threads Number of threads/processes to launch as an int. Default 10. time_per_url Time in seconds to spend on each Target. Default 10. request_timeout Time in seconds to wait for a connection before giving up. Default 10. proxy_list list of proxies specified as dicts. Default empty. hadoop_reporting Output info for hadoop if True. Default False. payload_groups UNUSED. list of groups of Payload objects. Default []. """ super(WebFuzzer, self).__init__() # do this because we may need to create more MassRequest objects in # checks (like bsqli), needs to be configured the same self.mreq_config_dict = {"num_threads": num_threads, "time_per_url": time_per_url, "request_timeout": request_timeout, "proxy_list": proxy_list or [{}], "hadoop_reporting": hadoop_reporting} self.mreq = MassRequest(**self.mreq_config_dict) self.targets = targets or [] self.payloads = payloads or [] self.mxi_check = MXICheck() self.osci_check = OSCICheck() self.sqli_check = SQLICheck() self.trav_check = TravCheck() self.xpathi_check = XPathICheck() self.xss_check = XSSCheck() self.hadoop_reporting = hadoop_reporting if self.hadoop_reporting: logger.info("Hadoop reporting set in fuzzer") self.fuzzy_targets = [] def __generate_fuzzy_target_get(self, target): """ Associate fuzzing data for GET requests with the target. target Target object. returns list of Targets with fuzzing data. """ url = target.url parsed_url = urlparse(url) parsed_url_query = parsed_url.query url_q_dic = parse_qs(parsed_url_query) fuzzy_targets = [] for query_param, _ in url_q_dic.iteritems(): for payload in self.payloads: fuzzy_url = (self.replace_param_value(url, query_param, str(payload))) fuzzy_target = FuzzyTarget(fuzzy_url, url, query_param, GET, payload=payload) logger.debug("GET fuzzy_target type: %s", type(fuzzy_target)) fuzzy_targets.append(fuzzy_target) return fuzzy_targets def __generate_fuzzy_target_post(self, target): """ Associate fuzzing data for POST requests with the target. target Target object. returns list of Targets with fuzzing data. """ url = target.url fuzzy_targets = [] post_keys = target.data.keys() for key in post_keys: data_copy = target.data.copy() for payload in self.payloads: data_copy[key] = str(payload) fuzzy_target = FuzzyTarget(url, url, key, POST, data=data_copy.copy(), payload=payload, unfuzzed_data=target.data) logger.debug("POST fuzzy_target type: %s", type(fuzzy_target)) fuzzy_targets.append(fuzzy_target) return fuzzy_targets def generate_fuzzy_targets(self): """ Associate fuzzing data with the targets. """ if self.hadoop_reporting: logger.info("Generating fuzzy targets") # If no targets then raise an exception if len(self.targets) == 0: raise ValueError("Targets list must not be empty!") self.fuzzy_targets = [] for target in self.targets: logger.debug("input target type: %s", type(target)) if target.ttype == "get": fuzzy_target_list = self.__generate_fuzzy_target_get(target) self.fuzzy_targets += fuzzy_target_list if target.ttype == "post": fuzzy_target_list = self.__generate_fuzzy_target_post(target) self.fuzzy_targets += fuzzy_target_list if not self.fuzzy_targets: raise ValueError("fuzzy_targets is empty. No targets generated" " from: %s", ','.join([str(x) for x in self.targets])) return self.fuzzy_targets def fuzz(self): """ Fuzz all the targets and return the results. returns list of Result objects. """ self.mreq.request_targets(self.fuzzy_targets) results = [] for target, response in self.mreq.results: #FIXME: Clarify with alex: !not yet multithreaded, should it be? logger.debug("target type: %s", type(target)) try: result = self.analyze_response(target, response) except (TypeError, AttributeError) as err: # If request failed and str is returned instead of Response obj # could save some cycles here not analyzing response if self.hadoop_reporting: logger.info("Marking target as failed due to exception: ", exc_info=True) logger.debug(err) try: result = self._make_failed_result(target, "__PNK_FAILED_RESPONSE") except TypeError as err: logger.debug("Failed to make a failed result for %s.", target) logger.warn(err.message, exc_info=True) continue results.append(result) return results def _make_failed_result(self, target, result_dic=None): """ Macro to make a failed Result. """ if not result_dic: result_dic = {} for check_type in target.payload.check_type_list: result_dic[check_type] = False return Result(target, result_dic) def analyze_response(self, ftarget, response): """ Analyze the results of the request and return the info gathered. ftargeet FuzzyTarget object. response requests.Response object. returns Result object. raises TypeError or AttributeError when non requests.Response is given as response. """ #FIXME: Clarify with alex: !function is a mess, response is of type # text or non-text, trying to read blah blah result_dic = {} check_type_list = ftarget.payload.check_type_list if self.hadoop_reporting: logger.info("Response is of type %s for target %s.", response.__class__.__name__, ftarget) worthy = parse_worthy(response, hadoop_reporting=self.hadoop_reporting) if worthy: logger.info("FuzzyTarget %s looks worth checking for vulnerabilities.", ftarget) else: logger.info("Response deemed non-parse-worthy. Setting all checks " "in result_dic to False for %s", ftarget) return self._make_failed_result(ftarget) result_dic = self._run_checks(response, result_dic, check_type_list) return Result(ftarget, result_dic) def _run_checks(self, response, result_dic, check_type_list): """ Check reponse output with the specified checkers. response requests.Response object. result_dic dict with checker names as keys. check_type_list list of names of checkers to check with. """ #FIXME: Make me work on a dict of checker IDs and methods to call # instead of an if statement cascade if "mxi" in check_type_list: mxi_result = self.mxi_check.check(response.text) result_dic["mxi"] = mxi_result if "sqli" in check_type_list: sqli_result = self.sqli_check.check(response.text) result_dic["sqli"] = sqli_result if "xpathi" in check_type_list: xpathi_result = self.xpathi_check.check(response.text) result_dic["xpathi"] = xpathi_result if "trav" in check_type_list: trav_result = self.trav_check.check(response.text) result_dic["trav"] = trav_result if "osci" in check_type_list: osci_result = self.osci_check.check(response.text) result_dic["osci"] = osci_result if "xss" in check_type_list: xss_result = self.xss_check.check(response.text) result_dic["xss"] = xss_result return result_dic
""" >>> from massweb.mass_requests.mass_request import MassRequest >>> urls_to_fetch = [u"http://www.hyperiongray.com", u"http://course.hyperiongray.com/vuln1/", u"http://course.hyperiongray.com/vuln2/898538a7335fd8e6bac310f079ba3fd1/"] >>> mr = MassRequest() >>> mr.get_urls(urls_to_fetch) >>> for r in mr.results: ... print r ... ('http://www.hyperiongray.com', <Response [200]>) ('http://course.hyperiongray.com/vuln2/898538a7335fd8e6bac310f079ba3fd1/', <Response [200]>) ('http://course.hyperiongray.com/vuln1/', <Response [200]>) """ from massweb.mass_requests.mass_request import MassRequest urls_to_fetch = [u"http://www.hyperiongray.com", u"http://course.hyperiongray.com/vuln1/", u"http://course.hyperiongray.com/vuln2/898538a7335fd8e6bac310f079ba3fd1/"] mr = MassRequest() mr.get_urls(urls_to_fetch) for target, response in mr.results: print target, response
class MassCrawl(object): def __init__(self, seeds=[], add_seeds_to_scope=True): logger.info("Insantiating MassCrawl object") self.seeds = seeds self.domains = [] self.posts_identified = [] self.targets = [] self.results = [] self.mreq = None self.add_seeds_to_scope(seeds) self.add_seeds_to_targets(seeds) def add_seeds_to_scope(self, seeds): for seed in seeds: self.add_to_scope_from_url(seed) def add_seeds_to_targets(self, seeds): for seed in seeds: ct = CrawlTarget(seed) self.add_target(ct) def get_domain_from_url(self, url): domain_raw = urlparse(url).netloc if ":" in domain_raw: domain = domain_raw.split(":")[0] else: domain = domain_raw return domain def add_to_scope_from_url(self, url): domain = self.get_domain_from_url(url) self.add_to_scope(domain) def add_to_scope(self, domain): if domain not in self.domains: self.domains.append(domain) def in_scope(self, url): domain = self.get_domain_from_url(url) return domain in self.domains def add_target(self, target): if target not in self.targets: self.targets.append(target) def parse_response(self, response, stay_in_scope=True, max_links=10): links = [] for tag in BeautifulSoup(response.text, 'html.parser', parse_only=SoupStrainer( ['a', 'img', 'script', 'link'])): # stop finding links if max links reached if len(links) <= max_links: link = self.parse_tag(tag, response, stay_in_scope) if link: links.append(link) return links def parse_tag(self, tag, response, stay_in_scope): href = None if tag.get('href'): href = tag.get('href') elif tag.get('src'): href = tag.get('src') if href and not href.startswith("mailto:"): link_normed = normalize_link(href, response.url)["norm_url"] if stay_in_scope: if self.in_scope(link_normed): return link_normed else: return link_normed def dedupe_targets(self): seen_hashes = [] for target in self.targets: target_hash = hash(target) if target_hash in seen_hashes: self.targets.pop(self.targets.index(target)) logger.warn("Found duplicate target: %s", target) else: seen_hashes.append(target_hash) def filter_targets_by_scope(self): #FIXME: !in large-scale crawls, there's some out of scope posts, # this is a hack to stop that, real issue should be found # and resolved logger.info("Filtering targets by scope") for target in self.targets: if not self.in_scope(target.url): self.targets.pop(self.targets.index(target)) logger.warn("Target filtered out that was not in scope: %s", target.url) def fetch(self, num_threads=10, time_per_url=10, request_timeout=10, proxy_list=[{}]): """Fetch URLs and append them to the seed list""" self.mreq = MassRequest(num_threads=num_threads, time_per_url=time_per_url, request_timeout=request_timeout, proxy_list=proxy_list, hadoop_reporting=True) unfetched_targets = [ unfetched_target for unfetched_target in self.targets if unfetched_target.status == "unfetched" ] for ut in unfetched_targets: logger.info("Fetching %s", ut) # NB: this only fetches via GET, doesn't submit forms for more links self.mreq.get_targets(self.targets) self.results = self.mreq.results for target in self.targets: target.status = "fetched" def parse(self, stay_in_scope=True, max_links=10): for target, response in self.results: # skip 40X replies and strings (i.e. failed requests) logger.info("Attempting to parse %s", target) try: response.raise_for_status() except ( HTTPError, AttributeError ) as exc: # only exception type we care about from requests.Response logger.debug("Failed request.", exc_info=True) continue if parse_worthy(response, content_type_match="text/html", hadoop_reporting=True): logger.info("pase_worthy function tells us to parse") else: logger.info("pase_worthy function tells us not to try" " parsing") continue logger.info("Finding post requests on page %s", response.url) #FIXME: !this doesn't stay in scope? post_request_targets = find_post_requests( target=response.url, response_text=response.text) for target_post in post_request_targets: ct_post = CrawlTarget(target_post.url) ct_post.__dict__ = target_post.__dict__ ct_post.status = "unfetched" self.add_target(ct_post) links = self.parse_response(response, stay_in_scope=stay_in_scope, max_links=max_links) for link in links: ct_link = CrawlTarget(unicode(link)) self.add_target(ct_link) if stay_in_scope: self.filter_targets_by_scope() logger.info("Finished attempted parsing for %s", target) def crawl(self, depth=3, num_threads=10, time_per_url=10, request_timeout=10, proxy_list=None, stay_in_scope=True, max_links=20, dedupe=True): for _ in range(depth): logger.info("Entering the fetch phase at depth %d", depth) self.fetch(num_threads=num_threads, time_per_url=time_per_url, request_timeout=request_timeout, proxy_list=proxy_list or None) logger.info("Entering the parse phase at depth %d", depth) self.parse(max_links=max_links, stay_in_scope=stay_in_scope) if dedupe: self.dedupe_targets() if stay_in_scope: self.filter_targets_by_scope()