def worker(self, url): try: print "fuzz url ===>" + url r = _requests( url, headers=headers, allow_redirects=True ) if isinstance(r, bool): return code = r.status_code text = r.text return_url = r.url return_headers = r.headers if return_url in self.return_urls or not text or text in self.return_texts: return elif self.stander.has_key('title'): match = url.split('/')[-2] re_rule = self.stander['title'] flag = re.findall(re_rule, text) if flag: if match in flag[0]: self.result.append(url) self.return_urls.add(return_url) self.return_texts.add(text) elif self.stander.has_key('Content-Type'): values = return_headers['Content-Type'] rule = self.stander['Content-Type'] flag = filter( lambda x: x in values, rule ) if flag: self.result.append(url) self.return_urls.add(return_url) self.return_texts.add(text) elif 'code' in self.stander: if code == self.stander['code']: self.result.append(url) self.return_urls.add(return_url) self.return_texts.add(text) else: texts = self.stander['text'] def calc_differece(t): from difflib import SequenceMatcher if SequenceMatcher(None, text, t).quick_ratio()\ > self.threshold: return True flag = any( map( calc_differece, texts ) ) if not flag: self.result.append(url) self.return_urls.add(return_url) self.return_texts.add(text) except: traceback.print_exc()
def crawl(self, link): try: r = _requests(link, headers=headers) if isinstance(r, bool): return current_url = r.url text = r.text links = self.parse_content(text, current_url) self.cacheurls.update(links) except: traceback.print_exc()
def exploit_server_path(url): result = [] try: standers = get_site_stander(url) r = _requests(url) dicts = get_dict_by_server(r.headers) if dicts: hand = fuzz(url, dicts, standers) result = hand.scan() except: traceback.print_exc() finally: return result
def scan(self): try: self.target_domain = urlparse.urlparse(self.url).netloc r = _requests(self.url, headers=headers) current_url = r.url text = r.text self.links = self.parse_content(text, current_url) self.crawl_links = list(self.links) self.urls.update(self.links) for _ in xrange(self.depth): self.start() except: traceback.print_exc() finally: return self.urls