def rec(target): processed.add(target) printableTarget = '/'.join(target.split('/')[3:]) if len(printableTarget) > 40: printableTarget = printableTarget[-40:] else: printableTarget = (printableTarget + (' ' * (40 - len(printableTarget)))) print ('%s Parsing %s' % (run, printableTarget), end='\r') url = getUrl(target, True) params = getParams(target, '', True) if '=' in target: # if there's a = in the url, there should be GET parameters inps = [] for name, value in params.items(): inps.append({'name': name, 'value': value}) forms.append({0: {'action': url, 'method': 'get', 'inputs': inps}}) response = requester(url, params, headers, True, delay, timeout).text forms.append(zetanize(response)) matches = findall(r'<[aA].*href=["\']{0,1}(.*?)["\']', response) for link in matches: # iterate over the matches # remove everything after a "#" to deal with in-page anchors link = link.split('#')[0] if link[:4] == 'http': if link.startswith(main_url): storage.add(link) elif link[:2] == '//': if link.split('/')[2].startswith(host): storage.add(schema + link) elif link[:1] == '/': storage.add(main_url + link) else: storage.add(main_url + '/' + link)
def rec(target): processed.add(target) printableTarget = '/'.join(target.split('/')[3:]) if len(printableTarget) > 40: printableTarget = printableTarget[-40:] else: printableTarget = (printableTarget + (' ' * (40 - len(printableTarget)))) print('%s Parsing %s' % (run, printableTarget), end='\r') url = getUrl(target, True) params = getParams(target, '', True) if '=' in target: # if there's a = in the url, there should be GET parameters inps = [] for name, value in params.items(): inps.append({'name': name, 'value': value}) forms.append({0: {'action': url, 'method': 'get', 'inputs': inps}}) response = requester(url, params, headers, True, delay, timeout).text forms.append(zetanize(response)) matches = findall(r'<[aA].*href=["\']{0,1}(.*?)["\']', response) for link in matches: # iterate over the matches # remove everything after a "#" to deal with in-page anchors link = link.split('#')[0] if link[:4] == 'http': if link.startswith(main_url): storage.add(link) elif link[:2] == '//': if link.split('/')[2].startswith(host): storage.add(schema + link) elif link[:1] == '/': storage.add(main_url + link) else: storage.add(main_url + '/' + link)
def rec(target): processed.add(target) print('%s Parsing %s' % (run, target)) url = getUrl(target, '', True) params = getParams(target, '', True) if '=' in target: inps = [] for name, value in params.items(): inps.append({'name': name, 'value': value}) forms.append({0: {'action': url, 'method': 'get', 'inputs': inps}}) response = requester(url, params, headers, True, 0).text forms.append(zetanize(response)) matches = findall(r'<[aA].*href=["\']{0,1}(.*?)["\']', response) for link in matches: # iterate over the matches link = link.split( '#' )[0] # remove everything after a "#" to deal with in-page anchors if link[:4] == 'http': if link.startswith(main_url): storage.add(link) elif link[:2] == '//': if link.split('/')[2].startswith(host): storage.add(schema + link) elif link[:1] == '/': storage.add(main_url + link) else: storage.add(main_url + '/' + link)
def rec(target): processed.add(target) urlPrint = (target + (' ' * 60))[:60] print('%s Parsing %-40s' % (run, urlPrint), end='\r') url = getUrl(target, True) params = getParams(target, '', True) if '=' in target: # if there's a = in the url, there should be GET parameters inps = [] for name, value in params.items(): inps.append({'name': name, 'value': value}) forms.append({0: {'action': url, 'method': 'get', 'inputs': inps}}) raw_response = requester(url, params, True) response = raw_response.text js = js_extractor(response) scripts = script_extractor(response) for each in retirejs(url, response, checkedScripts): all_outdated_js.append(each) all_techs.extend(wappalyzer(raw_response, js, scripts)) parsed_response = zetanize(response) forms.append(parsed_response) matches = re.finditer( r'<[aA][^>]*?(?:href|HREF)=[\'"`]?([^>]*?)[\'"`]?>', response) for link in matches: # iterate over the matches # remove everything after a "#" to deal with in-page anchors link = link.group(1).split('#')[0] this_url = handle_anchor(target, link) if urlparse(this_url).netloc == host: storage.add(this_url)
def rec(url): processed.add(url) urlPrint = (url + (' ' * 60))[:60] print ('%s Parsing %-40s' % (run, urlPrint), end='\r') url = getUrl(url, '', True) params = getParams(url, '', True) if '=' in url: inps = [] for name, value in params.items(): inps.append({'name': name, 'value': value}) forms.append( {url: {0: {'action': url, 'method': 'get', 'inputs': inps}}}) response = requester(url, params, headers, True, 0).text forms.append({url: zetanize(url, response)}) matches = findall( r'<[aA][^>]*?(href|HREF)=["\']{0,1}(.*?)["\']', response) for link in matches: # iterate over the matches # remove everything after a "#" to deal with in-page anchors link = link[1].split('#')[0].lstrip(' ') if link[:4] == 'http': if link.startswith(main_url): storage.add(link) elif link[:2] == '//': if link.split('/')[2].startswith(host): storage.add(scheme + '://' + link) elif link[:1] == '/': storage.add(remove_file(url) + link) else: usable_url = remove_file(url) if usable_url.endswith('/'): storage.add(usable_url + link) elif link.startswith('/'): storage.add(usable_url + link) else: storage.add(usable_url + '/' + link)
def rec(target): vulnerable_items = list() vulnerable_report = dict() processed.add(target) printableTarget = '/'.join(target.split('/')[3:]) if len(printableTarget) > 40: printableTarget = printableTarget[-40:] else: printableTarget = (printableTarget + (' ' * (40 - len(printableTarget)))) logger.run('Parsing %s\r' % printableTarget) url = getUrl(target, True) params = getParams(target, '', True) if '=' in target: # if there's a = in the url, there should be GET parameters inps = [] for name, value in params.items(): inps.append({'name': name, 'value': value}) forms.append({0: {'action': url, 'method': 'get', 'inputs': inps}}) response = requester(url, params, headers, True, delay, timeout).text vulnerable_components = retireJs(url, response) print("===== Vulnerable Components ======") print(vulnerable_components) vulnerable_report['vulnerable_components'] = vulnerable_components if not skipDOM: highlighted = dom(response) clean_highlighted = ''.join([re.sub(r'^\d+\s+', '', line) for line in highlighted]) if highlighted and clean_highlighted not in checkedDOMs: checkedDOMs.append(clean_highlighted) logger.good('Potentially vulnerable objects found at %s' % url) vulnerable_report['url'] = url logger.red_line(level='good') for line in highlighted: vulnerable_items.append(clean_colors(line)) logger.no_format(line, level='good') vulnerable_report['codes'] = vulnerable_items logger.red_line(level='good') forms.append(zetanize(response)) matches = re.findall(r'<[aA].*href=["\']{0,1}(.*?)["\']', response) for link in matches: # iterate over the matches # remove everything after a "#" to deal with in-page anchors link = link.split('#')[0] if link.endswith(('.pdf', '.png', '.jpg', '.jpeg', '.xls', '.xml', '.docx', '.doc')): pass else: if link[:4] == 'http': if link.startswith(main_url): storage.add(link) elif link[:2] == '//': if link.split('/')[2].startswith(host): storage.add(schema + link) elif link[:1] == '/': storage.add(main_url + link) else: storage.add(main_url + '/' + link) return vulnerable_report
def extractForms(url): response = requester(url, {}, headers, True, 0).text forms = zetanize(url, response) for each in forms.values(): localTokens = set() inputs = each['inputs'] for inp in inputs: value = inp['value'] if value and match(r'^[\w\-_]+$', value): if strength(value) > 10: simTokens.append(value)
def rec(target): print('%s Parsing %s' % (run, target)) url = getUrl(target, '', True) params = getParams(target, '', True) if '=' in target: inps = [] for name, value in params.items(): inps.append({'name': name, 'value': value}) forms.append({0: {'action': url, 'method': 'get', 'inputs': inps}}) response = requester(url, params, headers, True, 0).text forms.append(zetanize(response))
def rec(target): processed.add(target) printableTarget = "/".join(target.split("/")[3:]) if len(printableTarget) > 40: printableTarget = printableTarget[-40:] else: printableTarget = printableTarget + (" " * (40 - len(printableTarget))) logger.run("Parsing %s\r" % printableTarget) url = getUrl(target, True) params = getParams(target, "", True) if "=" in target: # if there's a = in the url, there should be GET parameters inps = [] for name, value in params.items(): inps.append({"name": name, "value": value}) forms.append({0: {"action": url, "method": "get", "inputs": inps}}) response = requester(url, params, headers, True, delay, timeout).text retireJs(url, response) if not skipDOM: highlighted = dom(response) clean_highlighted = "".join( [re.sub(r"^\d+\s+", "", line) for line in highlighted]) if highlighted and clean_highlighted not in checkedDOMs: checkedDOMs.append(clean_highlighted) logger.good("Potentially vulnerable objects found at %s" % url) logger.red_line(level="good") for line in highlighted: logger.no_format(line, level="good") logger.red_line(level="good") forms.append(zetanize(response)) matches = re.findall(r'<[aA].*href=["\']{0,1}(.*?)["\']', response) for link in matches: # iterate over the matches # remove everything after a "#" to deal with in-page anchors link = link.split("#")[0] if link[:4] == "http": if link.startswith(main_url): storage.add(link) elif link[:2] == "//": if link.split("/")[2].startswith(host): storage.add(schema + link) elif link[:1] == "/": storage.add(main_url + link) else: storage.add(main_url + "/" + link)
def photon(main_url, url, headers): urls = set() # urls found forms = [] # web forms processed = set() # urls that have been crawled storage = set() # urls that belong to the target i.e. in-scope host = urlparse(url).netloc url = getUrl(url, '', True) schema = urlparse(main_url).scheme params = getParams(url, '', True) response = requester(url, params, headers, True, 0).text forms.append(zetanize(response)) matches = findall(r'<[aA].*href=["\']{0,1}(.*?)["\']', response) for link in matches: # iterate over the matches link = link.split('#')[ 0] # remove everything after a "#" to deal with in-page anchors if link[:4] == 'http': if link.startswith(main_url): urls.add(link) elif link[:2] == '//': if link.split('/')[2].startswith(host): urls.add(schema + link) elif link[:1] == '/': urls.add(main_url + link) else: urls.add(main_url + '/' + link) def rec(target): print('%s Parsing %s' % (run, target)) url = getUrl(target, '', True) params = getParams(target, '', True) if '=' in target: inps = [] for name, value in params.items(): inps.append({'name': name, 'value': value}) forms.append({0: {'action': url, 'method': 'get', 'inputs': inps}}) response = requester(url, params, headers, True, 0).text forms.append(zetanize(response)) from core.config import threadCount threadpool = concurrent.futures.ThreadPoolExecutor(max_workers=threadCount) futures = (threadpool.submit(rec, url) for url in urls) for i, _ in enumerate(concurrent.futures.as_completed(futures)): pass return forms
print('%s Checking if the resonse is dynamic.' % run) response = requester(origUrl, origData, headers, origGET, 0) secondLength = len(response.text) if originalLength != secondLength: print('%s Response is dynamic.' % info) tolerableDifference = abs(originalLength - secondLength) else: print('%s Response isn\'t dynamic.' % info) tolerableDifference = 0 print('%s Emulating a mobile browser' % run) print('%s Making a request with mobile browser' % run) headers[ 'User-Agent'] = 'Mozilla/4.0 (compatible; MSIE 5.5; Windows CE; PPC; 240x320)' response = requester(origUrl, {}, headers, True, 0).text parsed = zetanize(origUrl, response) if isProtected(parsed): print('%s CSRF protection is enabled for mobile browsers as well.' % bad) else: print('%s CSRF protection isn\'t enabled for mobile browsers.' % good) print('%s Making a request without CSRF token parameter.' % run) data = tweaker(origData, 'remove') response = requester(origUrl, data, headers, origGET, 0) if response.status_code == originalCode: if str(originalCode)[0] in ['4', '5']: print('%s It didn\'t work' % bad) else: difference = abs(originalLength - len(response.text)) if difference <= tolerableDifference: