def _get_data_external_links(scripts, driver=None): """ @param scripts: a list of HTML internal scripts and exernal script links (src) @returns: an ordered list containing inline scripts and the contents of the REACHABLE external script links """ data = [] if driver is None: # use python requests for item in scripts: script_type = item[0] if script_type == "external_script": link = item[1] d = RequesterModule.requester(link) if RequesterModule.is_http_response_valid(d): d_str = str(d).strip() if (not d_str.startswith("""<!doctype html>""")) and ('doctype html' not in d_str): #ignore the case when resource is HTML, e.g, non-authenticated access via python requests data.append([script_type, d]) else: ## no valid content if constantsModule.DEBUG_PRINTS: print("+ InvalidResourceURL encountered!") continue else: data.append(item) return data else: # use browser for item in scripts: script_type = item[0] if script_type == "external_script": link = item[1] current_handle = driver.current_window_handle driver.execute_script("""window.open('', '_blank')""") # new tab time.sleep(1) driver.switch_to_window(driver.window_handles[1]) driver.get(link) time.sleep(1) d = driver.page_source driver.close() # closes the new tab driver.switch_to_window(current_handle) dp = BeautifulSoup(d, 'html.parser') d_str = dp.find('pre', recursive=True) # js is rendered in a pre tag in chrome if d_str is None: continue else: d_str = d_str.text # get the 'pre' tag content if (not d_str.startswith("""<!doctype html>""")): #ignore the case when resource is HTML, e.g, non-authenticated access via python requests data.append([script_type, d_str]) else: ## no valid content if constantsModule.DEBUG_PRINTS: print("+ InvalidResourceURL encountered!") continue else: data.append(item) return data
def get_external_resource(resource_url): """ @param {string} resource_url @return {string} http response if valid, o/w empty string """ response = RequesterModule.requester(resource_url) if RequesterModule.is_http_response_valid(response): return response return ''
def crawl(scheme, host, main_url, form, headers, delay, timeout): if form: #这个form是一个表单,应该是从返回页面中提取出来的表单集合 for each in form.values(): url = each['action'] url = main_url if url: # if url.startswith(main_url): # pass # elif url.startswith('//') and url[2:].startswith(host): # url=scheme+'://'+url[2:] # elif url.startswith('/'): # url=scheme+'://'+host+url if url not in config.globalVariables['checkedForms']: config.globalVariables['checkedForms'][url] = [] method = each['method'] GET = True if method == 'get' else False inputs = each['inputs'] #一个form表单中的input标签的集合 Scan_area.insert(END, inputs) paramData = {} for one in inputs: paramData[one['name']] = one['value'] for paramName in paramData.keys(): if paramName not in config.globalVariables[ 'checkedForms'][url]: config.globalVariables['checkedForms'][url].append( paramName) paramsCopy = copy.deepcopy(paramData) paramsCopy[paramName] = xsschecker response = requester(url, paramsCopy, headers, GET, delay, timeout) #发送GET请求 #Scan_area.insert(END,response.text) occurences = htmlParser( response, False) #返回的是html网页中输出点的上下文信息 positions = occurences.keys() #注入点位置 #模糊测试,判断xss漏洞的 匹配度?? efficiences = filterChecker( url, paramsCopy, headers, GET, delay, occurences, timeout, False) vectors = generator(occurences, response.text) #生成攻击向量?? #存储攻击向量的数据结构 payloads = [] if vectors: for confidence, vects in vectors.items(): try: payload = list(vects)[0] s = "this is payload area" #Scan_area.insert(END,s) Scan_area.insert(END, payload) Scan_area.insert(END, '\n') payloads.append(payload) break except IndexError: pass
def main_data_collection(): args = sys.argv if len(args) > 1: low = int(args[1]) high = low if len(args) > 2: high = int(args[2]) for i in range(low, high + 1): site_id = args[1] # 1. get saved URLs or find URLs if needed urls = get_site_urls(site_id) # 2. collect js and data of the site, for each URL found if CrawlerConfig.PLATFORM == "linux": display = Display(visible=0, size=(800, 600)) display.start() driver = seleniumModule.get_new_browser(xhr_logger=True, event_logger=True, headless_mode=False) ## load predefined states into the browser (e.g., login) driver = CrawlerModule.get_logged_driver(driver, site_id) for navigation_url in urls: # crawlerUtilityModule.collect_site_data(site_id, navigation_url, driver) d = RequesterModule.requester(navigation_url) ## check if the site base address is reachable if RequesterModule.is_http_response_valid(d): try: crawlerUtilityModule.collect_site_data( site_id, navigation_url, driver) except BaseException as error: print('chrome runinto error for site: %s' % site_id) driver = seleniumModule.get_new_browser( xhr_logger=True, event_logger=True, headless_mode=False) continue else: continue if CrawlerConfig.PLATFORM == "linux": display.stop()
def check_live(proxy): check_ip = "http://httpbin.org/ip" ip = proxy[0] + ":" + proxy[1] try: response = requester(check_ip, data=None, timeout=3, GET=True, proxy=ip) if not response is None: if proxy[0] in response.text: return True return False return False except ConnectTimeoutError: return False
def checker(url, params, headers, GET, delay, payload, positions, timeout, encoding): checkString = 'st4r7s' + payload + '3nd' if encoding: checkString = encoding(unquote(checkString)) #urlencode逆向 response = requester( url, replaceValue(params, xsschecker, checkString, copy.deepcopy), headers, GET, delay, timeout).text.lower() reflectedPositions = [] for match in re.finditer('st4r7s', response): reflectedPositions.append(match.start()) #将找到匹配位置的字符位置标记 filledPositions = fillHoles(positions, reflectedPositions) #fillHoles函数的用法: #填补哪些被完全过滤的位置,例如有些网站就是你提交了危险字符,他会拦截你整个字符串,这样我们的输出在扫描器看来就会少了一处 #也就是reflectedPositions 长度比positions小 #Itretating over the reflections num = 0 efficiences = [] for position in filledPositions: allEfficiencies = [] try: reflected = response[ reflectedPositions[num]:reflectedPositions[num] + len(checkString)] #打分的地方就是这里,fuzz.partial_ratio是一个比较字符串函数 efficiency = fuzz.partial_ratio(reflected, checkString.lower()) allEfficiencies.append(efficiency) except IndexError: pass if position: reflected = response[position:position + len(checkString)] if encoding: checkString = encoding(checkString.lower()) efficiency = fuzz.partial_ratio(reflected, checkString) if reflected[:-2] == ( '\\%s' % checkString.replace('st4r7s', '').replace('3nd', '')): efficiency = 90 allEfficiencies.append(efficiency) efficiences.append(max(allEfficiencies)) else: efficiences.append(0) num += 1 return list(filter(None, efficiences)) #过滤掉0 或者''
def rec(target): processed.add(target) url = getUrl(target, True) params = getParams(target, '', True) #得到参数 if '=' in target: # if there's a = in the url, there should be GET parameters inps = [] for name, value in params.items(): inps.append({'name': name, 'value': value}) forms.append({0: {'action': url, 'method': 'get', 'inputs': inps}}) response = requester(url, params, headers, True, delay, timeout).text #retireJs(url, response)##检测<script>中是否存在漏洞 # if not skipDOM: # highlighted = dom(response) # clean_highlighted = ''.join([re.sub(r'^\d+\s+', '', line) for line in highlighted]) # if highlighted and clean_highlighted not in checkedDOMs: # checkedDOMs.append(clean_highlighted) # logger.good('Potentially vulnerable objects found at %s' % url) # logger.red_line(level='good') # for line in highlighted: # logger.no_format(line, level='good') # logger.red_line(level='good') forms.append(get_form(response)) #取出response中的所有form表单 matches = re.findall(r'<[aA].*href=["\']{0,1}(.*?)["\']', response) for link in matches: # iterate over the matches # remove everything after a "#" to deal with in-page anchors link = link.split('#')[0] if link.endswith(('.pdf', '.png', '.jpg', '.jpeg', '.xls', '.xml', '.docx', '.doc')): pass else: if link[:4] == 'http': if link.startswith(main_url): storage.add(link) elif link[:2] == '//': if link.split('/')[2].startswith(host): storage.add(schema + link) elif link[:1] == '/': storage.add(main_url + link) else: storage.add(main_url + '/' + link)
def change_names(contest_id, names): auth = get_auth() with requester( cookie_filename=os.path.join(os.path.dirname(__file__), 'cookies', auth['username'] + '.cookie'), caching=False, ) as req: url = f'https://contest.yandex.ru/admin/contest-participants?contestId={contest_id}&forceOldPages=true' page = req.get(url) # auth form = req.form(action=None, limit=1) if form and form['method'] == 'post' and 'login' in form[ 'post'] and 'retpath' in form['post']: page = req.submit_form( form=form, data={ 'login': auth['username'], 'passwd': auth['password'] }, url='https://passport.yandex.ru/auth/', ) form = req.form(action=None, fid='change-displayed-names-form', enctype=True) names = '\n'.join(f'''{r['login']} {r['name']}''' for r in names) page = req.submit_form( form=form, data={ 'names': names, 'files__': { 'file': { 'filename': 'file', 'content': '' } } }, url= 'https://contest.yandex.ru/admin/contest-participants/change-names', ) return page
import os import re import sys import signal from requester import requester quiz_nr = 10 url="http://127.0.0.1/?q=r&n=%i" % quiz_nr word_list = "words.txt" stop = False reqs =requester(url) def tryIt(test_word): try: global stop, quiz_nr if stop: return print test_word test_word = test_word.strip() test_word = test_word.lower() data = {} data['no']=str(quiz_nr)
def collect_site_data(site_id, url, driver, out_path=CrawlerConfig.OUTPUT_DATA_DIRECTORY): """ @param {string} site_id @param {string} url @param {object} driver: selenium driver handle @param {string} out_path @return {bool} whether or not the operation has succeeded """ # possible return values ERR_INVALID_URL = False SUCCESS = True # created output file names NAME_HTML_UNRENDERED = "html_initial.html" NAME_HTML_RENDERED = "html_rendered.html" NAME_JS_PROGRAM = "js_program.js" NAME_URL_FILE = "navigation_url.out" NAME_DOCUMENT_PROPS = 'document_props.out' NAME_DOCUMENT_PROPS_MACHINE = 'document_props_short.out' NAME_LIBRARIES_FOLDER = "libraries" NAME_XHR_LOGS = "request_logs.out" NAME_XHR_LOGS_MACHINE = "request_logs_short.out" NAME_COOKIE_FILE = "cookies.pkl" NAME_COOKIE_FILE_STR = "cookies_str.out" NAME_FIRED_EVENTS = "events.out" NAME_FIRED_EVENTS_PICKLE = "events_pickle.pkl" # prepare save path directories # site_map_name = sitesmapModule.get_site_data(site_id)[0] output_folder_of_this_site = os.path.join(out_path, str(site_id)) folder_name_of_this_page = _hash(url) output_folder_path_name_of_this_page = os.path.join( output_folder_of_this_site, folder_name_of_this_page) if not os.path.exists(output_folder_path_name_of_this_page): os.makedirs(output_folder_path_name_of_this_page) # save the navigation url path_name_navigation_url = os.path.join( output_folder_path_name_of_this_page, NAME_URL_FILE) with open(path_name_navigation_url, "wb") as fp: fp.write(url.encode('utf-8')) # step 2: capture the rendered HTML page and JS dynamic_data = DOMCollectorModule.get_dynamic_data(site_id, url, driver, close_conn=False) if dynamic_data is None: return ERR_INVALID_URL time.sleep(1) html_content = dynamic_data[0] soup_content = dynamic_data[1] js_of_page = DOMCollectorModule.combine_js_scripts(dynamic_data) inline_js_of_html = DOMCollectorModule.process_inline_dom_javascript( html_content, soup_content) # capture xhr requests via extension for lator use xhr_logs = seleniumModule.get_xhr_logger_extension_data(driver) # cookies cookies = driver.get_cookies() # DOM level 3 spec: first inline HTML events are fired, then others path_name_js_program = os.path.join(output_folder_path_name_of_this_page, NAME_JS_PROGRAM) with open(path_name_js_program, "wb") as fp: fp.write(inline_js_of_html.encode('utf-8')) fp.write(b'\n') fp.write(js_of_page.encode('utf-8')) _beautify_js(path_name_js_program) path_name_html_rendered = os.path.join( output_folder_path_name_of_this_page, NAME_HTML_RENDERED) with open(path_name_html_rendered, "wb") as fp: fp.write(html_content.encode('utf-8')) fp.write(b'\n') # store individual script files scripts_folder = os.path.join(output_folder_path_name_of_this_page, "scripts") if not os.path.exists(scripts_folder): os.makedirs(scripts_folder) script_files = dynamic_data[2] script_files_counter = 0 mappings = {} writeMapping = False for item in script_files: script_files_counter += 1 script_content = item[1] if len(script_content.strip()) == 0: continue if item[0] == 'internal_script': # remove HTML comment obfuscation in the start and end of inline script tags <!-- and --> script_content = script_content.strip().lstrip('<!--').rstrip( '-->') else: link = item[2] mappings[script_files_counter] = link writeMapping = True script_save_file_name = os.path.join(scripts_folder, str(script_files_counter) + '.js') with open(script_save_file_name, "w+") as fd: fd.write(script_content) _beautify_js(script_save_file_name) if writeMapping: with open(os.path.join(scripts_folder, "mappings.json"), 'w+', encoding='utf-8') as fd: json.dump(mappings, fd, ensure_ascii=False, indent=4) # step 3: save library files lib_links_dictionary = dynamic_data[3] library_output_folder_of_this_site = os.path.join( output_folder_path_name_of_this_page, NAME_LIBRARIES_FOLDER) _save_program_libraries(library_output_folder_of_this_site, lib_links_dictionary) # create timestamp for reports timestamp = get_current_timestamp() sep = get_output_header_sep() sep_templates = get_output_subheader_sep() # step 4: save document and form variables (accessible through document.form_name.input_name) document_form_variables = HTMLParserModule.get_document_properties_from_html( soup_content) path_name_document_props = os.path.join( output_folder_path_name_of_this_page, NAME_DOCUMENT_PROPS) with open(path_name_document_props, 'w+') as fd: fd.write(sep) fd.write('[timestamp] generated on %s\n' % timestamp) fd.write( '[description] defined properties in HTML for \'document\' DOM API\n' ) fd.write(sep + '\n\n') for counter, elm in enumerate(document_form_variables, start=1): fd.write("(%s): %s\n" % (counter, elm)) path_name_document_props_machine = os.path.join( output_folder_path_name_of_this_page, NAME_DOCUMENT_PROPS_MACHINE) with open(path_name_document_props_machine, 'w+') as fd: fd.write(str(document_form_variables)) # step 5: save captured onload requests via extension without_data_reqs = xhr_logs['without_data'] # no formData with_data_reqs = xhr_logs['with_data'] # also contains formData succ_reqs = xhr_logs['succ'] # all successully accepted requests with 2xx path_name_xhr_logs_machine = os.path.join( output_folder_path_name_of_this_page, NAME_XHR_LOGS_MACHINE) with open(path_name_xhr_logs_machine, "w+") as fp: fp.write(str(xhr_logs)) # save also a nicer human readable version path_name_xhr_logs = os.path.join(output_folder_path_name_of_this_page, NAME_XHR_LOGS) with open(path_name_xhr_logs, "w+") as fp: for each_request in without_data_reqs: try: if isinstance(each_request, dict): xhr_url = each_request['url'] xhr_url = _unquote_url(xhr_url) xhr_status = _check_if_req_is_successful( each_request['requestId'], succ_reqs) fp.write("Navigation_URL: '%s'\n" % (url)) fp.write("Request_URL: '%s'\n" % (xhr_url)) fp.write("Request_Accepted: '%s'\n" % (str(xhr_status[0]))) fp.write("Response_HTTP_Status: '%s'\n" % (str(xhr_status[1]))) fp.write(sep_templates) else: d = json.loads(each_request) xhr_url = d['url'] xhr_url = _unquote_url(xhr_url) xhr_status = _check_if_req_is_successful( d['requestId'], succ_reqs) fp.write("Navigation_URL: '%s'\n" % (url)) fp.write("Request_URL: '%s'\n" % (xhr_url)) fp.write("Request_Accepted: '%s'\n" % (str(xhr_status[0]))) fp.write("Response_HTTP_Status: '%s'\n" % (str(xhr_status[1]))) fp.write(sep_templates) except: continue for each_request in with_data_reqs: try: if isinstance(each_request, dict): xhr_url = each_request['url'] xhr_url = _unquote_url(xhr_url) form_data_dict = each_request['requestBody'] form_data_str = str(form_data_dict) fp.write("Navigation_URL: '%s'\n" % (url)) fp.write("Request_URL: '%s'\n" % (xhr_url)) fp.write("Form_Data: \n%s\n" % (form_data_str)) xhr_status = _check_if_req_is_successful( each_request['requestId'], succ_reqs) fp.write("Request_Accepted: %s\n" % (str(xhr_status[0]))) fp.write("Response_HTTP_Status: %s\n" % (str(xhr_status[1]))) fp.write(sep_templates) else: d = json.loads(each_request) xhr_url = d['url'] xhr_url = _unquote_url(xhr_url) form_data_dict = d['requestBody'] form_data_str = str(form_data_dict) fp.write("Navigation_URL: '%s'\n" % (url)) fp.write("Request_URL: '%s'\n" % (xhr_url)) fp.write("Form_Data: \n%s\n" % (form_data_str)) xhr_status = _check_if_req_is_successful( d['requestId'], succ_reqs) fp.write("Request_Accepted: '%s'\n" % (str(xhr_status[0]))) fp.write("Response_HTTP_Status: '%s'\n" % (str(xhr_status[1]))) fp.write(sep_templates) except: continue # step 6: save cookies # @Thanks to: https://stackoverflow.com/questions/15058462/how-to-save-and-load-cookies-using-python-selenium-webdriver path_name_cookie_logs = os.path.join(output_folder_path_name_of_this_page, NAME_COOKIE_FILE) path_name_cookie_logs_str = os.path.join( output_folder_path_name_of_this_page, NAME_COOKIE_FILE_STR) with open(path_name_cookie_logs, "wb") as fp: pickle.dump(cookies, fp) with open(path_name_cookie_logs_str, "w+") as fd: fd.write(str(cookies)) # step 7: save events logs = seleniumModule.get_chrome_console_logs(driver) with open( os.path.join(output_folder_path_name_of_this_page, NAME_FIRED_EVENTS_PICKLE), 'wb') as fd: pickle.dump(logs, fd) with open( os.path.join(output_folder_path_name_of_this_page, NAME_FIRED_EVENTS), 'w+') as fd: for log in logs: if log['level'] == 'INFO' and log['message'].startswith( 'chrome-extension://'): fd.write(str(log['message']) + '\n') d = RequesterModule.requester(url) if RequesterModule.is_http_response_valid(d): unrendered_html_page = str(d).strip() else: driver.get("view-source:" + str(url)) unrendered_html_page = driver.page_source # save the initial html path_name_html_unrendered = os.path.join( output_folder_path_name_of_this_page, NAME_HTML_UNRENDERED) with open(path_name_html_unrendered, "wb") as fp: fp.write(unrendered_html_page.encode('utf-8')) fp.write(b'\n') return SUCCESS
from requester import requester from lowezy import lowezy import threading import time if __name__ == "__main__": start = time.time() lowes = lowezy() http = requester() storeID = lowes.storeId prodID = lowes.productid lowes.genLink(storeID, prodID) ListUrl = lowes.urlList http.privateProxies() threads = [] numbThreads = 1729 totalThreads = len(ListUrl) totalProducts = len(prodID) for i in range(totalProducts*7): threadRange = ListUrl[i*int(len(storeID)/7):(i+1)*int(len(storeID)/7)] threads = [] if len(threadRange) != 0: for j in range(len(threadRange)):
def check_waf(target, proxy=None): original_target = target if "=" not in original_target: print(f"{red}[!][{time}] Please provide a url with parameters! {end}") return "WAF:None" # folder = Path.cwd().parent # waf_file = str(folder / "data/waf_signature") waf_file = "waf_signature" with open(waf_file, 'r') as loader: waf_data = json.load(loader) waf_match = {0: None} waf_info = {'company': None, 'waf_type': None, 'bypass_known': None} for intruder in waf_checker: try: intruder_type = "XSS" if intruder.startswith("<") else "SQLi" target, payload = chambering(original_target, strike=True, payload=intruder, type=intruder_type) response = requester(target, payload, GET=True, timeout=5, proxy=proxy) print( f"{purple}[~][{time}] using {intruder} to detect WAF !{end}" ) if not response is None: page, code, headers = response.text, response.status_code, response.headers if code >= 400: match = 0 for waf_name, waf_signature in waf_data.items(): if re.search(waf_signature['regex'], page, re.I): match = match + 1 if "code" in waf_signature: if re.search(waf_signature['code'], code, re.I): match = match + 1 if "header" in waf_signature: if re.search(waf_signature["header"], headers, re.I): match = match + 1 if match > max(waf_match, key=waf_match.get): waf_info['company'] = waf_name waf_info['waf_type'] = waf_signature['name'] if 'bypass_known' not in waf_signature: waf_info['bypass_known'] = None else: waf_info['bypass_known'] = waf_signature[ 'bypass_known'] waf_match.clear() waf_match[match]: waf_info except Exception: pass if max(waf_match, key=waf_match.get) > 0: return "WAF:" + match else: return "WAF:None"
def scan(target, paramData, encoding, headers, delay, timeout, path, jsonData): GET, POST = (False, True) if paramData else (True, False) #如果用户输入的入口主URL不是以http/https开头,会进行处理 if not target.startswith('http'): try: response = requester('https://' + target, {}, headers, GET, delay, timeout) target = 'https://' + target except: target = 'http://' + target response = requester(target, {}, headers, GET, delay, timeout, jsonData, path).text #得到入口target的response host = urlparse(target).netloc #将host提取出来 url = getUrl(target, GET) params = getParams(target, paramData, GET, jsonData, path) #将target中的参数提取出来 # if find: # params=get_forms(url,GET,headers,delay,timeout) for paraName in params.keys(): paramsCopy = copy.deepcopy(params) if encoding: paramsCopy[paramName] = encoding(xsschecker) else: paramsCopy[parasName] = xsschecker response = requester(url, paramsCopy, headers, GET, delay, timeout, jsonData, path) occurences = htmlParser(response, encoding) #获得输出点得上下文环境 positions = occurences.keys() if not occurences: print('No reflection found') continue else: print('Reflections found:%i' % len(occurences)) #filterChecker函数检查每个输出位置是否过滤了> < " ' //这些特殊符号 efficiencies = filterCheccker(url, paramsCopy, headers, GET, delay, occurences, timeout, encoding) #对过滤字符的打分列表 vectors = generator(occurences, response.text) #生成payload total = 0 for v in vectors.values(): total += len(v) #总共生成了多少条payload if total == 0: print('No vectors were crafted.') continue progress = 0 for confidence, vects in vectors.items(): for vect in vects: if config.globalVariables['path']: vect = vect.replace('/', '%2F') #如果用户设置在url路径中插入payload loggerVector = vect progress += 1 if not GET: vect = unquote(vect) efficiencies = checker(url, paramData, headers, GET, delay, vect, positions, timeout, encoding) if not efficiencies: for i in range(len(occurences)): efficiencies.append(0) bestEfficiency = max(efficiencies) if bestEfficiency == 100 or (vect[0] == '||' and bestEfficiency >= 95): print("Payload:%s" % loggerVector) print("Efficiency:%s Confidence:%s" % (bestEfficiency, confidence)) elif bestEfficiency > minEfficiency: print("Payload:%s" % loggerVector) print("Efficiency:%s Confidence:%s" % (bestEfficiency, confidence))
fimg = FileSystem_Images() #... other implementations fprg = myIOFace_Programs( context= DictAttr( lang='eng'), result_as= str, ) fmeta = myIOFace_Storage() #print 'generators:', fprg.methods_as_generator urls = [] urls+= url4webpy( fprg, #face_subset= AFace, requester= lambda m: requester( m, face= fprg, as_generator= m.name in fprg.methods_as_generator, checklogin= checklogin), ) #simple ones for f in [ fimg, fmeta ]: urls+= url4webpy( f, requester= lambda m: requester( m, result_is_tuple= False),) #... ''' becomes something like this (all are user-authenticated somehow; all are GET except some browser forms) /programs .channels #all channels .programs?channel=34 #programs in channel .programs #programs in all channels .add_program?channel=34&program=2345 #add program to channel