def initSearchList(self): module = Module(self.visitSearchList,u"第三步_开始搜索公司列表") module.appendUrl("http://aic.hainan.gov.cn:1888/searchList.jspx") module.appendHeaders( { "Host": "aic.hainan.gov.cn:1888", "Connection": "keep-alive", "Cache-Control": "max-age=0", "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8", "Origin": "http://aic.hainan.gov.cn:1888", "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.87 Safari/537.36", "Content-Type": "application/x-www-form-urlencoded", "Referer": "http://aic.hainan.gov.cn:1888/search.jspx", "Accept-Encoding": "gzip, deflate", "Accept-Language": "zh-CN,zh;q=0.8" } ) module.appendWebMethod("post") module.appendPostData(lambda yzm, company_key:{ "checkNo": yzm, "entName": company_key }) module.appendOutput("url_list", ".//div[@class='list']//a/@href", OutputType.LIST) module.appendOutput("name_list", ".//div[@class='list']//a/text()", OutputType.LIST) module.appendOutput(name="search_list", type=OutputType.FUNCTION, function=lambda url_list, name_list: zip(url_list, name_list)) module.addEvent(Event(EventType.EXCEPTION_OCCURED, retry_times=20, redo_module="hn_yzm_pic")) module.addEvent(Event(EventType.OUTPUT_NOT_SATISFIED, retry_times=20, redo_module="hn_yzm_pic")) module.addEvent(Event(EventType.ASSERT_FAILED, retry_times=0, assert_function=lambda :False if self.report.access_type == SeedAccessType.NON_COMPANY else True)) module.appendMiddleValueMonitor("search_list") module.addSleep(Sleep(1)) self.module_manager.appendSubModule(module)
def initConfigHomePage(self): module = Module(self.visitHomePage, u"首页") module.module_id = "module_home_page" module.appendUrl("http://211.141.74.198:8081/aiccips/") module.appendHeaders({ 'Connection': 'keep-alive', 'Accept-Language': 'en-US,en;q=0.8,zh-CN;q=0.6,zh-TW;q=0.4', 'Accept-Encoding': 'gzip, deflate, sdch', 'Cache-Control': 'max-age=0', 'Referer': 'http://211.141.74.198:8081/aiccips/', 'Host': '211.141.74.198:8081', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.10; rv:36.0) Gecko/20100101 Firefox/36.0' }) module.appendCookie("cookie") module.appendOutput("csrf", ".//input[@name='_csrf']/@value", OutputType.LIST) module.appendMiddleValueMonitor("csrf") module.addSleep(Sleep(3)) self.module_manager.appendSubModule(module, True)
def initCookie(self): module = Module(self.getWebHtml, u"获取cookie") module.module_id = "module_cookie" module.appendUrl('http://211.141.74.198:8081/aiccips/') module.appendHeaders( lambda ua: { 'Connection': 'keep-alive', 'Accept-Language': 'en-US,en;q=0.8,zh-CN;q=0.6,zh-TW;q=0.4', 'Accept-Encoding': 'gzip, deflate, sdch', 'Cache-Control': 'max-age=0', 'Referer': 'http://211.141.74.198:8081/aiccips/', 'Host': '211.141.74.198:8081', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', 'User-Agent': ua }) def getCookie(html): pattern = re.compile(r'\}\(([\s\S]*?\{\})\)\)', re.S) result = pattern.search(html).group(1) params = result.split(',') if len(params) != 6: raise Exception("cookie获取失败!") strstr = 'var document = {};var window = {};document[\'cookie\'] = "";window[\'location\'] ={}; \ window[\'location\'][\'reload\'] = function(){};eval(function (p, a, c, k, e, r) { \ e = function(c) { \ return c.toString(a) \ }; \ if (!\'\'.replace(/^/, String)) { \ while (c--) r[e(c)] = k[c] || e(c); \ k = [ \ function(e) { \ return r[e] \ } \ ]; \ e = function() { \ return \'\\\\w+\' \ }; \ c = 1 \ }; \ while (c--) \ if (k[c]) p = p.replace(new RegExp(\'\\\\b\' + e(c) + \'\\\\b\', \'g\'), k[c]); \ return p \ }(' + params[0] + ',' + params[1] + ',' + params[2] + ',' + params[ 3] + ',' + params[4] + ',' + params[5] + ')); \ challenge();var a = document[\'cookie\'];' with PyV8.JSContext() as se: se.eval(strstr) a = se.locals.a cookie = a.split('=')[1].split(';')[0] cookie_temp1 = dict({'ROBOTCOOKIEID': cookie}) return cookie_temp1 module.appendOutput(name="cookie", type=OutputType.FUNCTION, function=getCookie) module.appendMiddleValueMonitor("cookie") module.addSleep(Sleep(3)) self.module_manager.appendSubModule(module, True)
def initConfigSearchList(self): module = Module(self.visitSearchList, u"搜索列表") module.appendUrl('http://xyjg.egs.gov.cn/ECPS_HB/searchList.jspx') module.appendHeaders( lambda ua: { "Host": "xyjg.egs.gov.cn", "User-Agent": ua, "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", "Accept-Language": "zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3", "Accept-Encoding": "gzip, deflate", "Content-Type": "application/x-www-form-urlencoded", "Referer": "http://xyjg.egs.gov.cn/ECPS_HB/search.jspx" }) module.appendWebMethod("post") module.appendPostData(lambda company_key, yzm: { 'checkNo': yzm, 'entName': company_key }) module.appendOutput("search_list", "//*[@class='list']/ul/li/a/@href", OutputType.LIST) module.addEvent( Event(EventType.EXCEPTION_OCCURED, retry_times=100, redo_module="module_home_page")) module.addEvent( Event(EventType.OUTPUT_NOT_SATISFIED, retry_times=100, redo_module="module_home_page")) module.addEvent( Event(EventType.ASSERT_FAILED, retry_times=0, assert_function=lambda: False if self.report.access_type == SeedAccessType.NON_COMPANY else True)) module.appendMiddleValueMonitor("search_list") self.module_manager.appendSubModule(module)