def test_cls_MultiScapy_scapy__print_OnScapy(): site = "http://www.hao123.com/" max_size = 100 ptns = producePtns() onscapy = print_OnScapy() multispider = MultiSpider(site,ptns,max_size=max_size,onScapy=onscapy) multispider.scapy()
def createSpider(self, kwags): site = kwags.get("site", "") site = site str_ptn = kwags.get("ptns", "") ptn = re.compile(str_ptn) ptns = [ptn] onScapyDic = {"onScapy": OnScapy(), "print": print_OnScapy(), "logfile": logfile_OnScapy()} max_size = kwags.get("max_size", "100") max_size = int(max_size) onScapy = kwags.get("output_type", "onScapy") onscapy = onScapyDic[onScapy] logfileName = kwags.get("output_name", None) multi = kwags.get("multi_thread", "False") threadnumber = kwags.get("threadnumber", None) if not site: raise Exception("not site input") if logfileName and onScapy == "logfile": onscapy = logfile_OnScapy(logfileName) spiderCls = Spider if multi == "True": spiderCls = MultiSpider spider = spiderCls(site, ptns, max_size, onScapy=onscapy) if multi == "True" and threadnumber: threadnumber = int(threadnumber) spider.setThreadNumber(threadnumber) return spider
def test_cls_print_OnScapy_filterHtml(): onscapy = print_OnScapy() url,html = 'url','html' out_result = 'html' result = onscapy.filterHtml(url, html) assert result==out_result,('print_OnScapy.filterHtml(url,html) error',url,html,result,out_result)