def createSpider(self, kwags): site = kwags.get("site", "") site = site str_ptn = kwags.get("ptns", "") ptn = re.compile(str_ptn) ptns = [ptn] onScapyDic = {"onScapy": OnScapy(), "print": print_OnScapy(), "logfile": logfile_OnScapy()} max_size = kwags.get("max_size", "100") max_size = int(max_size) onScapy = kwags.get("output_type", "onScapy") onscapy = onScapyDic[onScapy] logfileName = kwags.get("output_name", None) multi = kwags.get("multi_thread", "False") threadnumber = kwags.get("threadnumber", None) if not site: raise Exception("not site input") if logfileName and onScapy == "logfile": onscapy = logfile_OnScapy(logfileName) spiderCls = Spider if multi == "True": spiderCls = MultiSpider spider = spiderCls(site, ptns, max_size, onScapy=onscapy) if multi == "True" and threadnumber: threadnumber = int(threadnumber) spider.setThreadNumber(threadnumber) return spider
def test_cls_MultiScapy_scapy__logfile_OnScapy(): site = "http://www.hao123.com/" max_size = 100 ptns = producePtns() name = 'test_result/test_cls_MultiScapy_scapy__logfile_OnScapy' onscapy = logfile_OnScapy(name=name) multispider = MultiSpider(site,ptns,max_size=max_size,onScapy=onscapy) multispider.scapy()
def test_cls_Spider_scapy__logfile_OnScapy(): site = "http://www.hao123.com/" max_size = 100 logfile = "test_result/logfile" ptns = producePtns() onscapy = logfile_OnScapy(name=logfile) spider = Spider(site,ptns,max_size=max_size,onScapy=onscapy) spider.scapy()
def test_cls_logfile_OnScapy(): name = "test_result/logfile" url,html = 'url','html' on_scapy = logfile_OnScapy(name=name) on_scapy.beforeScapy() on_scapy.filterHtml(url, html) on_scapy.finishScapy() url = url+'\n' f = open(name) logfile = f.read() f.close() assert logfile==url,('logfile_OnScapy class error',logfile,url)