コード例 #1
0
def test_cls_MultiScapy_scapy__print_OnScapy():
    site = "http://www.hao123.com/"
    max_size = 100
    ptns = producePtns()
    onscapy = print_OnScapy()
    multispider = MultiSpider(site,ptns,max_size=max_size,onScapy=onscapy)
    multispider.scapy()
コード例 #2
0
    def createSpider(self, kwags):
        site = kwags.get("site", "")
        site = site

        str_ptn = kwags.get("ptns", "")
        ptn = re.compile(str_ptn)
        ptns = [ptn]

        onScapyDic = {"onScapy": OnScapy(), "print": print_OnScapy(), "logfile": logfile_OnScapy()}

        max_size = kwags.get("max_size", "100")
        max_size = int(max_size)

        onScapy = kwags.get("output_type", "onScapy")
        onscapy = onScapyDic[onScapy]
        logfileName = kwags.get("output_name", None)

        multi = kwags.get("multi_thread", "False")
        threadnumber = kwags.get("threadnumber", None)

        if not site:
            raise Exception("not site input")
        if logfileName and onScapy == "logfile":
            onscapy = logfile_OnScapy(logfileName)

        spiderCls = Spider

        if multi == "True":
            spiderCls = MultiSpider
        spider = spiderCls(site, ptns, max_size, onScapy=onscapy)

        if multi == "True" and threadnumber:
            threadnumber = int(threadnumber)
            spider.setThreadNumber(threadnumber)
        return spider
コード例 #3
0
def test_cls_print_OnScapy_filterHtml():
    onscapy = print_OnScapy()
    url,html = 'url','html'
    out_result = 'html'
    result = onscapy.filterHtml(url, html)
    assert result==out_result,('print_OnScapy.filterHtml(url,html) error',url,html,result,out_result)