def createSpider(self, kwags):
        site = kwags.get("site", "")
        site = site

        str_ptn = kwags.get("ptns", "")
        ptn = re.compile(str_ptn)
        ptns = [ptn]

        onScapyDic = {"onScapy": OnScapy(), "print": print_OnScapy(), "logfile": logfile_OnScapy()}

        max_size = kwags.get("max_size", "100")
        max_size = int(max_size)

        onScapy = kwags.get("output_type", "onScapy")
        onscapy = onScapyDic[onScapy]
        logfileName = kwags.get("output_name", None)

        multi = kwags.get("multi_thread", "False")
        threadnumber = kwags.get("threadnumber", None)

        if not site:
            raise Exception("not site input")
        if logfileName and onScapy == "logfile":
            onscapy = logfile_OnScapy(logfileName)

        spiderCls = Spider

        if multi == "True":
            spiderCls = MultiSpider
        spider = spiderCls(site, ptns, max_size, onScapy=onscapy)

        if multi == "True" and threadnumber:
            threadnumber = int(threadnumber)
            spider.setThreadNumber(threadnumber)
        return spider
Ejemplo n.º 2
0
def test_cls_MultiScapy_scapy__logfile_OnScapy():
    site = "http://www.hao123.com/"
    max_size = 100
    ptns = producePtns()
    name = 'test_result/test_cls_MultiScapy_scapy__logfile_OnScapy'
    onscapy = logfile_OnScapy(name=name)
    multispider = MultiSpider(site,ptns,max_size=max_size,onScapy=onscapy)
    multispider.scapy()
Ejemplo n.º 3
0
def test_cls_Spider_scapy__logfile_OnScapy():
    site = "http://www.hao123.com/"
    max_size = 100
    logfile = "test_result/logfile"
    ptns = producePtns()
    onscapy = logfile_OnScapy(name=logfile)
    spider = Spider(site,ptns,max_size=max_size,onScapy=onscapy)
    spider.scapy()
Ejemplo n.º 4
0
def test_cls_logfile_OnScapy():
    name = "test_result/logfile"
    url,html = 'url','html'
    on_scapy = logfile_OnScapy(name=name)
    on_scapy.beforeScapy()
    on_scapy.filterHtml(url, html)
    on_scapy.finishScapy()
    url = url+'\n'
    f = open(name)
    logfile = f.read()
    f.close()
    assert logfile==url,('logfile_OnScapy class error',logfile,url)