예제 #1
0
 def get_words(self, url):
     html = GetWords.get_content(url)
     try:
         words = self.catch_words(html)
         wlist = []
         for wd in words:
             wlist.extend(self.analyze(wd))
         PyMongoUtil.write(url, wlist)
     except Exception, e:
         logger.error(url + " " + str(e))
예제 #2
0
파일: test.py 프로젝트: gandaruvu/SpiderZ
    def testGetUrl():
        PyMongoUtil.clean()
        MemcacheUtil.clean()
        SpiderBloomFilter()

        html = GetWords.get_content("http://www.leakedin.com/tag/emailpassword-dump/")
        list = UrlScan.scanpage(html,"http://www.leakedin.com/tag/emailpassword-dump/",None)

        for l in list:
            PyMongoUtil.write(l,[""])
        print len(list)
예제 #3
0
파일: test.py 프로젝트: zzmzz/SpiderZ
    def testGetUrl():
        PyMongoUtil.clean()
        MemcacheUtil.clean()
        SpiderBloomFilter()

        html = GetWords.get_content(
            "http://www.leakedin.com/tag/emailpassword-dump/")
        list = UrlScan.scanpage(
            html, "http://www.leakedin.com/tag/emailpassword-dump/", None)

        for l in list:
            PyMongoUtil.write(l, [""])
        print len(list)