Exemple #1
0
 def fetch_one_url(url):
     env_obj = SpiderEnv(url)
     env = yield env_obj.gen_env()
     urlSeek = BaseSpider(env, app)
     yield urlSeek.work()
     for url in urlSeek.urlLists:
         Log4Spider.infoLog(url)
     Log4Spider.infoLog(len(urlSeek.urlLists))
 def fetch_one_url(url):
     env_obj = SpiderEnv(url)
     env = yield env_obj.gen_env()
     urlSeek = Jd_Home_Spider(env,app)
     yield urlSeek.work()
     for url in urlSeek.urlLists:
        Log4Spider.infoLog(url)
     Log4Spider.infoLog(len(urlSeek.urlLists))
Exemple #3
0
 def downPicTest():
     for url in [
             "http://upload-images.jianshu.io/upload_images/1679702-7e810a34f3ef8d18.jpg?imageMogr2/auto-orient/strip%7CimageView2/1/w/300/h/300",
             "https://asearch.alicdn.com/bao/uploaded/i1/146280142867863617/TB2.atGhFXXXXcoXXXXXXXXXXXX_!!15874628-0-saturn_solar.jpg_210x210.jpg",
             "http://pic18.wed114.cn/20140923/2014092312515083.jpg"
     ]:
         env_obj = SpiderEnv(url)
         env = yield env_obj.gen_env()
         spider = PicDownSpider(env, None)
         yield spider.work()
 def fetch_one_url(url):
     env_obj = SpiderEnv(url)
     env = yield env_obj.gen_env()
     urlSeek = Jd_Item_Spider(env,app)
     yield urlSeek.work()
     for url in urlSeek.urlLists:
        Log4Spider.infoLog(url)
     Log4Spider.infoLog(len(urlSeek.urlLists))
     global num
     num-=1
     if num == 0:
         event.set()
Exemple #5
0
 def fetch_one_url(url):
     env_obj = SpiderEnv(url)
     env = yield env_obj.gen_env()
     urlSeek = Jd_Item_Spider(env, app)
     yield urlSeek.work()
     for url in urlSeek.urlLists:
         Log4Spider.infoLog(url)
     Log4Spider.infoLog(len(urlSeek.urlLists))
     global num
     num -= 1
     if num == 0:
         event.set()
Exemple #6
0
 def main():
     for url in [
             "http://www.jianshu.com",
             "http://upload-images.jianshu.io/upload_images/1679702-7e810a34f3ef8d18.jpg?imageMogr2/auto-orient/strip%7CimageView2/1/w/300/h/300"
     ]:
         env_obj = SpiderEnv(url)
         env = yield env_obj.gen_env()
         urlSeek = UrlSeekSpider(env, None)
         yield urlSeek.work()
         for url in urlSeek.urlLists:
             Log4Spider.infoLog(url)
         Log4Spider.infoLog(len(urlSeek.urlLists))
Exemple #7
0
    def run(self):
        while True:
            url = yield self.queue.get()
            Log4Spider.debugLog(self, "get url:", url)
            try:
                env = yield SpiderEnv(url).gen_env()
            except Exception as e:
                Log4Spider.errLog(self, "spider env failed url:", url,
                                  "exception:", e)
                continue

            self._find_url_handler(url)
            Log4Spider.infoLog(self, "url: ", url, " --- class: ",
                               self.handler_class)
            spider = self.handler_class(env, self.application,
                                        **self.handler_kwargs)
            yield spider.work()
            for url in spider.urlLists:
                Log4Spider.debugLog(self, "put url:", url)
                yield self.queue.put(url)
Exemple #8
0
 def main():
     env = SpiderEnv("http://www.taobao.com")
     yield env.gen_env()
     print(env.env)
Exemple #9
0
 def main():
     env = SpiderEnv("http://www.taobao.com")
     yield env.gen_env()
     print(env.env)