def run(self): """ 多线程的入口函数 """ crawler = BlogCrawler() # crawler = UserCrawler() while not Controller.taskpool.empty(): un = Controller.taskpool.get() print "\n已处理 %d 个任务, 还剩 %d 个任务" % (Controller.finished_count, Controller.taskpool.qsize()) # print uns try: urls = get_urls(get_uns_uids(config.UID_FILEPATH)[1]) uns = get_uns_uids(config.UID_FILEPATH)[0] # print urls,'testing........' print "task start" # userinfo_dic={'username':userid} # url = 'http://weibo.com/u/1340714021' # url='http://weibo.com/u/1756439121' # url = 'http://weibo.com/caikangyong' # url = 'http://weibo.com/u/1704116960' # url = 'http://weibo.com/u/1730336902' # userinfo = crawler.scratch(un) # Controller.save_userinfo(userinfo,un) print "crawlering %s th bloger...." % (uns.index(un) + 1) blogs = crawler.scratch(urls[uns.index(un)]) Controller.save_csv(blogs, un) print "task end" except: print un Controller.finished_count += 1
def run(self): """ 多线程的入口函数 """ crawler = BlogCrawler() #crawler = UserCrawler() while not Controller.taskpool.empty(): uid = Controller.taskpool.get() print "\n已处理 %d 个任务, 还剩 %d 个任务" % (Controller.finished_count, Controller.taskpool.qsize()) #print uid try: print 'task start' #userinfo_dic={'username':userid} url = 'http://weibo.com/u/1340714021' #url='http://weibo.com/u/1756439121' #url = 'http://weibo.com/caikangyong' #url = 'http://weibo.com/u/1704116960' #url = 'http://weibo.com/u/1730336902' #userinfo = crawler.scratch(uid) blogs = crawler.scratch(url) Controller.save_csv(blogs, uid) print 'task end' except: print uid Controller.finished_count += 1
def run(self): """ 多线程的入口函数 """ crawler = BlogCrawler() #crawler = UserCrawler() while not Controller.taskpool.empty(): un = Controller.taskpool.get() print "\n已处理 %d 个任务, 还剩 %d 个任务" % (Controller.finished_count, Controller.taskpool.qsize()) #print uns try: urls = get_urls(get_uns_uids(config.UID_FILEPATH)[1]) uns = get_uns_uids(config.UID_FILEPATH)[0] #print urls,'testing........' print 'task start' #userinfo_dic={'username':userid} #url = 'http://weibo.com/u/1340714021' #url='http://weibo.com/u/1756439121' #url = 'http://weibo.com/caikangyong' #url = 'http://weibo.com/u/1704116960' #url = 'http://weibo.com/u/1730336902' #userinfo = crawler.scratch(un) #Controller.save_userinfo(userinfo,un) print 'crawlering %s th bloger....' % (uns.index(un) + 1) blogs = crawler.scratch(urls[uns.index(un)]) Controller.save_csv(blogs, un) print 'task end' except: print un Controller.finished_count += 1