# coding=utf-8 import datetime import blog_spider import mood_spider import friend_spider import information_spider import public_methods from multiprocessing.dummy import Pool import my_log log = my_log.getLogger("SpideController.log", "spider_controller.SpideController") class SpideController(object): """ 功能:控制去抓取日志、说说、个人信息,并保存到MongoDB """ def __init__(self, my_messages=None): self.my_messages = my_messages self.changer = public_methods.Changing( self.my_messages) # 新建一个对象,用来更换QQ,更换Cookie def beginer(self): restNum = self.my_messages.rconn.llen('QQSpider:QQForSpide') while restNum > 0: step = restNum if restNum < 1000 else 1000 pool = Pool(self.my_messages.thread_num_QQ) pool.map(self.store_dairy, range(step)) pool.close() pool.join() restNum = self.my_messages.rconn.llen('QQSpider:QQForSpide')
# coding=utf-8 import re import datetime import itertools from bs4 import BeautifulSoup from multiprocessing.dummy import Pool import my_log log = my_log.getLogger() class BlogSpider(object): """ 功能:爬取QQ日志 """ def __init__(self, spiderMessage, changer): self.message = spiderMessage self.changer = changer def beginer(self): blog_list = self.get_blog_list() # 获取日志ID列表 if blog_list: pool = Pool(self.changer.my_messages.thread_num_Blog) myBlog = pool.map( self.get_blog, itertools.izip(blog_list.keys(), blog_list.values())) pool.close() pool.join() fail = myBlog.count(-1) # 对于获取失败的日志,需要清除 for i in range(fail): myBlog.remove(-1) return myBlog