def start_catch_info(self): argv = { 'func': self.catch_info, 'iterable': self.info_url_set, } Control.control_center(argv, self.info_url_set) return
def start_download(self): argv = { 'func': self.download, # 所有待存入数据库中的数据都应当是list 'iterable': self.container, } Control.control_center(argv, self.container) return
def start_create_work_list(self): self.clear_work_set() argv = { 'func': self.create_work_set, 'iterable': self.task_set, } Control.control_center(argv, self.task_set) return
def start_worker(self): a = list(self.work_set) a.sort() argv = {"func": self.worker, "iterable": a} # 所有待存入数据库中的数据都应当是list Control.control_center(argv, self.work_set) Debug.logger.info(u"所有内容抓取完毕,开始对页面进行解析") i = 0 for content in self.content_list: i += 1 Debug.print_in_single_line(u"正在解析第{}/{}张页面".format(i, self.content_list.__len__())) self.parse_content(content) Debug.logger.info(u"网页内容解析完毕") return
def start_worker(self): a = list(self.work_set) a.sort() argv = { 'func': self.worker, # 所有待存入数据库中的数据都应当是list 'iterable': a, } Control.control_center(argv, self.work_set) Debug.logger.info(u"所有内容抓取完毕,开始对页面进行解析") i = 0 for content in self.content_list: i += 1 Debug.print_in_single_line(u"正在解析第{}/{}张页面".format( i, self.content_list.__len__())) self.parse_content(content) Debug.logger.info(u"网页内容解析完毕") return
def start_worker(self): u""" work_set是所有的需要抓取的页面 :return: """ a = list(self.work_set) a.sort() argv = {'func': self.worker, # 所有待存入数据库中的数据都应当是list 'iterable': a, } Control.control_center(argv, self.work_set) Debug.logger.info(u"所有内容抓取完毕,开始对页面进行解析") i = 0 for content in self.content_list: i += 1 Debug.print_in_single_line(u"正在解析第{}/{}张页面".format(i, self.content_list.__len__())) self.parse_content(content) Debug.logger.info(u"网页内容解析完毕") return
def start_download(self): argv = {'func': self.download, # 所有待存入数据库中的数据都应当是list 'iterable': self.container, } Control.control_center(argv, self.container) return
def start_catch_info(self): argv = {"func": self.catch_info, "iterable": self.info_url_set} Control.control_center(argv, self.info_url_set) return
def start_create_work_list(self): self.clear_work_set() argv = {"func": self.create_work_set, "iterable": self.task_set} Control.control_center(argv, self.task_set) return