def worker(self, target_url): content = HttpBaseClass.get_http_content(target_url, timeout=SettingClass.WAITFOR_HTML) if not content: return self.work_set.discard(target_url) self.parse_content(content) return
def worker(self, target_url): content = HttpBaseClass.get_http_content( target_url, timeout=SettingClass.WAITFOR_HTML) if not content: return self.work_set.discard(target_url) self.parse_content(content) return
def catch_info(self, target_url): content = HttpBaseClass.get_http_content(target_url + '/top-answers', timeout=SettingClass.WAITFOR_HTML) if not content: return self.info_url_set.discard(target_url) parser = TopicParser(content) self.info_list.append(parser.get_extra_info()) return
def catch_info(self, target_url): content = HttpBaseClass.get_http_content( target_url + '/top-answers', timeout=SettingClass.WAITFOR_HTML) if not content: return self.info_url_set.discard(target_url) parser = TopicParser(content) self.info_list.append(parser.get_extra_info()) return
def download(self, image): filename = image['filename'] href = image['href'] content = HttpBaseClass.get_http_content(url=href, timeout=SettingClass.WAITFOR_PIC) if not content: return with open(self.save_path + '/' + filename, 'wb') as image: image.write(content) self.delete(href) return
def create_work_set(self, target_url): content = HttpBaseClass.get_http_content(target_url, timeout=SettingClass.WAITFOR_HTML) if not content: return self.task_set.discard(target_url) max_page = self.parse_max_page(content) for page in range(max_page): url = '{}?nr=1&sort=created&page={}'.format(target_url, page) self.work_set.add(url) return
def create_work_set(self, target_url): content = HttpBaseClass.get_http_content(target_url + '/answers?order_by=vote_num', timeout=SettingClass.WAITFOR_HTML) if not content: return self.task_set.discard(target_url) max_page = self.parse_max_page(content) for page in range(max_page): url = '{}/answers?order_by=vote_num&page={}'.format(target_url, page) self.work_set.add(url) return
def create_work_set(self, target_url): content = HttpBaseClass.get_http_content( target_url, timeout=SettingClass.WAITFOR_HTML) if not content: return self.task_set.discard(target_url) max_page = self.parse_max_page(content) for page in range(max_page): url = '{}?nr=1&sort=created&page={}'.format(target_url, page) self.work_set.add(url) return
def create_work_set(self, target_url): content = HttpBaseClass.get_http_content( target_url + '/answers?order_by=vote_num', timeout=SettingClass.WAITFOR_HTML) if not content: return self.task_set.discard(target_url) max_page = self.parse_max_page(content) for page in range(max_page): url = '{}/answers?order_by=vote_num&page={}'.format( target_url, page) self.work_set.add(url) return