Exemple #1
0
 def worker(self, target_url):
     content = HttpBaseClass.get_http_content(target_url, timeout=SettingClass.WAITFOR_HTML)
     if not content:
         return
     self.work_set.discard(target_url)
     self.parse_content(content)
     return
Exemple #2
0
 def worker(self, target_url):
     content = HttpBaseClass.get_http_content(
         target_url, timeout=SettingClass.WAITFOR_HTML)
     if not content:
         return
     self.work_set.discard(target_url)
     self.parse_content(content)
     return
Exemple #3
0
 def catch_info(self, target_url):
     content = HttpBaseClass.get_http_content(target_url + '/top-answers', timeout=SettingClass.WAITFOR_HTML)
     if not content:
         return
     self.info_url_set.discard(target_url)
     parser = TopicParser(content)
     self.info_list.append(parser.get_extra_info())
     return
Exemple #4
0
 def catch_info(self, target_url):
     content = HttpBaseClass.get_http_content(
         target_url + '/top-answers', timeout=SettingClass.WAITFOR_HTML)
     if not content:
         return
     self.info_url_set.discard(target_url)
     parser = TopicParser(content)
     self.info_list.append(parser.get_extra_info())
     return
 def download(self, image):
     filename = image['filename']
     href = image['href']
     content = HttpBaseClass.get_http_content(url=href, timeout=SettingClass.WAITFOR_PIC)
     if not content:
         return
     with open(self.save_path + '/' + filename, 'wb') as image:
         image.write(content)
     self.delete(href)
     return
Exemple #6
0
 def create_work_set(self, target_url):
     content = HttpBaseClass.get_http_content(target_url, timeout=SettingClass.WAITFOR_HTML)
     if not content:
         return
     self.task_set.discard(target_url)
     max_page = self.parse_max_page(content)
     for page in range(max_page):
         url = '{}?nr=1&sort=created&page={}'.format(target_url, page)
         self.work_set.add(url)
     return
Exemple #7
0
 def create_work_set(self, target_url):
     content = HttpBaseClass.get_http_content(target_url + '/answers?order_by=vote_num', timeout=SettingClass.WAITFOR_HTML)
     if not content:
         return
     self.task_set.discard(target_url)
     max_page = self.parse_max_page(content)
     for page in range(max_page):
         url = '{}/answers?order_by=vote_num&page={}'.format(target_url, page)
         self.work_set.add(url)
     return
Exemple #8
0
 def create_work_set(self, target_url):
     content = HttpBaseClass.get_http_content(
         target_url, timeout=SettingClass.WAITFOR_HTML)
     if not content:
         return
     self.task_set.discard(target_url)
     max_page = self.parse_max_page(content)
     for page in range(max_page):
         url = '{}?nr=1&sort=created&page={}'.format(target_url, page)
         self.work_set.add(url)
     return
Exemple #9
0
 def create_work_set(self, target_url):
     content = HttpBaseClass.get_http_content(
         target_url + '/answers?order_by=vote_num',
         timeout=SettingClass.WAITFOR_HTML)
     if not content:
         return
     self.task_set.discard(target_url)
     max_page = self.parse_max_page(content)
     for page in range(max_page):
         url = '{}/answers?order_by=vote_num&page={}'.format(
             target_url, page)
         self.work_set.add(url)
     return