Example #1
0
 def save_img(self, src, referer):
     headers = request.build_headers(referer=referer)
     img = request.get(src, headers, 10)
     name = src[-9:-4] + '.' + src[-3:]
     file = open(name, 'ab')
     file.write(img.content)
     file.close()
Example #2
0
 def inner_page(self, page_url, referer):
     headers = request.build_headers(referer=referer)
     inner_html = request.get(page_url, headers, 10)
     max_span = BeautifulSoup(inner_html.text, 'lxml').find(
         'div', class_='pagenavi').find_all('span')[-2].get_text()
     for page_num in range(1, int(max_span) + 1):
         next_page = page_url + '/' + str(page_num)
         self.get_img(next_page, page_url)
Example #3
0
 def start(self, url):
     headers = request.build_headers()
     all_html = request.get(url, headers, 10)
     options = BeautifulSoup(all_html.text, 'lxml').find('select', attrs={'name': 'sldd'})
     # options = BeautifulSoup(all_html.text, 'lxml').select('select[name="sldd"]')
     if __name__ == '__main__':
         pool = ThreadPoolExecutor(10)
         pool.map(self.inner_page, options, url)
         pool.shutdown()
Example #4
0
 def start(self, url):
     headers = request.build_headers()
     all_html = request.get(url, headers, 10)
     a_list = BeautifulSoup(all_html.text).find(
         'ul', class_='archives').find_all("a")
     for a in a_list:
         title = a.get_text()
         folder = str(title).replace('?', '_')
         self.mkdir(folder)
         page_url = a['href']
         try:
             self.inner_page(page_url, url)
         except AttributeError:
             print("inner_page error:", AttributeError)
Example #5
0
 def get_img(self, page_url, referer):
     headers = request.build_headers(referer=referer)
     img_html = request.get(page_url, headers, 10)
     src = BeautifulSoup(img_html.text, 'lxml').find(
         'div', class_='main-image').find('img')['src']
     self.save_img(src, referer)