def download(url): option = webdriver.ChromeOptions() # option.add_argument('headless') option.add_argument('log-level=3') driver = webdriver.Chrome( executable_path='.//chromedriver', chrome_options=option) title = "output" try: driver.set_page_load_timeout(15) driver.get(url) title = driver.title except: print("Timeout - start download anyway.") print(f'原创力: 《{title}》') time.sleep(5) driver.find_element_by_id('btn_preview_remain').click() time.sleep(2) frame = driver.find_elements_by_class_name('preview-iframe')[0] src = frame.get_attribute('src') print(src) driver.get(src) time.sleep(5) if os.path.exists(f'./temp/{title}'): shutil.rmtree(f'./temp/{title}') os.makedirs(f'./temp/{title}') pageCount = int(driver.find_element_by_id( 'PageCount').get_attribute('innerHTML')) for i in trange(pageCount): driver.save_screenshot(f'temp/{title}/capture.png') page = driver.find_element_by_id('ppt') left = page.location['x'] top = page.location['y'] right = left + page.size['width'] bottom = top + page.size['height'] - 35 im = Image.open(f'temp/{title}/capture.png') im = im.crop((left, top, right, bottom)) # 元素裁剪 im.save(f'temp/{title}/{i}.png') # 元素截图 driver.find_element_by_id('pageNext').click() time.sleep(1) # 防止还没加载出来 os.remove(f'./temp/{title}/capture.png') driver.quit() print('下载完毕,正在转码') conpdf(f'output/{title}.pdf', f'temp/{title}', '.png')
def getPDF(self): print(self.title) # 获取需要的信息 self.__getPdfInfo() # 获得所有图片的地址 img = self.pdfInfo.get('Img') imgUrl = img if img != None else "" print('解析地址') while self.index != self.total: self.__getNextPage( self.imgList[-1] if len(self.imgList) != 0 else imgUrl) self.pbar.close() # 下载图片 self.__getIMG() # 生成pdf print('下载完毕,正在转码') conpdf(f'output/{self.title}.pdf', f'temp/{self.title}/', '.jpg')
def download(url): text = requests.get(url).text pos = text.index('allPage:') pages = int(text[pos + 8:pos + 12].split(',')[0]) id = url.split('.')[-2].split('-')[-1] html = BeautifulSoup(text, features='lxml') title = html.title.string.replace('/', '.') print(f'豆丁:《{title}》') if os.path.exists(f'./temp/{title}'): shutil.rmtree(f'./temp/{title}') os.makedirs(f'./temp/{title}') for i in trange(pages): url = f"http://211.147.220.164/index.jsp?file={id}&width=1600&pageno={i + 1}" res = requests.get(url) with open(f'./temp/{title}/{i+1}.jpg', 'wb') as f: f.write(res.content) print('下载完毕,正在转码') conpdf(f'output/{title}.pdf', f'./temp/{title}', '.jpg', True)
def download(url): option = webdriver.ChromeOptions() # option.add_argument('headless') option.add_argument('log-level=3') driver = webdriver.Chrome(executable_path='.//chromedriver', chrome_options=option) title = "output" try: driver.set_page_load_timeout(15) driver.get(url) title = driver.title except: print("Timeout - start download anyway.") print(f'原创力: 《{title}》') time.sleep(5) try: # 展开全部 elem_cont_button = driver.find_element_by_id("agree_full") driver.execute_script("arguments[0].scrollIntoView(true);", elem_cont_button) actions = ActionChains(driver) actions.move_to_element(elem_cont_button).perform() time.sleep(0.5) elem_cont_button.click() # time.sleep(10) except NoSuchElementException: pass frame = driver.find_element_by_id('layer_view_iframe') src = frame.get_attribute('src') print(src) driver.get(src) time.sleep(5) if os.path.exists(f'./temp/{title}'): shutil.rmtree(f'./temp/{title}') os.makedirs(f'./temp/{title}') pageCount = int( driver.find_element_by_id('PageCount').get_attribute('innerHTML')) for i in trange(pageCount): driver.save_screenshot(f'temp/{title}/capture.png') page = driver.find_element_by_id('ppt') left = page.location['x'] top = page.location['y'] right = left + page.size['width'] bottom = top + page.size['height'] - 35 im = Image.open(f'temp/{title}/capture.png') im = im.crop((left, top, right, bottom)) # 元素裁剪 im.save(f'temp/{title}/{i}.png') # 元素截图 driver.find_element_by_id('pageNext').click() time.sleep(1) # 防止还没加载出来 os.remove(f'./temp/{title}/capture.png') driver.quit() print('下载完毕,正在转码') conpdf(f'output/{title}.pdf', f'temp/{title}', '.png')
def download(url, callback): option = webdriver.ChromeOptions() option.add_argument('headless') option.add_argument('log-level=3') driver = webdriver.Chrome(options=option) title = "output" try: # driver.implicitly_wait(15) driver.set_page_load_timeout(15) driver.get(url) title = driver.title except: return False, "下载失败,超时" print(title) if os.path.exists(f'./output/{title}.pdf'): return True, title if 'ppt' in title: import book118_PPT driver.quit() return book118_PPT.download(url, callback) time.sleep(2) driver.find_element_by_id("agree_full").click() time.sleep(1) driver.get(driver.find_element_by_id( "layer_new_view_iframe").get_attribute("src")) time.sleep(3) while True: try: # 展开全部 elem_cont_button = driver.find_element_by_id("btn_read") driver.execute_script( "arguments[0].scrollIntoView(true);", elem_cont_button) actions = ActionChains(driver) actions.move_to_element(elem_cont_button).perform() time.sleep(2) elem_cont_button.click() except NoSuchElementException: break except Exception: continue # 获取页数 num_of_pages = driver.find_element_by_class_name( 'page-counts').get_attribute('innerHTML') num_of_pages = int(num_of_pages.split(' ')[-1]) if os.path.exists(f'./temp/{title}'): shutil.rmtree(f'./temp/{title}') os.makedirs(f'./temp/{title}') elems = driver.find_elements_by_class_name("webpreview-item") for pages in trange(num_of_pages): try: callback(pages, num_of_pages, "正在下载:%s" % title) elem = elems[pages] time.sleep(0.5) actions = ActionChains(driver) actions.move_to_element(elem).perform() img = elem.find_element_by_tag_name('img') img_url = img.get_attribute('src') if img_url is None or not 'http' in img_url: img_url = "http:" + img.get_attribute('data-src') res = requests.get(img_url) with open(f"./temp/{title}/{pages}.png", "wb") as fh: fh.write(res.content) except Exception as e: print("下载失败!\n%r" % e) driver.quit() return False, e driver.quit() callback(99, 100, "正在转码") print('下载完毕,正在转码') conpdf(f'output/{title}.pdf', f'temp/{title}', '.png') return True, title
def download(url): option = webdriver.ChromeOptions() # option.add_argument('headless') option.add_argument('log-level=3') driver = webdriver.Chrome(executable_path='/usr/bin/chromedriver', chrome_options=option) title = "output" try: driver.set_page_load_timeout(15) driver.get(url) title = driver.title except: print("Timeout - start download anyway.") print('道客巴巴: <' + str(title) + '>') time.sleep(5) try: # 展开全部 elem_cont_button = driver.find_element_by_id("continueButton") driver.execute_script("arguments[0].scrollIntoView(true);", elem_cont_button) actions = ActionChains(driver) actions.move_to_element(elem_cont_button).perform() time.sleep(0.5) elem_cont_button.click() except NoSuchElementException: pass # 获取页数 num_of_pages = driver.find_element_by_id( 'readshop' ).find_element_by_class_name('mainpart').find_element_by_class_name( 'shop3').find_element_by_class_name('text').get_attribute('innerHTML') num_of_pages = int(num_of_pages.split(' ')[-1]) for i in range(5): # 缩放 driver.find_element_by_id('zoomInButton').click() time.sleep(0.5) tempdir = './temp/' + str(title) if os.path.exists(tempdir): shutil.rmtree(tempdir) os.makedirs(tempdir) for pages in trange(num_of_pages): time.sleep(0.5) canvas_id = "page_" + str(pages + 1) pagepb_id = "pagepb_" + str(pages + 1) element = driver.find_element_by_id(canvas_id) driver.execute_script("arguments[0].scrollIntoView(true);", element) actions = ActionChains(driver) actions.move_to_element(element).perform() time.sleep(0.5) # Check loading status while (len( driver.find_element_by_id(pagepb_id).get_attribute( 'innerHTML')) != 0): time.sleep(1) # print(driver.find_element_by_id( # pagepb_id).get_attribute('innerHTML')) js_cmd = "var canvas = document.getElementById('{}');".format(canvas_id) + \ "return canvas.toDataURL();" img_data = driver.execute_script(js_cmd) img_data = (img_data[22:]).encode() tempPage = './temp/' + str(title) + '/' + str(pages) + '.png' with open(tempPage, "wb") as fh: fh.write(base64.decodebytes(img_data)) driver.quit() print('下载完毕,正在转码') outputfile = 'output/' + str(title) + '.pdf' conpdf(outputfile, tempdir, '.png')
def download(url): option = webdriver.ChromeOptions() # option.add_argument('headless') option.add_argument('log-level=3') driver = webdriver.Chrome(executable_path='.//chromedriver', chrome_options=option) title = "output" try: # driver.implicitly_wait(15) driver.set_page_load_timeout(15) driver.get(url) title = driver.title except: print("Timeout - start download anyway.") print(title) time.sleep(5) while True: try: # 展开全部 elem_cont_button = driver.find_element_by_class_name( "banner-download") driver.execute_script("arguments[0].scrollIntoView(true);", elem_cont_button) actions = ActionChains(driver) actions.move_to_element(elem_cont_button).perform() time.sleep(0.5) driver.find_element_by_class_name("down-arrow").click() except Exception: break # 获取页数 num_of_pages = driver.find_element_by_id( 'readshop').find_element_by_class_name( 'mainpart').find_element_by_class_name( 'shop3').find_elements_by_class_name('text')[-1].get_attribute( 'innerHTML') num_of_pages = int(num_of_pages.split(';')[-1]) if os.path.exists(f'./temp/{title}'): shutil.rmtree(f'./temp/{title}') os.makedirs(f'./temp/{title}') for pages in trange(num_of_pages): try: time.sleep(0.5) elem = driver.find_element_by_id(f'outer_page_{pages+1}') actions = ActionChains(driver) actions.move_to_element(elem).perform() img = elem.find_element_by_tag_name('img') img_url = img.get_attribute('src') res = requests.get(img_url) with open(f"./temp/{title}/{pages}.gif", "wb") as fh: fh.write(res.content) except Exception as e: print("下载失败!\n%r" % e) driver.quit() return driver.quit() print('下载完毕,正在转码') conpdf(f'output/{title}.pdf', f'temp/{title}', '.gif')
def download(url, callback): option = webdriver.ChromeOptions() option.add_argument('headless') option.add_argument('log-level=3') driver = webdriver.Chrome(chrome_options=option) title = "output" try: driver.set_page_load_timeout(15) driver.get(url) title = driver.title[:-8] except: return False, "下载失败,超时" print(f'淘豆网: 《{title}》') if os.path.exists(f'./output/{title}.pdf'): return True, title time.sleep(5) while True: try: driver.execute_script("window.scrollBy(0,10000)") time.sleep(1) # 展开全部 elem_cont_button = driver.find_element_by_class_name( "banner-more-btn") elem_cont_button = elem_cont_button.find_element_by_tag_name( 'span') driver.execute_script( "arguments[0].scrollIntoView(true);", elem_cont_button) actions = ActionChains(driver) actions.move_to_element(elem_cont_button).perform() time.sleep(0.5) driver.execute_script("arguments[0].click();", elem_cont_button) # break except NoSuchElementException: break except StaleElementReferenceException: break except JavascriptException: continue try: # 获取页数 num_of_pages = driver.find_element_by_id( 'docPage').get_attribute('innerHTML') num_of_pages = int(num_of_pages) if os.path.exists(f'./temp/{title}'): shutil.rmtree(f'./temp/{title}') os.makedirs(f'./temp/{title}') for pages in trange(num_of_pages): callback(pages, num_of_pages, "正在下载:%s" % title) try: time.sleep(0.5) element = driver.find_element_by_id(f"page{pages + 1}") driver.execute_script("arguments[0].scrollIntoView(true);", element) actions = ActionChains(driver) actions.move_to_element(element).perform() time.sleep(0.5) imgElement = element.find_element_by_tag_name('img') imgUrl = imgElement.get_attribute('src') response = urllib.request.urlopen(imgUrl) html = response.read() with open(f'./temp/{title}/{pages}.jpg', 'wb') as f: f.write(html) except Exception as e: print('下载中断,信息:\n%r' % e) break driver.quit() print('下载完毕,正在转码') callback(99, 100, "正在转码") conpdf(f'output/{title}.pdf', f'temp/{title}', '.jpg') return True, title except Exception as e: return False, e
def download(url, callback): option = webdriver.ChromeOptions() option.add_argument('headless') option.add_argument('log-level=3') driver = webdriver.Chrome(chrome_options=option) title = "output" try: driver.set_page_load_timeout(15) driver.get(url) title = driver.title except: return False, "下载失败,超时" print(f'新浪爱问: 《{title}》') if os.path.exists(f'./output/{title}.pdf'): return True, title time.sleep(5) lastPageNum = None while True: try: driver.execute_script("window.scrollBy(0,10000)") time.sleep(1) # 展开全部 elem_cont_button = driver.find_element_by_class_name( "state-bottom") elem_cont_button = elem_cont_button.find_element_by_tag_name('a') text = elem_cont_button.find_element_by_tag_name( 'p').get_attribute('innerHTML') if text == lastPageNum: print('收费文档!无法下载') driver.quit() return lastPageNum = text if '结束' in text or '>0</em>' in text: break driver.execute_script( "arguments[0].scrollIntoView(true);", elem_cont_button) actions = ActionChains(driver) actions.move_to_element(elem_cont_button).perform() time.sleep(0.5) driver.execute_script("arguments[0].click();", elem_cont_button) except JavascriptException: continue try: # 获取页数 num_of_pages = driver.find_element_by_class_name( 'page-input-con').find_element_by_tag_name('span').get_attribute('innerHTML') num_of_pages = int(num_of_pages) driver.execute_script("window.scrollBy(0,10000)") if os.path.exists(f'./temp/{title}'): shutil.rmtree(f'./temp/{title}') os.makedirs(f'./temp/{title}') imgs = driver.find_elements_by_class_name('data-detail') for pages in trange(num_of_pages): callback(pages, num_of_pages, "正在下载:%s" % title) try: # svg格式 imgUrl = imgs[pages].find_element_by_tag_name( 'embed').get_attribute('src') html = requests.get(imgUrl).content with open(f'./temp/{title}/{pages}.svg', 'wb') as svgFile: svgFile.write(html) svgFile.flush() print(f'rsvg "./temp/{title}/{pages}.svg" "./temp/{title}/{pages}.png" -w 1500 -b white -f png') os.system( f'rsvg "./temp/{title}/{pages}.svg" "./temp/{title}/{pages}.png" -w 1500 -b white -f png') os.remove(f'./temp/{title}/{pages}.svg') extension = '.png' except NoSuchElementException: # 图片格式 while True: try: imgUrl = imgs[pages].find_element_by_tag_name( 'img').get_attribute('src') html = requests.get(imgUrl).content with open(f'./temp/{title}/{pages}.jpg', 'wb') as f: f.write(html) break except Exception as e: actions = ActionChains(driver) actions.move_to_element(imgs[pages]).perform() time.sleep(1) driver.execute_script("window.scrollBy(0,10000)") extension = '.jpg' driver.quit() print('下载完毕,正在转码') callback(99, 100, "正在转码") conpdf(f'output/{title}.pdf', f'temp/{title}', extension, True) return True, title except Exception as e: return False, e
def download(url): option = webdriver.ChromeOptions() # option.add_argument('headless') option.add_argument('log-level=3') driver = webdriver.Chrome( executable_path='.//chromedriver', options=option) title = "output" try: # driver.implicitly_wait(15) driver.set_page_load_timeout(15) driver.get(url) title = driver.title except: print("Timeout - start download anyway.") print(title) if 'ppt' in title: import book118_PPT driver.quit() book118_PPT.download(url) return time.sleep(2) try: driver.find_element_by_id("agree_full").click() except: try: driver.find_elements_by_class_name('big')[0].click() except: pass finally: time.sleep(1) # driver.get(driver.find_element_by_id( # "layer_new_view_iframe").get_attribute("src")) # time.sleep(3) while True: try: # 展开全部 elem_cont_button = driver.find_element_by_id("btn_preview_remain") driver.execute_script( "arguments[0].scrollIntoView(true);", elem_cont_button) actions = ActionChains(driver) actions.move_to_element(elem_cont_button).perform() driver.execute_script("window.scrollBy(0, -100)") time.sleep(2) elem_cont_button.click() except NoSuchElementException: break except Exception: import traceback traceback.print_exc() finally: time.sleep(1) # 获取页数 num_of_pages = driver.find_element_by_class_name( 'counts').get_attribute('innerHTML') num_of_pages = int(num_of_pages.split(' ')[-1]) if os.path.exists(f'./temp/{title}'): shutil.rmtree(f'./temp/{title}') os.makedirs(f'./temp/{title}') elems = driver.find_elements_by_class_name("webpreview-item") for pages in trange(num_of_pages): try: elem = elems[pages] time.sleep(0.5) actions = ActionChains(driver) actions.move_to_element(elem).perform() img = elem.find_element_by_tag_name('img') count = 0 while count < 10 and img.get_attribute('data-src') == None and img.get_attribute('src') == None: count += 1 time.sleep(1) img_url = img.get_attribute('src') if img_url is None or not 'http' in img_url: img_url = "http:" + img.get_attribute('data-src') res = requests.get(img_url) with open(f"./temp/{title}/{pages}.png", "wb") as fh: fh.write(res.content) except Exception as e: print("下载失败!\n%r" % e) driver.quit() return driver.quit() print('下载完毕,正在转码') conpdf(f'output/{title}.pdf', f'temp/{title}', '.png')
def download(url): option = webdriver.ChromeOptions() option.add_argument('headless') option.add_argument('log-level=3') driver = webdriver.Chrome(executable_path='.//chromedriver', chrome_options=option) title = "output" try: driver.set_page_load_timeout(15) driver.get(url) title = driver.title except: print("Timeout - start download anyway.") print(f'新浪爱问: 《{title}》') time.sleep(5) lastPageNum = None while True: try: driver.execute_script("window.scrollBy(0,10000)") time.sleep(1) # 展开全部 elem_cont_button = driver.find_element_by_class_name( "state-bottom") elem_cont_button = elem_cont_button.find_element_by_tag_name('a') text = elem_cont_button.find_element_by_tag_name( 'p').get_attribute('innerHTML') if text == lastPageNum: print('收费文档!无法下载') driver.quit() return lastPageNum = text if '结束' in text or '>0</em>' in text: break driver.execute_script("arguments[0].scrollIntoView(true);", elem_cont_button) actions = ActionChains(driver) actions.move_to_element(elem_cont_button).perform() time.sleep(0.5) driver.execute_script("arguments[0].click();", elem_cont_button) except JavascriptException: continue # 获取页数 num_of_pages = driver.find_element_by_class_name( 'page-input-con').find_element_by_tag_name('span').get_attribute( 'innerHTML') num_of_pages = int(num_of_pages) driver.execute_script("window.scrollBy(0,10000)") if os.path.exists(f'./temp/{title}'): shutil.rmtree(f'./temp/{title}') os.makedirs(f'./temp/{title}') imgs = driver.find_elements_by_class_name('data-detail') for pages in trange(num_of_pages): try: # svg格式 imgUrl = imgs[pages].find_element_by_tag_name( 'embed').get_attribute('src') html = requests.get(imgUrl).content with open(f'./temp/{title}/{title}.svg', 'wb') as svgFile: svgFile.write(html) svgFile.flush() os.system( f'svg2png "./temp/{title}/{title}.svg" -o "./temp/{title}/{title}.png" -w 1500' ) im = Image.open(f'./temp/{title}/{title}.png') bg = Image.new('RGB', im.size, (255, 255, 255)) bg.paste(im, im) bg.save(f'./temp/{title}/{pages}.jpg') os.remove(f'./temp/{title}/{title}.png') os.remove(f'./temp/{title}/{title}.svg') except NoSuchElementException: # 图片格式 while True: try: imgUrl = imgs[pages].find_element_by_tag_name( 'img').get_attribute('src') html = requests.get(imgUrl).content with open(f'./temp/{title}/{pages}.jpg', 'wb') as f: f.write(html) break except Exception as e: actions = ActionChains(driver) actions.move_to_element(imgs[pages]).perform() time.sleep(1) driver.execute_script("window.scrollBy(0,10000)") driver.quit() print('下载完毕,正在转码') conpdf(f'output/{title}.pdf', f'temp/{title}', '.jpg', True)
def download(url): option = webdriver.ChromeOptions() option.add_argument('headless') option.add_argument('log-level=3') option.add_argument("--window-size=1920,1080") driver = webdriver.Chrome(executable_path='.//chromedriver', chrome_options=option) title = "output" try: driver.set_page_load_timeout(15) driver.get(url) title = driver.title except: print("Timeout - start download anyway.") print(f'道客巴巴: 《{title}》') time.sleep(5) try: # 展开全部 elem_cont_button = driver.find_element_by_id("continueButton") driver.execute_script("arguments[0].scrollIntoView(true);", elem_cont_button) actions = ActionChains(driver) actions.move_to_element(elem_cont_button).perform() time.sleep(0.5) elem_cont_button.click() except NoSuchElementException: pass # 获取页数 num_of_pages = driver.find_element_by_id('toolbar').find_element_by_id('item-page-panel').\ find_element_by_class_name('text').text num_of_pages = int(num_of_pages.split(' ')[-1]) # print("页数:",num_of_pages) for i in range(5): # 缩放 driver.find_element_by_id('zoomInButton').click() time.sleep(0.5) if os.path.exists(f'./temp/{title}'): shutil.rmtree(f'./temp/{title}') os.makedirs(f'./temp/{title}') for pages in trange(num_of_pages): time.sleep(0.5) canvas_id = "outer_page_" + str(pages + 1) pagepb_id = "page_" + str(pages + 1) try: element = driver.find_element_by_id(canvas_id) except: time.sleep(1) element = driver.find_element_by_id(canvas_id) driver.execute_script("arguments[0].scrollIntoView(true);", element) actions = ActionChains(driver) actions.move_to_element(element).perform() time.sleep(0.5) # 执行js代码 js_cmd = "var canvas = document.getElementById('{}');".format(pagepb_id) + \ "return canvas.toDataURL();" img_data = driver.execute_script(js_cmd) img_data = (img_data[22:]).encode() with open(f"./temp/{title}/{pages}.png", "wb") as fh: fh.write(base64.decodebytes(img_data)) driver.quit() print('下载完毕,正在转码') conpdf(f'output/{title}.pdf', f'temp/{title}', '.png')
def download(url, callback): option = webdriver.ChromeOptions() option.add_argument('headless') option.add_argument('log-level=3') driver = webdriver.Chrome(chrome_options=option) title = "output" try: driver.set_page_load_timeout(15) driver.get(url) title = driver.title except: return False, "下载失败" print(f'道客巴巴: 《{title}》') if os.path.exists(f'./output/{title}.pdf'): return True, title time.sleep(5) try: # 展开全部 elem_cont_button = driver.find_element_by_id("continueButton") driver.execute_script( "arguments[0].scrollIntoView(true);", elem_cont_button) actions = ActionChains(driver) actions.move_to_element(elem_cont_button).perform() time.sleep(0.5) elem_cont_button.click() except NoSuchElementException: pass try: # 获取页数 num_of_pages = driver.find_element_by_id('readshop').find_element_by_class_name( 'mainpart').find_element_by_class_name('shop3').find_element_by_class_name('text').get_attribute('innerHTML') num_of_pages = int(num_of_pages.split(' ')[-1]) for i in range(5): # 缩放 driver.find_element_by_id('zoomInButton').click() time.sleep(0.5) if os.path.exists(f'./temp/{title}'): shutil.rmtree(f'./temp/{title}') os.makedirs(f'./temp/{title}') for pages in trange(num_of_pages): callback(pages, num_of_pages, "正在下载:%s" % title) time.sleep(0.5) canvas_id = "page_" + str(pages + 1) pagepb_id = "pagepb_" + str(pages + 1) element = driver.find_element_by_id(canvas_id) driver.execute_script("arguments[0].scrollIntoView(true);", element) actions = ActionChains(driver) actions.move_to_element(element).perform() time.sleep(0.5) # Check loading status while(len(driver.find_element_by_id(pagepb_id).get_attribute('innerHTML')) != 0): time.sleep(1) # print(driver.find_element_by_id( # pagepb_id).get_attribute('innerHTML')) js_cmd = "var canvas = document.getElementById('{}');".format(canvas_id) + \ "return canvas.toDataURL();" img_data = driver.execute_script(js_cmd) img_data = (img_data[22:]).encode() with open(f"./temp/{title}/{pages}.png", "wb") as fh: fh.write(base64.decodebytes(img_data)) driver.quit() print('下载完毕,正在转码') callback(99, 100, "正在转码") conpdf(f'output/{title}.pdf', f'temp/{title}', '.png') return True, title except Exception as e: return False, e