def as_image(source, output): key = self.credentials['credentials']['grabzit_key'] secret = self.credentials['credentials']['grabzit_secret'] grabzIt = GrabzItClient.GrabzItClient(key, secret) options = GrabzItImageOptions.GrabzItImageOptions() options.browserHeight = -1 options.width = -1 options.height = -1 options.format = 'png' options.quality = 100 grabzIt.FileToImage(source, options) grabzIt.SaveTo(output) # (!) synchonous call to Grabzit API
def convert_url_img(url): filename1 = datetime.now().strftime("%Y%m%d-%H%M%S") path = 'tmp\img\{}.png'.format(filename1) try: grabzIt = GrabzItClient.GrabzItClient( "YmI5YzRiY2U1NzI3NDNkMTk5OGJjNDRkNjNkNmUxNGE=", "RT8/Pz8/egY/Pz8/Vz8/OD8/Cz8RRj8/Pz8/PyEOMz8=") options = GrabzItImageOptions.GrabzItImageOptions() options.format = "png" options.browserHeight = -1 options.height = -1 options.width = -1 grabzIt.URLToImage(url, options) grabzIt.SaveTo(path) except Error as why: print('grabzIt Error: {}\n'.format(why)) raise return path
def convert_url_pdf(url): filename1 = datetime.now().strftime("%Y%m%d-%H%M%S") path = "tmp\pdf\{}.pdf".format(filename1) try: grabzIt = GrabzItClient.GrabzItClient( "YmI5YzRiY2U1NzI3NDNkMTk5OGJjNDRkNjNkNmUxNGE=", "RT8/Pz8/egY/Pz8/Vz8/OD8/Cz8RRj8/Pz8/PyEOMz8=") options = GrabzItPDFOptions.GrabzItPDFOptions() grabzIt.URLToPDF(url, options) grabzIt.SaveTo(path) grabzIt.SaveTo(path) except pdfcrowd.Error as why: # report the error sys.stderr.write('Pdfcrowd Error: {}\n'.format(why)) # rethrow or handle the exception raise return path
def get_problem_by_id(self, subject, id, img=None, path_to_img=None, path_to_tmp_html=''): """ Получение информации о задаче по ее идентификатору :param subject: Наименование предмета :type subject: str :param id: Идентификатор задачи :type subject: str :param img: Принимает одно из двух значений: pyppeteer или grabzit; В результате будет использована одна из библиотек для генерации изображения с задачей. Если не передавать этот аргумент, изображение генерироваться не будет :type img: str :param path_to_img: Путь до изображения, куда сохранить сохранить задание. :type path_to_img: str :param path_to_html: Можно указать директорию, куда будут сохраняться временные html-файлы заданий при использовании pyppeteer :type path_to_html: str :param grabzit_auth: При использовании GrabzIT укажите данные для аутентификации: {"AppKey":"...", "AppSecret":"..."} :type grabzit_auth: dict """ doujin_page = requests.get( f'{self._SUBJECT_BASE_URL[subject]}/problem?id={id}') soup = BeautifulSoup(doujin_page.content, 'html.parser') probBlock = soup.find('div', {'class': 'prob_maindiv'}) if probBlock is None: return None for i in probBlock.find_all('img'): if not 'sdamgia.ru' in i['src']: i['src'] = self._SUBJECT_BASE_URL[subject] + i['src'] URL = f'{self._SUBJECT_BASE_URL[subject]}/problem?id={id}' TOPIC_ID = ' '.join(probBlock.find( 'span', {'class': 'prob_nums'}).text.split()[1:][:-2]) ID = id CONDITION, SOLUTION, ANSWER, ANALOGS = {}, {}, '', [] try: CONDITION = {'text': probBlock.find_all('div', {'class': 'pbody'})[0].text, 'images': [i['src'] for i in probBlock.find_all('div', {'class': 'pbody'})[0].find_all('img')] } except IndexError: pass try: SOLUTION = {'text': probBlock.find_all('div', {'class': 'pbody'})[1].text, 'images': [i['src'] for i in probBlock.find_all('div', {'class': 'pbody'})[1].find_all('img')] } except IndexError: pass except AttributeError: pass try: ANSWER = probBlock.find( 'div', {'class': 'answer'}).text.replace('Ответ: ', '') except IndexError: pass except AttributeError: pass try: ANALOGS = [i.text for i in probBlock.find( 'div', {'class': 'minor'}).find_all('a')] if 'Все' in ANALOGS: ANALOGS.remove('Все') except IndexError: pass except AttributeError: pass if not img is None: for i in probBlock.find_all('div', {'class': 'minor'}): # delete the information parts of problem i.decompose() probBlock.find_all('div')[-1].decompose() # Pyppeteer if img == 'pyppeteer': import asyncio from pyppeteer import launch open(f'{path_to_tmp_html}{id}.html', 'w', encoding='utf-8').write(str(probBlock)) async def main(): browser = await launch() page = await browser.newPage() await page.goto('file:' + path.abspath(f'{path_to_tmp_html}{id}.html')) await page.screenshot({'path': path_to_img, 'fullPage': 'true'}) await browser.close() asyncio.get_event_loop().run_until_complete(main()) remove(path.abspath(f'{path_to_tmp_html}{id}.html')) # Grabz.it elif img == 'grabzit': from GrabzIt import GrabzItClient, GrabzItImageOptions grabzIt = GrabzItClient.GrabzItClient(self.grabzit_auth['AppKey'], self.grabzit_auth['AppSecret']) options = GrabzItImageOptions.GrabzItImageOptions() options.browserWidth = 800 options.browserHeight = -1 grabzIt.HTMLToImage(str(probBlock), options=options) grabzIt.SaveTo(path_to_img) # HTML2Image elif img == 'html2img': from html2image import Html2Image if self.html2img_chrome_path == 'chrome': hti = Html2Image() else: hti = Html2Image(chrome_path=self.html2img_chrome_path, custom_flags=['--no-sandbox']) hti.screenshot(html_str=str(probBlock), save_as=path_to_img) return {'id': ID, 'topic': TOPIC_ID, 'condition': CONDITION, 'solution': SOLUTION, 'answer': ANSWER, 'analogs': ANALOGS, 'url': URL}
#Number of Employees e = soup.find_all( 'a', class_= 'component--field-formatter field-type-enum link-accent ng-star-inserted' ) l = str(e[0]).strip().split('>') ne.append(l[1][:-3]) #Founders fn = soup.find_all('a', class_='link-accent ng-star-inserted') founder.append(Founder(fn)) #Links grabzIt = GrabzItClient.GrabzItClient( "ZDg2NmM1MDVjYTI3NGFkM2FmOTZjMzc4MGE5YTI3MzU=", "bj9UTR8VED8/PxsFUT81PyB4Pz8/d29tSz8UVj8ccU0=") for link in soup.find_all('a', href=True): lk = link.get('href') txt2 = str(lk) #Website wa = web() wo.append(wa[0]) wo1.append(wa[1]) #Facebook fa = FLT('facebook') fbo.append(fa[0]) fbo1.append(fa[1])
URL = "https://www.kiva.org/lend?queryString=" + str(n) + "&status=all" chromedriver = "/Users/arjunsoin/Desktop/chromedriver" browser = webdriver.Chrome(chromedriver) browser.get(URL) html = browser.page_source soup = BeautifulSoup(html,"lxml") s = 91929 ## placeholder html that is rewritten on each iteration fl = '%s.html' % s ## replace with url di = os.getcwd() final = di + "/" + fl with open(final, "w") as file: file.write(str(soup)) options = GrabzItImageOptions.GrabzItImageOptions() options.width = -1 options.height = -1 options.browserHeight = -1 grabzIt = GrabzItClient.GrabzItClient("NmViOGJhNjk0YjRiNGU4OTlhNDFiNjk2MDliOTY2MDM=", "KzA/ID8LVz8/P3F+X2M/Pz8/RT9VPz8mPz8/Pz8/Pyc=") grabzIt.FileToImage(final, options) filepath = di + '/result.jpg' grabzIt.SaveTo(filepath) im = Image.open(filepath) outfile = str(n) + "_tile.jpg" region=im.crop((40, 350, 330, 651)) region.save(outfile, "JPEG")
except ImportError: from ConfigParser import SafeConfigParser message = "" if os.environ['REQUEST_METHOD'] == 'POST': form = cgi.FieldStorage() if form.getvalue("delete") == "1": r = glob.glob('results/*') for i in r: os.remove(i) else: try: parser = SafeConfigParser() parser.read("config.ini") grabzIt = GrabzItClient.GrabzItClient( parser.get("GrabzIt", "applicationKey"), parser.get("GrabzIt", "applicationSecret")) isHtml = form.getvalue("convert") == "html" if form.getvalue("format") == "pdf": if (isHtml == True): grabzIt.HTMLToPDF(form.getvalue("html")) else: grabzIt.URLToPDF(form.getvalue("url")) elif form.getvalue("format") == "gif": grabzIt.URLToAnimation(form.getvalue("url")) else: if (isHtml == True): grabzIt.HTMLToImage(form.getvalue("html")) else: grabzIt.URLToImage(form.getvalue("url"))
form = cgi.FieldStorage() message = form.getvalue("message") customId = form.getvalue("customid") id = form.getvalue("id") filename = form.getvalue("filename") format = form.getvalue("format") targeterror = form.getvalue("targeterror") parser = SafeConfigParser() parser.read('config.ini') #Custom id can be used to store user ids or whatever is needed for the later processing of the #resulting screenshot grabzIt = GrabzItClient.GrabzItClient( parser.get('GrabzIt', 'applicationKey'), parser.get('GrabzIt', 'applicationSecret')) result = grabzIt.GetResult(id) if result != None: #Ensure that the application has the correct rights for this directory. fo = open( os.path.dirname(os.path.abspath(__file__)) + os.path.sep + "results" + os.sep + filename, "wb") fo.write(result) fo.close() print("Content-type: text/html\n\n")