def update(a): print '## Get specific data' specific.get_html(a, range=(bill_s, bill_e)) specific.html2json(a, range=(bill_s, bill_e)) print '## Get pdfs' pdf.get_pdf(a, range=(bill_s, bill_e))
def get_new(a): print '## Get meta data' new_bill_ids = fetch_new_bill_ids(a) push_to_queue('insert_bills_db', new_bill_ids) print '## Get specific data' specific.get_html(a, bill_ids=new_bill_ids) specific.html2json(a, bill_ids=new_bill_ids) print '## Get pdfs' pdf.get_pdf(a, bill_ids=new_bill_ids)
def get_new(a): print '## Get meta data' new_bill_ids = fetch_new_bill_ids(a) for queue_name in QUEUE_NAMES.itervalues(): push_to_queue(queue_name, new_bill_ids) print '## Get specific data' specific.get_html(a, bill_ids=new_bill_ids) specific.html2json(a, bill_ids=new_bill_ids) print '## Get pdfs' pdf.get_pdf(a, bill_ids=new_bill_ids)
async def pdf(request): trace = str(uuid4()) try: data = await request.json() except json.decoder.JSONDecodeError: return bad_request("Must provide valid JSON") if "url" not in data: return bad_request("Must provide 'url'", data) timeout = int(data.pop("timeout", 120)) compress = data.pop("compress", False) LOG.info(f"{trace} Generating PDF for url {data['url']}") try: pdf = await asyncio.wait_for(get_pdf(CDP_HOST, **data, trace=trace), timeout) except TimeoutError as e: return gateway_timeout(str(e), data) except PayloadTooBig as e: return payload_too_large(str(e), data) except NavigationError as e: url = e.url or data["url"] return failed_dependency(str(e), url, e.code) except Exception as e: print(e) raise if compress: pdf = b64encode(zlib.compress(b64decode(pdf))).decode("utf8") LOG.info(f"{trace} PDF returned successfully") return web.json_response(dict(pdf=pdf, **data))
#! /usr/bin/python2.7 # -*- coding: utf-8 -*- import meta import specific import pdf assembly_s, assembly_e = 17, 19 bill_s, bill_e = None, None for a in range(assembly_s, assembly_e+1): print '\n# Assembly %d' % a print '## Get meta data' npages = meta.get_npages(a) meta.get_html(a, npages) meta.html2csv(a, npages) print '## Get specific data' specific.get_html(a, range=(bill_s, bill_e)) specific.html2json(a, range=(bill_s, bill_e)) print '## Get pdfs' pdf.get_pdf(a, range=(bill_s, bill_e))
#! /usr/bin/python2.7 # -*- coding: utf-8 -*- import meta import specific import pdf assembly_s, assembly_e = 17, 19 # start, end id of assembly bill_s, bill_e = None, None # start, end number of bill for a in range(assembly_s, assembly_e+1): print '\n# Assembly %d' % a print '## Get meta data' npages = meta.get_npages(a) meta.get_html(a, npages) meta.html2csv(a, npages) print '## Get specific data' specific.get_html(a, range=(bill_s, bill_e)) specific.html2json(a, range=(bill_s, bill_e)) print '## Get pdfs' pdf.get_pdf(a, range=(bill_s, bill_e))