def get_page(assembly_id, bill_id, link_id, field): url = '%s%s' % (BASEURL[field], link_id) outp = '%s/%s/%s.html' % (DIR[field], assembly_id, bill_id) i = 0 while i==0 or ('TEXTAREA ID="MSG" STYLE="display:none"' in doc and i<10): try: doc = utils.get_webpage_text(url) except requests.exceptions.RequestException: continue i += 1 with open(outp, 'w') as f: f.write(doc)
def get_page(assembly_id, bill_id, link_id, field): url = '%s%s' % (BASEURL[field], link_id) outp = '%s/%s/%s.html' % (DIR[field], assembly_id, bill_id) i = 0 while i == 0 or ('TEXTAREA ID="MSG" STYLE="display:none"' in doc and i < 10): try: doc = utils.get_webpage_text(url) except requests.exceptions.RequestException: continue i += 1 with open(outp, 'w') as f: f.write(doc)
def get_page(baseurl, page, directory, npages): try: url = baseurl + '&PAGE=%d&PAGE_SIZE=%d' % (page, PAGE_SIZE) pn = npages - page + 1 fn = '%s/%d.html' % (directory, pn) is_first = True while is_first or 'TEXTAREA ID="MSG" STYLE="display:none"' in doc: doc = utils.get_webpage_text(url) is_first = False with open(fn, 'w') as f: f.write(doc) sys.stdout.write('%s\t' % pn) sys.stdout.flush() except (requests.exceptions.RequestException, IOError) as e: print '\nFailed to get %s due to %s' % (fn, e.__repr__)