def bill_text(request, congress, type_slug, number, version=None): if version == "": version = None try: bill_type = BillType.by_slug(type_slug) except BillType.NotFound: raise Http404("Invalid bill type: " + type_slug) bill = get_object_or_404(Bill, congress=congress, bill_type=bill_type, number=number) from billtext import load_bill_text, get_bill_text_versions try: textdata = load_bill_text(bill, version) except IOError: textdata = None # Get a list of the alternate versions of this bill. alternates = None is_latest = True if textdata: alternates = [] for v in get_bill_text_versions(bill): try: alternates.append(load_bill_text(bill, v, mods_only=True)) except IOError: pass alternates.sort(key = lambda mods : mods["docdate"]) if len(alternates) > 0: is_latest = False if textdata["doc_version"] == alternates[-1]["doc_version"]: is_latest = True # Get a list of related bills. from billtext import get_current_version related_bills = [] for rb in list(bill.find_reintroductions()) + [r.related_bill for r in bill.get_related_bills()]: try: rbv = get_current_version(rb) if not (rb, rbv) in related_bills: related_bills.append((rb, rbv)) except IOError: pass # text not available for btc in BillTextComparison.objects.filter(bill1=bill).exclude(bill2=bill): if not (btc.bill2, btc.ver2) in related_bills: related_bills.append((btc.bill2, btc.ver2)) for btc in BillTextComparison.objects.filter(bill2=bill).exclude(bill1=bill): if not (btc.bill1, btc.ver1) in related_bills: related_bills.append((btc.bill1, btc.ver1)) return { 'bill': bill, "congressdates": get_congress_dates(bill.congress), "textdata": textdata, "version": version, "is_latest": is_latest, "alternates": alternates, "related_bills": related_bills, "days_old": (datetime.datetime.now().date() - bill.current_status_date).days, "is_on_bill_text_page": True, # for the header tabs }
def bill_text(request, congress, type_slug, number, version=None): if version == "": version = None try: bill_type = BillType.by_slug(type_slug) except BillType.NotFound: raise Http404("Invalid bill type: " + type_slug) bill = get_object_or_404(Bill, congress=congress, bill_type=bill_type, number=number) from billtext import load_bill_text, bill_gpo_status_codes try: textdata = load_bill_text(bill, version) except IOError: textdata = None # Get a list of the alternate versions of this bill. alternates = None if textdata: alternates = [] for v in bill_gpo_status_codes: fn = "data/us/bills.text/%s/%s/%s%d%s.mods.xml" % (bill.congress, BillType.by_value(bill.bill_type).xml_code, BillType.by_value(bill.bill_type).xml_code, bill.number, v) if os.path.exists(fn): alternates.append(load_bill_text(bill, v, mods_only=True)) alternates.sort(key = lambda mods : mods["docdate"]) # Get a list of related bills. from billtext import get_current_version related_bills = [] for rb in list(bill.find_reintroductions()) + [r.related_bill for r in bill.get_related_bills()]: try: rbv = get_current_version(rb) if not (rb, rbv) in related_bills: related_bills.append((rb, rbv)) except IOError: pass # text not available for btc in BillTextComparison.objects.filter(bill1=bill).exclude(bill2=bill): if not (btc.bill2, btc.ver2) in related_bills: related_bills.append((btc.bill2, btc.ver2)) for btc in BillTextComparison.objects.filter(bill2=bill).exclude(bill1=bill): if not (btc.bill1, btc.ver1) in related_bills: related_bills.append((btc.bill1, btc.ver1)) return { 'bill': bill, "congressdates": get_congress_dates(bill.congress), "textdata": textdata, "version": version, "alternates": alternates, "related_bills": related_bills, }
def load_comparison(left_bill, left_version, right_bill, right_version, timelimit=10, force=False): from billtext import load_bill_text, compare_xml_text, get_current_version import lxml left_bill = Bill.objects.get(id = left_bill) right_bill = Bill.objects.get(id = right_bill) if left_version == "": left_version = get_current_version(left_bill) if right_version == "": right_version = get_current_version(right_bill) btc = None try: btc = BillTextComparison.objects.get( bill1 = left_bill, ver1 = left_version, bill2 = right_bill, ver2 = right_version) btc.decompress() if not force: return btc.data except BillTextComparison.DoesNotExist: pass # Try with the bills swapped. try: btc2 = BillTextComparison.objects.get( bill2 = left_bill, ver2 = left_version, bill1 = right_bill, ver1 = right_version) btc2.decompress() data = btc2.data return { "left_meta": data["right_meta"], "right_meta": data["left_meta"], "left_text": data["right_text"], "right_text": data["left_text"], } except BillTextComparison.DoesNotExist: pass left = load_bill_text(left_bill, left_version, mods_only=True) right = load_bill_text(right_bill, right_version, mods_only=True) doc1 = lxml.etree.parse(left["basename"] + ".html") doc2 = lxml.etree.parse(right["basename"] + ".html") compare_xml_text(doc1, doc2, timelimit=timelimit) # revises DOMs in-place # dates aren't JSON serializable left["docdate"] = left["docdate"].strftime("%x") right["docdate"] = right["docdate"].strftime("%x") ret = { "left_meta": left, "right_meta": right, "left_text": lxml.etree.tostring(doc1), "right_text": lxml.etree.tostring(doc2), } if not btc: btc = BillTextComparison( bill1 = left_bill, ver1 = left_version, bill2 = right_bill, ver2 = right_version, data = dict(ret)) # clone before compress() else: btc.data = dict(ret) # clone before compress() btc.compress() btc.save() return ret
def load_comparison(left_bill, left_version, right_bill, right_version, timelimit=10, force=False): from billtext import load_bill_text, compare_xml_text, get_current_version import lxml left_bill = Bill.objects.get(id=left_bill) right_bill = Bill.objects.get(id=right_bill) if left_version == "": left_version = get_current_version(left_bill) if right_version == "": right_version = get_current_version(right_bill) btc = None try: btc = BillTextComparison.objects.get(bill1=left_bill, ver1=left_version, bill2=right_bill, ver2=right_version) btc.decompress() if not force: return btc.data except BillTextComparison.DoesNotExist: pass # Try with the bills swapped. try: btc2 = BillTextComparison.objects.get(bill2=left_bill, ver2=left_version, bill1=right_bill, ver1=right_version) btc2.decompress() data = btc2.data return { "left_meta": data["right_meta"], "right_meta": data["left_meta"], "left_text": data["right_text"], "right_text": data["left_text"], } except BillTextComparison.DoesNotExist: pass left = load_bill_text(left_bill, left_version, mods_only=True) right = load_bill_text(right_bill, right_version, mods_only=True) try: doc1 = lxml.etree.parse(left["html_file"]) doc2 = lxml.etree.parse(right["html_file"]) except KeyError: raise IOError( "The HTML bill text format is not available for one of the bills.") compare_xml_text(doc1, doc2, timelimit=timelimit) # revises DOMs in-place # dates aren't JSON serializable left["docdate"] = left["docdate"].strftime("%x") right["docdate"] = right["docdate"].strftime("%x") ret = { "left_meta": left, "right_meta": right, "left_text": lxml.etree.tostring(doc1), "right_text": lxml.etree.tostring(doc2), } if not btc: btc = BillTextComparison(bill1=left_bill, ver1=left_version, bill2=right_bill, ver2=right_version, data=dict(ret)) # clone before compress() else: btc.data = dict(ret) # clone before compress() btc.compress() btc.save() return ret
def load_comparison(left_bill, left_version, right_bill, right_version, timelimit=10): from billtext import load_bill_text, get_current_version from xml_diff import compare import lxml left_bill = Bill.objects.get(id=left_bill) right_bill = Bill.objects.get(id=right_bill) if left_version == "": left_version = get_current_version(left_bill) if right_version == "": right_version = get_current_version(right_bill) use_cache = True if use_cache: # Load from cache. try: btc = BillTextComparison.objects.get(bill1=left_bill, ver1=left_version, bill2=right_bill, ver2=right_version) btc.decompress() return btc.data except BillTextComparison.DoesNotExist: pass # Load from cache - Try with the bills swapped. try: btc2 = BillTextComparison.objects.get(bill2=left_bill, ver2=left_version, bill1=right_bill, ver1=right_version) btc2.decompress() data = btc2.data # un-swap return { "left_meta": data["right_meta"], "right_meta": data["left_meta"], "left_text": data["right_text"], "right_text": data["left_text"], } except BillTextComparison.DoesNotExist: pass # Load bill text metadata. left = load_bill_text(left_bill, left_version, mods_only=True) right = load_bill_text(right_bill, right_version, mods_only=True) # Load XML DOMs for each document and perform the comparison. def load_bill_text_xml(docinfo): # If XML text is available, use it, but pre-render it # into HTML. Otherwise use the legacy HTML that we # scraped from THOMAS. if "xml_file" in docinfo: import congressxml return congressxml.convert_xml(docinfo["xml_file"]) elif "html_file" in docinfo: return lxml.etree.parse(docinfo["html_file"]) else: raise IOError("Bill text is not available for one of the bills.") doc1 = load_bill_text_xml(left) doc2 = load_bill_text_xml(right) def make_tag_func(ins_del): import lxml.etree elem = lxml.etree.Element("comparison-change") return elem compare(doc1.getroot(), doc2.getroot(), make_tag_func=make_tag_func) # Prepare JSON response data. # dates aren't JSON serializable left["docdate"] = left["docdate"].strftime("%x") right["docdate"] = right["docdate"].strftime("%x") ret = { "left_meta": left, "right_meta": right, "left_text": lxml.etree.tostring(doc1), "right_text": lxml.etree.tostring(doc2), } if use_cache: # Cache in database so we don't have to re-do the comparison # computation again. btc = BillTextComparison(bill1=left_bill, ver1=left_version, bill2=right_bill, ver2=right_version, data=dict(ret)) # clone before compress() btc.compress() btc.save() # Return JSON comparison data. return ret
def load_comparison(left_bill, left_version, right_bill, right_version, timelimit=10, use_cache=True, force_update=False): from billtext import load_bill_text, get_current_version from xml_diff import compare import lxml left_bill = Bill.objects.get(id=left_bill) right_bill = Bill.objects.get(id=right_bill) if left_version == "": left_version = get_current_version(left_bill) if right_version == "": right_version = get_current_version(right_bill) if use_cache: # Load from cache. try: btc = BillTextComparison.objects.get(bill1=left_bill, ver1=left_version, bill2=right_bill, ver2=right_version) btc.decompress() return btc.data except BillTextComparison.DoesNotExist: pass # Load from cache - Try with the bills swapped. try: btc2 = BillTextComparison.objects.get(bill2=left_bill, ver2=left_version, bill1=right_bill, ver1=right_version) btc2.decompress() data = btc2.data # un-swap return { "left_meta": data["right_meta"], "right_meta": data["left_meta"], "left_text": data["right_text"], "right_text": data["left_text"], } except BillTextComparison.DoesNotExist: pass # Load bill text metadata. left = load_bill_text(left_bill, left_version, mods_only=True) right = load_bill_text(right_bill, right_version, mods_only=True) # Load XML DOMs for each document and perform the comparison. def load_bill_text_xml(docinfo): # If XML text is available, use it, but pre-render it # into HTML. Otherwise use the legacy HTML that we # scraped from THOMAS. if "xml_file" in docinfo: import congressxml return congressxml.convert_xml(docinfo["xml_file"]) elif "html_file" in docinfo: return lxml.etree.parse(docinfo["html_file"]) else: raise IOError("Bill text is not available for one of the bills.") doc1 = load_bill_text_xml(left) doc2 = load_bill_text_xml(right) def make_tag_func(ins_del): import lxml.etree elem = lxml.etree.Element("comparison-change") return elem def differ(text1, text2): # ensure we use the C++ Google DMP and can specify the time limit import diff_match_patch for x in diff_match_patch.diff_unicode(text1, text2, timelimit=timelimit): yield x compare(doc1.getroot(), doc2.getroot(), make_tag_func=make_tag_func, differ=differ) # Prepare JSON response data. # dates aren't JSON serializable left["docdate"] = left["docdate"].strftime("%x") right["docdate"] = right["docdate"].strftime("%x") ret = { "left_meta": left, "right_meta": right, "left_text": lxml.etree.tostring(doc1), "right_text": lxml.etree.tostring(doc2), } if use_cache or force_update: # For force_update, or race conditions, delete any existing record. fltr = { "bill1": left_bill, "ver1": left_version, "bill2": right_bill, "ver2": right_version } BillTextComparison.objects.filter(**fltr).delete() # Cache in database so we don't have to re-do the comparison # computation again. btc = BillTextComparison( data=dict(ret), # clone before compress() **fltr) btc.compress() btc.save() # Return JSON comparison data. return ret
def load_comparison(left_bill, left_version, right_bill, right_version, timelimit=10, use_cache=True, force_update=False): from billtext import load_bill_text, get_current_version from xml_diff import compare import lxml left_bill = Bill.objects.get(id = left_bill) right_bill = Bill.objects.get(id = right_bill) if left_version == "": left_version = get_current_version(left_bill) if right_version == "": right_version = get_current_version(right_bill) if use_cache: # Load from cache. try: btc = BillTextComparison.objects.get( bill1 = left_bill, ver1 = left_version, bill2 = right_bill, ver2 = right_version) btc.decompress() return btc.data except BillTextComparison.DoesNotExist: pass # Load from cache - Try with the bills swapped. try: btc2 = BillTextComparison.objects.get( bill2 = left_bill, ver2 = left_version, bill1 = right_bill, ver1 = right_version) btc2.decompress() data = btc2.data # un-swap return { "left_meta": data["right_meta"], "right_meta": data["left_meta"], "left_text": data["right_text"], "right_text": data["left_text"], } except BillTextComparison.DoesNotExist: pass # Load bill text metadata. left = load_bill_text(left_bill, left_version, mods_only=True) right = load_bill_text(right_bill, right_version, mods_only=True) # Load XML DOMs for each document and perform the comparison. def load_bill_text_xml(docinfo): # If XML text is available, use it, but pre-render it # into HTML. Otherwise use the legacy HTML that we # scraped from THOMAS. if "xml_file" in docinfo: import congressxml return congressxml.convert_xml(docinfo["xml_file"]) elif "html_file" in docinfo: return lxml.etree.parse(docinfo["html_file"]) else: raise IOError("Bill text is not available for one of the bills.") doc1 = load_bill_text_xml(left) doc2 = load_bill_text_xml(right) def make_tag_func(ins_del): import lxml.etree elem = lxml.etree.Element("comparison-change") return elem def differ(text1, text2): # ensure we use the C++ Google DMP and can specify the time limit import diff_match_patch for x in diff_match_patch.diff_unicode(text1, text2, timelimit=timelimit): yield x compare(doc1.getroot(), doc2.getroot(), make_tag_func=make_tag_func, differ=differ) # Prepare JSON response data. # dates aren't JSON serializable left["docdate"] = left["docdate"].strftime("%x") right["docdate"] = right["docdate"].strftime("%x") ret = { "left_meta": left, "right_meta": right, "left_text": lxml.etree.tostring(doc1), "right_text": lxml.etree.tostring(doc2), } if use_cache or force_update: # For force_update, or race conditions, delete any existing record. fltr = { "bill1": left_bill, "ver1": left_version, "bill2": right_bill, "ver2": right_version } BillTextComparison.objects.filter(**fltr).delete() # Cache in database so we don't have to re-do the comparison # computation again. btc = BillTextComparison( data = dict(ret), # clone before compress() **fltr) btc.compress() btc.save() # Return JSON comparison data. return ret