def merge_docs(first_pk, second_pk): first = Document.objects.get(pk=first_pk) second = Document.objects.get(pk=second_pk) first.source = 'LR' first.judges = second.judges first.html_lawbox = second.html_lawbox first.save() first.citation.case_name = second.citation.case_name first.citation.slug = second.citation.slug first.citation.save() second.docket.delete() second.citation.delete() second.delete() from alert.citations.tasks import update_document_by_id update_document_by_id(first_pk)
def import_resource_org_item(case_location): """Using the path to a case, import it, gathering all needed meta data. Path is any valid URI that the requests library can handle. """ def get_file(location): if location.startswith('/'): with open(location) as f: r = requests.Session() r.content = f.read() else: r = requests.get(location) return fromstring(r.content), get_clean_body_content(r.content) # Get trees and text for the opinion itself and for the index page # that links to it. Each has useful data. case_tree, case_text = get_file(case_location) vol_location = case_location.rsplit('/', 1)[-2] + '/index.html' vol_tree, vol_text = get_file(vol_location) html, blocked = anonymize(get_case_body(case_tree)) case_location_relative = case_location.rsplit('/', 1)[1] case_name, status = get_case_name_and_status( vol_tree, case_location_relative) cite = Citation( case_name=case_name, docket_number=get_docket_number(case_location), federal_cite_one=get_west_cite(vol_tree, case_location_relative), ) docket = Docket( court=Court.objects.get(pk=get_court_id(case_tree)), case_name=case_name, ) doc = Document( date_filed=get_date_filed(vol_tree, case_location_relative), source='R', sha1=hashlib.sha1(case_text).hexdigest(), citation=cite, docket=docket, download_url=case_location, html=html, precedential_status=status, ) if blocked: doc.blocked = True docket.blocked = True doc.date_blocked = datetime.date.today() docket.date_blocked = datetime.date.today() cite.save() docket.save() doc.docket = docket doc.citation = cite doc.save() # Update the citation graph from alert.citations.tasks import update_document_by_id update_document_by_id(doc.pk) return doc
def import_resource_org_item(case_location): """Using the path to a case, import it, gathering all needed meta data. Path is any valid URI that the requests library can handle. """ def get_file(location): if location.startswith('/'): with open(location) as f: r = requests.Session() r.content = f.read() else: r = requests.get(location) return fromstring(r.content), get_clean_body_content(r.content) # Get trees and text for the opinion itself and for the index page # that links to it. Each has useful data. case_tree, case_text = get_file(case_location) vol_location = case_location.rsplit('/', 1)[-2] + '/index.html' vol_tree, vol_text = get_file(vol_location) html, blocked = anonymize(get_case_body(case_tree)) case_location_relative = case_location.rsplit('/', 1)[1] case_name, status = get_case_name_and_status(vol_tree, case_location_relative) cite = Citation( case_name=case_name, docket_number=get_docket_number(case_location), federal_cite_one=get_west_cite(vol_tree, case_location_relative), ) docket = Docket( court=Court.objects.get(pk=get_court_id(case_tree)), case_name=case_name, ) doc = Document( date_filed=get_date_filed(vol_tree, case_location_relative), source='R', sha1=hashlib.sha1(case_text).hexdigest(), citation=cite, docket=docket, download_url=case_location, html=html, precedential_status=status, ) if blocked: doc.blocked = True docket.blocked = True doc.date_blocked = datetime.date.today() docket.date_blocked = datetime.date.today() cite.save() docket.save() doc.docket = docket doc.citation = cite doc.save() # Update the citation graph from alert.citations.tasks import update_document_by_id update_document_by_id(doc.pk) return doc