def __init__(self, item): # Standard fields self.id = item.pk if item.date_filed is not None: self.dateFiled = datetime.combine(item.date_filed, time()) # Midnight, PST self.citeCount = item.citation_count self.court = item.docket.court.full_name self.court_id = item.docket.court.pk self.court_citation_string = item.docket.court.citation_string try: self.caseName = item.citation.case_name self.absolute_url = item.get_absolute_url() except AttributeError: raise InvalidDocumentError( "Unable to save to index due to missing Citation object.") except NoReverseMatch: raise InvalidDocumentError( "Unable to save to index due to missing absolute_url (court_id: %s, item.pk: %s). " "Might the court have in_use set to False?" % (self.docket.court_id, item.pk)) self.judge = item.judges self.suitNature = item.nature_of_suit self.docketNumber = item.citation.docket_number self.lexisCite = item.citation.lexis_cite self.neutralCite = item.citation.neutral_cite self.status = item.get_precedential_status_display() self.source = item.source self.download_url = item.download_url self.local_path = unicode(item.local_path) self.citation = make_citation_string(item) # Assign the docket number and/or the citation to the caseNumber field if item.citation and item.citation.docket_number: self.caseNumber = '%s, %s' % (self.citation, item.citation.docket_number) elif item.citation: self.caseNumber = self.citation elif item.citation.docket_number: self.caseNumber = self.citation.docket_number # Load the document text using a template for cleanup and concatenation text_template = loader.get_template('search/indexes/opinion_text.txt') c = Context({'object': item}) self.text = '%s %s' % (text_template.render(c).translate(null_map), self.caseNumber) # Faceting fields self.status_exact = item.get_precedential_status_display() self.court_exact = item.docket.court.pk
def __init__(self, item): # Standard fields self.id = item.pk if item.date_filed is not None: self.dateFiled = datetime.combine(item.date_filed, time()) # Midnight, PST self.citeCount = item.citation_count self.court = item.docket.court.full_name self.court_id = item.docket.court.pk self.court_citation_string = item.docket.court.citation_string try: self.caseName = item.citation.case_name self.absolute_url = item.get_absolute_url() except AttributeError: raise InvalidDocumentError("Unable to save to index due to missing Citation object.") except NoReverseMatch: raise InvalidDocumentError("Unable to save to index due to missing absolute_url (court_id: %s, item.pk: %s). " "Might the court have in_use set to False?" % (self.docket.court_id, item.pk)) self.judge = item.judges self.suitNature = item.nature_of_suit self.docketNumber = item.docket.docket_number self.lexisCite = item.citation.lexis_cite self.neutralCite = item.citation.neutral_cite self.status = item.get_precedential_status_display() self.source = item.source self.download_url = item.download_url self.local_path = unicode(item.local_path) self.citation = make_citation_string(item) # Assign the docket number and/or the citation to the caseNumber field if item.citation and item.docket.docket_number: self.caseNumber = '%s, %s' % (self.citation, item.docket.docket_number) elif item.citation: self.caseNumber = self.citation elif item.docket.docket_number: self.caseNumber = self.docket.docket_number # Load the document text using a template for cleanup and concatenation text_template = loader.get_template('search/indexes/opinion_text.txt') c = Context({'object': item}) self.text = '%s %s' % (text_template.render(c).translate(null_map), self.caseNumber) # Faceting fields self.status_exact = item.get_precedential_status_display() self.court_exact = item.docket.court.pk
def update_document(document, index=True, commit=True): DEBUG = 0 if DEBUG >= 1: print "%s at https://www.courtlistener.com/admin/search/citation/%s/" % \ (document.citation.case_name, document.citation.pk) citations = get_document_citations(document) # List for tracking number of citation vs. name matches matched_citations = [] for citation in citations: # Resource.org docs contain their own citation in the html text, which # we don't want to include if citation.base_citation() in make_citation_string(document): continue matches, is_citation_match = match_citations.match_citation(citation, document) # TODO: Figure out what to do if there's more than one if len(matches) == 1: matched_citations.append(is_citation_match) match_id = matches[0]['id'] try: matched_doc = Document.objects.get(pk=match_id) # Increase citation count for matched document if it hasn't # already been cited by this document. if not matched_doc.citation in document.cases_cited.all(): matched_doc.citation_count += 1 matched_doc.save(index=index) # Add citation match to the citing document's list of cases it cites. # cases_cited is a set so duplicates aren't an issue document.cases_cited.add(matched_doc.citation) # URL field will be used for generating inline citation html citation.match_url = matched_doc.get_absolute_url() citation.match_id = matched_doc.pk except Document.DoesNotExist: if DEBUG >= 2: print "No database matches found for document id %s" % match_id continue except Document.MultipleObjectsReturned: if DEBUG >= 2: print "Multiple database matches found for document id %s" % match_id continue else: #create_stub([citation]) if DEBUG >= 2: # TODO: Don't print 1 line per citation. Save them in a list # and print in a single line at the end. print "No match found for citation %s" % citation.base_citation() # Only create new HTML if we found citations if citations: document.html_with_citations = create_cited_html(document, citations) if DEBUG >= 3: print document.html_with_citations # Update Solr if requested. In some cases we do it at the end for performance reasons. document.save(index=index, commit=commit) if DEBUG >= 1: citation_matches = sum(matched_citations) name_matches = len(matched_citations) - citation_matches print " %d citations" % len(citations) print " %d exact matches" % citation_matches print " %d name matches" % name_matches
def update_document(document, index=True): """Get the citations for an item and save it and add it to the index if requested.""" DEBUG = 0 if DEBUG >= 1: print "%s at https://www.courtlistener.com/admin/search/citation/%s/" % \ (document.citation.case_name, document.citation.pk) citations = get_document_citations(document) # List for tracking number of citation vs. name matches matched_citations = [] for citation in citations: # Resource.org docs contain their own citation in the html text, which # we don't want to include if citation.base_citation() in make_citation_string(document): continue matches, is_citation_match = match_citations.match_citation(citation, document) # TODO: Figure out what to do if there's more than one if len(matches) == 1: matched_citations.append(is_citation_match) match_id = matches[0]['id'] try: matched_doc = Document.objects.get(pk=match_id) # Increase citation count for matched document if it hasn't # already been cited by this document. if not matched_doc.citation in document.cases_cited.all(): matched_doc.citation_count += 1 matched_doc.save(index=index) # Add citation match to the citing document's list of cases it cites. # cases_cited is a set so duplicates aren't an issue document.cases_cited.add(matched_doc.citation) # URL field will be used for generating inline citation html citation.match_url = matched_doc.get_absolute_url() citation.match_id = matched_doc.pk except Document.DoesNotExist: if DEBUG >= 2: print "No database matches found for document id %s" % match_id continue except Document.MultipleObjectsReturned: if DEBUG >= 2: print "Multiple database matches found for document id %s" % match_id continue else: #create_stub([citation]) if DEBUG >= 2: # TODO: Don't print 1 line per citation. Save them in a list # and print in a single line at the end. print "No match found for citation %s" % citation.base_citation() # Only create new HTML if we found citations if citations: document.html_with_citations = create_cited_html(document, citations) if DEBUG >= 3: print document.html_with_citations # Update Solr if requested. In some cases we do it at the end for # performance reasons. document.save(index=index) if DEBUG >= 1: citation_matches = sum(matched_citations) name_matches = len(matched_citations) - citation_matches print " %d citations" % len(citations) print " %d exact matches" % citation_matches print " %d name matches" % name_matches