def __init__(self, item):
        # Standard fields
        self.id = item.pk
        if item.date_filed is not None:
            self.dateFiled = datetime.combine(item.date_filed,
                                              time())  # Midnight, PST
        self.citeCount = item.citation_count
        self.court = item.docket.court.full_name
        self.court_id = item.docket.court.pk
        self.court_citation_string = item.docket.court.citation_string
        try:
            self.caseName = item.citation.case_name
            self.absolute_url = item.get_absolute_url()
        except AttributeError:
            raise InvalidDocumentError(
                "Unable to save to index due to missing Citation object.")
        except NoReverseMatch:
            raise InvalidDocumentError(
                "Unable to save to index due to missing absolute_url (court_id: %s, item.pk: %s). "
                "Might the court have in_use set to False?" %
                (self.docket.court_id, item.pk))
        self.judge = item.judges
        self.suitNature = item.nature_of_suit
        self.docketNumber = item.citation.docket_number
        self.lexisCite = item.citation.lexis_cite
        self.neutralCite = item.citation.neutral_cite
        self.status = item.get_precedential_status_display()
        self.source = item.source
        self.download_url = item.download_url
        self.local_path = unicode(item.local_path)
        self.citation = make_citation_string(item)
        # Assign the docket number and/or the citation to the caseNumber field
        if item.citation and item.citation.docket_number:
            self.caseNumber = '%s, %s' % (self.citation,
                                          item.citation.docket_number)
        elif item.citation:
            self.caseNumber = self.citation
        elif item.citation.docket_number:
            self.caseNumber = self.citation.docket_number

        # Load the document text using a template for cleanup and concatenation
        text_template = loader.get_template('search/indexes/opinion_text.txt')
        c = Context({'object': item})
        self.text = '%s %s' % (text_template.render(c).translate(null_map),
                               self.caseNumber)

        # Faceting fields
        self.status_exact = item.get_precedential_status_display()
        self.court_exact = item.docket.court.pk
    def __init__(self, item):
        # Standard fields
        self.id = item.pk
        if item.date_filed is not None:
            self.dateFiled = datetime.combine(item.date_filed, time())  # Midnight, PST
        self.citeCount = item.citation_count
        self.court = item.docket.court.full_name
        self.court_id = item.docket.court.pk
        self.court_citation_string = item.docket.court.citation_string
        try:
            self.caseName = item.citation.case_name
            self.absolute_url = item.get_absolute_url()
        except AttributeError:
            raise InvalidDocumentError("Unable to save to index due to missing Citation object.")
        except NoReverseMatch:
            raise InvalidDocumentError("Unable to save to index due to missing absolute_url (court_id: %s, item.pk: %s). "
                                       "Might the court have in_use set to False?"
                                       % (self.docket.court_id, item.pk))
        self.judge = item.judges
        self.suitNature = item.nature_of_suit
        self.docketNumber = item.docket.docket_number
        self.lexisCite = item.citation.lexis_cite
        self.neutralCite = item.citation.neutral_cite
        self.status = item.get_precedential_status_display()
        self.source = item.source
        self.download_url = item.download_url
        self.local_path = unicode(item.local_path)
        self.citation = make_citation_string(item)
        # Assign the docket number and/or the citation to the caseNumber field
        if item.citation and item.docket.docket_number:
            self.caseNumber = '%s, %s' % (self.citation, item.docket.docket_number)
        elif item.citation:
            self.caseNumber = self.citation
        elif item.docket.docket_number:
            self.caseNumber = self.docket.docket_number

        # Load the document text using a template for cleanup and concatenation
        text_template = loader.get_template('search/indexes/opinion_text.txt')
        c = Context({'object': item})
        self.text = '%s %s' % (text_template.render(c).translate(null_map), self.caseNumber)

        # Faceting fields
        self.status_exact = item.get_precedential_status_display()
        self.court_exact = item.docket.court.pk
Example #3
0
def update_document(document, index=True, commit=True):
    DEBUG = 0
    if DEBUG >= 1:
        print "%s at https://www.courtlistener.com/admin/search/citation/%s/" % \
            (document.citation.case_name, document.citation.pk)

    citations = get_document_citations(document)
    # List for tracking number of citation vs. name matches
    matched_citations = []
    for citation in citations:
        # Resource.org docs contain their own citation in the html text, which
        # we don't want to include
        if citation.base_citation() in make_citation_string(document):
            continue
        matches, is_citation_match = match_citations.match_citation(citation, document)

        # TODO: Figure out what to do if there's more than one
        if len(matches) == 1:
            matched_citations.append(is_citation_match)
            match_id = matches[0]['id']
            try:
                matched_doc = Document.objects.get(pk=match_id)
                # Increase citation count for matched document if it hasn't
                # already been cited by this document.
                if not matched_doc.citation in document.cases_cited.all():
                    matched_doc.citation_count += 1
                    matched_doc.save(index=index)

                # Add citation match to the citing document's list of cases it cites.
                # cases_cited is a set so duplicates aren't an issue
                document.cases_cited.add(matched_doc.citation)
                # URL field will be used for generating inline citation html
                citation.match_url = matched_doc.get_absolute_url()
                citation.match_id = matched_doc.pk
            except Document.DoesNotExist:
                if DEBUG >= 2:
                    print "No database matches found for document id %s" % match_id
                continue
            except Document.MultipleObjectsReturned:
                if DEBUG >= 2:
                    print "Multiple database matches found for document id %s" % match_id
                continue
        else:
            #create_stub([citation])
            if DEBUG >= 2:
                # TODO: Don't print 1 line per citation.  Save them in a list
                # and print in a single line at the end.
                print "No match found for citation %s" % citation.base_citation()
    # Only create new HTML if we found citations
    if citations:
        document.html_with_citations = create_cited_html(document, citations)
        if DEBUG >= 3:
            print document.html_with_citations

    # Update Solr if requested. In some cases we do it at the end for performance reasons.
    document.save(index=index, commit=commit)
    if DEBUG >= 1:
        citation_matches = sum(matched_citations)
        name_matches = len(matched_citations) - citation_matches
        print "  %d citations" % len(citations)
        print "  %d exact matches" % citation_matches
        print "  %d name matches" % name_matches
Example #4
0
def update_document(document, index=True):
    """Get the citations for an item and save it and add it to the index if
    requested."""
    DEBUG = 0
    if DEBUG >= 1:
        print "%s at https://www.courtlistener.com/admin/search/citation/%s/" % \
            (document.citation.case_name, document.citation.pk)

    citations = get_document_citations(document)
    # List for tracking number of citation vs. name matches
    matched_citations = []
    for citation in citations:
        # Resource.org docs contain their own citation in the html text, which
        # we don't want to include
        if citation.base_citation() in make_citation_string(document):
            continue
        matches, is_citation_match = match_citations.match_citation(citation, document)

        # TODO: Figure out what to do if there's more than one
        if len(matches) == 1:
            matched_citations.append(is_citation_match)
            match_id = matches[0]['id']
            try:
                matched_doc = Document.objects.get(pk=match_id)
                # Increase citation count for matched document if it hasn't
                # already been cited by this document.
                if not matched_doc.citation in document.cases_cited.all():
                    matched_doc.citation_count += 1
                    matched_doc.save(index=index)

                # Add citation match to the citing document's list of cases it cites.
                # cases_cited is a set so duplicates aren't an issue
                document.cases_cited.add(matched_doc.citation)
                # URL field will be used for generating inline citation html
                citation.match_url = matched_doc.get_absolute_url()
                citation.match_id = matched_doc.pk
            except Document.DoesNotExist:
                if DEBUG >= 2:
                    print "No database matches found for document id %s" % match_id
                continue
            except Document.MultipleObjectsReturned:
                if DEBUG >= 2:
                    print "Multiple database matches found for document id %s" % match_id
                continue
        else:
            #create_stub([citation])
            if DEBUG >= 2:
                # TODO: Don't print 1 line per citation.  Save them in a list
                # and print in a single line at the end.
                print "No match found for citation %s" % citation.base_citation()
    # Only create new HTML if we found citations
    if citations:
        document.html_with_citations = create_cited_html(document, citations)
        if DEBUG >= 3:
            print document.html_with_citations

    # Update Solr if requested. In some cases we do it at the end for
    # performance reasons.
    document.save(index=index)
    if DEBUG >= 1:
        citation_matches = sum(matched_citations)
        name_matches = len(matched_citations) - citation_matches
        print "  %d citations" % len(citations)
        print "  %d exact matches" % citation_matches
        print "  %d name matches" % name_matches