Python Citation.Citation Exemples, alert.search.models.Citation.Citation Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : cl_scrape_opinions.py Projet : aktary/courtlistener

    def associate_meta_data_to_objects(self, site, i, court, sha1_hash):
        """Takes the meta data from the scraper and assocites it with objects. Returns the created objects.
        """
        cite = Citation(case_name=site.case_names[i])
        if site.docket_numbers:
            cite.docket_number = site.docket_numbers[i]
        if site.neutral_citations:
            cite.neutral_cite = site.neutral_citations[i]
        if site.west_citations:
            cite.federal_cite_one = site.west_citations[i]
        if site.west_state_citations:
            cite.west_state_cite = site.west_state_citations[i]

        docket = Docket(
            case_name=site.case_names[i],
            court=court,
        )

        doc = Document(source='C',
                       sha1=sha1_hash,
                       date_filed=site.case_dates[i],
                       download_url=site.download_urls[i],
                       precedential_status=site.precedential_statuses[i])
        if site.judges:
            doc.judges = site.judges[i]
        if site.nature_of_suit:
            doc.nature_of_suit = site.nature_of_suit[i]

        return cite, docket, doc

Exemple #2

0

Afficher le fichier

    def test_save_old_opinion(self):
        """Can we save opinions older than 1900?"""
        court = Court.objects.get(pk='test')

        cite = Citation(case_name=u"Blah")
        cite.save(index=True)
        docket = Docket(
            case_name=u"Blah",
            court=court,
        )
        docket.save()
        d = Document(
            citation=cite,
            docket=docket,
            date_filed=datetime.date(1899, 1, 1),

        )

        try:
            cf = ContentFile(StringIO.StringIO('blah').read())
            d.local_path.save('file_name.pdf', cf, save=False)
            d.save(index=True)
        except ValueError:
            raise ValueError("Unable to save a case older than 1900. Did you "
                             "try to use `strftime`...again?")

Exemple #3

0

Afficher le fichier

Fichier : tests.py Projet : wmbutler/courtlistener

    def setUp(self):
        # Set up some handy variables
        self.court = Court.objects.get(pk='test')
        self.client = Client()

        # Add a document to the index
        site = test_opinion_scraper.Site().parse()
        cite = Citation(
            docket_number=site.docket_numbers[0],
            neutral_cite=site.neutral_citations[0],
            federal_cite_one=site.west_citations[0]
        )
        cite.save(index=False)
        docket = Docket(
            court=self.court,
            case_name=site.case_names[0],
        )
        docket.save()
        self.doc = Document(
            date_filed=site.case_dates[0],
            citation=cite,
            docket=docket,
            precedential_status=site.precedential_statuses[0],
        )
        self.doc.save(index=False)

Exemple #4

0

Afficher le fichier

Fichier : tests.py Projet : jraller/courtlistener

 def test_solr_ingestion_and_deletion(self):
     """Do items get added to the Solr index when they are ingested?"""
     site = test_opinion_scraper.Site().parse()
     path = os.path.join(settings.INSTALL_ROOT, 'alert',
                         site.download_urls[0])  # a simple PDF
     with open(path) as f:
         content = f.read()
         cf = ContentFile(content)
         extension = get_extension(content)
     cite = Citation()
     cite.save(index=False)
     docket = Docket(
         court=self.court,
         case_name=site.case_names[0],
     )
     docket.save()
     doc = Document(
         date_filed=site.case_dates[0],
         docket=docket,
         citation=cite,
     )
     file_name = trunc(site.case_names[0].lower(), 75) + extension
     doc.local_path.save(file_name, cf, save=False)
     doc.save(index=False)
     extract_doc_content(doc.pk, callback=subtask(extract_by_ocr))
     response = self.si.raw_query(**{
         'q': 'supreme',
         'caller': 'scraper_test',
     }).execute()
     count = response.result.numFound
     self.assertEqual(
         count, 1,
         "There were %s items found when there should have been 1" % count)

Exemple #5

0

Afficher le fichier

Fichier : test_helpers.py Projet : wmbutler/courtlistener

    def setUp(self):
        self.court = Court.objects.get(pk='test')

        # create 3 documents with their citations and dockets
        c1, c2, c3 = Citation(case_name=u"c1"), Citation(
            case_name=u"c2"), Citation(case_name=u"c3")
        c1.save(index=False)
        c2.save(index=False)
        c3.save(index=False)
        docket1 = Docket(
            case_name=u"c1",
            court=self.court,
        )
        docket2 = Docket(
            case_name=u"c2",
            court=self.court,
        )
        docket3 = Docket(
            case_name=u"c3",
            court=self.court,
        )
        docket1.save()
        docket2.save()
        docket3.save()
        d1, d2, d3 = Document(date_filed=date.today()), Document(
            date_filed=date.today()), Document(date_filed=date.today())
        d1.citation, d2.citation, d3.citation = c1, c2, c3
        d1.docket, d2.docket, d3.docket = docket1, docket2, docket3
        doc_list = [d1, d2, d3]
        for d in doc_list:
            d.citation.save(index=False)
            d.save(index=False)

        # create simple citing relation: 1 cites 2 and 3; 2 cites 3; 3 cites 1;
        d1.cases_cited.add(d2.citation)
        d2.citation_count += 1
        d2.cases_cited.add(d3.citation)
        d3.citation_count += 1
        d3.cases_cited.add(d1.citation)
        d1.citation_count += 1
        d1.cases_cited.add(d3.citation)
        d3.citation_count += 1
        d1.save(index=False)
        d2.save(index=False)
        d3.save(index=False)

Exemple #6

0

Afficher le fichier

Fichier : import_law_box.py Projet : wmbutler/courtlistener

def import_law_box_case(case_path):
    """Open the file, get its contents, convert to XML and extract the meta data.

    Return a document object for saving in the database
    """
    raw_text = open(case_path).read()
    clean_html_tree, complete_html_tree, clean_html_str, body_text = get_html_from_raw_text(
        raw_text)

    sha1 = hashlib.sha1(clean_html_str).hexdigest()
    citations = get_citations_from_tree(complete_html_tree, case_path)
    judges = get_judge(clean_html_tree, case_path)
    court = get_court_object(clean_html_tree, citations, case_path, judges)

    doc = Document(
        source='L',
        sha1=sha1,
        html=
        clean_html_str,  # we clear this field later, putting the value into html_lawbox.
        date_filed=get_date_filed(clean_html_tree,
                                  citations=citations,
                                  case_path=case_path,
                                  court=court),
        precedential_status=get_precedential_status(),
        judges=judges,
        download_url=case_path,
    )

    cite = Citation(docket_number=get_docket_number(
        clean_html_tree, case_path=case_path, court=court))

    docket = Docket(
        case_name=get_case_name(complete_html_tree, case_path),
        court=court,
    )

    # Necessary for dup_finder.
    path = '//p/text()'
    doc.body_text = ' '.join(clean_html_tree.xpath(path))

    # Add the dict of citations to the object as its attributes.
    citations_as_dict = map_citations_to_models(citations)
    for k, v in citations_as_dict.iteritems():
        setattr(cite, k, v)

    doc.citation = cite
    doc.docket = docket

    return doc

Exemple #7

0

Afficher le fichier

    def setUp(self):
        c1 = Citation(case_name=u"foo")
        c1.save(index=False)
        docket = Docket(
            case_name=u'foo',
            court=Court.objects.get(pk='test'),
        )
        docket.save()
        # Must be more than a year old for all tests to be runnable.
        last_month = now().date() - timedelta(days=400)
        self.doc = Document(citation=c1, docket=docket, date_filed=last_month)
        self.doc.save(index=False)

        # Scrape the audio "site" and add its contents
        site = test_oral_arg_scraper.Site().parse()
        OralArgumentCommand().scrape_court(site, full_crawl=True)

Exemple #8

0

Afficher le fichier

    def test_updating_the_docket_when_the_citation_case_name_changes(self):
        """Makes sure that the docket changes when the citation does."""
        court = Court.objects.get(pk='test')

        original_case_name = u'original case name'
        new_case_name = u'new case name'
        cite = Citation(case_name=original_case_name)
        cite.save(index=False)
        docket = Docket(
            case_name=original_case_name,
            court=court,
        )
        docket.save()
        Document(
            citation=cite,
            docket=docket,
        ).save(index=False)
        cite.case_name = new_case_name
        cite.save(index=False)
        changed_docket = Docket.objects.get(pk=docket.pk)
        self.assertEqual(changed_docket.case_name, new_case_name)

Exemple #9

0

Afficher le fichier

Fichier : tests.py Projet : wmbutler/courtlistener

    def test_content_extraction(self):
        """Do all of the supported mimetypes get extracted to text
        successfully, including OCR?"""
        site = test_opinion_scraper.Site().parse()

        test_strings = [
            'supreme', 'intelligence', 'indiana', 'reagan', 'indiana',
            'fidelity'
        ]
        for i in range(0, len(site.case_names)):
            path = os.path.join(settings.INSTALL_ROOT, 'alert',
                                site.download_urls[i])
            with open(path) as f:
                content = f.read()
                cf = ContentFile(content)
                extension = get_extension(content)
            cite = Citation()
            cite.save(index=False)
            docket = Docket(
                case_name=site.case_names[i],
                court=self.court,
            )
            docket.save()
            doc = Document(
                date_filed=site.case_dates[i],
                citation=cite,
                docket=docket,
            )
            file_name = trunc(site.case_names[i].lower(), 75) + extension
            doc.local_path.save(file_name, cf, save=False)
            doc.save(index=False)
            doc = extract_doc_content(doc.pk, callback=subtask(extract_by_ocr))
            if extension in ['.html', '.wpd']:
                self.assertIn(test_strings[i], doc.html.lower())
            else:
                self.assertIn(test_strings[i], doc.plain_text.lower())

            doc.delete()

Exemple #10

0

Afficher le fichier

Fichier : test_helpers.py Projet : wmbutler/courtlistener

    def setUp(self):
        # Set up some handy variables
        self.court = Court.objects.get(pk='test')

        # Set up testing cores in Solr and swap them in
        self.core_name_opinion = '%s.opinion-test-%s' % \
                                 (self.__module__, time.time())
        self.core_name_audio = '%s.audio-test-%s' % \
                               (self.__module__, time.time())
        create_solr_core(self.core_name_opinion)
        create_solr_core(
            self.core_name_audio,
            schema=os.path.join(settings.INSTALL_ROOT, 'Solr', 'conf',
                                'audio_schema.xml'),
            instance_dir='/usr/local/solr/example/solr/audio',
        )
        swap_solr_core('collection1', self.core_name_opinion)
        swap_solr_core('audio', self.core_name_audio)
        self.si_opinion = sunburnt.SolrInterface(settings.SOLR_OPINION_URL,
                                                 mode='rw')
        self.si_audio = sunburnt.SolrInterface(settings.SOLR_AUDIO_URL,
                                               mode='rw')

        # Add three documents and three audio files to the index, but don't
        # extract their contents
        self.site_opinion = test_opinion_scraper.Site().parse()
        self.site_audio = test_oral_arg_scraper.Site().parse()
        cite_counts = (4, 6, 8)
        self.docs = {}
        for i in range(0, 3):
            cite = Citation(
                case_name=self.site_opinion.case_names[i],
                docket_number=self.site_opinion.docket_numbers[i],
                neutral_cite=self.site_opinion.neutral_citations[i],
                federal_cite_one=self.site_opinion.west_citations[i],
            )
            cite.save(index=False)
            docket = Docket(
                case_name=self.site_opinion.case_names[i],
                court=self.court,
            )
            docket.save()
            self.docs[i] = Document(
                date_filed=self.site_opinion.case_dates[i],
                citation=cite,
                docket=docket,
                precedential_status=self.site_opinion.precedential_statuses[i],
                citation_count=cite_counts[i],
                nature_of_suit=self.site_opinion.nature_of_suit[i],
                judges=self.site_opinion.judges[i],
            )
            self.docs[i].save()

        # Create citations between the documents
        # 0 ---cites--> 1, 2
        # 1 ---cites--> 2
        # 2 ---cites--> 0
        self.docs[0].cases_cited.add(self.docs[1].citation)
        self.docs[0].cases_cited.add(self.docs[2].citation)
        self.docs[1].cases_cited.add(self.docs[2].citation)
        self.docs[2].cases_cited.add(self.docs[0].citation)

        for doc in self.docs.itervalues():
            doc.save()

        # Scrape the audio "site" and add its contents
        site = test_oral_arg_scraper.Site().parse()
        Command().scrape_court(site, full_crawl=True)

        self.expected_num_results_opinion = 3
        self.expected_num_results_audio = 2
        self.si_opinion.commit()
        self.si_audio.commit()

Exemple #11

0

Afficher le fichier

Fichier : manual_import.py Projet : wmbutler/courtlistener

def import_resource_org_item(case_location):
    """Using the path to a case, import it, gathering all needed meta data.

    Path is any valid URI that the requests library can handle.
    """
    def get_file(location):
        if location.startswith('/'):
            with open(location) as f:
                r = requests.Session()
                r.content = f.read()
        else:
            r = requests.get(location)
        return fromstring(r.content), get_clean_body_content(r.content)

    # Get trees and text for the opinion itself and for the index page
    # that links to it. Each has useful data.
    case_tree, case_text = get_file(case_location)
    vol_location = case_location.rsplit('/', 1)[-2] + '/index.html'
    vol_tree, vol_text = get_file(vol_location)

    html, blocked = anonymize(get_case_body(case_tree))

    case_location_relative = case_location.rsplit('/', 1)[1]
    case_name, status = get_case_name_and_status(vol_tree,
                                                 case_location_relative)
    cite = Citation(
        case_name=case_name,
        docket_number=get_docket_number(case_location),
        federal_cite_one=get_west_cite(vol_tree, case_location_relative),
    )
    docket = Docket(
        court=Court.objects.get(pk=get_court_id(case_tree)),
        case_name=case_name,
    )
    doc = Document(
        date_filed=get_date_filed(vol_tree, case_location_relative),
        source='R',
        sha1=hashlib.sha1(case_text).hexdigest(),
        citation=cite,
        docket=docket,
        download_url=case_location,
        html=html,
        precedential_status=status,
    )
    if blocked:
        doc.blocked = True
        docket.blocked = True
        doc.date_blocked = datetime.date.today()
        docket.date_blocked = datetime.date.today()

    cite.save()
    docket.save()
    doc.docket = docket
    doc.citation = cite
    doc.save()

    # Update the citation graph
    from alert.citations.tasks import update_document_by_id
    update_document_by_id(doc.pk)

    return doc

Exemple #12

0

Afficher le fichier

Fichier : tests.py Projet : jraller/courtlistener

    def test_pagerank_calculation(self):
        """Create a few Documents and fake citation relation among them, then
        run the pagerank algorithm. Check whether this simple case can get the
        correct result.
        """
        # Set up some handy variables
        self.court = Court.objects.get(pk='test')

        # create 3 documents with their citations and dockets
        c1, c2, c3 = Citation(case_name=u"c1"), Citation(
            case_name=u"c2"), Citation(case_name=u"c3")
        c1.save(index=False)
        c2.save(index=False)
        c3.save(index=False)
        docket1 = Docket(
            case_name=u"c1",
            court=self.court,
        )
        docket2 = Docket(
            case_name=u"c2",
            court=self.court,
        )
        docket3 = Docket(
            case_name=u"c3",
            court=self.court,
        )
        d1, d2, d3 = Document(date_filed=date.today()), Document(
            date_filed=date.today()), Document(date_filed=date.today())
        d1.citation, d2.citation, d3.citation = c1, c2, c3
        d1.docket, d2.docket, d3.docket = docket1, docket2, docket3
        doc_list = [d1, d2, d3]
        for d in doc_list:
            d.citation.save(index=False)
            d.save(index=False)

        #create simple citing relation: 1 cites 2 and 3; 2 cites 3; 3 cites 1;
        d1.cases_cited.add(d2.citation)
        d2.citation_count += 1
        d2.cases_cited.add(d3.citation)
        d3.citation_count += 1
        d3.cases_cited.add(d1.citation)
        d1.citation_count += 1
        d1.cases_cited.add(d3.citation)
        d3.citation_count += 1
        d1.save(index=False)
        d2.save(index=False)
        d3.save(index=False)

        #calculate pagerank of these 3 document
        comm = Command()
        self.verbosity = 1
        comm.do_pagerank(chown=False)

        # read in the pagerank file, converting to a dict
        pr_values_from_file = {}
        with open(get_data_dir_location() + "external_pagerank") as f:
            for line in f:
                pk, value = line.split('=')
                pr_values_from_file[pk] = float(value.strip())

        # Verify that whether the answer is correct, based on calculations in
        # Gephi
        answers = {
            '1': 0.387790,
            '2': 0.214811,
            '3': 0.397400,
        }
        for key, value in answers.iteritems():
            self.assertTrue(
                (abs(pr_values_from_file[key]) - value) < 0.0001,
                msg="The answer for item %s was %s when it should have been "
                "%s" % (
                    key,
                    pr_values_from_file[key],
                    answers[key],
                ))