def import_law_box_case(case_path):
    raw_text = open(case_path).read()
    clean_html_tree, complete_html_tree, clean_html_str, body_text = get_html_from_raw_text(raw_text)
    citations = get_citations_from_tree(complete_html_tree, case_path)
    court = get_court_object(clean_html_tree, citations, case_path)
    dates = get_date_filed(clean_html_tree, citations, case_path=case_path, court=court)
    if not dates and ('review_issues' in DEBUG or 'log_bad_values' in DEBUG):
        if 'review_issues' in DEBUG:
            subprocess.Popen(['firefox', 'file://%s' % case_path], shell=False).communicate()
            raw_input("No date identified! Can we fix this and restart, or just press enter to log it? ")
        if 'log_bad_values' in DEBUG:
            # Write the failed case out to file.
            with open('missing_dates_post_focus.txt', 'a') as out:
                out.write('%s\n' % case_path)
def import_law_box_case(case_path):
    raw_text = open(case_path).read()
    clean_html_tree, complete_html_tree, clean_html_str, body_text = get_html_from_raw_text(
        raw_text)
    citations = get_citations_from_tree(complete_html_tree, case_path)
    court = get_court_object(clean_html_tree, citations, case_path)
    dates = get_date_filed(clean_html_tree,
                           citations,
                           case_path=case_path,
                           court=court)
    if not dates and ('review_issues' in DEBUG or 'log_bad_values' in DEBUG):
        if 'review_issues' in DEBUG:
            subprocess.Popen(['firefox', 'file://%s' % case_path],
                             shell=False).communicate()
            raw_input(
                "No date identified! Can we fix this and restart, or just press enter to log it? "
            )
        if 'log_bad_values' in DEBUG:
            # Write the failed case out to file.
            with open('missing_dates_post_focus.txt', 'a') as out:
                out.write('%s\n' % case_path)
Exemple #3
0
def cleaner(simulate=False, verbose=False):
    """Find items that:

     - Contain the word "argued"
     - Occur between 2002-01-01 and 2031-12-31
     - Are precedential
     - Have a source == L.
     - Match a regex for the funky date pattern

    """
    conn = sunburnt.SolrInterface(settings.SOLR_OPINION_URL, mode="rw")
    q = {
        "q": "argued",
        "fl": "id,text,source",
        "fq": [
            "dateFiled:[2002-01-01T00:00:00Z TO 2031-12-31T00:00:00Z]",
            'status_exact:("Precedential")',
        ],
        "sort": "dateFiled asc",
        "caller": "cleanup_script",
    }
    results = conn.raw_query(**q)
    for r in results:
        if verbose:
            print "Running tests on item %s" % r["id"]
        # We iterate over the search results. For each one, we run tests on it to see if it needs a fix.
        # If so, we get the record from the database and update it. If not, re continue.
        if r["source"] != "L":
            # Only affects pure Lawbox cases. Merged cases did not have their date updated.
            if verbose:
                print "  - Source is %s. Punting." % r["source"]
            continue

        re_match = re.search("Argued.{1,12}\d{1,2}-\d{1,2}, \d{4}", r["text"])
        if not re_match:
            # Lacks the affronting line. Onwards.
            if verbose:
                print "  - Lacks the bad date string. Punting."
            continue

        if verbose:
            print "  - All tests pass. This item may be modified. (Simulate is: %s)" % simulate

        doc = Document.objects.get(pk=r["id"])
        clean_html_tree = html.fromstring(doc.html_lawbox)

        new_date = get_date_filed(clean_html_tree, citations=[]).date()

        if verbose:
            print "  - https://www.courtlistener.com%s" % doc.get_absolute_url()
            print "  - Old date was: %s" % doc.date_filed
            print "  - New date is:  %s" % new_date

        if new_date == doc.date_filed:
            # No change needed, simply move on.
            if verbose:
                print "  - Dates are equal: Proceeding."
            continue
        else:
            if verbose:
                print "  - Updating with new date."
            if not simulate:
                doc.date_filed = new_date
                doc.save(index=True, force_commit=False)

    # Do one big commit at the end
    conn.commit()
def cleaner(simulate=False, verbose=False):
    """Find items that:

     - Contain the word "argued"
     - Occur between 2002-01-01 and 2031-12-31
     - Are precedential
     - Have a source == L.
     - Match a regex for the funky date pattern

    """
    conn = sunburnt.SolrInterface(settings.SOLR_OPINION_URL, mode='rw')
    q = {
        'q': 'argued',
        'fl': 'id,text,source',
        'fq': [
            'dateFiled:[2002-01-01T00:00:00Z TO 2031-12-31T00:00:00Z]',
            'status_exact:("Precedential")',
        ],
        'sort': 'dateFiled asc',
        'caller': 'cleanup_script',
    }
    results = conn.raw_query(**q)
    for r in results:
        if verbose:
            print "Running tests on item %s" % r['id']
        # We iterate over the search results. For each one, we run tests on it to see if it needs a fix.
        # If so, we get the record from the database and update it. If not, re continue.
        if r['source'] != 'L':
            # Only affects pure Lawbox cases. Merged cases did not have their date updated.
            if verbose:
                print "  - Source is %s. Punting." % r['source']
            continue

        re_match = re.search('Argued.{1,12}\d{1,2}-\d{1,2}, \d{4}', r['text'])
        if not re_match:
            # Lacks the affronting line. Onwards.
            if verbose:
                print "  - Lacks the bad date string. Punting."
            continue

        if verbose:
            print "  - All tests pass. This item may be modified. (Simulate is: %s)" % simulate

        doc = Document.objects.get(pk=r['id'])
        clean_html_tree = html.fromstring(doc.html_lawbox)

        new_date = get_date_filed(clean_html_tree, citations=[]).date()

        if verbose:
            print "  - https://www.courtlistener.com%s" % doc.get_absolute_url()
            print "  - Old date was: %s" % doc.date_filed
            print "  - New date is:  %s" % new_date

        if new_date == doc.date_filed:
            # No change needed, simply move on.
            if verbose:
                print "  - Dates are equal: Proceeding."
            continue
        else:
            if verbose:
                print "  - Updating with new date."
            if not simulate:
                doc.date_filed = new_date
                doc.save(index=True, force_commit=False)

    # Do one big commit at the end
    conn.commit()