예제 #1
0
def handle_adddocmeta(docid, court, casenum, de_seq_num, dm_id, docnum, subdocnum):

    docid = ParsePacer.coerce_docid(docid)

    query = Document.objects.filter(court=court, casenum=casenum, docnum=docnum, subdocnum=subdocnum)

    try:
        doc = query[0]
    except IndexError:
        doc = Document(
            docid=docid,
            court=court,
            casenum=casenum,
            de_seq_num=de_seq_num,
            dm_id=dm_id,
            docnum=docnum,
            subdocnum=subdocnum,
        )
    else:
        doc.de_seq_num = de_seq_num
        doc.dm_id = dm_id
        doc.docnum = docnum
        doc.docid = docid

    try:
        doc.save()
    except IntegrityError:
        logging.error("handle_adddocmeta: could not save docid %s" % (docid))
예제 #2
0
def docid_from_url_name(url):
    """ Extract the docid from a PACER URL name. """
    if doc_re.search(url):
        return ParsePacer.coerce_docid(doc_re.search(url).group(1))
    if ca_doc_re.search(url):
        return ca_doc_re.search(url).group(1) or ca_doc_re.search(url).group(2)
    raise ValueError('docid_from_url_name')
예제 #3
0
def create_docket_pickles(path, court):
    #check if docket pickle directory exists and pickle generation has completed
    opinion_reports= glob.glob(os.path.join(path, "*.opinions.*"))


    try:
        os.mkdir(os.path.join(path, 'docket_pickles'))
    except OSError:
        #delete docket_pickles
        pass

    for report in opinion_reports:
        filebits = open(report).read()
        dockets = PP.parse_opinions(filebits, court)

        if dockets:
            print "Found %s dockets in %s " % (len(dockets), report)
            for docket in dockets:
                if len(docket.documents) != 1:
                    raise "This docket has more than one document! docket text: " % docket
                doc = docket.documents.values()[0]
                filename = _get_docket_pickle_filename(court, doc['casenum'], doc['doc_num'], doc['attachment_num'])
                success, msg = IA.pickle_object(docket, filename, os.path.join(path, "docket_pickles"))

                if not success:
                    print "Error pickling file %s: %s " % (filename, msg)
예제 #4
0
def handle_adddocmeta(docid, court, casenum, de_seq_num, dm_id, docnum,
                      subdocnum):
    docid = ParsePacer.coerce_docid(docid)

    query = Document.objects.filter(court=court,
                                    casenum=casenum,
                                    docnum=docnum,
                                    subdocnum=subdocnum)

    try:
        doc = query[0]
    except IndexError:
        doc = Document(docid=docid,
                       court=court,
                       casenum=casenum,
                       de_seq_num=de_seq_num,
                       dm_id=dm_id,
                       docnum=docnum,
                       subdocnum=subdocnum)
    else:
        doc.de_seq_num = de_seq_num
        doc.dm_id = dm_id
        doc.docnum = docnum
        doc.docid = docid

    try:
        doc.save()
    except IntegrityError:
        logging.error("handle_adddocmeta: could not save docid %s" % (docid))
예제 #5
0
    def get_opinions(self, court, start_date, end_date):
        html = self.pacer_client.get_opinions_html(court, start_date, end_date)

        dockets = PP.parse_opinions(html, court)
        logger.info(' Downloaded %d dockets for court %s between %s and %s',
                    len(dockets), court, start_date, end_date)
        #if len(dockets) == 0:
        #    logger.debug(' 0 dockets downloaded. HTML response: %s', html)
        return dockets
def docid_from_url_name(url):
    """ Extract the docid from a PACER URL name.

    CA sometimes have: /cmecf/servlet/TransportRoom?servlet=ShowDoc&dls_id=00404800657&caseId=124912&dktType=dktPublic
    """
    if doc_re.search(url):
        return ParsePacer.coerce_docid(doc_re.search(url).group(1))
    if ca_doc_re.search(url):
        return ca_doc_re.search(url).group(1) or ca_doc_re.search(url).group(2)
    raise ValueError('docid_from_url_name')
def handle_doc1(filebits, court, filename, team_name):
    """ Write HTML (doc1) file metadata into the database. """

    logging.debug('handle_doc1 %s %s', court, filename)

    docid = docid_from_url_name(filename)

    query = Document.objects.filter(docid=docid)

    try:
        main_doc = query[0]
    except IndexError:
        logging.info("handle_doc1: unknown docid %s" % (docid))
        return "upload: doc1 ignored."
    else:
        casenum = main_doc.casenum
        main_docnum = main_doc.docnum

        # Sanity check
        if court != main_doc.court:
            logging.error("handle_doc1: court mismatch (%s, %s) %s" %
                          (court, main_doc.court, docid))
            return "upload: doc1 metadata mismatch."

    if ParsePacer.is_appellate(court):
        docket = ParsePacer.parse_ca_doc1(filebits, court, casenum,
                                          main_docnum)
    else:
        docket = ParsePacer.parse_doc1(filebits, court, casenum, main_docnum)

    if docket:
        # Merge the docket with IA
        do_me_up(docket)
        # Update the local DB
        DocumentManager.update_local_db(docket, team_name=team_name)

    response = {"cases": _get_cases_dict(casenum, docket),
                "documents": _get_documents_dict(court, casenum),
                "message": "doc1 successfully parsed."}
    message = simplejson.dumps(response)
    return message
    def get_opinions(self, court, start_date, end_date):
        html = self.pacer_client.get_opinions_html(court,
                                                   start_date,
                                                   end_date)

        dockets = PP.parse_opinions(html, court)
        logger.info(' Downloaded %d dockets for court %s between %s and %s', len(dockets), 
                                                                       court,
                                                                       start_date,
                                                                       end_date)
        #if len(dockets) == 0:
        #    logger.debug(' 0 dockets downloaded. HTML response: %s', html)
        return dockets
예제 #9
0
def handle_cadkt(filebits, court, casenum, is_full=False):
    docket = ParsePacer.parse_cadkt(filebits, court, casenum, is_full)

    if not docket:
        return "upload: could not parse docket."

    # Merge the docket with IA
    do_me_up(docket)

    # Update the local DB
    DocumentManager.update_local_db(docket)

    response = {"cases": _get_cases_dict(casenum, docket),
                "documents": _get_documents_dict(court, casenum),
                "message":"DktRpt successfully parsed."}
    message = simplejson.dumps(response)

    return message
def handle_cadkt(filebits, court, casenum, team_name, is_full=False):
    docket = ParsePacer.parse_cadkt(filebits, court, casenum, is_full)

    if not docket:
        return "upload: could not parse docket."

    # Merge the docket with IA
    do_me_up(docket)

    # Update the local DB
    DocumentManager.update_local_db(docket, team_name=team_name)

    response = {"cases": _get_cases_dict(casenum, docket),
                "documents": _get_documents_dict(court, casenum),
                "message": "DktRpt successfully parsed."}
    message = simplejson.dumps(response)

    return message
예제 #11
0
def handle_histdocqry(filebits, court, casenum, team_name):
    docket = ParsePacer.parse_histdocqry(filebits, court, casenum)

    if not docket:
        return "upload: could not parse docket."

    # Merge the docket with IA
    do_me_up(docket)

    # Update the local DB
    DocumentManager.update_local_db(docket, team_name=team_name)

    response = {"cases": _get_cases_dict(casenum, docket),
                "documents": _get_documents_dict(court, casenum),
                "message": "HistDocQry successfully parsed."}

    message = simplejson.dumps(response)

    return message
예제 #12
0
def handle_dktrpt(filebits, court, casenum):

    if config['DUMP_DOCKETS'] and re.search(config['DUMP_DOCKETS_COURT_REGEX'], court):
        logging.info("handle_dktrpt: Dumping docket %s.%s for debugging" % (court, casenum))
        _dump_docket_for_debugging(filebits,court,casenum)

    docket = ParsePacer.parse_dktrpt(filebits, court, casenum)

    if not docket:
        return "upload: could not parse docket."

    # Merge the docket with IA
    do_me_up(docket)

    # Update the local DB
    DocumentManager.update_local_db(docket)

    response = {"cases": _get_cases_dict(casenum, docket),
                "documents": _get_documents_dict(court, casenum),
                "message":"DktRpt successfully parsed."}
    message = simplejson.dumps(response)

    return message
def handle_dktrpt(filebits, court, casenum, team_name):
    if config.DUMP_DOCKETS and re.search(config.DUMP_DOCKETS_COURT_REGEX,
                                         court):
        logging.info("handle_dktrpt: Dumping docket %s.%s for debugging" % (
            court, casenum))
        _dump_docket_for_debugging(filebits, court, casenum)

    docket = ParsePacer.parse_dktrpt(filebits, court, casenum)

    if not docket:
        return "upload: could not parse docket."

    # Merge the docket with IA
    do_me_up(docket)

    # Update the local DB
    DocumentManager.update_local_db(docket, team_name=team_name)

    response = {"cases": _get_cases_dict(casenum, docket),
                "documents": _get_documents_dict(court, casenum),
                "message": "DktRpt successfully parsed."}
    message = simplejson.dumps(response)

    return message
def process_case(casenum):

    # Setup: Grab the lock.
    got_lock, nonce_or_message = lock(court, casenum)

    if got_lock:
        print "got the lock: %s" % (nonce_or_message)
        nonce = nonce_or_message
    else:
        print "could not get lock: %s" % (nonce_or_message)
        add_to_retry(casenum)
        return False

    casedir = "%s/%s" % (dirarg, casenum)

    # Step 1: Parse the docket.html file.
    try:
        docketpath = "%s/docket.html" % casedir
        docketfile = open(docketpath)
        docketbits = docketfile.read()
        docketfile.close()
    except IOError:
        reason = "could not open local docket"
        print "***Skipping %s.%s: %s... " % (court, casenum, reason),
        print_unlock_message(unlock(court, casenum, False))
        del_from_retry(casenum)
        add_to_failed(casenum, reason)
        return False
    else:
        docket = ParsePacer.parse_histdocqry(docketbits, court, casenum)

    if not docket:
        reason = "could not parse local docket"
        print "***Skipping %s.%s: %s... " % (court, casenum, reason),
        print_unlock_message(unlock(court, casenum, False))
        del_from_retry(casenum)
        add_to_failed(casenum, reason)
        return False

    # Step 1a: Try to fetch the the existing IA docket.
    ia_docket = None
    ia_docket_orig_string = ""
    ia_casemeta_orig_hash = ""

    ia_docketstring, fetcherror = IADirect.get_docket_string(court, casenum)

    if ia_docketstring:

        # Got the existing docket-- parse it.
        ia_docket, parseerror = DocketXML.parse_xml_string(ia_docketstring)
        if not ia_docket:
            reason = "could not parse IA docket: %s" % (parseerror)
            print "***Skipping %s.%s: %s... " % (court, casenum, reason),
            print_unlock_message(unlock(court, casenum, False))
            del_from_retry(casenum)
            add_to_failed(casenum, reason)
            return False
        else:
            # Save the original docket hashes
            ia_docket_orig_string = ia_docketstring
            ia_casemeta_orig_hash = hash(pickle.dumps(ia_docket.casemeta))

    elif fetcherror is IADirect.FETCH_NO_FILE:
        # Bucket exists but no docket-- ok.
        pass

    elif fetcherror is IADirect.FETCH_NO_BUCKET:
        # Bucket doesn't exist-- either make_bucket failed or not yet ready.

        if casenum not in bucket_made:
            # If make_bucket failed, try make_bucket again.
            print "  make bucket...",
            make_bucket(casenum)

    elif fetcherror is IADirect.FETCH_TIMEOUT:
        # Couldn't contact IA, skip.
        print "***Skipping %s.%s: IA is down... " % (court, casenum),
        print_unlock_message(unlock(court, casenum, False))
        add_to_retry(casenum)
        return False

    elif not ia_docketstring:
        # Unknown fetch error, skip.
        print "***Skipping %s.%s: unknown docket fetch error: %s..." % \
            (court, casenum, fetcherror),
        print_unlock_message(unlock(court, casenum, False))
        add_to_retry(casenum)
        return False

    # Step 1b: If necessary, merge the two dockets.
    if ia_docket:
        ia_docket.merge_docket(docket)
    else:
        ia_docket = docket

    casedir_ls = os.listdir(casedir)

    index_ls = []
    pdf_ls = []
    for casedocname in casedir_ls:
        if casedocname.endswith("index.html"):
            index_ls.append(casedocname)
        elif casedocname.endswith(".pdf"):
            pdf_ls.append(casedocname)

    # Step 2: Parse each index file
    for indexname in index_ls:

        try:
            indexpath = "%s/%s" % (casedir, indexname)
            indexfile = open(indexpath)
            indexbits = indexfile.read()
            indexfile.close()
        except IOError:
            print "***Could not open file '%s'" % indexpath
            continue

        docnum = indexname.strip("-index.html")
        index_docket = ParsePacer.parse_doc1(indexbits, court, casenum, docnum)
        # Merge this docket into the IA docket
        ia_docket.merge_docket(index_docket)

    # Set initial flag for retrying this case.
    need_to_retry = 0

    # Step 3: Wait for the bucket to be ready
    bucketready = False
    for checkcount in xrange(20):
        bucketready, code = IADirect.check_bucket_ready(court, casenum)
        if bucketready:
            break
        else:
            # Wait 5 seconds and try again.
            time.sleep(5)

    if not bucketready:
        print "***Skipping %s.%s: bucket is not ready... " \
            % (court, casenum),
        print_unlock_message(unlock(court, casenum, False))
        add_to_retry(casenum)
        return False

    # Step 4: Upload each pdf file.
    doccount = 0
    for pdfname in pdf_ls:
        doccount += 1

        print "  uploading document %d/%d..." % (doccount, len(pdf_ls)),

        try:
            pdfpath = "%s/%s" % (casedir, pdfname)
            pdffile = open(pdfpath)
            pdfbits = pdffile.read()
            pdffile.close()
        except IOError:
            print "***Could not open file '%s'" % pdfpath
            continue

        pdfname = pdfname.strip(".pdf")
        split = pdfname.split("-")
        try:
            docnum = unicode(int(split[0]))
        except ValueError:
            # Not an integer.
            print "***Docnum not an integer '%s'" % pdfpath
            continue

        try:
            # converting v3->v4 subdocnums
            subdocnum = unicode(int(split[1]) - 1)
        except IndexError:
            subdocnum = "0"

        doc_docket = DocketXML.make_docket_for_pdf(pdfbits, court, casenum,
                                                   docnum, subdocnum)
        doc_meta = doc_docket.get_document_metadict(docnum, subdocnum)

        # Only upload the PDF if the hash doesn't match the one in IA.
        ia_pdfhash = ia_docket.get_document_sha1(docnum, subdocnum)
        pdfhash = doc_docket.get_document_sha1(docnum, subdocnum)

        if ia_pdfhash != pdfhash:
            pdfstatus, pdferror = \
                IADirect.put_pdf(pdfbits, court, casenum,
                                 docnum, subdocnum, doc_meta)

            if not pdfstatus:
                # PUT failed, mark document as unavailable
                doc_docket.set_document_available(docnum, subdocnum, "0")
                print " fail: %s" % pdferror
                need_to_retry = True
                continue
            else:
                print "done."

            # Add this document's metadata into the ia_docket
            ia_docket.merge_docket(doc_docket)

        else:
            print "same."

    # Step 5: Push the docket to IA, if things have changed.
    print "  docket upload...",

    docket_modified = 0
    ignore_nonce = 0
    ia_docket_merged_string = ia_docket.to_xml()

    if ia_docket_orig_string != ia_docket_merged_string:

        # Assign the docket the new nonce from the lock
        ia_docket.nonce = nonce

        ia_casemeta_merged_hash = hash(pickle.dumps(ia_docket.casemeta))
        casemeta_diff = ia_casemeta_orig_hash != ia_casemeta_merged_hash

        putstatus, puterror = \
            IADirect.put_docket(ia_docket, court, casenum,
                                casemeta_diff=casemeta_diff)

        if putstatus:
            docket_modified = 1
            print "done."
        else:
            need_to_retry = 1
            print "fail: %s" % puterror
    else:
        ignore_nonce = 1
        print "same."

    if ignore_nonce:
        print_unlock_message(unlock(court, casenum, ignore_nonce=1))
    else:
        print_unlock_message(unlock(court, casenum, modified=docket_modified))

    if need_to_retry:
        add_to_retry(casenum)
        return False
    else:
        return True
예제 #15
0
def process_case(casenum):

    # Setup: Grab the lock.
    got_lock, nonce_or_message = lock(court, casenum)

    if got_lock:
        print "got the lock: %s" % (nonce_or_message)
        nonce = nonce_or_message
    else:
        print "could not get lock: %s" % (nonce_or_message)
        add_to_retry(casenum)
        return False

    casedir = "%s/%s" % (dirarg, casenum)

    # Step 1: Parse the docket.html file.
    try:
        docketpath = "%s/docket.html" % casedir
        docketfile = open(docketpath)
        docketbits = docketfile.read()
        docketfile.close()
    except IOError:
        reason = "could not open local docket"
        print "***Skipping %s.%s: %s... " % (court, casenum, reason),
        print_unlock_message(unlock(court, casenum, False))
        del_from_retry(casenum)
        add_to_failed(casenum, reason)
        return False
    else:
        docket = ParsePacer.parse_histdocqry(docketbits, court, casenum)

    if not docket:
        reason = "could not parse local docket"
        print "***Skipping %s.%s: %s... " % (court, casenum, reason),
        print_unlock_message(unlock(court, casenum, False))
        del_from_retry(casenum)
        add_to_failed(casenum, reason)
        return False

    # Step 1a: Try to fetch the the existing IA docket.
    ia_docket = None
    ia_docket_orig_string = ""
    ia_casemeta_orig_hash = ""

    ia_docketstring, fetcherror = IADirect.get_docket_string(court, casenum)

    if ia_docketstring:

        # Got the existing docket-- parse it.
        ia_docket, parseerror = DocketXML.parse_xml_string(ia_docketstring)
        if not ia_docket:
            reason = "could not parse IA docket: %s" % (parseerror)
            print "***Skipping %s.%s: %s... " % (court, casenum, reason),
            print_unlock_message(unlock(court, casenum, False))
            del_from_retry(casenum)
            add_to_failed(casenum, reason)
            return False
        else:
            # Save the original docket hashes
            ia_docket_orig_string = ia_docketstring
            ia_casemeta_orig_hash = hash(pickle.dumps(ia_docket.casemeta))

    elif fetcherror is IADirect.FETCH_NO_FILE:
        # Bucket exists but no docket-- ok.
        pass

    elif fetcherror is IADirect.FETCH_NO_BUCKET:
        # Bucket doesn't exist-- either make_bucket failed or not yet ready.

        if casenum not in bucket_made:
            # If make_bucket failed, try make_bucket again.
            print "  make bucket...",
            make_bucket(casenum)

    elif fetcherror is IADirect.FETCH_TIMEOUT:
        # Couldn't contact IA, skip.
        print "***Skipping %s.%s: IA is down... " % (court, casenum),
        print_unlock_message(unlock(court, casenum, False))
        add_to_retry(casenum)
        return False

    elif not ia_docketstring:
        # Unknown fetch error, skip.
        print "***Skipping %s.%s: unknown docket fetch error: %s..." % \
            (court, casenum, fetcherror),
        print_unlock_message(unlock(court, casenum, False))
        add_to_retry(casenum)
        return False

    # Step 1b: If necessary, merge the two dockets.
    if ia_docket:
        ia_docket.merge_docket(docket)
    else:
        ia_docket = docket

    casedir_ls = os.listdir(casedir)

    index_ls = []
    pdf_ls = []
    for casedocname in casedir_ls:
        if casedocname.endswith("index.html"):
            index_ls.append(casedocname)
        elif casedocname.endswith(".pdf"):
            pdf_ls.append(casedocname)

    # Step 2: Parse each index file
    for indexname in index_ls:

        try:
            indexpath = "%s/%s" % (casedir, indexname)
            indexfile = open(indexpath)
            indexbits = indexfile.read()
            indexfile.close()
        except IOError:
            print "***Could not open file '%s'" % indexpath
            continue

        docnum = indexname.strip("-index.html")
        index_docket = ParsePacer.parse_doc1(indexbits, court,
                                             casenum, docnum)
        # Merge this docket into the IA docket
        ia_docket.merge_docket(index_docket)

    # Set initial flag for retrying this case.
    need_to_retry = 0

    # Step 3: Wait for the bucket to be ready
    bucketready = False
    for checkcount in xrange(20):
        bucketready, code = IADirect.check_bucket_ready(court, casenum)
        if bucketready:
            break
        else:
            # Wait 5 seconds and try again.
            time.sleep(5)

    if not bucketready:
        print "***Skipping %s.%s: bucket is not ready... " \
            % (court, casenum),
        print_unlock_message(unlock(court, casenum, False))
        add_to_retry(casenum)
        return False

    # Step 4: Upload each pdf file.
    doccount = 0
    for pdfname in pdf_ls:
        doccount += 1

        print "  uploading document %d/%d..." % (doccount, len(pdf_ls)),

        try:
            pdfpath = "%s/%s" % (casedir, pdfname)
            pdffile = open(pdfpath)
            pdfbits = pdffile.read()
            pdffile.close()
        except IOError:
            print "***Could not open file '%s'" % pdfpath
            continue

        pdfname = pdfname.strip(".pdf")
        split = pdfname.split("-")
        try:
            docnum = unicode(int(split[0]))
        except ValueError:
            # Not an integer.
            print "***Docnum not an integer '%s'" % pdfpath
            continue

        try:
            # converting v3->v4 subdocnums
            subdocnum = unicode(int(split[1]) - 1)
        except IndexError:
            subdocnum = "0"

        doc_docket = DocketXML.make_docket_for_pdf(pdfbits, court,
                                                   casenum, docnum,
                                                   subdocnum)
        doc_meta = doc_docket.get_document_metadict(docnum, subdocnum)

        # Only upload the PDF if the hash doesn't match the one in IA.
        ia_pdfhash = ia_docket.get_document_sha1(docnum, subdocnum)
        pdfhash = doc_docket.get_document_sha1(docnum, subdocnum)

        if ia_pdfhash != pdfhash:
            pdfstatus, pdferror = \
                IADirect.put_pdf(pdfbits, court, casenum,
                                 docnum, subdocnum, doc_meta)

            if not pdfstatus:
                # PUT failed, mark document as unavailable
                doc_docket.set_document_available(docnum, subdocnum, "0")
                print " fail: %s" % pdferror
                need_to_retry = True
                continue
            else:
                print "done."

            # Add this document's metadata into the ia_docket
            ia_docket.merge_docket(doc_docket)

        else:
            print "same."


    # Step 5: Push the docket to IA, if things have changed.
    print "  docket upload...",

    docket_modified = 0
    ignore_nonce = 0
    ia_docket_merged_string = ia_docket.to_xml()

    if ia_docket_orig_string != ia_docket_merged_string:

        # Assign the docket the new nonce from the lock
        ia_docket.nonce = nonce

        ia_casemeta_merged_hash = hash(pickle.dumps(ia_docket.casemeta))
        casemeta_diff = ia_casemeta_orig_hash != ia_casemeta_merged_hash

        putstatus, puterror = \
            IADirect.put_docket(ia_docket, court, casenum,
                                casemeta_diff=casemeta_diff)

        if putstatus:
            docket_modified = 1
            print "done."
        else:
            need_to_retry = 1
            print "fail: %s" % puterror
    else:
        ignore_nonce = 1
        print "same."

    if ignore_nonce:
        print_unlock_message(unlock(court, casenum, ignore_nonce=1))
    else:
        print_unlock_message(unlock(court, casenum, modified=docket_modified))

    if need_to_retry:
        add_to_retry(casenum)
        return False
    else:
        return True