def _cron_process_PDF(obj, ppentry): filename = ppentry.filename meta = IACommon.get_meta_from_filename(filename) court = meta["court"] casenum = meta["casenum"] docnum = meta["docnum"] subdocnum = meta["subdocnum"] invalid_PDF = _is_invalid_pdf(obj, filename) # We only want to check for ssns on valid PDFs # PyPdf doesn't deal well with bad input if not invalid_PDF: # SSN privacy check has_ssn = _has_ssn(obj, filename) else: has_ssn = False # Blacklist file check in_blacklist = _in_blacklist(filename) if invalid_PDF or has_ssn or in_blacklist: docket = DocketXML.make_docket_for_pdf("", court, casenum, docnum, subdocnum, available=0) UploadHandler.do_me_up(docket) # Delete the entry from the DB ppentry.delete() # Quarantine the pickle file for analysis _quarantine_pickle(filename, ssn=has_ssn, blacklist_file=in_blacklist, invalid_PDF=invalid_PDF) return put_result, put_msg = _dispatch_put(obj, ppentry) if put_result: # Put success-- mark this document as available in the DB DocumentManager.mark_as_available(filename) docket = DocketXML.make_docket_for_pdf("", court, casenum, docnum, subdocnum, available=1) UploadHandler.do_me_up(docket) print " %s %s" % (filename, put_msg)
def _cron_process_PDF(obj, ppentry): filename = ppentry.filename meta = IACommon.get_meta_from_filename(filename) court = meta["court"] casenum = meta["casenum"] docnum = meta["docnum"] subdocnum = meta["subdocnum"] invalid_PDF = _is_invalid_pdf(obj, filename) # We only want to check for ssns on valid PDFs # PyPdf doesn't deal well with bad input if not invalid_PDF: # SSN privacy check has_ssn = _has_ssn(obj, filename) else: has_ssn = False # Blacklist file check in_blacklist = _in_blacklist(filename) if invalid_PDF or has_ssn or in_blacklist: docket = DocketXML.make_docket_for_pdf("", court, casenum, docnum, subdocnum, available=0) UploadHandler.do_me_up(docket) # Delete the entry from the DB ppentry.delete() # Quarantine the pickle file for analysis _quarantine_pickle(filename, ssn=has_ssn, blacklist_file= in_blacklist, invalid_PDF= invalid_PDF) return put_result, put_msg = _dispatch_put(obj, ppentry) if put_result: # Put success-- mark this document as available in the DB DocumentManager.mark_as_available(filename) docket = DocketXML.make_docket_for_pdf("", court, casenum, docnum, subdocnum, available=1) UploadHandler.do_me_up(docket) print " %s %s" % (filename, put_msg)
def _cron_process_docketXML(docket, ppentry): ''' Required to have the lock. ''' court = docket.casemeta["court"] casenum = docket.casemeta["pacer_case_num"] # Force '0' in the XML on docs that failed to upload. _update_docs_availability(docket) # The docket filename docketname = IACommon.get_docketxml_name(court, casenum) # Step 1: Try to fetch the existing docket from IA docketstring, fetcherror = IADirect.get_docket_string(court, casenum) if docketstring: # Got the existing docket-- put merged docket file. ia_docket, parse_msg = DocketXML.parse_xml_string(docketstring) if ia_docket: put_result, put_msg = _cron_me_up(ia_docket, docket, ppentry) print " %s %s" % (docketname, put_msg) else: print " %s docket parsing error: %s" % (docketname, parse_msg) elif fetcherror is IADirect.FETCH_NO_FILE: # Bucket exists but no docket-- put a new docket file. put_result, put_msg = put_docket(docket, court, casenum, ppentry) print " %s put into existing bucket: %s" % (docketname, put_msg) elif fetcherror is IADirect.FETCH_NO_BUCKET: # Bucket doesn't exist-- make the bucket and put a new docket file. put_result, put_msg = put_docket(docket, court, casenum, ppentry, newbucket=1) print " %s put into new bucket: %s" % (docketname, put_msg) elif fetcherror is IADirect.FETCH_URLERROR: # Couldn't get the IA docket # Unset the processing flag for later # ppentry.processing = 0 # ppentry.save() # Leave the pickle file for later # Drop Lock Here? print " %s timed out. wait for next cron." % (docketname) else: # Unknown fetch error. # Unset the processing flag for later # ppentry.processing = 0 # ppentry.save() # Drop Lock Here? # Leave the pickle file for later print " %s unknown fetch error. wait for next cron." % (docketname)
def get_lock(court, casenum, uploaderid, one_per_uploader=0): nonce = DocketXML.generate_new_nonce() lock = BucketLock(court=court, casenum=casenum, uploaderid=uploaderid, nonce=nonce) try: lock.save() except IntegrityError: # Fail, lock already exists. lockquery = BucketLock.objects.filter(court=court).filter(casenum=casenum) try: lock = lockquery[0] except IndexError: # No lock exists anymore-- must have just missed it. return None, "Locked." else: # Lock already exists # This prevents two cron jobs from requesting the same lock if lock.uploaderid == uploaderid and one_per_uploader: return None, "You already own this lock (Another cron job?)" if lock.uploaderid == uploaderid and not lock.ready: return lock.nonce, "" if lock.uploaderid == uploaderid and lock.ready and not lock.processing: # If we're not currently processing the case, let the same # uploader modify it lock.ready = 0 lock.save() return lock.nonce, "" else: return None, "Locked by another user." else: # Success. return nonce, ""
def delete_documents_from_docket(court, casenum, documents): # Step 1: Get docket and convert into DocketXML docketstring, fetcherror = IADirect.get_docket_string(court, casenum) if not docketstring: print "Could not find docket on IA, exiting...." exit() ia_docket, message = DocketXML.parse_xml_string(docketstring) if not ia_docket: print "Docket parsing error: %s.%s, exiting...." % (court, casenum) exit() # Step 2: Remove documents from DocketXML object for document in documents: ia_docket.remove_document(document.docnum, document.subdocnum) # Step 3: upload modified xml docketbits = ia_docket.to_xml() request = IACommon.make_docketxml_request(docketbits, court, casenum, ia_docket.casemeta) success_status = False try: response = urllib2.urlopen(request) except urllib2.HTTPError, e: if e.code == 201 or e.code == 200: # 201 Created: Success! print "Updated %s %s docket.xml" % (court, casenum) success_status = True
def _cron_fetch_update(lock): court = unicode(lock.court) casenum = unicode(lock.casenum) nonce = unicode(lock.nonce) docketstring, fetcherror = IADirect.get_docket_string(court, casenum) if not docketstring: # Couldn't get the docket. Try again later. if nonce: BucketLockManager.try_lock_later(lock) else: lock.delete() print " %s.%s couldn't fetch the docket: %d" % (court, casenum, fetcherror) return ia_docket, message = DocketXML.parse_xml_string(docketstring) if not ia_docket: # Docket parsing error. if nonce: BucketLockManager.try_lock_later(lock) else: lock.delete() print " %s.%s docket parsing error: %s" % (court, casenum, message) return elif ia_docket.nonce == nonce or not nonce: # Got the docket and it is either: # 1. up-to-date (nonce match), or # 2. expired (ignore nonce) # In both scenarios, update the local DB. DocumentManager.update_local_db(ia_docket, ignore_available=0) print " %s.%s fetched and DB updated." % (court, casenum) ia_docket_orig_hash = hash(pickle.dumps(ia_docket)) local_docket = DocumentManager.create_docket_from_local_documents( court, casenum) if local_docket: ia_docket.merge_docket(local_docket) ia_docket_after_local_merge_hash = hash(pickle.dumps(ia_docket)) if ia_docket_orig_hash != ia_docket_after_local_merge_hash: print " After fetch, some locally stored information was " \ "missing from %s.%s. Local info addition scheduled." % ( court, casenum) UploadHandler.do_me_up(ia_docket) # Remove the lock. lock.delete() else: # Got the docket but it is not update to date. Try again later. BucketLockManager.try_lock_later(lock) print " %s.%s fetched, wait more." % (court, casenum)
def _cron_me_up(ia_docket, docket, ppentry): """ Merge and update docket""" ia_court = ia_docket.casemeta["court"] ia_casenum = ia_docket.casemeta["pacer_case_num"] # Save the original hash to diff with later ia_docket_orig_hash = hash(pickle.dumps(ia_docket)) ia_casemeta_orig_hash = hash(pickle.dumps(ia_docket.casemeta)) # Merge ia_docket with our local database information to fill in blank # fields that may exist in ia local_docket = DocumentManager.create_docket_from_local_documents( ia_court, ia_casenum, docket) if local_docket: ia_docket.merge_docket(local_docket) ia_docket_after_local_merge_hash = hash(pickle.dumps(ia_docket)) if ia_docket_orig_hash != ia_docket_after_local_merge_hash: print " Some locally stored information was missing from %s.%s. Local info added." % ( ia_court, ia_casenum) # Step 2: Merge new docket into the existing IA docket ia_docket.merge_docket(docket) # Step 3: If diff, then upload the merged docket ia_docket_merged_hash = hash(pickle.dumps(ia_docket)) if ia_docket_orig_hash != ia_docket_merged_hash: # Generate a new nonce for the docket ia_docket.nonce = DocketXML.generate_new_nonce() ia_casemeta_merged_hash = hash(pickle.dumps(ia_docket.casemeta)) casemeta_diff = ia_casemeta_orig_hash != ia_casemeta_merged_hash # Put the docket to IA put_result, put_msg = put_docket(ia_docket, ia_court, ia_casenum, ppentry, casemeta_diff=casemeta_diff) return put_result, "merged: %s" % put_msg else: # No difference between IA docket and this docket, no need to upload. filename = ppentry.filename # Delete the entry from the DB ppentry.delete() # Delete the pickle file delete_pickle(filename) # Return False to reflect "no update" return False, "not merged: no diff."
def _cron_fetch_update(lock): court = unicode(lock.court) casenum = unicode(lock.casenum) nonce = unicode(lock.nonce) docketstring, fetcherror = IADirect.get_docket_string(court, casenum) if not docketstring: # Couldn't get the docket. Try again later. if nonce: BucketLockManager.try_lock_later(lock) else: lock.delete() print " %s.%s couldn't fetch the docket: %d" % (court, casenum, fetcherror) return ia_docket, message = DocketXML.parse_xml_string(docketstring) if not ia_docket: # Docket parsing error. if nonce: BucketLockManager.try_lock_later(lock) else: lock.delete() print " %s.%s docket parsing error: %s" % (court, casenum, message) return elif ia_docket.nonce == nonce or not nonce: # Got the docket and it is either: # 1. up-to-date (nonce match), or # 2. expired (ignore nonce) # In both scenarios, update the local DB. DocumentManager.update_local_db(ia_docket, ignore_available=0) print " %s.%s fetched and DB updated." % (court, casenum) ia_docket_orig_hash = hash(pickle.dumps(ia_docket)) local_docket = DocumentManager.create_docket_from_local_documents(court, casenum) if local_docket: ia_docket.merge_docket(local_docket) ia_docket_after_local_merge_hash = hash(pickle.dumps(ia_docket)) if ia_docket_orig_hash != ia_docket_after_local_merge_hash: print " After fetch, some locally stored information was missing from %s.%s. Local info addition scheduled." % (court, casenum) UploadHandler.do_me_up(ia_docket) # Remove the lock. lock.delete() else: # Got the docket but it is not update to date. Try again later. BucketLockManager.try_lock_later(lock) print " %s.%s fetched, wait more." % (court, casenum)
def _cron_me_up(ia_docket, docket, ppentry): """ Merge and update docket""" ia_court = ia_docket.casemeta["court"] ia_casenum = ia_docket.casemeta["pacer_case_num"] # Save the original hash to diff with later ia_docket_orig_hash = hash(pickle.dumps(ia_docket)) ia_casemeta_orig_hash = hash(pickle.dumps(ia_docket.casemeta)) # Merge ia_docket with our local database information to fill in blank # fields that may exist in ia local_docket = DocumentManager.create_docket_from_local_documents(ia_court, ia_casenum, docket) if local_docket: ia_docket.merge_docket(local_docket) ia_docket_after_local_merge_hash = hash(pickle.dumps(ia_docket)) if ia_docket_orig_hash != ia_docket_after_local_merge_hash: print " Some locally stored information was missing from %s.%s. Local info added." % ( ia_court, ia_casenum) # Step 2: Merge new docket into the existing IA docket ia_docket.merge_docket(docket) # Step 3: If diff, then upload the merged docket ia_docket_merged_hash = hash(pickle.dumps(ia_docket)) if ia_docket_orig_hash != ia_docket_merged_hash: # Generate a new nonce for the docket ia_docket.nonce = DocketXML.generate_new_nonce() ia_casemeta_merged_hash = hash(pickle.dumps(ia_docket.casemeta)) casemeta_diff = ia_casemeta_orig_hash != ia_casemeta_merged_hash # Put the docket to IA put_result, put_msg = put_docket(ia_docket, ia_court, ia_casenum, ppentry, casemeta_diff=casemeta_diff) return put_result, "merged: %s" % put_msg else: # No difference between IA docket and this docket, no need to upload. filename = ppentry.filename # Delete the entry from the DB ppentry.delete() # Delete the pickle file delete_pickle(filename) # Return False to reflect "no update" return False, "not merged: no diff."
def _get_docket_from_IA(docket): docketstring, fetcherror = IADirect.get_docket_string(docket.get_court(), docket.get_casenum()) if docketstring: # Got the existing docket-- put merged docket file. ia_docket, parse_msg = DocketXML.parse_xml_string(docketstring) if ia_docket: return ia_docket, fetcherror else: print " %s docket parsing error: %s" % (docketname, parse_msg) return None, parse_msg return None, fetcherror
def mark_document_as_unavailable(document): #if not document.available: # print "Exiting: This document isn't currently available on IA" # print usage() # exit() document.available = 0 document.lastdate = datetime.datetime.now() # this ensures that the archive.recapthelaw will get the update document.save() docket = DocketXML.make_docket_for_pdf("", document.court, document.casenum, document.docnum, document.subdocnum, available=0) UploadHandler.do_me_up(docket)
def mark_document_as_unavailable(document): # if not document.available: # print "Exiting: This document isn't currently available on IA" # print usage() # exit() document.available = 0 document.lastdate = datetime.datetime.now() # this ensures that the archive.recapthelaw will get the update document.save() docket = DocketXML.make_docket_for_pdf("", document.court, document.casenum, document.docnum, document.subdocnum, available=0) UploadHandler.do_me_up(docket)
def _upload_documents(docket, docmap): court = docket.get_court() casenum = docket.get_casenum() for dockey, filename in docmap.items(): #TK: abstract this split into a separate function docnum, subdocnum = dockey.split('-') pdfbits = _unpickle_object(filename) # make a docket that contains some metadata (sha1, etc) for this docket temp_docket = DocketXML.make_docket_for_pdf(pdfbits, court, casenum, docnum, subdocnum, available=0, free_import=1) docket.merge_docket(temp_docket) doc_success, doc_msg = upload_document(pdfbits, court, casenum, docnum, subdocnum) if doc_success: docket.set_document_available(docnum, subdocnum, "1") else: #TK: I don't think we unlock correctly here return False, doc_msg return True, "All documents uploaded"
def process_case(casenum): # Setup: Grab the lock. got_lock, nonce_or_message = lock(court, casenum) if got_lock: print "got the lock: %s" % (nonce_or_message) nonce = nonce_or_message else: print "could not get lock: %s" % (nonce_or_message) add_to_retry(casenum) return False casedir = "%s/%s" % (dirarg, casenum) # Step 1: Parse the docket.html file. try: docketpath = "%s/docket.html" % casedir docketfile = open(docketpath) docketbits = docketfile.read() docketfile.close() except IOError: reason = "could not open local docket" print "***Skipping %s.%s: %s... " % (court, casenum, reason), print_unlock_message(unlock(court, casenum, False)) del_from_retry(casenum) add_to_failed(casenum, reason) return False else: docket = ParsePacer.parse_histdocqry(docketbits, court, casenum) if not docket: reason = "could not parse local docket" print "***Skipping %s.%s: %s... " % (court, casenum, reason), print_unlock_message(unlock(court, casenum, False)) del_from_retry(casenum) add_to_failed(casenum, reason) return False # Step 1a: Try to fetch the the existing IA docket. ia_docket = None ia_docket_orig_string = "" ia_casemeta_orig_hash = "" ia_docketstring, fetcherror = IADirect.get_docket_string(court, casenum) if ia_docketstring: # Got the existing docket-- parse it. ia_docket, parseerror = DocketXML.parse_xml_string(ia_docketstring) if not ia_docket: reason = "could not parse IA docket: %s" % (parseerror) print "***Skipping %s.%s: %s... " % (court, casenum, reason), print_unlock_message(unlock(court, casenum, False)) del_from_retry(casenum) add_to_failed(casenum, reason) return False else: # Save the original docket hashes ia_docket_orig_string = ia_docketstring ia_casemeta_orig_hash = hash(pickle.dumps(ia_docket.casemeta)) elif fetcherror is IADirect.FETCH_NO_FILE: # Bucket exists but no docket-- ok. pass elif fetcherror is IADirect.FETCH_NO_BUCKET: # Bucket doesn't exist-- either make_bucket failed or not yet ready. if casenum not in bucket_made: # If make_bucket failed, try make_bucket again. print " make bucket...", make_bucket(casenum) elif fetcherror is IADirect.FETCH_TIMEOUT: # Couldn't contact IA, skip. print "***Skipping %s.%s: IA is down... " % (court, casenum), print_unlock_message(unlock(court, casenum, False)) add_to_retry(casenum) return False elif not ia_docketstring: # Unknown fetch error, skip. print "***Skipping %s.%s: unknown docket fetch error: %s..." % \ (court, casenum, fetcherror), print_unlock_message(unlock(court, casenum, False)) add_to_retry(casenum) return False # Step 1b: If necessary, merge the two dockets. if ia_docket: ia_docket.merge_docket(docket) else: ia_docket = docket casedir_ls = os.listdir(casedir) index_ls = [] pdf_ls = [] for casedocname in casedir_ls: if casedocname.endswith("index.html"): index_ls.append(casedocname) elif casedocname.endswith(".pdf"): pdf_ls.append(casedocname) # Step 2: Parse each index file for indexname in index_ls: try: indexpath = "%s/%s" % (casedir, indexname) indexfile = open(indexpath) indexbits = indexfile.read() indexfile.close() except IOError: print "***Could not open file '%s'" % indexpath continue docnum = indexname.strip("-index.html") index_docket = ParsePacer.parse_doc1(indexbits, court, casenum, docnum) # Merge this docket into the IA docket ia_docket.merge_docket(index_docket) # Set initial flag for retrying this case. need_to_retry = 0 # Step 3: Wait for the bucket to be ready bucketready = False for checkcount in xrange(20): bucketready, code = IADirect.check_bucket_ready(court, casenum) if bucketready: break else: # Wait 5 seconds and try again. time.sleep(5) if not bucketready: print "***Skipping %s.%s: bucket is not ready... " \ % (court, casenum), print_unlock_message(unlock(court, casenum, False)) add_to_retry(casenum) return False # Step 4: Upload each pdf file. doccount = 0 for pdfname in pdf_ls: doccount += 1 print " uploading document %d/%d..." % (doccount, len(pdf_ls)), try: pdfpath = "%s/%s" % (casedir, pdfname) pdffile = open(pdfpath) pdfbits = pdffile.read() pdffile.close() except IOError: print "***Could not open file '%s'" % pdfpath continue pdfname = pdfname.strip(".pdf") split = pdfname.split("-") try: docnum = unicode(int(split[0])) except ValueError: # Not an integer. print "***Docnum not an integer '%s'" % pdfpath continue try: # converting v3->v4 subdocnums subdocnum = unicode(int(split[1]) - 1) except IndexError: subdocnum = "0" doc_docket = DocketXML.make_docket_for_pdf(pdfbits, court, casenum, docnum, subdocnum) doc_meta = doc_docket.get_document_metadict(docnum, subdocnum) # Only upload the PDF if the hash doesn't match the one in IA. ia_pdfhash = ia_docket.get_document_sha1(docnum, subdocnum) pdfhash = doc_docket.get_document_sha1(docnum, subdocnum) if ia_pdfhash != pdfhash: pdfstatus, pdferror = \ IADirect.put_pdf(pdfbits, court, casenum, docnum, subdocnum, doc_meta) if not pdfstatus: # PUT failed, mark document as unavailable doc_docket.set_document_available(docnum, subdocnum, "0") print " fail: %s" % pdferror need_to_retry = True continue else: print "done." # Add this document's metadata into the ia_docket ia_docket.merge_docket(doc_docket) else: print "same." # Step 5: Push the docket to IA, if things have changed. print " docket upload...", docket_modified = 0 ignore_nonce = 0 ia_docket_merged_string = ia_docket.to_xml() if ia_docket_orig_string != ia_docket_merged_string: # Assign the docket the new nonce from the lock ia_docket.nonce = nonce ia_casemeta_merged_hash = hash(pickle.dumps(ia_docket.casemeta)) casemeta_diff = ia_casemeta_orig_hash != ia_casemeta_merged_hash putstatus, puterror = \ IADirect.put_docket(ia_docket, court, casenum, casemeta_diff=casemeta_diff) if putstatus: docket_modified = 1 print "done." else: need_to_retry = 1 print "fail: %s" % puterror else: ignore_nonce = 1 print "same." if ignore_nonce: print_unlock_message(unlock(court, casenum, ignore_nonce=1)) else: print_unlock_message(unlock(court, casenum, modified=docket_modified)) if need_to_retry: add_to_retry(casenum) return False else: return True
def handle_pdf(filebits, court, url): """ Write PDF file metadata into the database. """ # Parse coerced docid out of url try: docid = docid_from_url_name(url) except ValueError: logging.warning("handle_pdf: no url available to get docid") return "upload: pdf failed. no url supplied." # Lookup based on docid b/c it's the only metadata we have # Document exists if we've previously parsed the case's docket query = Document.objects.filter(docid=docid) try: doc = query[0] except IndexError: logging.info("handle_pdf: haven't yet seen docket %s" % (docid)) return "upload: pdf ignored." else: # Sanity check if doc.court != court: logging.error("handle_pdf: court mismatch (%s, %s) %s" % (court, doc.court, url)) return "upload: pdf metadata mismatch." casenum = doc.casenum docnum = doc.docnum subdocnum = doc.subdocnum sha1 = doc.sha1 # Docket with updated sha1, available, and upload_date docket = DocketXML.make_docket_for_pdf(filebits, court, casenum, docnum, subdocnum, available=0) DocumentManager.update_local_db(docket) if docket.get_document_sha1(docnum ,subdocnum) != sha1: # Upload the file -- either doesn't exist on IA or has different sha1 # Gather all the additional metadata we have # - from the docket we just made doc_meta = docket.get_document_metadict(docnum, subdocnum) # - from the database, if available if doc.docid: doc_meta["pacer_doc_id"] = doc.docid if doc.de_seq_num: doc_meta["pacer_de_seq_num"] = doc.de_seq_num if doc.dm_id: doc_meta["pacer_dm_id"] = doc.dm_id # Push the file to IA IA.put_file(filebits, court, casenum, docnum, subdocnum, doc_meta) # Whether we uploaded the file, push the docket update to IA. do_me_up(docket) logging.info("handle_pdf: uploaded %s.%s.%s.%s.pdf" % (court, casenum, docnum, subdocnum)) message = "pdf uploaded." response = {} response["message"] = message jsonout = simplejson.dumps(response) return jsonout
def _cron_process_docketXML(docket, ppentry): """ Required to have the lock. """ court = docket.casemeta["court"] casenum = docket.casemeta["pacer_case_num"] # Force '0' in the XML on docs that failed to upload. _update_docs_availability(docket) # The docket filename docketname = IACommon.get_docketxml_name(court, casenum) # Step 1: Try to fetch the existing docket from IA docketstring, fetcherror = IADirect.get_docket_string(court, casenum) if docketstring: # Got the existing docket-- put merged docket file. ia_docket, parse_msg = DocketXML.parse_xml_string(docketstring) if ia_docket: put_result, put_msg = _cron_me_up(ia_docket, docket, ppentry) print " %s %s" % (docketname, put_msg) else: print " %s docket parsing error: %s" % (docketname, parse_msg) elif fetcherror is IADirect.FETCH_NO_FILE: # Bucket exists but no docket-- put a new docket file. put_result, put_msg = put_docket(docket, court, casenum, ppentry) print " %s put into existing bucket: %s" % (docketname, put_msg) elif fetcherror is IADirect.FETCH_NO_BUCKET: # Bucket doesn't exist-- make the bucket and put a new docket file. put_result, put_msg = put_docket(docket, court, casenum, ppentry, newbucket=1) print " %s put into new bucket: %s" % (docketname, put_msg) elif fetcherror is IADirect.FETCH_URLERROR: # Couldn't get the IA docket # Unset the processing flag for later # ppentry.processing = 0 # ppentry.save() # Leave the pickle file for later # Drop Lock Here? print " %s timed out. wait for next cron." % (docketname) else: # Unknown fetch error. # Unset the processing flag for later # ppentry.processing = 0 # ppentry.save() # Drop Lock Here? # Leave the pickle file for later print " %s unknown fetch error. wait for next cron." % (docketname)
def handle_pdf(filebits, court, url, team_name): """ Write PDF file metadata into the database. """ # Parse coerced docid out of url try: docid = docid_from_url_name(url) except ValueError: logging.warning("handle_pdf: no url available to get docid") return "upload: pdf failed. no url supplied." # Lookup based on docid b/c it's the only metadata we have # Document exists if we've previously parsed the case's docket query = Document.objects.filter(docid=docid) try: doc = query[0] except IndexError: logging.info("handle_pdf: haven't yet seen docket %s" % docid) return "upload: pdf ignored because we don't have docket %s" % docid else: # Sanity check if doc.court != court: logging.error("handle_pdf: court mismatch (%s, %s) %s" % (court, doc.court, url)) return "upload: pdf metadata mismatch." casenum = doc.casenum docnum = doc.docnum subdocnum = doc.subdocnum sha1 = doc.sha1 # Docket with updated sha1, available, and upload_date docket = DocketXML.make_docket_for_pdf(filebits, court, casenum, docnum, subdocnum, available=0) DocumentManager.update_local_db(docket, team_name=team_name) if docket.get_document_sha1(docnum, subdocnum) != sha1: # Upload the file -- either doesn't exist on IA or has different sha1 # Gather all the additional metadata we have # - from the docket we just made doc_meta = docket.get_document_metadict(docnum, subdocnum) # - from the database, if available if doc.docid: doc_meta["pacer_doc_id"] = doc.docid if doc.de_seq_num: doc_meta["pacer_de_seq_num"] = doc.de_seq_num if doc.dm_id: doc_meta["pacer_dm_id"] = doc.dm_id # Push the file to IA IA.put_file(filebits, court, casenum, docnum, subdocnum, doc_meta) # Whether we uploaded the file, push the docket update to IA. do_me_up(docket) logging.info("handle_pdf: uploaded %s.%s.%s.%s.pdf" % (court, casenum, docnum, subdocnum)) message = "pdf uploaded." response = {"message": message} return simplejson.dumps(response)