def _cron_fetch_update(lock): court = unicode(lock.court) casenum = unicode(lock.casenum) nonce = unicode(lock.nonce) docketstring, fetcherror = IADirect.get_docket_string(court, casenum) if not docketstring: # Couldn't get the docket. Try again later. if nonce: BucketLockManager.try_lock_later(lock) else: lock.delete() print " %s.%s couldn't fetch the docket: %d" % (court, casenum, fetcherror) return ia_docket, message = DocketXML.parse_xml_string(docketstring) if not ia_docket: # Docket parsing error. if nonce: BucketLockManager.try_lock_later(lock) else: lock.delete() print " %s.%s docket parsing error: %s" % (court, casenum, message) return elif ia_docket.nonce == nonce or not nonce: # Got the docket and it is either: # 1. up-to-date (nonce match), or # 2. expired (ignore nonce) # In both scenarios, update the local DB. DocumentManager.update_local_db(ia_docket, ignore_available=0) print " %s.%s fetched and DB updated." % (court, casenum) ia_docket_orig_hash = hash(pickle.dumps(ia_docket)) local_docket = DocumentManager.create_docket_from_local_documents( court, casenum) if local_docket: ia_docket.merge_docket(local_docket) ia_docket_after_local_merge_hash = hash(pickle.dumps(ia_docket)) if ia_docket_orig_hash != ia_docket_after_local_merge_hash: print " After fetch, some locally stored information was " \ "missing from %s.%s. Local info addition scheduled." % ( court, casenum) UploadHandler.do_me_up(ia_docket) # Remove the lock. lock.delete() else: # Got the docket but it is not update to date. Try again later. BucketLockManager.try_lock_later(lock) print " %s.%s fetched, wait more." % (court, casenum)
def _cron_process_docketXML(docket, ppentry): ''' Required to have the lock. ''' court = docket.casemeta["court"] casenum = docket.casemeta["pacer_case_num"] # Force '0' in the XML on docs that failed to upload. _update_docs_availability(docket) # The docket filename docketname = IACommon.get_docketxml_name(court, casenum) # Step 1: Try to fetch the existing docket from IA docketstring, fetcherror = IADirect.get_docket_string(court, casenum) if docketstring: # Got the existing docket-- put merged docket file. ia_docket, parse_msg = DocketXML.parse_xml_string(docketstring) if ia_docket: put_result, put_msg = _cron_me_up(ia_docket, docket, ppentry) print " %s %s" % (docketname, put_msg) else: print " %s docket parsing error: %s" % (docketname, parse_msg) elif fetcherror is IADirect.FETCH_NO_FILE: # Bucket exists but no docket-- put a new docket file. put_result, put_msg = put_docket(docket, court, casenum, ppentry) print " %s put into existing bucket: %s" % (docketname, put_msg) elif fetcherror is IADirect.FETCH_NO_BUCKET: # Bucket doesn't exist-- make the bucket and put a new docket file. put_result, put_msg = put_docket(docket, court, casenum, ppentry, newbucket=1) print " %s put into new bucket: %s" % (docketname, put_msg) elif fetcherror is IADirect.FETCH_URLERROR: # Couldn't get the IA docket # Unset the processing flag for later # ppentry.processing = 0 # ppentry.save() # Leave the pickle file for later # Drop Lock Here? print " %s timed out. wait for next cron." % (docketname) else: # Unknown fetch error. # Unset the processing flag for later # ppentry.processing = 0 # ppentry.save() # Drop Lock Here? # Leave the pickle file for later print " %s unknown fetch error. wait for next cron." % (docketname)
def delete_documents_from_docket(court, casenum, documents): # Step 1: Get docket and convert into DocketXML docketstring, fetcherror = IADirect.get_docket_string(court, casenum) if not docketstring: print "Could not find docket on IA, exiting...." exit() ia_docket, message = DocketXML.parse_xml_string(docketstring) if not ia_docket: print "Docket parsing error: %s.%s, exiting...." % (court, casenum) exit() # Step 2: Remove documents from DocketXML object for document in documents: ia_docket.remove_document(document.docnum, document.subdocnum) # Step 3: upload modified xml docketbits = ia_docket.to_xml() request = IACommon.make_docketxml_request(docketbits, court, casenum, ia_docket.casemeta) success_status = False try: response = urllib2.urlopen(request) except urllib2.HTTPError, e: if e.code == 201 or e.code == 200: # 201 Created: Success! print "Updated %s %s docket.xml" % (court, casenum) success_status = True
def _cron_fetch_update(lock): court = unicode(lock.court) casenum = unicode(lock.casenum) nonce = unicode(lock.nonce) docketstring, fetcherror = IADirect.get_docket_string(court, casenum) if not docketstring: # Couldn't get the docket. Try again later. if nonce: BucketLockManager.try_lock_later(lock) else: lock.delete() print " %s.%s couldn't fetch the docket: %d" % (court, casenum, fetcherror) return ia_docket, message = DocketXML.parse_xml_string(docketstring) if not ia_docket: # Docket parsing error. if nonce: BucketLockManager.try_lock_later(lock) else: lock.delete() print " %s.%s docket parsing error: %s" % (court, casenum, message) return elif ia_docket.nonce == nonce or not nonce: # Got the docket and it is either: # 1. up-to-date (nonce match), or # 2. expired (ignore nonce) # In both scenarios, update the local DB. DocumentManager.update_local_db(ia_docket, ignore_available=0) print " %s.%s fetched and DB updated." % (court, casenum) ia_docket_orig_hash = hash(pickle.dumps(ia_docket)) local_docket = DocumentManager.create_docket_from_local_documents(court, casenum) if local_docket: ia_docket.merge_docket(local_docket) ia_docket_after_local_merge_hash = hash(pickle.dumps(ia_docket)) if ia_docket_orig_hash != ia_docket_after_local_merge_hash: print " After fetch, some locally stored information was missing from %s.%s. Local info addition scheduled." % (court, casenum) UploadHandler.do_me_up(ia_docket) # Remove the lock. lock.delete() else: # Got the docket but it is not update to date. Try again later. BucketLockManager.try_lock_later(lock) print " %s.%s fetched, wait more." % (court, casenum)
def _get_docket_from_IA(docket): docketstring, fetcherror = IADirect.get_docket_string(docket.get_court(), docket.get_casenum()) if docketstring: # Got the existing docket-- put merged docket file. ia_docket, parse_msg = DocketXML.parse_xml_string(docketstring) if ia_docket: return ia_docket, fetcherror else: print " %s docket parsing error: %s" % (docketname, parse_msg) return None, parse_msg return None, fetcherror
def process_case(casenum): # Setup: Grab the lock. got_lock, nonce_or_message = lock(court, casenum) if got_lock: print "got the lock: %s" % (nonce_or_message) nonce = nonce_or_message else: print "could not get lock: %s" % (nonce_or_message) add_to_retry(casenum) return False casedir = "%s/%s" % (dirarg, casenum) # Step 1: Parse the docket.html file. try: docketpath = "%s/docket.html" % casedir docketfile = open(docketpath) docketbits = docketfile.read() docketfile.close() except IOError: reason = "could not open local docket" print "***Skipping %s.%s: %s... " % (court, casenum, reason), print_unlock_message(unlock(court, casenum, False)) del_from_retry(casenum) add_to_failed(casenum, reason) return False else: docket = ParsePacer.parse_histdocqry(docketbits, court, casenum) if not docket: reason = "could not parse local docket" print "***Skipping %s.%s: %s... " % (court, casenum, reason), print_unlock_message(unlock(court, casenum, False)) del_from_retry(casenum) add_to_failed(casenum, reason) return False # Step 1a: Try to fetch the the existing IA docket. ia_docket = None ia_docket_orig_string = "" ia_casemeta_orig_hash = "" ia_docketstring, fetcherror = IADirect.get_docket_string(court, casenum) if ia_docketstring: # Got the existing docket-- parse it. ia_docket, parseerror = DocketXML.parse_xml_string(ia_docketstring) if not ia_docket: reason = "could not parse IA docket: %s" % (parseerror) print "***Skipping %s.%s: %s... " % (court, casenum, reason), print_unlock_message(unlock(court, casenum, False)) del_from_retry(casenum) add_to_failed(casenum, reason) return False else: # Save the original docket hashes ia_docket_orig_string = ia_docketstring ia_casemeta_orig_hash = hash(pickle.dumps(ia_docket.casemeta)) elif fetcherror is IADirect.FETCH_NO_FILE: # Bucket exists but no docket-- ok. pass elif fetcherror is IADirect.FETCH_NO_BUCKET: # Bucket doesn't exist-- either make_bucket failed or not yet ready. if casenum not in bucket_made: # If make_bucket failed, try make_bucket again. print " make bucket...", make_bucket(casenum) elif fetcherror is IADirect.FETCH_TIMEOUT: # Couldn't contact IA, skip. print "***Skipping %s.%s: IA is down... " % (court, casenum), print_unlock_message(unlock(court, casenum, False)) add_to_retry(casenum) return False elif not ia_docketstring: # Unknown fetch error, skip. print "***Skipping %s.%s: unknown docket fetch error: %s..." % \ (court, casenum, fetcherror), print_unlock_message(unlock(court, casenum, False)) add_to_retry(casenum) return False # Step 1b: If necessary, merge the two dockets. if ia_docket: ia_docket.merge_docket(docket) else: ia_docket = docket casedir_ls = os.listdir(casedir) index_ls = [] pdf_ls = [] for casedocname in casedir_ls: if casedocname.endswith("index.html"): index_ls.append(casedocname) elif casedocname.endswith(".pdf"): pdf_ls.append(casedocname) # Step 2: Parse each index file for indexname in index_ls: try: indexpath = "%s/%s" % (casedir, indexname) indexfile = open(indexpath) indexbits = indexfile.read() indexfile.close() except IOError: print "***Could not open file '%s'" % indexpath continue docnum = indexname.strip("-index.html") index_docket = ParsePacer.parse_doc1(indexbits, court, casenum, docnum) # Merge this docket into the IA docket ia_docket.merge_docket(index_docket) # Set initial flag for retrying this case. need_to_retry = 0 # Step 3: Wait for the bucket to be ready bucketready = False for checkcount in xrange(20): bucketready, code = IADirect.check_bucket_ready(court, casenum) if bucketready: break else: # Wait 5 seconds and try again. time.sleep(5) if not bucketready: print "***Skipping %s.%s: bucket is not ready... " \ % (court, casenum), print_unlock_message(unlock(court, casenum, False)) add_to_retry(casenum) return False # Step 4: Upload each pdf file. doccount = 0 for pdfname in pdf_ls: doccount += 1 print " uploading document %d/%d..." % (doccount, len(pdf_ls)), try: pdfpath = "%s/%s" % (casedir, pdfname) pdffile = open(pdfpath) pdfbits = pdffile.read() pdffile.close() except IOError: print "***Could not open file '%s'" % pdfpath continue pdfname = pdfname.strip(".pdf") split = pdfname.split("-") try: docnum = unicode(int(split[0])) except ValueError: # Not an integer. print "***Docnum not an integer '%s'" % pdfpath continue try: # converting v3->v4 subdocnums subdocnum = unicode(int(split[1]) - 1) except IndexError: subdocnum = "0" doc_docket = DocketXML.make_docket_for_pdf(pdfbits, court, casenum, docnum, subdocnum) doc_meta = doc_docket.get_document_metadict(docnum, subdocnum) # Only upload the PDF if the hash doesn't match the one in IA. ia_pdfhash = ia_docket.get_document_sha1(docnum, subdocnum) pdfhash = doc_docket.get_document_sha1(docnum, subdocnum) if ia_pdfhash != pdfhash: pdfstatus, pdferror = \ IADirect.put_pdf(pdfbits, court, casenum, docnum, subdocnum, doc_meta) if not pdfstatus: # PUT failed, mark document as unavailable doc_docket.set_document_available(docnum, subdocnum, "0") print " fail: %s" % pdferror need_to_retry = True continue else: print "done." # Add this document's metadata into the ia_docket ia_docket.merge_docket(doc_docket) else: print "same." # Step 5: Push the docket to IA, if things have changed. print " docket upload...", docket_modified = 0 ignore_nonce = 0 ia_docket_merged_string = ia_docket.to_xml() if ia_docket_orig_string != ia_docket_merged_string: # Assign the docket the new nonce from the lock ia_docket.nonce = nonce ia_casemeta_merged_hash = hash(pickle.dumps(ia_docket.casemeta)) casemeta_diff = ia_casemeta_orig_hash != ia_casemeta_merged_hash putstatus, puterror = \ IADirect.put_docket(ia_docket, court, casenum, casemeta_diff=casemeta_diff) if putstatus: docket_modified = 1 print "done." else: need_to_retry = 1 print "fail: %s" % puterror else: ignore_nonce = 1 print "same." if ignore_nonce: print_unlock_message(unlock(court, casenum, ignore_nonce=1)) else: print_unlock_message(unlock(court, casenum, modified=docket_modified)) if need_to_retry: add_to_retry(casenum) return False else: return True
def _cron_process_docketXML(docket, ppentry): """ Required to have the lock. """ court = docket.casemeta["court"] casenum = docket.casemeta["pacer_case_num"] # Force '0' in the XML on docs that failed to upload. _update_docs_availability(docket) # The docket filename docketname = IACommon.get_docketxml_name(court, casenum) # Step 1: Try to fetch the existing docket from IA docketstring, fetcherror = IADirect.get_docket_string(court, casenum) if docketstring: # Got the existing docket-- put merged docket file. ia_docket, parse_msg = DocketXML.parse_xml_string(docketstring) if ia_docket: put_result, put_msg = _cron_me_up(ia_docket, docket, ppentry) print " %s %s" % (docketname, put_msg) else: print " %s docket parsing error: %s" % (docketname, parse_msg) elif fetcherror is IADirect.FETCH_NO_FILE: # Bucket exists but no docket-- put a new docket file. put_result, put_msg = put_docket(docket, court, casenum, ppentry) print " %s put into existing bucket: %s" % (docketname, put_msg) elif fetcherror is IADirect.FETCH_NO_BUCKET: # Bucket doesn't exist-- make the bucket and put a new docket file. put_result, put_msg = put_docket(docket, court, casenum, ppentry, newbucket=1) print " %s put into new bucket: %s" % (docketname, put_msg) elif fetcherror is IADirect.FETCH_URLERROR: # Couldn't get the IA docket # Unset the processing flag for later # ppentry.processing = 0 # ppentry.save() # Leave the pickle file for later # Drop Lock Here? print " %s timed out. wait for next cron." % (docketname) else: # Unknown fetch error. # Unset the processing flag for later # ppentry.processing = 0 # ppentry.save() # Drop Lock Here? # Leave the pickle file for later print " %s unknown fetch error. wait for next cron." % (docketname)
def _upload_document(path, court, document): filename = _get_docket_pickle_filename(court, document['casenum'], document['doc_num'], document['attachment_num']) docket, msg = IA.unpickle_object(filename, os.path.join(path, 'docket_pickles')) if not docket: return False, 'Could not unpickle: %s' % msg casenum = docket.get_casenum() got_lock, nonce_or_message = UM.lock(court, casenum) # We need to: grab a lock if got_lock: print "got the lock: %s" % (nonce_or_message) nonce = nonce_or_message else: return False, "could not get lock: %s" % (nonce_or_message) # Get the existing ia docket, if it exists ia_docket = None ia_docket_orig_string = "" ia_casemeta_orig_hash = "" ia_docketstring, fetcherror = IADirect.get_docket_string(court, casenum) if ia_docketstring: # Got the existing docket-- parse it. ia_docket, parseerror = DocketXML.parse_xml_string(ia_docketstring) if not ia_docket: reason = "could not parse IA docket: %s" % (parseerror) UM.print_unlock_message(UM.unlock(court, casenum, False)) return False, "***Skipping %s.%s: %s... " % (court, casenum, reason), else: # Save the original docket hashes ia_docket_orig_string = ia_docketstring ia_casemeta_orig_hash = hash(pickle.dumps(ia_docket.casemeta)) print "There is a docket for %s, %s! " % (court, casenum) elif fetcherror is IADirect.FETCH_NO_FILE: # Bucket exists but no docket-- ok. pass elif fetcherror is IADirect.FETCH_NO_BUCKET: # Bucket doesn't exist-- either make_bucket failed or not yet ready. # That's okay, we'll make the bucket with the first upload #if casenum not in bucket_made: # If make_bucket failed, try make_bucket again. # print " make bucket...", # make_bucket(casenum) elif fetcherror is IADirect.FETCH_TIMEOUT: # Couldn't contact IA, skip. UM.print_unlock_message(UM.unlock(court, casenum, False)) #TK: Handle retry logic here? return False, "***Skipping %s.%s: IA is down... " % (court, casenum), elif not ia_docketstring: # Unknown fetch error, skip. UM.print_unlock_message(UM.unlock(court, casenum, False)) return False, "***Skipping %s.%s: unknown docket fetch error: %s..." % \ (court, casenum, fetcherror), # Step 1b: If necessary, merge the two dockets. if ia_docket: ia_docket.merge_docket(docket) else: ia_docket = docket # Upload the pdf #TK: add some better status updates here, maybe uploading doc 123 of 1234 print " uploading document %s.%s.%s..." % (court, casenum, document['doc_num']), try: doc_filename = os.path.join(path, document['docid'], ".pdf" pdfbits = open(doc_filename)).read() except IOError: UM.print_unlock_message(UM.unlock(court, casenum, False)) return False, " ***Could not open file %s " % doc_filename #TK: probably need to make the bucket before doing this doc_docket = DocketXML.make_docket_for_pdf(pdfbits, court, casenum, docnum, subdocnum) doc_meta = doc_docket.get_document_metadict(docnum, subdocnum) # Only upload the PDF if the hash doesn't match the one in IA. ia_pdfhash = ia_docket.get_document_sha1(docnum, subdocnum) pdfhash = doc_docket.get_document_sha1(docnum, subdocnum) if ia_pdfhash != pdfhash: pdfstatus, pdferror = \ IADirect.put_pdf(pdfbits, court, casenum, docnum, subdocnum, doc_meta) if not pdfstatus: # PUT failed, mark document as unavailable doc_docket.set_document_available(docnum, subdocnum, "0") # TK: handle retry here UM.print_unlock_message(UM.unlock(court, casenum, False)) return False, " fail: %s" % pdferror else: print "done." # Add this document's metadata into the ia_docket ia_docket.merge_docket(doc_docket) # Step 5: Push the docket to IA, if things have changed. print " docket upload...", docket_modified = 0 ignore_nonce = 0 ia_docket_merged_string = ia_docket.to_xml() if ia_docket_orig_string != ia_docket_merged_string: # Assign the docket the new nonce from the lock ia_docket.nonce = nonce ia_casemeta_merged_hash = hash(pickle.dumps(ia_docket.casemeta)) casemeta_diff = ia_casemeta_orig_hash != ia_casemeta_merged_hash putstatus, puterror = \ IADirect.put_docket(ia_docket, court, casenum, casemeta_diff=casemeta_diff) UM.print_unlock_message(UM.unlock(court, casenum, modified = False)) return True, "Document uploaded"