def _cron_process_PDF(obj, ppentry): filename = ppentry.filename meta = IACommon.get_meta_from_filename(filename) court = meta["court"] casenum = meta["casenum"] docnum = meta["docnum"] subdocnum = meta["subdocnum"] invalid_PDF = _is_invalid_pdf(obj, filename) # We only want to check for ssns on valid PDFs # PyPdf doesn't deal well with bad input if not invalid_PDF: # SSN privacy check has_ssn = _has_ssn(obj, filename) else: has_ssn = False # Blacklist file check in_blacklist = _in_blacklist(filename) if invalid_PDF or has_ssn or in_blacklist: docket = DocketXML.make_docket_for_pdf("", court, casenum, docnum, subdocnum, available=0) UploadHandler.do_me_up(docket) # Delete the entry from the DB ppentry.delete() # Quarantine the pickle file for analysis _quarantine_pickle(filename, ssn=has_ssn, blacklist_file=in_blacklist, invalid_PDF=invalid_PDF) return put_result, put_msg = _dispatch_put(obj, ppentry) if put_result: # Put success-- mark this document as available in the DB DocumentManager.mark_as_available(filename) docket = DocketXML.make_docket_for_pdf("", court, casenum, docnum, subdocnum, available=1) UploadHandler.do_me_up(docket) print " %s %s" % (filename, put_msg)
def _cron_process_PDF(obj, ppentry): filename = ppentry.filename meta = IACommon.get_meta_from_filename(filename) court = meta["court"] casenum = meta["casenum"] docnum = meta["docnum"] subdocnum = meta["subdocnum"] invalid_PDF = _is_invalid_pdf(obj, filename) # We only want to check for ssns on valid PDFs # PyPdf doesn't deal well with bad input if not invalid_PDF: # SSN privacy check has_ssn = _has_ssn(obj, filename) else: has_ssn = False # Blacklist file check in_blacklist = _in_blacklist(filename) if invalid_PDF or has_ssn or in_blacklist: docket = DocketXML.make_docket_for_pdf("", court, casenum, docnum, subdocnum, available=0) UploadHandler.do_me_up(docket) # Delete the entry from the DB ppentry.delete() # Quarantine the pickle file for analysis _quarantine_pickle(filename, ssn=has_ssn, blacklist_file= in_blacklist, invalid_PDF= invalid_PDF) return put_result, put_msg = _dispatch_put(obj, ppentry) if put_result: # Put success-- mark this document as available in the DB DocumentManager.mark_as_available(filename) docket = DocketXML.make_docket_for_pdf("", court, casenum, docnum, subdocnum, available=1) UploadHandler.do_me_up(docket) print " %s %s" % (filename, put_msg)
def mark_as_available(filename): docmeta = IACommon.get_meta_from_filename(filename) docquery = Document.objects.filter( court=docmeta["court"], casenum=docmeta["casenum"], docnum=docmeta["docnum"], subdocnum=docmeta["subdocnum"] ) try: docentry = docquery[0] except IndexError: # Unexpected case. No Document entry logging.error("mark_as_available: no entry for %s." % (filename)) else: docentry.available = 1 try: docentry.save() except IntegrityError: logging.error("mark_as_available: could not save %s." % (filename))
def mark_as_available(filename): docmeta = IACommon.get_meta_from_filename(filename) docquery = Document.objects.filter(court=docmeta["court"], casenum=docmeta["casenum"], docnum=docmeta["docnum"], subdocnum=docmeta["subdocnum"]) try: docentry = docquery[0] except IndexError: # Unexpected case. No Document entry logging.error("mark_as_available: no entry for %s." % (filename)) else: docentry.available = 1 try: docentry.save() except IntegrityError: logging.error("mark_as_available: could not save %s." % (filename))
def _cron_put_pickles(): # Get uploader credentials. uploader_query = Uploader.objects.filter(key=AUTH_KEY) try: RECAP_UPLOADER_ID = uploader_query[0].id except IndexError: print " could not find uploader with key=%s" % AUTH_KEY return # Get all ready pickles query = PickledPut.objects.filter(ready=1, processing=0) \ .order_by('-filename') # Set all ready pickles to the processing state #for ppentry in query: # ppentry.processing = 1 # ppentry.save() # Keep track of court, casenum. Only lock and unlock once for each case. curr_court = None curr_casenum = None lock_nonce = None # Process pickles one at a time. for ppentry in query: filename = ppentry.filename ppmeta = IACommon.get_meta_from_filename(filename) court = ppmeta["court"] casenum = ppmeta["casenum"] # Make sure we have the lock for this case. if curr_court == court and curr_casenum == casenum: # Same case as the previous ppentry. if not lock_nonce: # Skip if we don't have the lock already. # ppentry.processing = 0 # ppentry.save() continue # Otherwise, we already have the lock, so continue. else: # Switching to a new case. # Drop the current lock (from previous case), if necessary. if curr_court and curr_casenum: dropped, errmsg = BucketLockManager.drop_lock(curr_court, curr_casenum, RECAP_UPLOADER_ID, nolocaldb=1) if not dropped: print " %s.%s someone stole my lock?" % \ (court, unicode(casenum)) # Grab new lock curr_court = court curr_casenum = casenum lock_nonce, errmsg = BucketLockManager.get_lock(court, casenum, RECAP_UPLOADER_ID, one_per_uploader=1) if not lock_nonce: print " Passing on %s.%s: %s" % (court, casenum, errmsg) # We don't have a lock, so don't drop the lock in the next loop curr_court = None curr_casenum = None continue # We'll always have the lock here. # Unpickle the object obj, unpickle_msg = unpickle_object(filename) # Two cases for the unpickled object: Request or DocketXML if obj and ppentry.docket: _cron_process_docketXML(obj, ppentry) elif obj: # Dispatch the PUT request _cron_process_PDF(obj, ppentry) else: # Unpickling failed # If unpickling fails, it could mean that another cron job # has already finished this PP - not sure how to distinguish this print " %s %s (Another cron job completed?)" % (filename, unpickle_msg) # Delete the entry from the DB ppentry.delete() # Delete the pickle file delete_pickle(filename) # Drop last lock if curr_court and curr_casenum: dropped, errmsg = BucketLockManager.drop_lock(curr_court, curr_casenum, RECAP_UPLOADER_ID, nolocaldb=1) if not dropped: print " %s.%s someone stole my lock??" % (court, unicode(casenum))
def _cron_put_pickles(): # Get uploader credentials. uploader_query = Uploader.objects.filter(key=AUTH_KEY) try: RECAP_UPLOADER_ID = uploader_query[0].id except IndexError: print " could not find uploader with key=%s" % AUTH_KEY return # Get all ready pickles query = PickledPut.objects.filter(ready=1, processing=0) \ .order_by('-filename') # Set all ready pickles to the processing state # for ppentry in query: # ppentry.processing = 1 # ppentry.save() # Keep track of court, casenum. Only lock and unlock once for each case. curr_court = None curr_casenum = None lock_nonce = None # Process pickles one at a time. for ppentry in query: filename = ppentry.filename ppmeta = IACommon.get_meta_from_filename(filename) court = ppmeta["court"] casenum = ppmeta["casenum"] # Make sure we have the lock for this case. if curr_court == court and curr_casenum == casenum: # Same case as the previous ppentry. if not lock_nonce: # Skip if we don't have the lock already. # ppentry.processing = 0 # ppentry.save() continue # Otherwise, we already have the lock, so continue. else: # Switching to a new case. # Drop the current lock (from previous case), if necessary. if curr_court and curr_casenum: dropped, errmsg = BucketLockManager.drop_lock( curr_court, curr_casenum, RECAP_UPLOADER_ID, nolocaldb=1) if not dropped: print " %s.%s someone stole my lock?" % \ (court, unicode(casenum)) # Grab new lock curr_court = court curr_casenum = casenum lock_nonce, errmsg = BucketLockManager.get_lock(court, casenum, RECAP_UPLOADER_ID, one_per_uploader=1) if not lock_nonce: print " Passing on %s.%s: %s" % (court, casenum, errmsg) if not lock_nonce or lock_nonce == 'bigdoc': # We don't have a lock, so don't drop the lock in the next loop curr_court = None curr_casenum = None continue # We'll always have the lock here. # Unpickle the object obj, unpickle_msg = unpickle_object(filename) # Two cases for the unpickled object: Request or DocketXML if obj and ppentry.docket: print "Processing docket: %s" % filename _cron_process_docketXML(obj, ppentry) elif obj: # Dispatch the PUT request _cron_process_PDF(obj, ppentry) else: # Unpickling failed # If unpickling fails, it could mean that another cron job # has already finished this PP - not sure how to distinguish this print " %s %s (Another cron job completed?)" % (filename, unpickle_msg) # Delete the entry from the DB ppentry.delete() # Delete the pickle file delete_pickle(filename) # Drop last lock if curr_court and curr_casenum: dropped, errmsg = BucketLockManager.drop_lock(curr_court, curr_casenum, RECAP_UPLOADER_ID, nolocaldb=1) if not dropped: print " %s.%s someone stole my lock??" % (court, unicode(casenum))