def _cron_fetch_update(lock): court = unicode(lock.court) casenum = unicode(lock.casenum) nonce = unicode(lock.nonce) docketstring, fetcherror = IADirect.get_docket_string(court, casenum) if not docketstring: # Couldn't get the docket. Try again later. if nonce: BucketLockManager.try_lock_later(lock) else: lock.delete() print " %s.%s couldn't fetch the docket: %d" % (court, casenum, fetcherror) return ia_docket, message = DocketXML.parse_xml_string(docketstring) if not ia_docket: # Docket parsing error. if nonce: BucketLockManager.try_lock_later(lock) else: lock.delete() print " %s.%s docket parsing error: %s" % (court, casenum, message) return elif ia_docket.nonce == nonce or not nonce: # Got the docket and it is either: # 1. up-to-date (nonce match), or # 2. expired (ignore nonce) # In both scenarios, update the local DB. DocumentManager.update_local_db(ia_docket, ignore_available=0) print " %s.%s fetched and DB updated." % (court, casenum) ia_docket_orig_hash = hash(pickle.dumps(ia_docket)) local_docket = DocumentManager.create_docket_from_local_documents( court, casenum) if local_docket: ia_docket.merge_docket(local_docket) ia_docket_after_local_merge_hash = hash(pickle.dumps(ia_docket)) if ia_docket_orig_hash != ia_docket_after_local_merge_hash: print " After fetch, some locally stored information was " \ "missing from %s.%s. Local info addition scheduled." % ( court, casenum) UploadHandler.do_me_up(ia_docket) # Remove the lock. lock.delete() else: # Got the docket but it is not update to date. Try again later. BucketLockManager.try_lock_later(lock) print " %s.%s fetched, wait more." % (court, casenum)
def _cron_fetch_update(lock): court = unicode(lock.court) casenum = unicode(lock.casenum) nonce = unicode(lock.nonce) docketstring, fetcherror = IADirect.get_docket_string(court, casenum) if not docketstring: # Couldn't get the docket. Try again later. if nonce: BucketLockManager.try_lock_later(lock) else: lock.delete() print " %s.%s couldn't fetch the docket: %d" % (court, casenum, fetcherror) return ia_docket, message = DocketXML.parse_xml_string(docketstring) if not ia_docket: # Docket parsing error. if nonce: BucketLockManager.try_lock_later(lock) else: lock.delete() print " %s.%s docket parsing error: %s" % (court, casenum, message) return elif ia_docket.nonce == nonce or not nonce: # Got the docket and it is either: # 1. up-to-date (nonce match), or # 2. expired (ignore nonce) # In both scenarios, update the local DB. DocumentManager.update_local_db(ia_docket, ignore_available=0) print " %s.%s fetched and DB updated." % (court, casenum) ia_docket_orig_hash = hash(pickle.dumps(ia_docket)) local_docket = DocumentManager.create_docket_from_local_documents(court, casenum) if local_docket: ia_docket.merge_docket(local_docket) ia_docket_after_local_merge_hash = hash(pickle.dumps(ia_docket)) if ia_docket_orig_hash != ia_docket_after_local_merge_hash: print " After fetch, some locally stored information was missing from %s.%s. Local info addition scheduled." % (court, casenum) UploadHandler.do_me_up(ia_docket) # Remove the lock. lock.delete() else: # Got the docket but it is not update to date. Try again later. BucketLockManager.try_lock_later(lock) print " %s.%s fetched, wait more." % (court, casenum)
def _cron_process_PDF(obj, ppentry): filename = ppentry.filename meta = IACommon.get_meta_from_filename(filename) court = meta["court"] casenum = meta["casenum"] docnum = meta["docnum"] subdocnum = meta["subdocnum"] invalid_PDF = _is_invalid_pdf(obj, filename) # We only want to check for ssns on valid PDFs # PyPdf doesn't deal well with bad input if not invalid_PDF: # SSN privacy check has_ssn = _has_ssn(obj, filename) else: has_ssn = False # Blacklist file check in_blacklist = _in_blacklist(filename) if invalid_PDF or has_ssn or in_blacklist: docket = DocketXML.make_docket_for_pdf("", court, casenum, docnum, subdocnum, available=0) UploadHandler.do_me_up(docket) # Delete the entry from the DB ppentry.delete() # Quarantine the pickle file for analysis _quarantine_pickle(filename, ssn=has_ssn, blacklist_file=in_blacklist, invalid_PDF=invalid_PDF) return put_result, put_msg = _dispatch_put(obj, ppentry) if put_result: # Put success-- mark this document as available in the DB DocumentManager.mark_as_available(filename) docket = DocketXML.make_docket_for_pdf("", court, casenum, docnum, subdocnum, available=1) UploadHandler.do_me_up(docket) print " %s %s" % (filename, put_msg)
def _cron_process_PDF(obj, ppentry): filename = ppentry.filename meta = IACommon.get_meta_from_filename(filename) court = meta["court"] casenum = meta["casenum"] docnum = meta["docnum"] subdocnum = meta["subdocnum"] invalid_PDF = _is_invalid_pdf(obj, filename) # We only want to check for ssns on valid PDFs # PyPdf doesn't deal well with bad input if not invalid_PDF: # SSN privacy check has_ssn = _has_ssn(obj, filename) else: has_ssn = False # Blacklist file check in_blacklist = _in_blacklist(filename) if invalid_PDF or has_ssn or in_blacklist: docket = DocketXML.make_docket_for_pdf("", court, casenum, docnum, subdocnum, available=0) UploadHandler.do_me_up(docket) # Delete the entry from the DB ppentry.delete() # Quarantine the pickle file for analysis _quarantine_pickle(filename, ssn=has_ssn, blacklist_file= in_blacklist, invalid_PDF= invalid_PDF) return put_result, put_msg = _dispatch_put(obj, ppentry) if put_result: # Put success-- mark this document as available in the DB DocumentManager.mark_as_available(filename) docket = DocketXML.make_docket_for_pdf("", court, casenum, docnum, subdocnum, available=1) UploadHandler.do_me_up(docket) print " %s %s" % (filename, put_msg)
def _cron_me_up(ia_docket, docket, ppentry): """ Merge and update docket""" ia_court = ia_docket.casemeta["court"] ia_casenum = ia_docket.casemeta["pacer_case_num"] # Save the original hash to diff with later ia_docket_orig_hash = hash(pickle.dumps(ia_docket)) ia_casemeta_orig_hash = hash(pickle.dumps(ia_docket.casemeta)) # Merge ia_docket with our local database information to fill in blank # fields that may exist in ia local_docket = DocumentManager.create_docket_from_local_documents( ia_court, ia_casenum, docket) if local_docket: ia_docket.merge_docket(local_docket) ia_docket_after_local_merge_hash = hash(pickle.dumps(ia_docket)) if ia_docket_orig_hash != ia_docket_after_local_merge_hash: print " Some locally stored information was missing from %s.%s. Local info added." % ( ia_court, ia_casenum) # Step 2: Merge new docket into the existing IA docket ia_docket.merge_docket(docket) # Step 3: If diff, then upload the merged docket ia_docket_merged_hash = hash(pickle.dumps(ia_docket)) if ia_docket_orig_hash != ia_docket_merged_hash: # Generate a new nonce for the docket ia_docket.nonce = DocketXML.generate_new_nonce() ia_casemeta_merged_hash = hash(pickle.dumps(ia_docket.casemeta)) casemeta_diff = ia_casemeta_orig_hash != ia_casemeta_merged_hash # Put the docket to IA put_result, put_msg = put_docket(ia_docket, ia_court, ia_casenum, ppentry, casemeta_diff=casemeta_diff) return put_result, "merged: %s" % put_msg else: # No difference between IA docket and this docket, no need to upload. filename = ppentry.filename # Delete the entry from the DB ppentry.delete() # Delete the pickle file delete_pickle(filename) # Return False to reflect "no update" return False, "not merged: no diff."
def handle_cadkt(filebits, court, casenum, team_name, is_full=False): docket = ParsePacer.parse_cadkt(filebits, court, casenum, is_full) if not docket: return "upload: could not parse docket." # Merge the docket with IA do_me_up(docket) # Update the local DB DocumentManager.update_local_db(docket, team_name=team_name) response = {"cases": _get_cases_dict(casenum, docket), "documents": _get_documents_dict(court, casenum), "message": "DktRpt successfully parsed."} message = simplejson.dumps(response) return message
def handle_cadkt(filebits, court, casenum, is_full=False): docket = ParsePacer.parse_cadkt(filebits, court, casenum, is_full) if not docket: return "upload: could not parse docket." # Merge the docket with IA do_me_up(docket) # Update the local DB DocumentManager.update_local_db(docket) response = {"cases": _get_cases_dict(casenum, docket), "documents": _get_documents_dict(court, casenum), "message":"DktRpt successfully parsed."} message = simplejson.dumps(response) return message
def _cron_me_up(ia_docket, docket, ppentry): """ Merge and update docket""" ia_court = ia_docket.casemeta["court"] ia_casenum = ia_docket.casemeta["pacer_case_num"] # Save the original hash to diff with later ia_docket_orig_hash = hash(pickle.dumps(ia_docket)) ia_casemeta_orig_hash = hash(pickle.dumps(ia_docket.casemeta)) # Merge ia_docket with our local database information to fill in blank # fields that may exist in ia local_docket = DocumentManager.create_docket_from_local_documents(ia_court, ia_casenum, docket) if local_docket: ia_docket.merge_docket(local_docket) ia_docket_after_local_merge_hash = hash(pickle.dumps(ia_docket)) if ia_docket_orig_hash != ia_docket_after_local_merge_hash: print " Some locally stored information was missing from %s.%s. Local info added." % ( ia_court, ia_casenum) # Step 2: Merge new docket into the existing IA docket ia_docket.merge_docket(docket) # Step 3: If diff, then upload the merged docket ia_docket_merged_hash = hash(pickle.dumps(ia_docket)) if ia_docket_orig_hash != ia_docket_merged_hash: # Generate a new nonce for the docket ia_docket.nonce = DocketXML.generate_new_nonce() ia_casemeta_merged_hash = hash(pickle.dumps(ia_docket.casemeta)) casemeta_diff = ia_casemeta_orig_hash != ia_casemeta_merged_hash # Put the docket to IA put_result, put_msg = put_docket(ia_docket, ia_court, ia_casenum, ppentry, casemeta_diff=casemeta_diff) return put_result, "merged: %s" % put_msg else: # No difference between IA docket and this docket, no need to upload. filename = ppentry.filename # Delete the entry from the DB ppentry.delete() # Delete the pickle file delete_pickle(filename) # Return False to reflect "no update" return False, "not merged: no diff."
def handle_histdocqry(filebits, court, casenum, team_name): docket = ParsePacer.parse_histdocqry(filebits, court, casenum) if not docket: return "upload: could not parse docket." # Merge the docket with IA do_me_up(docket) # Update the local DB DocumentManager.update_local_db(docket, team_name=team_name) response = {"cases": _get_cases_dict(casenum, docket), "documents": _get_documents_dict(court, casenum), "message": "HistDocQry successfully parsed."} message = simplejson.dumps(response) return message
def put_docket(docket, court, casenum, ppentry, newbucket=0, casemeta_diff=1): # Put the docket to IA docketbits = docket.to_xml() request = IACommon.make_docketxml_request(docketbits, court, casenum, docket.casemeta, newbucket) put_result, put_msg = _dispatch_put(request, ppentry) if put_result: html_put_msg = IADirect.cleanup_docket_put(court, casenum, docket, metadiff=casemeta_diff) print " gov.uscourts.%s.%s.docket.html upload: %s" % (court, unicode( casenum), html_put_msg) DocumentManager.update_local_db(docket) return put_result, put_msg
def put_docket(docket, court, casenum, ppentry, newbucket=0, casemeta_diff=1): # Put the docket to IA docketbits = docket.to_xml() request = IACommon.make_docketxml_request(docketbits, court, casenum, docket.casemeta, newbucket) put_result, put_msg = _dispatch_put(request, ppentry) if put_result: html_put_msg = IADirect.cleanup_docket_put(court, casenum, docket, metadiff=casemeta_diff) print " gov.uscourts.%s.%s.docket.html upload: %s" % ( court, unicode(casenum), html_put_msg) DocumentManager.update_local_db(docket) return put_result, put_msg
def handle_doc1(filebits, court, filename, team_name): """ Write HTML (doc1) file metadata into the database. """ logging.debug('handle_doc1 %s %s', court, filename) docid = docid_from_url_name(filename) query = Document.objects.filter(docid=docid) try: main_doc = query[0] except IndexError: logging.info("handle_doc1: unknown docid %s" % (docid)) return "upload: doc1 ignored." else: casenum = main_doc.casenum main_docnum = main_doc.docnum # Sanity check if court != main_doc.court: logging.error("handle_doc1: court mismatch (%s, %s) %s" % (court, main_doc.court, docid)) return "upload: doc1 metadata mismatch." if ParsePacer.is_appellate(court): docket = ParsePacer.parse_ca_doc1(filebits, court, casenum, main_docnum) else: docket = ParsePacer.parse_doc1(filebits, court, casenum, main_docnum) if docket: # Merge the docket with IA do_me_up(docket) # Update the local DB DocumentManager.update_local_db(docket, team_name=team_name) response = {"cases": _get_cases_dict(casenum, docket), "documents": _get_documents_dict(court, casenum), "message": "doc1 successfully parsed."} message = simplejson.dumps(response) return message
def handle_dktrpt(filebits, court, casenum): if config['DUMP_DOCKETS'] and re.search(config['DUMP_DOCKETS_COURT_REGEX'], court): logging.info("handle_dktrpt: Dumping docket %s.%s for debugging" % (court, casenum)) _dump_docket_for_debugging(filebits,court,casenum) docket = ParsePacer.parse_dktrpt(filebits, court, casenum) if not docket: return "upload: could not parse docket." # Merge the docket with IA do_me_up(docket) # Update the local DB DocumentManager.update_local_db(docket) response = {"cases": _get_cases_dict(casenum, docket), "documents": _get_documents_dict(court, casenum), "message":"DktRpt successfully parsed."} message = simplejson.dumps(response) return message
def handle_dktrpt(filebits, court, casenum, team_name): if config.DUMP_DOCKETS and re.search(config.DUMP_DOCKETS_COURT_REGEX, court): logging.info("handle_dktrpt: Dumping docket %s.%s for debugging" % ( court, casenum)) _dump_docket_for_debugging(filebits, court, casenum) docket = ParsePacer.parse_dktrpt(filebits, court, casenum) if not docket: return "upload: could not parse docket." # Merge the docket with IA do_me_up(docket) # Update the local DB DocumentManager.update_local_db(docket, team_name=team_name) response = {"cases": _get_cases_dict(casenum, docket), "documents": _get_documents_dict(court, casenum), "message": "DktRpt successfully parsed."} message = simplejson.dumps(response) return message
def __init__(self): self.builder = gtk.Builder() self.builder.add_from_file("ui.glade") self.documentManager = DocumentManager.getDocumentManager() signals = { "on_MainWindow_destroy": gtk.main_quit, "on_NewDiagramMenuBar_activate": self.newDiagram, "on_QuitMenuItem_activate": gtk.main_quit, "on_PreferencesMenuItem_activate": self.showPreferences, "on_OpenMenuItem_activate": self.openDiagram, "on_SaveMenuItem_activate": self.saveDiagram, "on_SaveAsMenuItem_activate": self.saveDiagramAs } self.builder.connect_signals(signals) self.window = self.builder.get_object("mainWindow") self.tabsPanel = self.builder.get_object("tabsPanel") self.window.show_all()
''' if len(sys.argv) != 2: sys.stderr.write("Usage: %s <filename_containing_cases_to_repair>\n " % sys.argv[0]) sys.stderr.write(" The contents of filename should have a single case per line, each identified by 'court casenum'\n " ) sys.exit(1) cases_to_repair = read_in_cases_to_repair(sys.argv[1]) for case in cases_to_repair: court = case[0] casenum = case[1] print "Repairing case %s.%s...." % (court, casenum) docket = DocumentManager.create_docket_from_local_documents(court, casenum) if docket: # this will merge our docket with existing one on IA UploadHandler.do_me_up(docket) else: print " Could not create docket from local documents for %s %s" % (court, casenum) # for each case, create docket fromlocal # call do_me_up(docket) # download ia_docket # # merge ia docket and local docket
logging.error(message) return HttpResponse(message) except ValueError, err: message = "adddocmeta: %s." % unicode(err) logging.error(message) return HttpResponse(message) # Necessary to preserve backwards compatibility with 0.6 # This param prevents tons of garbage from being printed to # the error console after an Adddocmeta request try: add_case_info = request.POST["add_case_info"] except KeyError: add_case_info = None DocumentManager.handle_adddocmeta(docid, court, casenum, de_seq_num, dm_id, docnum, subdocnum) if add_case_info: response = { "documents": UploadHandler._get_documents_dict(court, casenum), "message": "adddocmeta: DB updated for docid=%s" % (docid) } message = simplejson.dumps(response) else: message = "adddocmeta: DB updated for docid=%s" % (docid) return HttpResponse(message) def lock(request): try: key = request.GET["key"].strip()
return HttpResponse(message) except ValueError, err: message = "adddocmeta: %s." % unicode(err) logging.error(message) return HttpResponse(message) # Necessary to preserve backwards compatibility with 0.6 # This param prevents tons of garbage from being printed to # the error console after an Adddocmeta request try: add_case_info = request.POST["add_case_info"] except KeyError: add_case_info = None DocumentManager.handle_adddocmeta(docid, court, casenum, de_seq_num, dm_id, docnum, subdocnum) if add_case_info: response = {"documents": UploadHandler._get_documents_dict(court, casenum), "message": "adddocmeta: DB updated for docid=%s" % (docid) } message = simplejson.dumps(response) else: message = "adddocmeta: DB updated for docid=%s" % (docid) return HttpResponse(message) def lock(request): try: key = request.GET["key"].strip() court = request.GET["court"].strip() casenum = request.GET["casenum"].strip()
sys.stderr.write("Usage: %s <filename_containing_cases_to_repair>\n " % sys.argv[0]) sys.stderr.write( " The contents of filename should have a single case per line, each identified by 'court casenum'\n " ) sys.exit(1) cases_to_repair = read_in_cases_to_repair(sys.argv[1]) for case in cases_to_repair: court = case[0] casenum = case[1] print "Repairing case %s.%s...." % (court, casenum) docket = DocumentManager.create_docket_from_local_documents( court, casenum) if docket: # this will merge our docket with existing one on IA UploadHandler.do_me_up(docket) else: print " Could not create docket from local documents for %s %s" % ( court, casenum) # for each case, create docket fromlocal # call do_me_up(docket) # download ia_docket # # merge ia docket and local docket #
def handle_pdf(filebits, court, url): """ Write PDF file metadata into the database. """ # Parse coerced docid out of url try: docid = docid_from_url_name(url) except ValueError: logging.warning("handle_pdf: no url available to get docid") return "upload: pdf failed. no url supplied." # Lookup based on docid b/c it's the only metadata we have # Document exists if we've previously parsed the case's docket query = Document.objects.filter(docid=docid) try: doc = query[0] except IndexError: logging.info("handle_pdf: haven't yet seen docket %s" % (docid)) return "upload: pdf ignored." else: # Sanity check if doc.court != court: logging.error("handle_pdf: court mismatch (%s, %s) %s" % (court, doc.court, url)) return "upload: pdf metadata mismatch." casenum = doc.casenum docnum = doc.docnum subdocnum = doc.subdocnum sha1 = doc.sha1 # Docket with updated sha1, available, and upload_date docket = DocketXML.make_docket_for_pdf(filebits, court, casenum, docnum, subdocnum, available=0) DocumentManager.update_local_db(docket) if docket.get_document_sha1(docnum ,subdocnum) != sha1: # Upload the file -- either doesn't exist on IA or has different sha1 # Gather all the additional metadata we have # - from the docket we just made doc_meta = docket.get_document_metadict(docnum, subdocnum) # - from the database, if available if doc.docid: doc_meta["pacer_doc_id"] = doc.docid if doc.de_seq_num: doc_meta["pacer_de_seq_num"] = doc.de_seq_num if doc.dm_id: doc_meta["pacer_dm_id"] = doc.dm_id # Push the file to IA IA.put_file(filebits, court, casenum, docnum, subdocnum, doc_meta) # Whether we uploaded the file, push the docket update to IA. do_me_up(docket) logging.info("handle_pdf: uploaded %s.%s.%s.%s.pdf" % (court, casenum, docnum, subdocnum)) message = "pdf uploaded." response = {} response["message"] = message jsonout = simplejson.dumps(response) return jsonout
def handle_pdf(filebits, court, url, team_name): """ Write PDF file metadata into the database. """ # Parse coerced docid out of url try: docid = docid_from_url_name(url) except ValueError: logging.warning("handle_pdf: no url available to get docid") return "upload: pdf failed. no url supplied." # Lookup based on docid b/c it's the only metadata we have # Document exists if we've previously parsed the case's docket query = Document.objects.filter(docid=docid) try: doc = query[0] except IndexError: logging.info("handle_pdf: haven't yet seen docket %s" % docid) return "upload: pdf ignored because we don't have docket %s" % docid else: # Sanity check if doc.court != court: logging.error("handle_pdf: court mismatch (%s, %s) %s" % (court, doc.court, url)) return "upload: pdf metadata mismatch." casenum = doc.casenum docnum = doc.docnum subdocnum = doc.subdocnum sha1 = doc.sha1 # Docket with updated sha1, available, and upload_date docket = DocketXML.make_docket_for_pdf(filebits, court, casenum, docnum, subdocnum, available=0) DocumentManager.update_local_db(docket, team_name=team_name) if docket.get_document_sha1(docnum, subdocnum) != sha1: # Upload the file -- either doesn't exist on IA or has different sha1 # Gather all the additional metadata we have # - from the docket we just made doc_meta = docket.get_document_metadict(docnum, subdocnum) # - from the database, if available if doc.docid: doc_meta["pacer_doc_id"] = doc.docid if doc.de_seq_num: doc_meta["pacer_de_seq_num"] = doc.de_seq_num if doc.dm_id: doc_meta["pacer_dm_id"] = doc.dm_id # Push the file to IA IA.put_file(filebits, court, casenum, docnum, subdocnum, doc_meta) # Whether we uploaded the file, push the docket update to IA. do_me_up(docket) logging.info("handle_pdf: uploaded %s.%s.%s.%s.pdf" % (court, casenum, docnum, subdocnum)) message = "pdf uploaded." response = {"message": message} return simplejson.dumps(response)