class OpinionsDownloader(): # TK: Add a param to set where to download files to? def __init__(self, username, password): self.pacer_client = PacerClient(username, password) self.connection = pika.BlockingConnection( pika.ConnectionParameters('localhost')) self.channel = self.connection.channel() # set up the queue for later self.channel.queue_declare(queue='dockets') def reinit_pacer_client(self, username, password): # we have to log into each new court separately self.pacer_client = PacerClient(username, password) def get_opinions(self, court, start_date, end_date): html = self.pacer_client.get_opinions_html(court, start_date, end_date) dockets = PP.parse_opinions(html, court) logger.info(' Downloaded %d dockets for court %s between %s and %s', len(dockets), court, start_date, end_date) #if len(dockets) == 0: # logger.debug(' 0 dockets downloaded. HTML response: %s', html) return dockets def get_document(self, court, casenum, de_seq_num, dm_id, doc_num): return self.pacer_client.get_pdf_show_doc(court, casenum, de_seq_num, dm_id, doc_num) def enqueue_opinions(self, court, start_date, end_date): for docket in self.get_opinions(court, start_date, end_date): docmap = {} for key, doc in docket.documents.items(): logger.info(' Downloading document %s.%s.%s.0', court, docket.get_casenum(), doc['doc_num']), pdfbits = self.get_document(court, docket.get_casenum(), doc['pacer_de_seq_num'], doc['pacer_dm_id'], doc['doc_num']) logger.info(' Downloaded document %s.%s.%s.0', court, docket.get_casenum(), doc['doc_num']), # pickle the document into a file # map the docnum-subdocnum to the filename docmap[key] = _pickle_object(pdfbits) # pickle file filename = _pickle_object(docket) # create message upload_message = { 'docket_filename': filename, 'docnums_to_filename': docmap, 'court': docket.get_court(), 'casenum': docket.get_casenum() } # energize! self.channel.basic_publish(exchange='', routing_key='dockets', body=pickle.dumps(upload_message)) logger.info(' Sent upload message for %s.%s', court, docket.get_casenum())
def __init__(self, username, password): self.pacer_client = PacerClient(username, password) self.connection = pika.BlockingConnection( pika.ConnectionParameters('localhost')) self.channel = self.connection.channel() # set up the queue for later self.channel.queue_declare(queue='dockets')
def __init__(self, username, password) : self.pacer_client = PacerClient(username, password) self.connection = pika.BlockingConnection(pika.ConnectionParameters( 'localhost')) self.channel = self.connection.channel() # set up the queue for later self.channel.queue_declare(queue='dockets')
def reinit_pacer_client(self, username, password): # we have to log into each new court separately self.pacer_client = PacerClient(username, password)
class OpinionsDownloader(): # TK: Add a param to set where to download files to? def __init__(self, username, password) : self.pacer_client = PacerClient(username, password) self.connection = pika.BlockingConnection(pika.ConnectionParameters( 'localhost')) self.channel = self.connection.channel() # set up the queue for later self.channel.queue_declare(queue='dockets') def reinit_pacer_client(self, username, password): # we have to log into each new court separately self.pacer_client = PacerClient(username, password) def get_opinions(self, court, start_date, end_date): html = self.pacer_client.get_opinions_html(court, start_date, end_date) dockets = PP.parse_opinions(html, court) logger.info(' Downloaded %d dockets for court %s between %s and %s', len(dockets), court, start_date, end_date) #if len(dockets) == 0: # logger.debug(' 0 dockets downloaded. HTML response: %s', html) return dockets def get_document(self, court, casenum, de_seq_num, dm_id, doc_num): return self.pacer_client.get_pdf_show_doc(court, casenum, de_seq_num, dm_id, doc_num) def enqueue_opinions(self, court, start_date, end_date): for docket in self.get_opinions(court, start_date, end_date): docmap = {} for key, doc in docket.documents.items(): logger.info(' Downloading document %s.%s.%s.0', court, docket.get_casenum(), doc['doc_num']), pdfbits = self.get_document(court, docket.get_casenum(), doc['pacer_de_seq_num'], doc['pacer_dm_id'], doc['doc_num']) logger.info(' Downloaded document %s.%s.%s.0', court, docket.get_casenum(), doc['doc_num']), # pickle the document into a file # map the docnum-subdocnum to the filename docmap[key] = _pickle_object(pdfbits) # pickle file filename = _pickle_object(docket) # create message upload_message = {'docket_filename': filename, 'docnums_to_filename': docmap, 'court': docket.get_court(), 'casenum': docket.get_casenum()} # energize! self.channel.basic_publish(exchange='', routing_key='dockets', body=pickle.dumps(upload_message)) logger.info(' Sent upload message for %s.%s', court, docket.get_casenum())