Esempio n. 1
0
class OpinionsDownloader():
    # TK: Add a param to set where to download files to?
    def __init__(self, username, password):
        self.pacer_client = PacerClient(username, password)
        self.connection = pika.BlockingConnection(
            pika.ConnectionParameters('localhost'))
        self.channel = self.connection.channel()

        # set up the queue for later
        self.channel.queue_declare(queue='dockets')

    def reinit_pacer_client(self, username, password):
        # we have to log into each new court separately
        self.pacer_client = PacerClient(username, password)

    def get_opinions(self, court, start_date, end_date):
        html = self.pacer_client.get_opinions_html(court, start_date, end_date)

        dockets = PP.parse_opinions(html, court)
        logger.info(' Downloaded %d dockets for court %s between %s and %s',
                    len(dockets), court, start_date, end_date)
        #if len(dockets) == 0:
        #    logger.debug(' 0 dockets downloaded. HTML response: %s', html)
        return dockets

    def get_document(self, court, casenum, de_seq_num, dm_id, doc_num):
        return self.pacer_client.get_pdf_show_doc(court, casenum, de_seq_num,
                                                  dm_id, doc_num)

    def enqueue_opinions(self, court, start_date, end_date):
        for docket in self.get_opinions(court, start_date, end_date):
            docmap = {}
            for key, doc in docket.documents.items():
                logger.info('    Downloading document %s.%s.%s.0', court,
                            docket.get_casenum(), doc['doc_num']),
                pdfbits = self.get_document(court, docket.get_casenum(),
                                            doc['pacer_de_seq_num'],
                                            doc['pacer_dm_id'], doc['doc_num'])
                logger.info('    Downloaded document %s.%s.%s.0', court,
                            docket.get_casenum(), doc['doc_num']),
                # pickle the document into a file
                # map the docnum-subdocnum to the filename
                docmap[key] = _pickle_object(pdfbits)

            # pickle file
            filename = _pickle_object(docket)
            # create message
            upload_message = {
                'docket_filename': filename,
                'docnums_to_filename': docmap,
                'court': docket.get_court(),
                'casenum': docket.get_casenum()
            }
            # energize!
            self.channel.basic_publish(exchange='',
                                       routing_key='dockets',
                                       body=pickle.dumps(upload_message))
            logger.info('  Sent upload message for %s.%s', court,
                        docket.get_casenum())
Esempio n. 2
0
    def __init__(self, username, password):
        self.pacer_client = PacerClient(username, password)
        self.connection = pika.BlockingConnection(
            pika.ConnectionParameters('localhost'))
        self.channel = self.connection.channel()

        # set up the queue for later
        self.channel.queue_declare(queue='dockets')
    def __init__(self, username, password) :
        self.pacer_client = PacerClient(username, password)
        self.connection = pika.BlockingConnection(pika.ConnectionParameters(
                   'localhost'))
        self.channel = self.connection.channel()

        # set up the queue for later
        self.channel.queue_declare(queue='dockets')
 def reinit_pacer_client(self, username, password): 
     # we have to log into each new court separately
     self.pacer_client = PacerClient(username, password)
class OpinionsDownloader():
    # TK: Add a param to set where to download files to?
    def __init__(self, username, password) :
        self.pacer_client = PacerClient(username, password)
        self.connection = pika.BlockingConnection(pika.ConnectionParameters(
                   'localhost'))
        self.channel = self.connection.channel()

        # set up the queue for later
        self.channel.queue_declare(queue='dockets')

    def reinit_pacer_client(self, username, password): 
        # we have to log into each new court separately
        self.pacer_client = PacerClient(username, password)


    def get_opinions(self, court, start_date, end_date):
        html = self.pacer_client.get_opinions_html(court,
                                                   start_date,
                                                   end_date)

        dockets = PP.parse_opinions(html, court)
        logger.info(' Downloaded %d dockets for court %s between %s and %s', len(dockets), 
                                                                       court,
                                                                       start_date,
                                                                       end_date)
        #if len(dockets) == 0:
        #    logger.debug(' 0 dockets downloaded. HTML response: %s', html)
        return dockets
    
    def get_document(self, court, casenum, de_seq_num, dm_id, doc_num):
        return self.pacer_client.get_pdf_show_doc(court, casenum, de_seq_num, dm_id, doc_num)

    def enqueue_opinions(self, court, start_date, end_date):
        for docket in self.get_opinions(court, start_date, end_date):
            docmap = {}
            for key, doc in docket.documents.items():
                logger.info('    Downloading document %s.%s.%s.0', court, 
                                                              docket.get_casenum(), 
                                                              doc['doc_num']), 
                pdfbits = self.get_document(court, 
                                            docket.get_casenum(), 
                                            doc['pacer_de_seq_num'], 
                                            doc['pacer_dm_id'], 
                                            doc['doc_num'])
                logger.info('    Downloaded document %s.%s.%s.0', court, 
                                                              docket.get_casenum(), 
                                                              doc['doc_num']), 
                # pickle the document into a file
                # map the docnum-subdocnum to the filename
                docmap[key] = _pickle_object(pdfbits)
               
            # pickle file
            filename = _pickle_object(docket)
            # create message
            upload_message = {'docket_filename': filename,
                              'docnums_to_filename': docmap,
                              'court': docket.get_court(),
                              'casenum': docket.get_casenum()}
            # energize!
            self.channel.basic_publish(exchange='',
                                  routing_key='dockets',
                                  body=pickle.dumps(upload_message))
            logger.info('  Sent upload message for %s.%s', court, docket.get_casenum()) 
Esempio n. 6
0
 def reinit_pacer_client(self, username, password):
     # we have to log into each new court separately
     self.pacer_client = PacerClient(username, password)