Python CourtCase Examples

Programming Language: Python

Namespace/Package Name: case

Class/Type: CourtCase

Examples at hotexamples.com: 2

Python CourtCase - 2 examples found. These are the top rated real world Python examples of case.CourtCase extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

extract_case_id(2)

extract_filename(1)

get_document_text(1)

Example #1

Show file

File: elyon_crawler.py Project: mittelman/elyon-crawler

    def get_case(self, thread_info):
        # Unpack the parameters
        queue_pos = thread_info["index"]
        row = thread_info["row"]
        start_date = thread_info["start_date"]
        end_date = thread_info["end_date"]
        page_number = thread_info["page_number"]

        case_id = CourtCase.extract_case_id(row)

        # Use queue_pos to space the worker threads. Assuming each thread takes approximately
        # the same time, space out the first round of worker threads by using incremental
        # sleep delays. Remember that queue_pos is 0-based, so the first thread is not delayed
        if queue_pos < self.threads:
            time.sleep(queue_pos / 10.0)  # Space out threads by 100ms

        # Fetch the document text from the server create a CourtCase upon success, or add a failure
        try:
            doc_filename = CourtCase.extract_filename(row)
            doc_text = CourtCase.get_document_text(doc_filename.replace(".doc", ".txt"), self.timeout)
            extended_info = self.get_verdict_extended_info(
                CourtCase.extract_case_id(row), CourtCase.extract_filename(row))
        except Exception as e:
            self.log_message(LogLevel.ERROR, "Error fetching verdict for case " + case_id + ": " + str(e))
            return case_id

        self.pool_progress += 1
        return CourtCase(row, extended_info, doc_text)

Example #2

Show file

File: elyon_crawler.py Project: mittelman/elyon-crawler

    def handle_result_page(self, soup, start_date, end_date, page_number, specific_verdicts=None) -> (list, FaultEntity):
        """Decodes and parses the IIS VIEWSTATE hidden field, then extracts the XML search data
        and uses it to generate CourtCase object instances with the verdict's information"""
        return_list = []

        # Extract and decode the XML search results from the VIEWSTATE
        view_state = base64.b64decode(
            soup.find_all(id="__VIEWSTATE")[0]['value']
        ).decode("utf-8", "ignore")
        results_mask = re.compile(r'<Results>([\s\S]+?)</Results>')
        data_xml = results_mask.search(view_state).group(0)

        # Pass each child (search result) to CourtCase's constructor as an ElementTree object
        data_tree = ET.fromstring(data_xml)

        # If specific_verdicts was passed, filter data_tree only to the requested verdicts
        if specific_verdicts is not None:
            data_tree = [d for d in data_tree if CourtCase.extract_case_id(d) in specific_verdicts]

        # Create a list of data tree elements with the required delay on startup
        # This is used to space the requests instead of sending them all at the same time
        data_tree_numbered = []
        for i in range(len(data_tree)):
            item = {
                "index": i,
                "row": data_tree[i],
                "start_date": start_date.strftime("%d/%m/%Y"),
                "end_date": end_date.strftime("%d/%m/%Y"),
                "page_number": page_number
            }
            data_tree_numbered.append(item)

        # Initialize a thread pool and execute the jobs concurrently
        thread_pool = Pool(self.threads)
        callback = partial(self.print_status_line)
        tasks = [thread_pool.apply_async(self.get_case, (x, ), callback=callback) for x in data_tree_numbered]
        tasks_results = [task.get() for task in tasks]
        thread_pool.terminate()

        failed_verdicts = [v for v in tasks_results if type(v) == str]
        success_verdicts = [v for v in tasks_results if type(v) == CourtCase]

        if len(failed_verdicts) > 0:
            return success_verdicts, FaultEntity((start_date, end_date), page_number, failed_verdicts)
        else:
            return success_verdicts, None