from juriscraper.lib.date_utils import json_date_handler from juriscraper.lib.log_tools import make_default_logger from juriscraper.lib.string_utils import harmonize, clean_string, trunc, CaseNameTweaker from juriscraper.lib.test_utils import MockRequest from lxml import html import re import requests from requests.adapters import HTTPAdapter try: # Use cchardet for performance to detect the character encoding. import cchardet as chardet except ImportError: import chardet logger = make_default_logger() class InsanityException(Exception): def __init__(self, message): Exception.__init__(self, message) class AbstractSite(object): """Contains generic methods for scraping data. Should be extended by all scrapers. Should not contain lists that can't be sorted by the _date_sort function.""" def __init__(self, cnt=None): super(AbstractSite, self).__init__()
from juriscraper.lib.date_utils import make_date_range_tuples from juriscraper.lib.html_utils import (set_response_encoding, clean_html, fix_links_in_lxml_tree, get_html_parsed_text) from juriscraper.lib.log_tools import make_default_logger from juriscraper.lib.string_utils import convert_date_string from juriscraper.pacer.utils import (get_pacer_case_id_from_docket_url, make_doc1_url, get_pacer_doc_id_from_doc1_url, get_court_id_from_url, reverse_goDLS_function, is_pdf) logger = make_default_logger() class FreeOpinionReport(object): """TODO: document here.""" EXCLUDED_COURT_IDS = [ 'casb', 'ganb', 'innb', 'mieb', 'miwb', 'nmib', 'nvb', 'ohsb', 'tnwb', 'vib' ] VALID_SORT_PARAMS = ('date_filed', 'case_number') def __init__(self, court_id, pacer_session): self.court_id = court_id self.session = pacer_session super(FreeOpinionReport, self).__init__() @property def url(self):