def test_parsing_results(self): """Can we do a simple query and parse?""" paths = [] path_root = os.path.join(TESTS_ROOT, "examples", "pacer", "attachment_pages") for root, dirnames, filenames in os.walk(path_root): for filename in fnmatch.filter(filenames, '*.html'): paths.append(os.path.join(root, filename)) paths.sort() path_max_len = max(len(path) for path in paths) + 2 for i, path in enumerate(paths): sys.stdout.write("%s. Doing %s" % (i, path.ljust(path_max_len))) dirname, filename = os.path.split(path) filename_sans_ext = filename.split('.')[0] json_path = os.path.join(dirname, '%s.json' % filename_sans_ext) court = filename_sans_ext.split('_')[0] report = AttachmentPage(court) with open(path, 'r') as f: report._parse_text(f.read().decode('utf-8')) data = report.data if not os.path.exists(json_path): with open(json_path, 'w') as f: print("Creating new file at %s" % json_path) json.dump(data, f, indent=2, sort_keys=True) continue with open(json_path) as f: j = json.load(f) self.assertEqual(j, data) sys.stdout.write("✓\n")
def test_parsing_results(self): """Can we do a simple query and parse?""" paths = [] path_root = os.path.join(TESTS_ROOT, "examples", "pacer", "attachment_pages") for root, dirnames, filenames in os.walk(path_root): for filename in fnmatch.filter(filenames, '*.html'): paths.append(os.path.join(root, filename)) paths.sort() path_max_len = max(len(path) for path in paths) + 2 for i, path in enumerate(paths): sys.stdout.write("%s. Doing %s" % (i, path.ljust(path_max_len))) dirname, filename = os.path.split(path) filename_sans_ext = filename.split('.')[0] json_path = os.path.join(dirname, '%s.json' % filename_sans_ext) court = filename_sans_ext.split('_')[0] report = AttachmentPage(court) with open(path, 'r') as f: report._parse_text(f.read().decode('utf-8')) data = report.data with open(json_path) as f: j = json.load(f) self.assertEqual(j, data) sys.stdout.write("✓\n")
def test_parsing_results(self): """Can we do a simple query and parse?""" paths = [] path_root = os.path.join(TESTS_ROOT, "examples", "pacer", "possible_case_numbers") for root, dirnames, filenames in os.walk(path_root): for filename in fnmatch.filter(filenames, '*.xml'): paths.append(os.path.join(root, filename)) paths.sort() path_max_len = max(len(path) for path in paths) + 2 for i, path in enumerate(paths): sys.stdout.write("%s. Doing %s" % (i, path.ljust(path_max_len))) dirname, filename = os.path.split(path) filename_sans_ext = filename.split('.')[0] json_path = os.path.join(dirname, '%s.json' % filename_sans_ext) report = PossibleCaseNumberApi('anything') with open(path, 'r') as f: report._parse_text(f.read().decode('utf-8')) data = report.data(case_name=filename_sans_ext) if os.path.exists(json_path): with open(json_path) as f: j = json.load(f) self.assertEqual(j, data) else: # If no json file, data should be None. self.assertIsNone( data, msg="No json file detected and response is not None. " "Either create a json file for this test or make sure " "you get back valid results." ) sys.stdout.write("✓\n")
def parse_files(self, path_root, file_ext): paths = [] for root, dirnames, filenames in os.walk(path_root): for filename in fnmatch.filter(filenames, file_ext): paths.append(os.path.join(root, filename)) paths.sort() path_max_len = max(len(path) for path in paths) + 2 for i, path in enumerate(paths): t1 = time.time() sys.stdout.write("%s. Doing %s" % (i, path.ljust(path_max_len))) dirname, filename = os.path.split(path) filename_sans_ext = filename.split('.')[0] json_path = os.path.join(dirname, '%s.json' % filename_sans_ext) with open(path, 'rb') as f: text = f.read() result = check_if_logged_in_page(text) if not os.path.exists(json_path): with open(json_path, 'w') as f: print("Creating new file at %s" % json_path) json.dump(result, f, indent=2, sort_keys=True) continue with open(json_path) as f: j = json.load(f) self.assertEqual(j, result) t2 = time.time() duration = t2 - t1 warn_or_crash_slow_parser(duration, max_duration=0.5) sys.stdout.write("✓\n")
def test_parsing_results(self): """Can we do a simple query and parse?""" paths = [] path_root = os.path.join(TESTS_ROOT, "examples", "pacer", "possible_case_numbers") for root, dirnames, filenames in os.walk(path_root): for filename in fnmatch.filter(filenames, '*.xml'): paths.append(os.path.join(root, filename)) paths.sort() path_max_len = max(len(path) for path in paths) + 2 for i, path in enumerate(paths): sys.stdout.write("%s. Doing %s" % (i, path.ljust(path_max_len))) dirname, filename = os.path.split(path) filename_sans_ext = filename.split('.')[0] json_path = os.path.join(dirname, '%s.json' % filename_sans_ext) report = PossibleCaseNumberApi('anything') with open(path, 'r') as f: report._parse_text(f.read().decode('utf-8')) data = report.data(case_name=filename_sans_ext) if os.path.exists(json_path): with open(json_path) as f: j = json.load(f) self.assertEqual(j, data) else: # If no json file, data should be None. self.assertIsNone( data, msg="No json file detected and response is not None. " "Either create a json file for this test or make sure " "you get back valid results.") sys.stdout.write("✓\n")
def parse_files(self, path_root, file_ext): paths = [] for root, dirnames, filenames in os.walk(path_root): for filename in fnmatch.filter(filenames, file_ext): paths.append(os.path.join(root, filename)) paths.sort() path_max_len = max(len(path) for path in paths) + 2 for i, path in enumerate(paths): t1 = time.time() sys.stdout.write("%s. Doing %s" % (i, path.ljust(path_max_len))) dirname, filename = os.path.split(path) filename_sans_ext = filename.split('.')[0] json_path = os.path.join(dirname, '%s.json' % filename_sans_ext) with open(path, 'rb') as f: text = f.read() result = check_if_logged_in_page(text) if not os.path.exists(json_path): with open(json_path, 'w') as f: print("Creating new file at %s" % json_path) json.dump(result, f, indent=2, sort_keys=True) continue with open(json_path) as f: j = json.load(f) self.assertEqual(j, result) t2 = time.time() duration = t2 - t1 warn_or_crash_slow_parser(duration, max_duration=0.5) sys.stdout.write("✓\n")
def test_json_output(self, diff_res, expected): diff_json = diff_res[0].dump_json(indent=4) if os.environ.get('REGENERATE', 'false') == 'true': expected.write(diff_json) return assert jsondate.loads(diff_json) == jsondate.load(expected)
def init_data(): try: return Data.data except: try: return json.load(file(fname)) except: return {}
def setUpClass(cls): pacer_session = PacerSession() if PACER_USERNAME and PACER_PASSWORD: # CAND chosen at random pacer_session = PacerSession(username=PACER_USERNAME, password=PACER_PASSWORD) with open(os.path.join(JURISCRAPER_ROOT, 'pacer/courts.json')) as j: cls.courts = get_courts_from_json(json.load(j)) path = os.path.join(TESTS_ROOT, 'fixtures/valid_free_opinion_dates.json') with open(path) as j: cls.valid_dates = json.load(j) cls.reports = {} for court in cls.courts: court_id = get_court_id_from_url(court['court_link']) cls.reports[court_id] = FreeOpinionReport(court_id, pacer_session)
def run_parsers_on_path(self, path): """Test all the parsers on a given local path :param path: The path where you can find the files """ file_paths = glob.glob(path) file_paths.sort() path_max_len = max(len(path) for path in file_paths) + 2 for i, path in enumerate(file_paths): sys.stdout.write("%s. Doing %s" % (i, path.ljust(path_max_len))) t1 = time.time() dirname, filename = os.path.split(path) filename_sans_ext = filename.split(".")[0] json_path = os.path.join( dirname, "%s_result.json" % filename_sans_ext ) lasc = LASCSearch(session=None) with open(path, "rb") as f: data = json.load(f) clean_data = lasc._parse_case_data(data) if not os.path.isfile(json_path): # First time testing this docket bar = "*" * 50 print( "\n\n%s\nJSON FILE DID NOT EXIST. CREATING IT AT:" "\n\n %s\n\n" "Please test the data in this file before assuming " "everything worked.\n%s\n" % (bar, json_path, bar) ) with open(json_path, "w") as f: json.dump(clean_data, f, indent=2, sort_keys=True) continue with open(json_path) as f: j = json.load(f) self.assertEqual(j, clean_data) t2 = time.time() duration = t2 - t1 warn_or_crash_slow_parser(duration, max_duration=1) sys.stdout.write("✓ - %0.1fs\n" % (t2 - t1))
def setUp(self): pacer_session = PacerSession() if pacer_credentials_are_defined(): # CAND chosen at random pacer_session = get_pacer_session() pacer_session.login() with open(os.path.join(JURISCRAPER_ROOT, 'pacer/courts.json')) as j: self.courts = get_courts_from_json(json.load(j)) path = os.path.join(TESTS_ROOT_EXAMPLES_PACER, 'dates/valid_free_opinion_dates.json') with open(path) as j: self.valid_dates = json.load(j) self.reports = {} for court in self.courts: court_id = get_court_id_from_url(court['court_link']) self.reports[court_id] = FreeOpinionReport(court_id, pacer_session)
def setUpClass(cls): pacer_session = PacerSession() if PACER_USERNAME and PACER_PASSWORD: # CAND chosen at random pacer_session = PacerSession(username=PACER_USERNAME, password=PACER_PASSWORD) with open(os.path.join(JURISCRAPER_ROOT, 'pacer/courts.json')) as j: cls.courts = get_courts_from_json(json.load(j)) path = os.path.join(TESTS_ROOT, 'fixtures/valid_free_opinion_dates.json') with open(path) as j: cls.valid_dates = json.load(j) cls.reports = {} for court in cls.courts: court_id = get_court_id_from_url(court['court_link']) cls.reports[court_id] = FreeOpinionReport(court_id, pacer_session)
def parse_files(self, path_root, file_ext, test_class, initialize_with_court=True): """Can we do a simple query and parse?""" paths = [] for root, dirnames, filenames in os.walk(path_root): for filename in fnmatch.filter(filenames, file_ext): paths.append(os.path.join(root, filename)) paths.sort() path_max_len = max(len(path) for path in paths) + 2 for i, path in enumerate(paths): t1 = time.time() sys.stdout.write("%s. Doing %s" % (i, path.ljust(path_max_len))) dirname, filename = os.path.split(path) filename_sans_ext = filename.split('.')[0] json_path = os.path.join(dirname, '%s.json' % filename_sans_ext) if initialize_with_court: court = filename_sans_ext.split('_')[0] report = test_class(court) else: report = test_class() with open(path, 'rb') as f: report._parse_text(f.read().decode('utf-8')) data = report.data if not os.path.exists(json_path): with open(json_path, 'w') as f: print("Creating new file at %s" % json_path) json.dump(data, f, indent=2, sort_keys=True) continue with open(json_path) as f: j = json.load(f) self.assertEqual(j, data) t2 = time.time() duration = t2 - t1 warn_or_crash_slow_parser(duration, max_duration=2) sys.stdout.write("✓\n")
def parse_files(self, path_root, file_ext, test_class, initialize_with_court=True): """Can we do a simple query and parse?""" paths = [] for root, dirnames, filenames in os.walk(path_root): for filename in fnmatch.filter(filenames, file_ext): paths.append(os.path.join(root, filename)) paths.sort() path_max_len = max(len(path) for path in paths) + 2 for i, path in enumerate(paths): t1 = time.time() sys.stdout.write("%s. Doing %s" % (i, path.ljust(path_max_len))) dirname, filename = os.path.split(path) filename_sans_ext = filename.split('.')[0] json_path = os.path.join(dirname, '%s.json' % filename_sans_ext) if initialize_with_court: court = filename_sans_ext.split('_')[0] report = test_class(court) else: report = test_class() with open(path, 'r') as f: report._parse_text(f.read().decode('utf-8')) data = report.data if not os.path.exists(json_path): with open(json_path, 'w') as f: print("Creating new file at %s" % json_path) json.dump(data, f, indent=2, sort_keys=True) continue with open(json_path) as f: j = json.load(f) self.assertEqual(j, data) t2 = time.time() duration = t2 - t1 warn_or_crash_slow_parser(duration, max_duration=2) sys.stdout.write("✓\n")
def run_parsers_on_path( self, path_root, required_fields=['date_filed', 'case_name', 'docket_number']): """Test all the parsers, faking the network query.""" paths = [] for root, dirnames, filenames in os.walk(path_root): for filename in fnmatch.filter(filenames, '*.html'): paths.append(os.path.join(root, filename)) paths.sort() path_max_len = max(len(path) for path in paths) + 2 for i, path in enumerate(paths): sys.stdout.write("%s. Doing %s" % (i, path.ljust(path_max_len))) t1 = time.time() dirname, filename = os.path.split(path) filename_sans_ext = filename.split('.')[0] json_path = os.path.join(dirname, '%s.json' % filename_sans_ext) court = filename_sans_ext.split('_')[0] report = DocketReport(court) with open(path, 'rb') as f: report._parse_text(f.read().decode('utf-8')) data = report.data if data != {}: # If the docket is a valid docket, make sure some required # fields are populated. for field in required_fields: self.assertTrue( data[field], msg="Unable to find truthy value for field %s" % field, ) self.assertEqual(data['court_id'], court) # Party-specific tests... for party in data['parties']: self.assertTrue( party.get('name', False), msg="Every party must have a name attribute. Did not " "get a value for:\n\n%s" % party) # Protect against effed up adversary proceedings cases that # don't parse properly. See: cacb, 2:08-ap-01570-BB self.assertNotIn('----', party['name']) if not os.path.isfile(json_path): bar = "*" * 50 print("\n\n%s\nJSON FILE DID NOT EXIST. CREATING IT AT:" "\n\n %s\n\n" "Please test the data in this file before assuming " "everything worked.\n%s\n" % (bar, json_path, bar)) with open(json_path, 'w') as f: json.dump(data, f, indent=2, sort_keys=True) #self.assertFalse(True) continue with open(json_path) as f: j = json.load(f) if j != {}: # Compare docket entries and parties first, for easier # debugging, then compare whole objects to be sure. self.assertEqual(j['docket_entries'], data['docket_entries']) self.assertEqual(j['parties'], data['parties']) self.assertEqual(j, data) t2 = time.time() duration = t2 - t1 warn_or_crash_slow_parser(duration, max_duration=1) sys.stdout.write("✓ - %0.1fs\n" % (t2 - t1))
def run_parsers_on_path(self, path_root, required_fields=[ 'date_filed', 'case_name', 'docket_number']): """Test all the parsers, faking the network query.""" paths = [] for root, dirnames, filenames in os.walk(path_root): for filename in fnmatch.filter(filenames, '*.html'): paths.append(os.path.join(root, filename)) paths.sort() path_max_len = max(len(path) for path in paths) + 2 for i, path in enumerate(paths): sys.stdout.write("%s. Doing %s" % (i, path.ljust(path_max_len))) t1 = time.time() dirname, filename = os.path.split(path) filename_sans_ext = filename.split('.')[0] json_path = os.path.join(dirname, '%s.json' % filename_sans_ext) court = filename_sans_ext.split('_')[0] report = DocketReport(court) with open(path, 'r') as f: report._parse_text(f.read().decode('utf-8')) data = report.data if data != {}: # If the docket is a valid docket, make sure some required # fields are populated. for field in required_fields: self.assertTrue( data[field], msg="Unable to find truthy value for field %s" % field, ) self.assertEqual(data['court_id'], court) # Party-specific tests... for party in data['parties']: self.assertTrue( party.get('name', False), msg="Every party must have a name attribute. Did not " "get a value for:\n\n%s" % party ) # Protect against effed up adversary proceedings cases that # don't parse properly. See: cacb, 2:08-ap-01570-BB self.assertNotIn('----', party['name']) if not os.path.isfile(json_path): bar = "*" * 50 print("\n\n%s\nJSON FILE DID NOT EXIST. CREATING IT AT:" "\n\n %s\n\n" "Please test the data in this file before assuming " "everything worked.\n%s\n" % (bar, json_path, bar)) with open(json_path, 'w') as f: json.dump(data, f, indent=2, sort_keys=True) #self.assertFalse(True) continue with open(json_path) as f: j = json.load(f) if j != {}: # Compare docket entries and parties first, for easier # debugging, then compare whole objects to be sure. self.assertEqual(j['docket_entries'], data['docket_entries']) self.assertEqual(j['parties'], data['parties']) self.assertEqual(j, data) t2 = time.time() duration = t2 - t1 warn_or_crash_slow_parser(duration, max_duration=1) sys.stdout.write("✓ - %0.1fs\n" % (t2-t1))
def roundtrip(input): fileobj = six.StringIO() jsondate.dump(input, fileobj) fileobj.seek(0) return jsondate.load(fileobj)
def test_dump_datetime_roundtrips(self): orig_dict = dict(created_at=datetime.date(2011, 1, 1)) fileobj = StringIO.StringIO() jsondate.dump(orig_dict, fileobj) fileobj.seek(0) self.assertEqual(orig_dict, jsondate.load(fileobj))