예제 #1
0
    def test_parsing_results(self):
        """Can we do a simple query and parse?"""
        paths = []
        path_root = os.path.join(TESTS_ROOT, "examples", "pacer",
                                 "attachment_pages")
        for root, dirnames, filenames in os.walk(path_root):
            for filename in fnmatch.filter(filenames, '*.html'):
                paths.append(os.path.join(root, filename))
        paths.sort()
        path_max_len = max(len(path) for path in paths) + 2
        for i, path in enumerate(paths):
            sys.stdout.write("%s. Doing %s" % (i, path.ljust(path_max_len)))
            dirname, filename = os.path.split(path)
            filename_sans_ext = filename.split('.')[0]
            json_path = os.path.join(dirname, '%s.json' % filename_sans_ext)
            court = filename_sans_ext.split('_')[0]

            report = AttachmentPage(court)
            with open(path, 'r') as f:
                report._parse_text(f.read().decode('utf-8'))
            data = report.data
            if not os.path.exists(json_path):
                with open(json_path, 'w') as f:
                    print("Creating new file at %s" % json_path)
                    json.dump(data, f, indent=2, sort_keys=True)
                    continue
            with open(json_path) as f:
                j = json.load(f)
                self.assertEqual(j, data)

            sys.stdout.write("✓\n")
예제 #2
0
    def test_parsing_results(self):
        """Can we do a simple query and parse?"""
        paths = []
        path_root = os.path.join(TESTS_ROOT, "examples", "pacer",
                                 "attachment_pages")
        for root, dirnames, filenames in os.walk(path_root):
            for filename in fnmatch.filter(filenames, '*.html'):
                paths.append(os.path.join(root, filename))
        paths.sort()
        path_max_len = max(len(path) for path in paths) + 2
        for i, path in enumerate(paths):
            sys.stdout.write("%s. Doing %s" % (i, path.ljust(path_max_len)))
            dirname, filename = os.path.split(path)
            filename_sans_ext = filename.split('.')[0]
            json_path = os.path.join(dirname, '%s.json' % filename_sans_ext)
            court = filename_sans_ext.split('_')[0]

            report = AttachmentPage(court)
            with open(path, 'r') as f:
                report._parse_text(f.read().decode('utf-8'))
            data = report.data
            with open(json_path) as f:
                j = json.load(f)
                self.assertEqual(j, data)

            sys.stdout.write("✓\n")
예제 #3
0
    def test_parsing_results(self):
        """Can we do a simple query and parse?"""
        paths = []
        path_root = os.path.join(TESTS_ROOT, "examples", "pacer",
                                 "possible_case_numbers")
        for root, dirnames, filenames in os.walk(path_root):
            for filename in fnmatch.filter(filenames, '*.xml'):
                paths.append(os.path.join(root, filename))
        paths.sort()
        path_max_len = max(len(path) for path in paths) + 2
        for i, path in enumerate(paths):
            sys.stdout.write("%s. Doing %s" % (i, path.ljust(path_max_len)))
            dirname, filename = os.path.split(path)
            filename_sans_ext = filename.split('.')[0]
            json_path = os.path.join(dirname, '%s.json' % filename_sans_ext)

            report = PossibleCaseNumberApi('anything')
            with open(path, 'r') as f:
                report._parse_text(f.read().decode('utf-8'))
            data = report.data(case_name=filename_sans_ext)
            if os.path.exists(json_path):
                with open(json_path) as f:
                    j = json.load(f)
                    self.assertEqual(j, data)
            else:
                # If no json file, data should be None.
                self.assertIsNone(
                    data,
                    msg="No json file detected and response is not None. "
                        "Either create a json file for this test or make sure "
                        "you get back valid results."
                )

            sys.stdout.write("✓\n")
예제 #4
0
    def parse_files(self, path_root, file_ext):
        paths = []
        for root, dirnames, filenames in os.walk(path_root):
            for filename in fnmatch.filter(filenames, file_ext):
                paths.append(os.path.join(root, filename))
        paths.sort()
        path_max_len = max(len(path) for path in paths) + 2
        for i, path in enumerate(paths):
            t1 = time.time()
            sys.stdout.write("%s. Doing %s" % (i, path.ljust(path_max_len)))
            dirname, filename = os.path.split(path)
            filename_sans_ext = filename.split('.')[0]
            json_path = os.path.join(dirname, '%s.json' % filename_sans_ext)

            with open(path, 'rb') as f:
                text = f.read()

            result = check_if_logged_in_page(text)

            if not os.path.exists(json_path):
                with open(json_path, 'w') as f:
                    print("Creating new file at %s" % json_path)
                    json.dump(result, f, indent=2, sort_keys=True)
                continue
            with open(json_path) as f:
                j = json.load(f)
                self.assertEqual(j, result)

            t2 = time.time()
            duration = t2 - t1
            warn_or_crash_slow_parser(duration, max_duration=0.5)

            sys.stdout.write("✓\n")
예제 #5
0
    def test_parsing_results(self):
        """Can we do a simple query and parse?"""
        paths = []
        path_root = os.path.join(TESTS_ROOT, "examples", "pacer",
                                 "possible_case_numbers")
        for root, dirnames, filenames in os.walk(path_root):
            for filename in fnmatch.filter(filenames, '*.xml'):
                paths.append(os.path.join(root, filename))
        paths.sort()
        path_max_len = max(len(path) for path in paths) + 2
        for i, path in enumerate(paths):
            sys.stdout.write("%s. Doing %s" % (i, path.ljust(path_max_len)))
            dirname, filename = os.path.split(path)
            filename_sans_ext = filename.split('.')[0]
            json_path = os.path.join(dirname, '%s.json' % filename_sans_ext)

            report = PossibleCaseNumberApi('anything')
            with open(path, 'r') as f:
                report._parse_text(f.read().decode('utf-8'))
            data = report.data(case_name=filename_sans_ext)
            if os.path.exists(json_path):
                with open(json_path) as f:
                    j = json.load(f)
                    self.assertEqual(j, data)
            else:
                # If no json file, data should be None.
                self.assertIsNone(
                    data,
                    msg="No json file detected and response is not None. "
                    "Either create a json file for this test or make sure "
                    "you get back valid results.")

            sys.stdout.write("✓\n")
예제 #6
0
    def parse_files(self, path_root, file_ext):
        paths = []
        for root, dirnames, filenames in os.walk(path_root):
            for filename in fnmatch.filter(filenames, file_ext):
                paths.append(os.path.join(root, filename))
        paths.sort()
        path_max_len = max(len(path) for path in paths) + 2
        for i, path in enumerate(paths):
            t1 = time.time()
            sys.stdout.write("%s. Doing %s" % (i, path.ljust(path_max_len)))
            dirname, filename = os.path.split(path)
            filename_sans_ext = filename.split('.')[0]
            json_path = os.path.join(dirname, '%s.json' % filename_sans_ext)

            with open(path, 'rb') as f:
                text = f.read()

            result = check_if_logged_in_page(text)

            if not os.path.exists(json_path):
                with open(json_path, 'w') as f:
                    print("Creating new file at %s" % json_path)
                    json.dump(result, f, indent=2, sort_keys=True)
                continue
            with open(json_path) as f:
                j = json.load(f)
                self.assertEqual(j, result)

            t2 = time.time()
            duration = t2 - t1
            warn_or_crash_slow_parser(duration, max_duration=0.5)

            sys.stdout.write("✓\n")
예제 #7
0
    def test_json_output(self, diff_res, expected):
        diff_json = diff_res[0].dump_json(indent=4)

        if os.environ.get('REGENERATE', 'false') == 'true':
            expected.write(diff_json)
            return

        assert jsondate.loads(diff_json) == jsondate.load(expected)
예제 #8
0
 def init_data():
     try:
         return Data.data
     except:
         try:
             return json.load(file(fname))
         except:
             return {}
예제 #9
0
    def setUpClass(cls):
        pacer_session = PacerSession()

        if PACER_USERNAME and PACER_PASSWORD:
            # CAND chosen at random
            pacer_session = PacerSession(username=PACER_USERNAME,
                                         password=PACER_PASSWORD)

        with open(os.path.join(JURISCRAPER_ROOT, 'pacer/courts.json')) as j:
            cls.courts = get_courts_from_json(json.load(j))

        path = os.path.join(TESTS_ROOT, 'fixtures/valid_free_opinion_dates.json')
        with open(path) as j:
            cls.valid_dates = json.load(j)

        cls.reports = {}
        for court in cls.courts:
            court_id = get_court_id_from_url(court['court_link'])
            cls.reports[court_id] = FreeOpinionReport(court_id, pacer_session)
예제 #10
0
    def run_parsers_on_path(self, path):
        """Test all the parsers on a given local path

        :param path: The path where you can find the files
        """
        file_paths = glob.glob(path)
        file_paths.sort()
        path_max_len = max(len(path) for path in file_paths) + 2
        for i, path in enumerate(file_paths):
            sys.stdout.write("%s. Doing %s" % (i, path.ljust(path_max_len)))
            t1 = time.time()
            dirname, filename = os.path.split(path)
            filename_sans_ext = filename.split(".")[0]
            json_path = os.path.join(
                dirname, "%s_result.json" % filename_sans_ext
            )

            lasc = LASCSearch(session=None)
            with open(path, "rb") as f:
                data = json.load(f)
                clean_data = lasc._parse_case_data(data)

            if not os.path.isfile(json_path):
                # First time testing this docket
                bar = "*" * 50
                print(
                    "\n\n%s\nJSON FILE DID NOT EXIST. CREATING IT AT:"
                    "\n\n  %s\n\n"
                    "Please test the data in this file before assuming "
                    "everything worked.\n%s\n" % (bar, json_path, bar)
                )
                with open(json_path, "w") as f:
                    json.dump(clean_data, f, indent=2, sort_keys=True)
                    continue

            with open(json_path) as f:
                j = json.load(f)
                self.assertEqual(j, clean_data)

            t2 = time.time()
            duration = t2 - t1
            warn_or_crash_slow_parser(duration, max_duration=1)
            sys.stdout.write("✓ - %0.1fs\n" % (t2 - t1))
예제 #11
0
    def setUp(self):
        pacer_session = PacerSession()

        if pacer_credentials_are_defined():
            # CAND chosen at random
            pacer_session = get_pacer_session()
            pacer_session.login()

        with open(os.path.join(JURISCRAPER_ROOT, 'pacer/courts.json')) as j:
            self.courts = get_courts_from_json(json.load(j))

        path = os.path.join(TESTS_ROOT_EXAMPLES_PACER,
                            'dates/valid_free_opinion_dates.json')
        with open(path) as j:
            self.valid_dates = json.load(j)

        self.reports = {}
        for court in self.courts:
            court_id = get_court_id_from_url(court['court_link'])
            self.reports[court_id] = FreeOpinionReport(court_id, pacer_session)
예제 #12
0
    def setUpClass(cls):
        pacer_session = PacerSession()

        if PACER_USERNAME and PACER_PASSWORD:
            # CAND chosen at random
            pacer_session = PacerSession(username=PACER_USERNAME,
                                         password=PACER_PASSWORD)

        with open(os.path.join(JURISCRAPER_ROOT, 'pacer/courts.json')) as j:
            cls.courts = get_courts_from_json(json.load(j))

        path = os.path.join(TESTS_ROOT,
                            'fixtures/valid_free_opinion_dates.json')
        with open(path) as j:
            cls.valid_dates = json.load(j)

        cls.reports = {}
        for court in cls.courts:
            court_id = get_court_id_from_url(court['court_link'])
            cls.reports[court_id] = FreeOpinionReport(court_id, pacer_session)
예제 #13
0
    def parse_files(self,
                    path_root,
                    file_ext,
                    test_class,
                    initialize_with_court=True):
        """Can we do a simple query and parse?"""
        paths = []
        for root, dirnames, filenames in os.walk(path_root):
            for filename in fnmatch.filter(filenames, file_ext):
                paths.append(os.path.join(root, filename))
        paths.sort()
        path_max_len = max(len(path) for path in paths) + 2
        for i, path in enumerate(paths):
            t1 = time.time()
            sys.stdout.write("%s. Doing %s" % (i, path.ljust(path_max_len)))
            dirname, filename = os.path.split(path)
            filename_sans_ext = filename.split('.')[0]
            json_path = os.path.join(dirname, '%s.json' % filename_sans_ext)

            if initialize_with_court:
                court = filename_sans_ext.split('_')[0]
                report = test_class(court)
            else:
                report = test_class()
            with open(path, 'rb') as f:
                report._parse_text(f.read().decode('utf-8'))
            data = report.data
            if not os.path.exists(json_path):
                with open(json_path, 'w') as f:
                    print("Creating new file at %s" % json_path)
                    json.dump(data, f, indent=2, sort_keys=True)
                continue
            with open(json_path) as f:
                j = json.load(f)
                self.assertEqual(j, data)
            t2 = time.time()
            duration = t2 - t1
            warn_or_crash_slow_parser(duration, max_duration=2)

            sys.stdout.write("✓\n")
예제 #14
0
    def parse_files(self, path_root, file_ext, test_class,
                    initialize_with_court=True):
        """Can we do a simple query and parse?"""
        paths = []
        for root, dirnames, filenames in os.walk(path_root):
            for filename in fnmatch.filter(filenames, file_ext):
                paths.append(os.path.join(root, filename))
        paths.sort()
        path_max_len = max(len(path) for path in paths) + 2
        for i, path in enumerate(paths):
            t1 = time.time()
            sys.stdout.write("%s. Doing %s" % (i, path.ljust(path_max_len)))
            dirname, filename = os.path.split(path)
            filename_sans_ext = filename.split('.')[0]
            json_path = os.path.join(dirname, '%s.json' % filename_sans_ext)

            if initialize_with_court:
                court = filename_sans_ext.split('_')[0]
                report = test_class(court)
            else:
                report = test_class()
            with open(path, 'r') as f:
                report._parse_text(f.read().decode('utf-8'))
            data = report.data
            if not os.path.exists(json_path):
                with open(json_path, 'w') as f:
                    print("Creating new file at %s" % json_path)
                    json.dump(data, f, indent=2, sort_keys=True)
                continue
            with open(json_path) as f:
                j = json.load(f)
                self.assertEqual(j, data)
            t2 = time.time()
            duration = t2 - t1
            warn_or_crash_slow_parser(duration, max_duration=2)

            sys.stdout.write("✓\n")
예제 #15
0
    def run_parsers_on_path(
            self,
            path_root,
            required_fields=['date_filed', 'case_name', 'docket_number']):
        """Test all the parsers, faking the network query."""
        paths = []
        for root, dirnames, filenames in os.walk(path_root):
            for filename in fnmatch.filter(filenames, '*.html'):
                paths.append(os.path.join(root, filename))
        paths.sort()
        path_max_len = max(len(path) for path in paths) + 2
        for i, path in enumerate(paths):

            sys.stdout.write("%s. Doing %s" % (i, path.ljust(path_max_len)))
            t1 = time.time()
            dirname, filename = os.path.split(path)
            filename_sans_ext = filename.split('.')[0]
            json_path = os.path.join(dirname, '%s.json' % filename_sans_ext)
            court = filename_sans_ext.split('_')[0]

            report = DocketReport(court)
            with open(path, 'rb') as f:
                report._parse_text(f.read().decode('utf-8'))
            data = report.data

            if data != {}:
                # If the docket is a valid docket, make sure some required
                # fields are populated.
                for field in required_fields:
                    self.assertTrue(
                        data[field],
                        msg="Unable to find truthy value for field %s" % field,
                    )

                self.assertEqual(data['court_id'], court)

                # Party-specific tests...
                for party in data['parties']:
                    self.assertTrue(
                        party.get('name', False),
                        msg="Every party must have a name attribute. Did not "
                        "get a value for:\n\n%s" % party)
                    # Protect against effed up adversary proceedings cases that
                    # don't parse properly. See: cacb, 2:08-ap-01570-BB
                    self.assertNotIn('----', party['name'])

            if not os.path.isfile(json_path):
                bar = "*" * 50
                print("\n\n%s\nJSON FILE DID NOT EXIST. CREATING IT AT:"
                      "\n\n  %s\n\n"
                      "Please test the data in this file before assuming "
                      "everything worked.\n%s\n" % (bar, json_path, bar))
                with open(json_path, 'w') as f:
                    json.dump(data, f, indent=2, sort_keys=True)
                    #self.assertFalse(True)
                    continue

            with open(json_path) as f:
                j = json.load(f)
                if j != {}:
                    # Compare docket entries and parties first, for easier
                    # debugging, then compare whole objects to be sure.
                    self.assertEqual(j['docket_entries'],
                                     data['docket_entries'])
                    self.assertEqual(j['parties'], data['parties'])
                self.assertEqual(j, data)
            t2 = time.time()

            duration = t2 - t1
            warn_or_crash_slow_parser(duration, max_duration=1)
            sys.stdout.write("✓ - %0.1fs\n" % (t2 - t1))
예제 #16
0
    def run_parsers_on_path(self, path_root,
                            required_fields=[
                                'date_filed', 'case_name', 'docket_number']):
        """Test all the parsers, faking the network query."""
        paths = []
        for root, dirnames, filenames in os.walk(path_root):
            for filename in fnmatch.filter(filenames, '*.html'):
                paths.append(os.path.join(root, filename))
        paths.sort()
        path_max_len = max(len(path) for path in paths) + 2
        for i, path in enumerate(paths):

            sys.stdout.write("%s. Doing %s" % (i, path.ljust(path_max_len)))
            t1 = time.time()
            dirname, filename = os.path.split(path)
            filename_sans_ext = filename.split('.')[0]
            json_path = os.path.join(dirname, '%s.json' % filename_sans_ext)
            court = filename_sans_ext.split('_')[0]

            report = DocketReport(court)
            with open(path, 'r') as f:
                report._parse_text(f.read().decode('utf-8'))
            data = report.data

            if data != {}:
                # If the docket is a valid docket, make sure some required
                # fields are populated.
                for field in required_fields:
                    self.assertTrue(
                        data[field],
                        msg="Unable to find truthy value for field %s" % field,
                    )

                self.assertEqual(data['court_id'], court)

                # Party-specific tests...
                for party in data['parties']:
                    self.assertTrue(
                        party.get('name', False),
                        msg="Every party must have a name attribute. Did not "
                            "get a value for:\n\n%s" % party
                    )
                    # Protect against effed up adversary proceedings cases that
                    # don't parse properly. See: cacb, 2:08-ap-01570-BB
                    self.assertNotIn('----', party['name'])

            if not os.path.isfile(json_path):
                bar = "*" * 50
                print("\n\n%s\nJSON FILE DID NOT EXIST. CREATING IT AT:"
                      "\n\n  %s\n\n"
                      "Please test the data in this file before assuming "
                      "everything worked.\n%s\n" % (bar, json_path, bar))
                with open(json_path, 'w') as f:
                    json.dump(data, f, indent=2, sort_keys=True)
                    #self.assertFalse(True)
                    continue

            with open(json_path) as f:
                j = json.load(f)
                if j != {}:
                    # Compare docket entries and parties first, for easier
                    # debugging, then compare whole objects to be sure.
                    self.assertEqual(j['docket_entries'], data['docket_entries'])
                    self.assertEqual(j['parties'], data['parties'])
                self.assertEqual(j, data)
            t2 = time.time()

            duration = t2 - t1
            warn_or_crash_slow_parser(duration, max_duration=1)
            sys.stdout.write("✓ - %0.1fs\n" % (t2-t1))
예제 #17
0
 def roundtrip(input):
     fileobj = six.StringIO()
     jsondate.dump(input, fileobj)
     fileobj.seek(0)
     return jsondate.load(fileobj)
예제 #18
0
 def test_dump_datetime_roundtrips(self):
     orig_dict = dict(created_at=datetime.date(2011, 1, 1))
     fileobj = StringIO.StringIO()
     jsondate.dump(orig_dict, fileobj)
     fileobj.seek(0)
     self.assertEqual(orig_dict, jsondate.load(fileobj))