Example #1
0
def fetch(fmgr):
    """
    Fetch verdicts.
    """
    crawler = FJUDCrawler()
    while fmgr.has_fjud_record():
        rec = fmgr.pop_fjud_record()
        crawler.fetch(court=rec.court,
                      needle=rec.needle,
                      year=rec.get_start_year())
        fmgr.remove_fjud_record(rec)
        print("Sleep 60s for polite interaction with the server.")
        time.sleep(1*60)
    crawler.quit()
Example #2
0
class FJUDCrawlerTest(unittest.TestCase):
    def setUp(self):
        self.crawler = FJUDCrawler(enable_unittest=True)
        self.crawler.driver = WebdriverStub()

    def tearDown(self):
        self.crawler.quit()

    def test_is_already_written(self):
        test_dir = os.path.abspath(os.path.join(__file__, ".."))
        self.assertFalse(fjud.is_already_written(ur"89,台上,2581", test_dir))
        self.assertTrue(fjud.is_already_written(ur"query_result.html",
                        test_dir))
        self.assertFalse(fjud.is_already_written(ur"query_result.html"))

    def test_save_as(self):
        output_path = fjud.get_default_output_path()
        serial = ur"89,台上,2581____test_only"
        fjud.save_as(serial, "test only")
        output_file = os.path.join(output_path,
                                   fjud.normalize_serial(serial))
        self.assertTrue(os.path.isfile(output_file))
        os.unlink(output_file)
        self.assertFalse(os.path.isfile(output_file))

    def test_get_courts(self):
        all_courts = fjud.get_courts()
        self.assertTrue(len(all_courts) > 0)

    def test_normalize(self):
        serial = ur"89,台上,2581"
        self.assertEqual(
            serial,
            fjud.denormalize_serial(fjud.normalize_serial(serial)))

    @unittest.skip("disable temporarily")
    def disable_test_query(self):
        crawler = FJUDCrawler()
        ret = crawler.query()
        with file("ret.txt", "w") as fp:
            fp.write(ret)
        print ret
def main():
    """
    Search for bad verdicts and download them again.
    """

    # If <pre> exists, the file has verdict text in it.
    serials = files_without_word(u"<pre>")
    for serial in serials:
        serial = serial.decode("utf-8")
        crawler = FJUDCrawler()
        if u"台上" in serial:
            crawler.fetch(court=u"最高法院", serial=serial, overwrite=True)
        else:
            crawler.fetch(court=u"臺灣高等法院", serial=serial, overwrite=True)
        crawler.quit()
        time.sleep(3)
Example #4
0
 def setUp(self):
     self.crawler = FJUDCrawler(enable_unittest=True)
     self.crawler.driver = WebdriverStub()
Example #5
0
 def disable_test_query(self):
     crawler = FJUDCrawler()
     ret = crawler.query()
     with file("ret.txt", "w") as fp:
         fp.write(ret)
     print ret