def setup_method(self, method):
   self.docket = self.docket_to_test = "CP-51-CR-0000001-2011"
   self.birth_date = "07/24/1964"
   path = "tests/testDocs/testPDFs/%s.pdf" % self.docket_to_test
   text = sectionize.parse(path)
   sections = etree.XML(sectionize.stitch(text))
   self.section_text = sections.xpath("//section[@name='Defendant_Information']")[0].text.strip()
   with open("tests/testDocs/defendantSectionTexts/%s.txt" % self.docket_to_test, "w+") as f:
     f.write(self.section_text)
   f.close()
Ejemplo n.º 2
0
def just_parse():
  """"
  This is the slow one.  For 115 dockets, it took 80 seconds, or .76 seconds
  per docket.  This is the stumbling block in the whole thing.  Why is this
  method so slow?!
  """
  logging.basicConfig(filename="parse_timing.md", level=logging.DEBUG)
  logging.info("pdf2text_time, create_grammar_time, parse_grammar_time, node_visitor_time")
  print("Testing time of parse(), which includes pdf2text.")
  directory = "./testDocs/test_two/pdfs/*.pdf"
  iter = glob.iglob(directory)
  start = datetime.now()
  counter = 0
  for file in iter:
    parse(file)
    counter += 1
  end = datetime.now()
  print("Finished.")
  duration = (end-start).seconds
  print("Processed {} dockets in {} seconds.".format(counter, duration))
  print("{} seconds per docket.".format(duration/counter))
  print("Thanks for playing our game.")