Ejemplo n.º 1
0
 def test_external_process_works(self):
     (status, out,
      err) = utils.external_process(['grep', '3'],
                                    input_data='Line 1\nLine 2\nLine 3\n')
     self.assertEqual(status, 0)
     self.assertEqual(out, 'Line 3\n')
     self.assertEqual(err, '')
Ejemplo n.º 2
0
   def extract(self, data, deps):
      try:
         (status, stdout, stderr) = utils.external_process(['awk', '/^[0-9]/ {print;}', '-'], input_data=data, timeout=5)
      except subprocess.TimeoutExpired:
         raise RunnableError('awk timed out')

      lines = [line for line in stdout.split("\n") if line]

      root = ET.Element('extraction')
      for line in lines:
         ele = ET.SubElement(root, 'line')
         ele.text = line

      return ExtractorResult(xml_result=root)
Ejemplo n.º 3
0
    def extract(self, data, deps):
        try:
            (status, stdout, stderr) = utils.external_process(
                ['awk', '/^[0-9]/ {print;}', '-'], input_data=data, timeout=5)
        except subprocess.TimeoutExpired:
            raise RunnableError('awk timed out')

        lines = [line for line in stdout.split("\n") if line]

        root = ET.Element('extraction')
        for line in lines:
            ele = ET.SubElement(root, 'line')
            ele.text = line

        return ExtractorResult(xml_result=root)
Ejemplo n.º 4
0
   def extract(self, data, dep_results):
      file_path = utils.temp_file(data, suffix='.pdf')
      
      try:
         status, stdout, stderr = utils.external_process(['java', '-jar', config.PDF_BOX_JAR, 'ExtractText', '-console', '-encoding', 'UTF-8', file_path],
               timeout=30)
      except subprocess.TimeoutExpired as te:
         raise RunnableError('PDFBox timed out while processing document')
      finally:
         os.remove(file_path)

      if status != 0:
         raise RunnableError('PDFBox returned error status code {0}.\nPossible error:\n{1}'.format(status, stderr))

      plain_text = stdout

      # create xml result file that just points towards the file with plain text results
      root=ET.Element('file')
      root.text = 'plain_text.txt'

      files = {'plain_text.txt': stdout}

      return ExtractorResult(xml_result=root, files=files)
Ejemplo n.º 5
0
 def test_external_process_works(self):
    (status, out, err) = utils.external_process(['grep', '3'], input_data='Line 1\nLine 2\nLine 3\n')
    self.assertEqual(status, 0)
    self.assertEqual(out, 'Line 3\n')
    self.assertEqual(err, '')
Ejemplo n.º 6
0
 def test_external_process_timesout(self):
    self.assertRaises(subprocess.TimeoutExpired, utils.external_process, ['sleep', '3'], timeout=2)
    # This shouldn't timeout and thus shouldn't raise an error
    utils.external_process(['sleep', '3'])
Ejemplo n.º 7
0
 def test_external_process_returns_status_code(self):
    (status, out, err) = utils.external_process(['true'])
    self.assertEqual(status, 0)
    (status, out, err) = utils.external_process(['false'])
    self.assertEqual(status, 1)
Ejemplo n.º 8
0
 def test_external_process_timesout(self):
     self.assertRaises(subprocess.TimeoutExpired,
                       utils.external_process, ['sleep', '3'],
                       timeout=2)
     # This shouldn't timeout and thus shouldn't raise an error
     utils.external_process(['sleep', '3'])
Ejemplo n.º 9
0
 def test_external_process_returns_status_code(self):
     (status, out, err) = utils.external_process(['true'])
     self.assertEqual(status, 0)
     (status, out, err) = utils.external_process(['false'])
     self.assertEqual(status, 1)