def test_external_process_works(self): (status, out, err) = utils.external_process(['grep', '3'], input_data='Line 1\nLine 2\nLine 3\n') self.assertEqual(status, 0) self.assertEqual(out, 'Line 3\n') self.assertEqual(err, '')
def extract(self, data, deps): try: (status, stdout, stderr) = utils.external_process(['awk', '/^[0-9]/ {print;}', '-'], input_data=data, timeout=5) except subprocess.TimeoutExpired: raise RunnableError('awk timed out') lines = [line for line in stdout.split("\n") if line] root = ET.Element('extraction') for line in lines: ele = ET.SubElement(root, 'line') ele.text = line return ExtractorResult(xml_result=root)
def extract(self, data, deps): try: (status, stdout, stderr) = utils.external_process( ['awk', '/^[0-9]/ {print;}', '-'], input_data=data, timeout=5) except subprocess.TimeoutExpired: raise RunnableError('awk timed out') lines = [line for line in stdout.split("\n") if line] root = ET.Element('extraction') for line in lines: ele = ET.SubElement(root, 'line') ele.text = line return ExtractorResult(xml_result=root)
def extract(self, data, dep_results): file_path = utils.temp_file(data, suffix='.pdf') try: status, stdout, stderr = utils.external_process(['java', '-jar', config.PDF_BOX_JAR, 'ExtractText', '-console', '-encoding', 'UTF-8', file_path], timeout=30) except subprocess.TimeoutExpired as te: raise RunnableError('PDFBox timed out while processing document') finally: os.remove(file_path) if status != 0: raise RunnableError('PDFBox returned error status code {0}.\nPossible error:\n{1}'.format(status, stderr)) plain_text = stdout # create xml result file that just points towards the file with plain text results root=ET.Element('file') root.text = 'plain_text.txt' files = {'plain_text.txt': stdout} return ExtractorResult(xml_result=root, files=files)
def test_external_process_timesout(self): self.assertRaises(subprocess.TimeoutExpired, utils.external_process, ['sleep', '3'], timeout=2) # This shouldn't timeout and thus shouldn't raise an error utils.external_process(['sleep', '3'])
def test_external_process_returns_status_code(self): (status, out, err) = utils.external_process(['true']) self.assertEqual(status, 0) (status, out, err) = utils.external_process(['false']) self.assertEqual(status, 1)