from ConvStorage.conversion_client import TDocConversionClient from common.logging_wrapper import setup_logging import sys if __name__ == '__main__': logger = setup_logging(log_file_name="convert_pdf.log") client = TDocConversionClient( TDocConversionClient.parse_args(sys.argv[1:]), logger) client.start_conversion_thread() exit_code = client.process_files() sys.exit(exit_code)
class TTestConvBase(TestCase): def __init__(self, methodName='runTest'): super().__init__(methodName) self.port = 8081 self.name = None self.data_folder = None self.server_address = "localhost:{}".format(self.port) self.server = None self.server_thread = None self.server_process = None self.client = None self.converters = TExternalConverters(enable_smart_parser=False, enable_calibre=False, enable_cat_doc=False, enable_xls2csv=False, enable_office_2_txt=False) self.pdf_ocr_folder = os.path.join(os.path.dirname(__file__), "pdf.ocr") self.pdf_ocr_out_folder = os.path.join(os.path.dirname(__file__), "pdf.ocr.out") if not os.path.exists(self.pdf_ocr_folder) or not os.path.exists( self.pdf_ocr_out_folder): raise Exception( "run python update_finereader_task.py, and upload test.hft to finreader hot folder" ) self.project_file = "converted_file_storage.json" self.client = None self.server_args = None self.client_count = 0 def start_server_thread(self): self.server = TConvertProcessor( TConvertProcessor.parse_args(self.server_args)) self.server_thread = threading.Thread(target=start_server, args=(self.server, )) self.server_thread.start() def setup_server(self, name, addit_server_args=list(), start_process=False): self.name = name self.data_folder = os.path.join(os.path.dirname(__file__), "data.{}".format(name)) recreate_folder(self.data_folder) os.chdir(self.data_folder) input_files = "input_files" recreate_folder(input_files) db_converted_files = os.path.join(self.data_folder, "db_converted_files") recreate_folder(db_converted_files) db_input_files = os.path.join(self.data_folder, "db_input_files") recreate_folder(db_input_files) log_file = "db_conv.log" if os.path.exists(log_file): os.unlink(log_file) clear_folder_with_retry(self.pdf_ocr_folder) clear_folder_with_retry(self.pdf_ocr_out_folder) TConvertStorage.create_empty_db(db_input_files, db_converted_files, self.project_file) self.server_args = [ "--server-address", self.server_address, '--logfile', log_file, '--db-json', self.project_file, '--disable-killing-winword', '--ocr-input-folder', self.pdf_ocr_folder, '--ocr-output-folder', self.pdf_ocr_out_folder, '--disable-telegram' ] + addit_server_args if start_process: server_script = os.path.join(os.path.dirname(__file__), "..", "conv_storage_server.py") args = ["python", server_script] + self.server_args self.server_process = subprocess.Popen(args, stderr=subprocess.DEVNULL, stdout=subprocess.DEVNULL) else: self.start_server_thread() def restart_server(self): self.server.stop_http_server() self.server_thread.join(0) self.start_server_thread() def process_with_client(self, input_files, timeout=None, rebuild=False, skip_receiving=False, log_name="client", input_task_timeout=5): output_files = list(os.path.basename(i) + ".docx" for i in input_files) for o in output_files: if os.path.exists(o): os.unlink(o) client_args = [ "--server-address", self.server_address, "--conversion-timeout", "180", "--output-folder", ".", ] + input_files if timeout is not None: client_args.extend(['--conversion-timeout', str(timeout)]) if rebuild: client_args.append('--rebuild') if skip_receiving: client_args.append('--skip-receiving') if self.client_count >= 0 and log_name == "client": log_name = log_name + str(self.client_count) logger = setup_logging(logger_name=log_name) try: self.client_count += 1 self.client = TDocConversionClient( TDocConversionClient.parse_args(client_args), logger=logger) self.client.input_task_timeout = input_task_timeout self.client.start_conversion_thread() self.client.process_files() return output_files finally: close_logger(logger) def list2reason(self, exc_list): if exc_list and exc_list[-1][0] is self: return exc_list[-1][1] def tear_down(self): result = self.defaultTestResult() self._feedErrorsToResult(result, self._outcome.errors) error = self.list2reason(result.errors) failure = self.list2reason(result.failures) delete_temp_files = not error and not failure if self.client is not None: self.client.stop_conversion_thread(1) self.client = None if self.server is not None: self.server.stop_http_server() self.server_thread.join(0) self.server = None else: self.server_process.kill() self.server_process = None time.sleep(5) os.chdir(os.path.dirname(__file__))