Esempio n. 1
0
from ConvStorage.conversion_client import TDocConversionClient
from common.logging_wrapper import setup_logging

import sys

if __name__ == '__main__':
    logger = setup_logging(log_file_name="convert_pdf.log")
    client = TDocConversionClient(
        TDocConversionClient.parse_args(sys.argv[1:]), logger)
    client.start_conversion_thread()
    exit_code = client.process_files()
    sys.exit(exit_code)
class TTestConvBase(TestCase):
    def __init__(self, methodName='runTest'):
        super().__init__(methodName)
        self.port = 8081
        self.name = None
        self.data_folder = None
        self.server_address = "localhost:{}".format(self.port)
        self.server = None
        self.server_thread = None
        self.server_process = None
        self.client = None
        self.converters = TExternalConverters(enable_smart_parser=False,
                                              enable_calibre=False,
                                              enable_cat_doc=False,
                                              enable_xls2csv=False,
                                              enable_office_2_txt=False)

        self.pdf_ocr_folder = os.path.join(os.path.dirname(__file__),
                                           "pdf.ocr")
        self.pdf_ocr_out_folder = os.path.join(os.path.dirname(__file__),
                                               "pdf.ocr.out")
        if not os.path.exists(self.pdf_ocr_folder) or not os.path.exists(
                self.pdf_ocr_out_folder):
            raise Exception(
                "run python update_finereader_task.py, and upload test.hft to finreader hot folder"
            )
        self.project_file = "converted_file_storage.json"
        self.client = None
        self.server_args = None
        self.client_count = 0

    def start_server_thread(self):
        self.server = TConvertProcessor(
            TConvertProcessor.parse_args(self.server_args))
        self.server_thread = threading.Thread(target=start_server,
                                              args=(self.server, ))
        self.server_thread.start()

    def setup_server(self,
                     name,
                     addit_server_args=list(),
                     start_process=False):
        self.name = name
        self.data_folder = os.path.join(os.path.dirname(__file__),
                                        "data.{}".format(name))

        recreate_folder(self.data_folder)

        os.chdir(self.data_folder)
        input_files = "input_files"
        recreate_folder(input_files)

        db_converted_files = os.path.join(self.data_folder,
                                          "db_converted_files")
        recreate_folder(db_converted_files)

        db_input_files = os.path.join(self.data_folder, "db_input_files")
        recreate_folder(db_input_files)

        log_file = "db_conv.log"
        if os.path.exists(log_file):
            os.unlink(log_file)

        clear_folder_with_retry(self.pdf_ocr_folder)
        clear_folder_with_retry(self.pdf_ocr_out_folder)
        TConvertStorage.create_empty_db(db_input_files, db_converted_files,
                                        self.project_file)

        self.server_args = [
            "--server-address", self.server_address, '--logfile', log_file,
            '--db-json', self.project_file, '--disable-killing-winword',
            '--ocr-input-folder', self.pdf_ocr_folder, '--ocr-output-folder',
            self.pdf_ocr_out_folder, '--disable-telegram'
        ] + addit_server_args

        if start_process:
            server_script = os.path.join(os.path.dirname(__file__), "..",
                                         "conv_storage_server.py")
            args = ["python", server_script] + self.server_args
            self.server_process = subprocess.Popen(args,
                                                   stderr=subprocess.DEVNULL,
                                                   stdout=subprocess.DEVNULL)
        else:
            self.start_server_thread()

    def restart_server(self):
        self.server.stop_http_server()
        self.server_thread.join(0)
        self.start_server_thread()

    def process_with_client(self,
                            input_files,
                            timeout=None,
                            rebuild=False,
                            skip_receiving=False,
                            log_name="client",
                            input_task_timeout=5):
        output_files = list(os.path.basename(i) + ".docx" for i in input_files)
        for o in output_files:
            if os.path.exists(o):
                os.unlink(o)
        client_args = [
            "--server-address",
            self.server_address,
            "--conversion-timeout",
            "180",
            "--output-folder",
            ".",
        ] + input_files
        if timeout is not None:
            client_args.extend(['--conversion-timeout', str(timeout)])
        if rebuild:
            client_args.append('--rebuild')
        if skip_receiving:
            client_args.append('--skip-receiving')
        if self.client_count >= 0 and log_name == "client":
            log_name = log_name + str(self.client_count)
        logger = setup_logging(logger_name=log_name)
        try:
            self.client_count += 1
            self.client = TDocConversionClient(
                TDocConversionClient.parse_args(client_args), logger=logger)
            self.client.input_task_timeout = input_task_timeout
            self.client.start_conversion_thread()
            self.client.process_files()
            return output_files
        finally:
            close_logger(logger)

    def list2reason(self, exc_list):
        if exc_list and exc_list[-1][0] is self:
            return exc_list[-1][1]

    def tear_down(self):
        result = self.defaultTestResult()
        self._feedErrorsToResult(result, self._outcome.errors)
        error = self.list2reason(result.errors)
        failure = self.list2reason(result.failures)
        delete_temp_files = not error and not failure

        if self.client is not None:
            self.client.stop_conversion_thread(1)
            self.client = None

        if self.server is not None:
            self.server.stop_http_server()
            self.server_thread.join(0)
            self.server = None
        else:
            self.server_process.kill()
            self.server_process = None

        time.sleep(5)

        os.chdir(os.path.dirname(__file__))