def word_to_pdf(in_file, in_format, out_file, pdfa=False, password=None): tempdir = tempfile.mkdtemp() from_file = os.path.join(tempdir, "file." + in_format) to_file = os.path.join(tempdir, "file.pdf") shutil.copyfile(in_file, from_file) tries = 0 while tries < 5: subprocess_arguments = [LIBREOFFICE_PATH, '--headless', '--convert-to', 'pdf', from_file] p = subprocess.Popen(subprocess_arguments, cwd=tempdir) result = p.wait() if os.path.isfile(to_file): break result = 1 tries += 1 time.sleep(2 + tries*random.random()) continue if result == 0: if pdfa: pdf_to_pdfa(to_file) if password: pdf_encrypt(to_file, password) shutil.copyfile(to_file, out_file) if tempdir is not None: shutil.rmtree(tempdir) if result != 0: return False return True
def word_to_pdf(in_file, in_format, out_file, pdfa=False, password=None, update_references=False): tempdir = tempfile.mkdtemp() from_file = os.path.join(tempdir, "file." + in_format) to_file = os.path.join(tempdir, "file.pdf") shutil.copyfile(in_file, from_file) tries = 0 while tries < 5: if update_references: subprocess_arguments = [LIBREOFFICE_PATH, '--headless', '--invisible', 'macro:///Standard.Module1.PysIndexerPdf(' + from_file + ',' + to_file + ')'] else: subprocess_arguments = [LIBREOFFICE_PATH, '--headless', '--convert-to', 'pdf', from_file] p = subprocess.Popen(subprocess_arguments, cwd=tempdir) result = p.wait() if os.path.isfile(to_file): break result = 1 tries += 1 time.sleep(2 + tries*random.random()) logmessage("Retrying libreoffice with " + repr(subprocess_arguments)) continue if result == 0: if pdfa: pdf_to_pdfa(to_file) if password: pdf_encrypt(to_file, password) shutil.copyfile(to_file, out_file) if tempdir is not None: shutil.rmtree(tempdir) if result != 0: return False return True
def convert_to_file(self, question): metadata_as_dict = dict() if type(self.metadata) is dict: metadata_as_dict = self.metadata elif type(self.metadata) is list: for data in self.metadata: if type(data) is dict: for key in data: metadata_as_dict[key] = data[key] if self.output_format == 'rtf to docx': self.output_extension = 'rtf' else: self.output_extension = self.output_format if self.output_format in ( 'rtf', 'rtf to docx') and self.template_file is None: self.template_file = docassemble.base.functions.standard_template_filename( 'Legal-Template.rtf') if self.output_format == 'docx' and self.reference_file is None: self.reference_file = docassemble.base.functions.standard_template_filename( 'Legal-Template.docx') if (self.output_format == 'pdf' or self.output_format == 'tex') and self.template_file is None: self.template_file = docassemble.base.functions.standard_template_filename( 'Legal-Template.tex') yaml_to_use = list() if self.output_format in ('rtf', 'rtf to docx'): #logmessage("pre input content is " + str(self.input_content)) self.input_content = docassemble.base.filter.rtf_prefilter( self.input_content, metadata=metadata_as_dict) #logmessage("post input content is " + str(self.input_content)) if self.output_format == 'docx': self.input_content = docassemble.base.filter.docx_filter( self.input_content, metadata=metadata_as_dict, question=question) if self.output_format == 'pdf' or self.output_format == 'tex': if len(self.initial_yaml) == 0: standard_file = docassemble.base.functions.standard_template_filename( 'Legal-Template.yml') if standard_file is not None: self.initial_yaml.append(standard_file) for yaml_file in self.initial_yaml: if yaml_file is not None: yaml_to_use.append(yaml_file) for yaml_file in self.additional_yaml: if yaml_file is not None: yaml_to_use.append(yaml_file) #logmessage("Before: " + repr(self.input_content)) self.input_content = docassemble.base.filter.pdf_filter( self.input_content, metadata=metadata_as_dict, question=question) #logmessage("After: " + repr(self.input_content)) temp_file = tempfile.NamedTemporaryFile(prefix="datemp", mode="wb", suffix=".md", delete=False) temp_file.write(self.input_content.encode('utf8')) temp_file.close() temp_outfile = tempfile.NamedTemporaryFile(prefix="datemp", mode="wb", suffix="." + str(self.output_extension), delete=False) temp_outfile.close() current_temp_dir = 'epsconv' latex_conversion_directory = os.path.join(tempfile.gettempdir(), 'latex_convert') if not os.path.isdir(latex_conversion_directory): os.makedirs(latex_conversion_directory) if not os.path.isdir(latex_conversion_directory): raise Exception("Could not create latex conversion directory") icc_profile_in_temp = os.path.join( tempfile.gettempdir(), 'sRGB_IEC61966-2-1_black_scaled.icc') if not os.path.isfile(icc_profile_in_temp): shutil.copyfile( docassemble.base.functions.standard_template_filename( 'sRGB_IEC61966-2-1_black_scaled.icc'), icc_profile_in_temp) subprocess_arguments = [ PANDOC_PATH, '--smart', '-M', 'latextmpdir=' + os.path.join('latex_convert', ''), '-M', 'pdfa=' + ('true' if self.pdfa else 'false') ] if len(yaml_to_use) > 0: subprocess_arguments.extend(yaml_to_use) if self.template_file is not None: subprocess_arguments.extend(['--template=%s' % self.template_file]) if self.reference_file is not None: subprocess_arguments.extend( ['--reference-docx=%s' % self.reference_file]) subprocess_arguments.extend(['-s', '-o', temp_outfile.name]) subprocess_arguments.extend([temp_file.name]) subprocess_arguments.extend(self.arguments) #logmessage("Arguments are " + str(subprocess_arguments)) the_temp_dir = tempfile.gettempdir() try: msg = subprocess.check_output(subprocess_arguments, cwd=the_temp_dir, stderr=subprocess.STDOUT) except subprocess.CalledProcessError as err: raise Exception("Failed to assemble file: " + unicode(err.output)) if msg: self.pandoc_message = msg os.remove(temp_file.name) if os.path.exists(temp_outfile.name): if self.output_format in ('rtf', 'rtf to docx'): with open(temp_outfile.name) as the_file: file_contents = the_file.read() # with open('/tmp/asdf.rtf', 'w') as deb_file: # deb_file.write(file_contents) file_contents = docassemble.base.filter.rtf_filter( file_contents, metadata=metadata_as_dict, styles=get_rtf_styles(self.template_file), question=question) with open(temp_outfile.name, "wb") as the_file: the_file.write(file_contents) if self.output_format == 'rtf to docx': docx_outfile = tempfile.NamedTemporaryFile(prefix="datemp", mode="wb", suffix=".docx", delete=False) success = rtf_to_docx(temp_outfile.name, docx_outfile.name) if not success: raise Exception("Could not convert RTF to DOCX.") temp_outfile = docx_outfile if self.output_filename is not None: shutil.copyfile(temp_outfile.name, self.output_filename) else: self.output_filename = temp_outfile.name self.output_content = None if self.output_format == 'pdf' and self.password: pdf_encrypt(self.output_filename, self.password) else: raise IOError("Failed creating file: %s" % output_filename) return
def word_to_pdf(in_file, in_format, out_file, pdfa=False, password=None, update_refs=False, tagged=False): tempdir = tempfile.mkdtemp() from_file = os.path.join(tempdir, "file." + in_format) to_file = os.path.join(tempdir, "file.pdf") shutil.copyfile(in_file, from_file) tries = 0 if pdfa: method = 'pdfa' elif tagged: method = 'tagged' else: method = 'default' while tries < 5: use_libreoffice = True if update_refs: if daconfig.get('convertapi secret', None) is not None: update_references(from_file) try: convertapi_to_pdf(from_file, to_file) result = 0 except: logmessage("Call to convertapi failed") result = 1 use_libreoffice = False else: subprocess_arguments = [ LIBREOFFICE_PATH, '--headless', '--invisible', 'macro:///Standard.Module1.ConvertToPdf(' + from_file + ',' + to_file + ',True,' + method + ')' ] elif daconfig.get('convertapi secret', None) is not None: try: convertapi_to_pdf(from_file, to_file) result = 0 except: logmessage("Call to convertapi failed") result = 1 use_libreoffice = False else: if method == 'default': subprocess_arguments = [ LIBREOFFICE_PATH, '--headless', '--convert-to', 'pdf', from_file ] else: subprocess_arguments = [ LIBREOFFICE_PATH, '--headless', '--invisible', 'macro:///Standard.Module1.ConvertToPdf(' + from_file + ',' + to_file + ',False,' + method + ')' ] if use_libreoffice: initialize_libreoffice() #logmessage("Trying libreoffice with " + repr(subprocess_arguments)) p = subprocess.Popen(subprocess_arguments, cwd=tempdir) result = p.wait() if os.path.isfile(to_file): break result = 1 tries += 1 time.sleep(2 + tries * random.random()) if use_libreoffice: logmessage("Retrying libreoffice with " + repr(subprocess_arguments)) else: logmessage("Retrying convertapi") continue if result == 0: if password: pdf_encrypt(to_file, password) shutil.copyfile(to_file, out_file) if tempdir is not None: shutil.rmtree(tempdir) if result != 0: return False return True
def word_to_pdf(in_file, in_format, out_file, pdfa=False, password=None, update_refs=False, tagged=False, filename=None, retry=True): if filename is None: filename = 'file' filename = docassemble.base.functions.secure_filename(filename) tempdir = tempfile.mkdtemp() from_file = os.path.join(tempdir, filename + "." + in_format) to_file = os.path.join(tempdir, filename + ".pdf") shutil.copyfile(in_file, from_file) tries = 0 if pdfa: method = 'pdfa' elif tagged: method = 'tagged' else: method = 'default' if retry: num_tries = 5 else: num_tries = 1 while tries < num_tries: use_libreoffice = True if update_refs: if daconfig.get('convertapi secret', None) is not None: update_references(from_file) try: convertapi_to_pdf(from_file, to_file) result = 0 except: logmessage("Call to convertapi failed") result = 1 use_libreoffice = False elif daconfig.get('cloudconvert secret', None) is not None: try: cloudconvert_to_pdf(in_format, from_file, to_file, pdfa, password) result = 0 except Exception as err: logmessage("Call to cloudconvert failed") logmessage(err.__class__.__name__ + ": " + str(err)) result = 1 use_libreoffice = False password = False else: subprocess_arguments = [LIBREOFFICE_PATH, '--headless', '--invisible', 'macro:///Standard.Module1.ConvertToPdf(' + from_file + ',' + to_file + ',True,' + method + ')'] elif daconfig.get('convertapi secret', None) is not None: try: convertapi_to_pdf(from_file, to_file) result = 0 except: logmessage("Call to convertapi failed") result = 1 use_libreoffice = False elif daconfig.get('cloudconvert secret', None) is not None: try: cloudconvert_to_pdf(in_format, from_file, to_file, pdfa, password) result = 0 except Exception as err: logmessage("Call to cloudconvert failed") logmessage(err.__class__.__name__ + ": " + str(err)) result = 1 use_libreoffice = False password = False else: if method == 'default': subprocess_arguments = [LIBREOFFICE_PATH, '--headless', '--invisible', 'macro:///Standard.Module1.ConvertToPdf(' + from_file + ',' + to_file + ',False,' + method + ')'] # subprocess_arguments = [LIBREOFFICE_PATH, '--headless', '--invisible', '--convert-to', 'pdf', from_file, '--outdir', tempdir] else: subprocess_arguments = [LIBREOFFICE_PATH, '--headless', '--invisible', 'macro:///Standard.Module1.ConvertToPdf(' + from_file + ',' + to_file + ',False,' + method + ')'] if use_libreoffice: initialize_libreoffice() start_time = time.time() #logmessage("Trying libreoffice with " + repr(subprocess_arguments)) docassemble.base.functions.server.applock('obtain', 'libreoffice') logmessage("Obtained libreoffice lock after {:.4f} seconds.".format(time.time() - start_time)) try: result = subprocess.run(subprocess_arguments, cwd=tempdir, timeout=120).returncode except subprocess.TimeoutExpired: logmessage("word_to_pdf: libreoffice took too long") result = 1 tries = 5 logmessage("Finished libreoffice after {:.4f} seconds.".format(time.time() - start_time)) docassemble.base.functions.server.applock('release', 'libreoffice') if os.path.isfile(to_file): break time.sleep(0.1) if os.path.isfile(to_file): break time.sleep(0.1) if os.path.isfile(to_file): break time.sleep(0.1) if os.path.isfile(to_file): break time.sleep(0.1) if os.path.isfile(to_file): break result = 1 tries += 1 if tries < num_tries: time.sleep(tries*random.random()) if use_libreoffice: logmessage("Retrying libreoffice with " + repr(subprocess_arguments)) elif daconfig.get('convertapi secret', None) is not None: logmessage("Retrying convertapi") else: logmessage("Retrying cloudconvert") if result == 0: if password: pdf_encrypt(to_file, password) shutil.copyfile(to_file, out_file) if tempdir is not None: shutil.rmtree(tempdir) if result != 0: return False return True
def word_to_pdf(in_file, in_format, out_file, pdfa=False, password=None, update_refs=False, tagged=False): tempdir = tempfile.mkdtemp() from_file = os.path.join(tempdir, "file." + in_format) to_file = os.path.join(tempdir, "file.pdf") shutil.copyfile(in_file, from_file) tries = 0 if pdfa: method = 'pdfa' elif tagged: method = 'tagged' else: method = 'default' while tries < 5: use_libreoffice = True if update_refs: if daconfig.get('convertapi secret', None) is not None: update_references(from_file) try: convertapi_to_pdf(from_file, to_file) result = 0 except: logmessage("Call to convertapi failed") result = 1 use_libreoffice = False else: subprocess_arguments = [LIBREOFFICE_PATH, '--headless', '--invisible', 'macro:///Standard.Module1.ConvertToPdf(' + from_file + ',' + to_file + ',True,' + method + ')'] elif daconfig.get('convertapi secret', None) is not None: try: convertapi_to_pdf(from_file, to_file) result = 0 except: logmessage("Call to convertapi failed") result = 1 use_libreoffice = False else: if method == 'default': subprocess_arguments = [LIBREOFFICE_PATH, '--headless', '--convert-to', 'pdf', from_file] else: subprocess_arguments = [LIBREOFFICE_PATH, '--headless', '--invisible', 'macro:///Standard.Module1.ConvertToPdf(' + from_file + ',' + to_file + ',False,' + method + ')'] if use_libreoffice: initialize_libreoffice() #logmessage("Trying libreoffice with " + repr(subprocess_arguments)) p = subprocess.Popen(subprocess_arguments, cwd=tempdir) result = p.wait() if os.path.isfile(to_file): break result = 1 tries += 1 time.sleep(2 + tries*random.random()) if use_libreoffice: logmessage("Retrying libreoffice with " + repr(subprocess_arguments)) else: logmessage("Retrying convertapi") continue if result == 0: if password: pdf_encrypt(to_file, password) shutil.copyfile(to_file, out_file) if tempdir is not None: shutil.rmtree(tempdir) if result != 0: return False return True
def convert_to_file(self, question): metadata_as_dict = dict() if type(self.metadata) is dict: metadata_as_dict = self.metadata elif type(self.metadata) is list: for data in self.metadata: if type(data) is dict: for key in data: metadata_as_dict[key] = data[key] if self.output_format == 'rtf to docx': self.output_extension = 'rtf' else: self.output_extension = self.output_format if self.output_format in ('rtf', 'rtf to docx') and self.template_file is None: self.template_file = docassemble.base.functions.standard_template_filename('Legal-Template.rtf') if self.output_format == 'docx' and self.reference_file is None: self.reference_file = docassemble.base.functions.standard_template_filename('Legal-Template.docx') if (self.output_format == 'pdf' or self.output_format == 'tex') and self.template_file is None: self.template_file = docassemble.base.functions.standard_template_filename('Legal-Template.tex') yaml_to_use = list() if self.output_format in ('rtf', 'rtf to docx'): #logmessage("pre input content is " + str(self.input_content)) self.input_content = docassemble.base.filter.rtf_prefilter(self.input_content, metadata=metadata_as_dict) #logmessage("post input content is " + str(self.input_content)) if self.output_format == 'docx': self.input_content = docassemble.base.filter.docx_filter(self.input_content, metadata=metadata_as_dict, question=question) if self.output_format == 'pdf' or self.output_format == 'tex': if len(self.initial_yaml) == 0: standard_file = docassemble.base.functions.standard_template_filename('Legal-Template.yml') if standard_file is not None: self.initial_yaml.append(standard_file) for yaml_file in self.initial_yaml: if yaml_file is not None: yaml_to_use.append(yaml_file) for yaml_file in self.additional_yaml: if yaml_file is not None: yaml_to_use.append(yaml_file) #logmessage("Before: " + repr(self.input_content)) self.input_content = docassemble.base.filter.pdf_filter(self.input_content, metadata=metadata_as_dict, question=question) #logmessage("After: " + repr(self.input_content)) if not re.search(r'[^\s]', self.input_content): self.input_content = u"\\textbf{}\n" if PY3: temp_file = tempfile.NamedTemporaryFile(prefix="datemp", mode="w", suffix=".md", delete=False, encoding='utf-8') temp_file.write(self.input_content) else: temp_file = tempfile.NamedTemporaryFile(prefix="datemp", mode="w", suffix=".md", delete=False) with open(temp_file.name, 'w', encoding='utf-8') as fp: fp.write(self.input_content) temp_file.close() temp_outfile = tempfile.NamedTemporaryFile(prefix="datemp", mode="wb", suffix="." + str(self.output_extension), delete=False) temp_outfile.close() current_temp_dir = 'epsconv' latex_conversion_directory = os.path.join(tempfile.gettempdir(), 'conv') if not os.path.isdir(latex_conversion_directory): os.makedirs(latex_conversion_directory) if not os.path.isdir(latex_conversion_directory): raise Exception("Could not create latex conversion directory") icc_profile_in_temp = os.path.join(tempfile.gettempdir(), 'sRGB_IEC61966-2-1_black_scaled.icc') if not os.path.isfile(icc_profile_in_temp): shutil.copyfile(docassemble.base.functions.standard_template_filename('sRGB_IEC61966-2-1_black_scaled.icc'), icc_profile_in_temp) subprocess_arguments = [PANDOC_PATH, PANDOC_ENGINE] if PANDOC_OLD: subprocess_arguments.append("--smart") subprocess_arguments.extend(['-M', 'latextmpdir=' + os.path.join('.', 'conv'), '-M', 'pdfa=' + ('true' if self.pdfa else 'false')]) if len(yaml_to_use) > 0: subprocess_arguments.extend(yaml_to_use) if self.template_file is not None: subprocess_arguments.extend(['--template=%s' % self.template_file]) if self.reference_file is not None: if PANDOC_OLD: subprocess_arguments.extend(['--reference-docx=%s' % self.reference_file]) else: subprocess_arguments.extend(['--reference-doc=%s' % self.reference_file]) subprocess_arguments.extend(['-s', '-o', temp_outfile.name]) subprocess_arguments.extend([temp_file.name]) subprocess_arguments.extend(self.arguments) #logmessage("Arguments are " + str(subprocess_arguments) + " and directory is " + tempfile.gettempdir()) try: msg = subprocess.check_output(subprocess_arguments, cwd=tempfile.gettempdir(), stderr=subprocess.STDOUT).decode('utf-8', 'ignore') except subprocess.CalledProcessError as err: raise Exception("Failed to assemble file: " + err.output.decode()) if msg: self.pandoc_message = msg os.remove(temp_file.name) if os.path.exists(temp_outfile.name): if self.output_format in ('rtf', 'rtf to docx'): with open(temp_outfile.name, encoding='utf-8') as the_file: file_contents = the_file.read() # with open('/tmp/asdf.rtf', 'w') as deb_file: # deb_file.write(file_contents) file_contents = docassemble.base.filter.rtf_filter(file_contents, metadata=metadata_as_dict, styles=get_rtf_styles(self.template_file), question=question) with open(temp_outfile.name, "wb") as the_file: the_file.write(bytearray(file_contents, encoding='utf-8')) if self.output_format == 'rtf to docx': docx_outfile = tempfile.NamedTemporaryFile(prefix="datemp", mode="wb", suffix=".docx", delete=False) success = rtf_to_docx(temp_outfile.name, docx_outfile.name) if not success: raise Exception("Could not convert RTF to DOCX.") temp_outfile = docx_outfile if self.output_filename is not None: shutil.copyfile(temp_outfile.name, self.output_filename) else: self.output_filename = temp_outfile.name self.output_content = None if self.output_format == 'pdf' and self.password: pdf_encrypt(self.output_filename, self.password) else: raise IOError("Failed creating file: %s" % output_filename) return