Example #1
0
def concatenate_files(path_list):
    new_path_list = []
    for path in path_list:
        mimetype, encoding = mimetypes.guess_type(path)
        if mimetype in ('application/rtf', 'application/msword', 'application/vnd.oasis.opendocument.text'):
            new_docx_file = tempfile.NamedTemporaryFile(prefix="datemp", mode="wb", suffix=".docx", delete=False)
            if mimetype == 'application/rtf':
                ext = 'rtf'
            elif mimetype == 'application/msword':
                ext = 'doc'
            elif mimetype == 'application/vnd.oasis.opendocument.text':
                ext = 'odt'
            docassemble.base.pandoc.convert_file(path, new_docx_file.name, ext, 'docx')
            new_path_list.append(new_docx_file.name)
        elif mimetype == 'application/vnd.openxmlformats-officedocument.wordprocessingml.document':
            new_path_list.append(path)
    if len(new_path_list) == 0:
        raise DAError("concatenate_files: no valid files to concatenate")
    if len(new_path_list) == 1:
        return new_path_list[0]
    composer = Composer(docx.Document(new_path_list[0]))
    for indexno in range(1, len(new_path_list)):
        composer.append(docx.Document(new_path_list[indexno]))
    docx_file = tempfile.NamedTemporaryFile(prefix="datemp", mode="wb", suffix=".docx", delete=False)
    composer.save(docx_file.name)
    return docx_file.name
Example #2
0
def _combine_docx(target_file, files):
    master = Document_compose(files[0])
    composer = Composer(master)
    for i in range(1, len(files)):
        doc_temp = Document_compose(files[i])
        composer.append(doc_temp)
    composer.save(target_file)
Example #3
0
def merge_word_documents(indir,
                         outfile=search_book_name(),
                         add_page_break=True,
                         delete_merged_files=True):
    """
    Merge word documents.
    :param indir: THe directory where the documents are stored.
    :param outfile: THe outfile name of the word documents.
    :param add_page_break: flag for adding page breaks between documents.
    :param delete_merged_files: flag determining if the files merged files should be deleted.
    """
    files = [f"{indir}/{f}" for f in os.listdir(indir) if f.endswith(".docx")]
    first, *rest = files
    master = Document_compose(first)
    if add_page_break:
        master.add_page_break()
    composer = Composer(master)
    for i, f in enumerate(rest):
        tmp = Document_compose(f)
        if add_page_break and i < len(rest) - 1:
            tmp.add_page_break()
        composer.append(tmp)
    composer.save(f"{indir}/{outfile}")

    if delete_merged_files:
        for f in files:
            os.remove(f)
Example #4
0
def compose_wholepsa_for_member(parameters: dict):
    arbeitskleidung = docx.Document(ALL_DOCUMENTS["Arbeitskleidung"])
    find_and_replace(arbeitskleidung, parameters)
    arbeitskleidung.add_page_break()
    composed_master = Composer(arbeitskleidung)

    einsatzkleidung = docx.Document(ALL_DOCUMENTS["Einsatzkleidung"])
    find_and_replace(einsatzkleidung, parameters)
    einsatzkleidung.add_page_break()
    composed_master.append(einsatzkleidung)

    handschuhe = docx.Document(ALL_DOCUMENTS["Handschuhe"])
    find_and_replace(handschuhe, parameters)
    handschuhe.add_page_break()
    composed_master.append(handschuhe)

    helm = docx.Document(ALL_DOCUMENTS["Helm"])
    find_and_replace(helm, parameters)
    helm.add_page_break()
    composed_master.append(helm)

    kopfschutzhaube = docx.Document(ALL_DOCUMENTS["Kopfschutzhaube"])
    find_and_replace(kopfschutzhaube, parameters)
    kopfschutzhaube.add_page_break()
    composed_master.append(kopfschutzhaube)

    schuhe = docx.Document(ALL_DOCUMENTS["Schuhe"])
    find_and_replace(schuhe, parameters)
    composed_master.append(schuhe)

    composed_master.save(out_path)
    pass
Example #5
0
def make_master_file(docx_filepaths, filename, summ=True):
    '''
    for now, this takes in a list of filepaths, and creates two master files: one for summaries, and one for the articles

    it uses the above functions to:
    
        - create the summaries for the text in each individual article
        - formats headings and makes a table of contents for each file

    the input moving forward will likely be doc objects - files will be sent to an api endpoint, the api function will convert to docx objects, then sent to this
    this means we'll need to change this around a bit
    '''

    toc = Document()
    paragraph = toc.add_paragraph('TABLE OF CONTENTS')
    toc = make_toc(toc)
    article_list = [toc]

    for article in docx_filepaths:
        article_list.append(create_doc(article, summ))

    master = article_list[0]
    composer = Composer(master)
    for document in article_list[1:]:
        composer.append(document)

    composer.save(filename)

    update_toc(filename)
Example #6
0
    def post(self):
        """
        Merge multiple docx documents into one
        """
        log.info("received a request to merge docx")
        contents = request.json['contents']
        if len(contents) < 2:
            return {"message": "contents must be at least 2 elements"}, 400

        tempFolder = generate_temp_folder()
        ext = '.docx'

        for i, content in enumerate(contents):
            base64_decode = base64.b64decode(content)
            fp = open(tempFolder + "/file" + str(i) + ext, "wb")
            fp.write(base64_decode)
            fp.close()

        for i in range(len(contents)):
            doc = Document(tempFolder + '/file' + str(i) + ext)
            if i == 0:
                composer = Composer(doc)
            else:
                composer.append(doc)

        output_file = tempFolder + '/final' + ext
        composer.save(output_file)
        return send_file(output_file, as_attachment=True)
Example #7
0
def gen_doc(graph_data, style, template=None, max_processes=os.cpu_count()+1):
    if template is None:
        doc = Document()
    else:
        doc = Document(template)

    composer = Composer(doc)
    workers = []
    progress = 0
    for i in range(len(graph_data)):
        process = multiprocessing.Process(target=task, args=(i, graph_data[i], style))
        workers.append(process)
        process.start()
        if i == max_processes:
            for elt in workers:
                elt.join()
                composer.append(Document('docx/tmp/part{}.docx'.format(progress)))
                progress += 1
            workers = []
    for elt in workers:
        elt.join()
        composer.append(Document('docx/tmp/part{}.docx'.format(progress)))
        progress += 1

    composer.save('docx/final.docx')
    print("Document generated in docx/final.docx.\n")
Example #8
0
def combine_all_docx(filename_master, files_list):
    number_of_sections = len(files_list)
    master = Document_compose(filename_master)
    composer = Composer(master)
    for i in range(0, number_of_sections):
        doc_temp = Document_compose(files_list[i])
        composer.append(doc_temp)
    composer.save("combined_file.docx")
Example #9
0
def mergeDoc(filename_master, files_list):
    number_of_sections = len(files_list)
    master = Document_compose()
    composer = Composer(master)
    for i in range(0, number_of_sections):
        doc_temp = Document_compose(files_list[i])
        composer.append(doc_temp)
    composer.save(filename_master)
Example #10
0
def compose_files(parser, parsed_args):
    composer = Composer(Document(parsed_args.master))
    for slave_path in parsed_args.files:
        composer.append(Document(slave_path))

    composer.save(parsed_args.ouput_document)
    parser.exit(message='successfully composed file at {}\n'.format(
        parsed_args.ouput_document))
Example #11
0
def combine_report(docs_list, output_path):
    master = Document(docs_list[0])
    composer = Composer(master)
    if len(docs_list) > 1:
        for item_path in docs_list[1:]:
            doc = Document(item_path)
            composer.append(doc)
    composer.save(output_path)
Example #12
0
def combine_all_docx(filename_master, files_list):
    # Функция для объединения документов
    number_of_sections = len(files_list)
    master = Document_compose(filename_master)
    composer = Composer(master)
    for i in range(1, number_of_sections):
        doc_temp = Document_compose(files_list[i])
        composer.append(doc_temp)
    composer.save("Все сертификаты в одном файле.docx")
Example #13
0
def combine_all_docx(filename_master, files_list):
    number_of_sections = len(files_list)
    master = Document_compose(filename_master)
    composer = Composer(master)
    for i in range(0, number_of_sections):
        doc_temp = Document_compose(files_list[i])
        composer.append(doc_temp)
    open('1.txt', 'w')
    composer.save("ALL_SERTIFICATES.docx")
Example #14
0
def combine_resumes_and_other_stuff(resume_list):
    number_of_sections = len(resume_list)
    master = Document_compose(filename_master)
    composer = Composer(master)
    for i in range(0, number_of_sections):
        doc_temp = Document_compose(resume_list[i])
        composer.append(doc_temp)
    composer.append(Document_compose(last))
    composer.save(resume_master)
Example #15
0
def fusion2(word1, word2, dep):
    master = Document(word1)
    master.add_page_break()
    composer = Composer(master)
    doc1 = Document(word2)
    composer.append(doc1)
    name_fusion = "reports_word/Suivi_Territorial_plan_relance_{}.docx".format(
        dep)
    composer.save(name_fusion)
    return name_fusion
Example #16
0
def mergeDocx_pyDocx(destFileName, file_list):
    number = len(file_list)
    master = Document(file_list[0])

    docx_composer = Composer(master)

    for x in range(1, number):
        docx_tmp = Document(file_list[x])
        docx_composer.append(docx_tmp)
    docx_composer.save(destFileName)
Example #17
0
 def combine_word_documents(cls,word_files, ds_name):
     merged_document = Document(EMPTY_DOC_PATH)
     composer = Composer(merged_document)
     for file in word_files:
         doc_temp = Document(file)
         composer.append(doc_temp)
     try:
         composer.save(os.path.join(os.path.dirname(word_files[0]),f'{ds_name}.docx'))
     except PermissionError as e:
         print(str(e))
Example #18
0
def compose_doc(created, copy, pages):
    doc_main = Document()
    doc_main.save("temporary_main.docx")
    master = Document("temporary_main.docx")
    composer = Composer(master)
    doc = Document(copy)
    for i in range(pages):
        composer.append(doc)
    composer.save(created)
    remove("temporary_main.docx")
    remove("temporary_copy.docx")
Example #19
0
def append():
    masterDoc = Document(documents[0]["path"])
    masterDoc.add_page_break()
    composer = Composer(masterDoc)
    for doc in range(1, len(documents)):
        docu = Document(documents[doc]["path"])
        docu.add_page_break()
        composer.append(docu)
        doc += 1
    composer.save(os.path.join(path, "combined.docx"))
    messagebox.showinfo("POTEITOES!", "Ho finito!")
Example #20
0
def combine_all_env(filename_master,files_list):

    savepath = os.getcwd() + "\\print\\" + name + "_combined_envelope.docx"
    number_of_sections=len(files_list)
    master = Document_compose(filename_master)
    composer = Composer(master)
    for i in range(0, number_of_sections):
        if os.path.isfile(files_list[i]) == True:
            doc_temp = Document_compose(files_list[i])
            composer.append(doc_temp)
    composer.save(savepath)
Example #21
0
def merge_documents(files, output_doc, output_pdf):
    merger = Composer(Document())

    for file in files:
        merger.append(Document(file), False)
        paragraph_count = len(merger.doc.paragraphs)
        if paragraph_count > 0:
            delete_paragraph(merger.doc.paragraphs[paragraph_count - 1])

    merger.save(output_doc)
    if not output_pdf is None:
        docx_to_pdf(output_doc, output_pdf)
Example #22
0
def combine_all_docx(filename_master, files_list, output):
    print('Merging: %s' % filename_master)
    number_of_sections = len(files_list)
    master = Document_compose(filename_master)
    composer = Composer(master)
    for i in range(0, number_of_sections):
        doc_temp = Document_compose(files_list[i])
        composer.append(doc_temp)
        print('Merging: %s' % (files_list[i]))
    composer.save(output)
    hr('OUTPUT:')
    print('Output file: %s' % output)
Example #23
0
    def compose_files(self, files, output_file_path):
        """
        合并多个word文件到一个文件中
        :param files:
        :return:
        """
        composer = Composer(Document())
        for file in files:
            composer.append(Document(file))

        # 保存到新的文件中
        composer.save(output_file_path)
Example #24
0
def assemble_documents(files, output_doc, output_pdf):
    merger = Composer(Document())

    for file in files:
        merger.append(Document(file), False)
        paragraph_count = len(merger.doc.paragraphs)
        if paragraph_count > 0:
            _delete_paragraph(merger.doc.paragraphs[paragraph_count - 1])

    merger.save(output_doc)
    if output_pdf != None and output_pdf != '' and not output_pdf.isspace():
        _docx_to_pdf(output_doc, output_pdf)
Example #25
0
def combine_all_docx(filename_master, files_list):
    from datetime import datetime

    number_of_sections = len(files_list)
    master = Document(filename_master)
    composer = Composer(master)
    for i in range(0, number_of_sections):
        doc_temp = Document(files_list[i])
        composer.append(doc_temp)
    now = datetime.now().strftime("%Y%m%d-%H%M%S")
    outputfile = os.path.join(r"..", f"mats_{now}.docx")
    composer.save(outputfile)
    print(outputfile)
def compose_single_equipment(parameters: dict):
    doc = docx.Document(ALL_DOCUMENTS["BaseEquipment"])
    find_and_replace(doc, parameters)

    checkRow = ["test_date", "remark", "testVision", "testFunction", "tester"]
    if len(parameters["checks"]) <= 9:
        find_and_replace(doc, {"pagenumber": "1"})
        parameterChecks: dict = {}
        for iterChecks in range(1, 10):
            for check, iter in zip(checkRow, range(0, 5)):
                try:
                    parameterChecks[check +
                                    str(iterChecks)] = parameters["checks"][
                                        iterChecks - 1][iter]
                except IndexError:
                    parameterChecks[check + str(iterChecks)] = ""
        find_and_replace(doc, parameterChecks)
        composed_master = Composer(doc)
    else:
        find_and_replace(doc, {"pagenumber": str(1)})
        parameterChecks: dict = {}
        for iterChecks in range(1, 10):
            for check, iter in zip(checkRow, range(0, 5)):
                try:
                    parameterChecks[check +
                                    str(iterChecks)] = parameters["checks"][(
                                        (1 - 1) * 9) + (iterChecks - 1)][iter]
                except IndexError:
                    parameterChecks[check + str(iterChecks)] = ""
        find_and_replace(doc, parameterChecks)
        composed_master = Composer(doc)

        for iterPages in range(2,
                               math.ceil(len(parameters["checks"]) / 9) + 1):
            doc = docx.Document(ALL_DOCUMENTS["BaseEquipment"])
            find_and_replace(doc, parameters)
            find_and_replace(doc, {"pagenumber": str(iterPages)})
            parameterChecks: dict = {}
            for iterChecks in range(1, 10):
                for check, iter in zip(checkRow, range(0, 5)):
                    try:
                        parameterChecks[
                            check + str(iterChecks)] = parameters["checks"][(
                                (iterPages - 1) * 9) + (iterChecks - 1)][iter]
                    except IndexError:
                        parameterChecks[check + str(iterChecks)] = ""
            find_and_replace(doc, parameterChecks)
            composed_master.append(doc)

    composed_master.save(out_path)
    return composed_master
Example #27
0
 def compose_public(empty_template_dcmt, docx_full_path_sequence,
                    target_dcmt):
     """合并docx的公用代码, 返回target_dcmt"""
     # 如果提出来公用, 需要如下的参数: empty_template_dcmt, docx_full_path_list, target_dcmt
     target_empty_dcmt = Document(empty_template_dcmt)
     composer = Composer(target_empty_dcmt)
     for dcmt_full_path in docx_full_path_sequence:
         try:
             composer.append(Document(dcmt_full_path))
         except Exception as e:
             traceback.print_exc()
             print(dcmt_full_path + ' runs into Exception ')
         pass
     composer.save(target_dcmt)
     return target_dcmt
Example #28
0
def concat_docx(dir_p, dst_p, suffix, name='codes'):
    docxs = finder(dir_p)
    up_bound = len(docxs)
    d0 = read_docx(docxs[0])
    d0.add_page_break()
    h0 = head(docxs[0], suffix)
    compose = Composer(h0)
    compose.append(d0)
    for count, docx in enumerate(docxs[1:], start=2):
        sub_head = head(docx, suffix)
        compose.append(sub_head)
        sub_d = read_docx(docx)
        if count < up_bound:
            sub_d.add_page_break()
        compose.append(sub_d)
    compose.save(os.path.join(dst_p, f'{name}.docx'))
Example #29
0
def compose_wholepsa(parameters):
    length: int = len(parameters)

    compose_wholepsa_for_member(parameters[0])
    master = docx.Document(out_path)
    if length > 1:
        master.add_page_break()
    composed_master = Composer(master)

    for x in range(1, length):
        compose_wholepsa_for_member(parameters[x])
        new_file = docx.Document(out_path)
        if x != length - 1:
            new_file.add_page_break()
        composed_master.append(new_file)

    composed_master.save(out_path)
Example #30
0
    def publish(self):

        # Finalise destination
        dest.paragraphs[-1].add_run().add_break(WD_BREAK.PAGE)
        dest.render(context)

        # Finalise backpage
        backpage_doc = DocxTemplate(backpage)
        backpage_doc.render(context)

        # Combine documents
        composer = Composer(dest)
        composer.append(backpage_doc)

        # Save output and delete temp folder with all contents
        composer.save(output_path)
        print(f'Saved at {output_path}')
        shutil.rmtree(temp_path)
Example #31
0
class DocxMergeTool(object):
    """The docx merge tool merges docx documents with the docxcompose composer.
    The merge is based on a master document, if remove_property_fields is False
    the master's doc-properties will be preserved.

    It is used as a context manager and accepts bytes of documents.
    Calling the DocxMergeTool object merges the documents and returns the resulting
    bytes.

    The files are merged in the order they were added or inserted as as
    specified by inserts index.
    """

    def __init__(self, master, remove_property_fields=True):
        self._remove_property_fields = remove_property_fields
        self._master = master

    def __enter__(self):
        self._tempdir_path = Path(tempfile.mkdtemp(prefix='opengever.core.doxcmerge_'))
        self._index = 0
        self._composer = Composer(self._as_document(self._master))

        return self

    def __exit__(self, exc_type, exc_value, tb):
        self._tempdir_path.rmtree_p()

    def __call__(self):
        """Merge the registered docx files and return the resulting bytes.
        """
        result_path = self._tempdir_path.joinpath('result.docx')
        self._composer.save(result_path)
        return result_path.bytes()

    def add(self, file_data):
        self._composer.append(
            self._as_document(file_data),
            remove_property_fields=self._remove_property_fields)

    def insert(self, index, file_data):
        self._composer.insert(
            index, self._as_document(file_data),
            remove_property_fields=self._remove_property_fields)

    def _as_document(self, file_data):
        """Convert bytes to a document.

        Also make sure property field value are up to date before they are
        removed.
        """
        path = self._get_next_path()
        path.write_bytes(file_data)
        document = Document(path)

        if self._remove_property_fields:
            CustomProperties(document).update_all()

        return document

    def _get_next_path(self):
        path = self._tempdir_path.joinpath('{0}.docx'.format(self._index))
        self._index += 1
        return path