def main(event, context): for record in event['Records']: # get uploaded file name bucket = record['s3']['bucket']['name'] key = unquote_plus(record['s3']['object']['key']) print(bucket) print(key) # create upload and download paths tmpkey = key.replace('/', '') tmp_download_path = '/tmp/{}{}'.format(uuid.uuid4(), tmpkey) tmp_converted_path = '/tmp/converted-{}'.format(tmpkey) tmp_split_path = '/tmp/split' os.mkdir(tmp_split_path) # Download file s3.download_file(bucket, key, tmp_download_path) # Convert file to pdf convert(tmp_download_path, tmp_converted_path) # Split PDF and upload new docs to staging area document = PdfFileReader(open(tmp_converted_path, "rb")) for i in range(document.numPages): output = PdfFileWriter() output.addPage(document.getPage(i)) tmp_page_path = '{}/pdf-{}-{}'.format(tmp_split_path, i, tmpkey) with open(tmp_page_path, "wb") as outputStream: output.write(outputStream) new_key = key.replace('.docx', '/{}.pdf'.format(i)) s3.upload_file(tmp_page_path, bucket, new_key.replace('raw', 'staging'))
def QRstoPDF(path): doc = docx.Document() p = doc.add_paragraph() r = p.add_run() try: with open(path, "r") as csv_file: csv_reader = csv.reader(csv_file) next(csv_reader) count = 0 for line in csv_reader: count += 1 FileNameQR = "QRimages/" + line[0] + "QR.jpg" Section = line[2] r.add_picture(FileNameQR, width=docx.shared.Inches(3), height=docx.shared.Inches(3)) if (count == 6): p = doc.add_paragraph() r = p.add_run() count = 0 except FileNotFoundError: print("FileNOtFound") pdfname = "C:/Users/ROLINE JOHN AGUILAR/Downloads/" + line[ 2] + "QRs" + ".pdf" wordname = "QRGenerated.docx" doc.save(wordname) convert(wordname, pdfname) os.remove(wordname) os.startfile(pdfname) os.remove("QRData.csv")
def docx_to_pdf(docxfile: str): """Convert a DOCX file to PDF. Args: docxfile (str): The name of the DOCX file to convert. """ docx2pdf.convert(docxfile)
def slicing_original_documents_docx(self): # Convert the docx to a pdf self.docx_filepath = os.path.basename(self.filename) convert(self.docx_filepath) self.raw_filename = os.path.basename(self.filename) originaldoc_data12 = self.slicing_original_documents_pdf() return originaldoc_data12
def process_docx(path): convert(path) new_path = path.replace("docx", "pdf") print(new_path) output_string = process_pdf(new_path) os.remove(new_path) return output_string
def convert_docx_to_pdf(docx_file): convert(docx_file) # Delay to resolve a problem when converting small .docxfiles. time.sleep(1) temporary_docx_pdf_files.append(docx_file)
def main(self): print(self.document_1.get_merge_fields()) self.read_files() for n_idx, n_row in self.df_data.iterrows(): dict = { "姓名": str(n_row['姓名']), "年龄": str(n_row['年龄']), "生日": str(n_row['生日']), "folder": 'folder1' } self.document_1.merge(**dict) file_path = self.path + 'xlsx/' if not os.path.exists(file_path): os.makedirs(file_path) file_docx = self.path + 'xlsx/' + n_row['姓名'] + '.docx' self.document_1.write(file_docx) # time.sleep(1) file_path = self.path + n_row['性别'] + '/' if not os.path.exists(file_path): os.makedirs(file_path) file_pdf = file_path + n_row['姓名'] + '.pdf' # file_pdf = self.path + 'pdfs/' + n_row['姓名'] + '.pdf' # self.generate_pdf(file_docx,file_pdf) convert(file_docx, file_pdf, keep_active=True) self.document_1.close()
def download_file(filename, pi_no): print(pi_no) x = requests.get( 'http://151.80.237.86:1251/ords/zkt/pi_doc/doc?pi_no={}'.format(str(pi_no))) data = x.json() doc = DocxTemplate( "./{}".format( filename)) pythoncom.CoInitialize() for x in data['items']: if x['pi_no'].strip() == '{}'.format(pi_no): # 17865 # pprint(x) doc.render(x) # # time.sleep(1) file_stream = StringIO() doc.save('./static/file.docx') convert('./static/file.docx', './static/file.pdf') flash("Your file has been created, Now you can download!!") return render_template('upload_file.html')
def saveaspdf(self, parent, fname): from docx2pdf import convert #convert("input.docx") print(tx) convert(tx, tx + ".pdf")
def convert_docx_to_pdf(docx_filename: str, output_pdf_filename: str) -> str: logging.debug( f" --- docx_file = {docx_filename}\n--- starting pdf conversion now.") fail: bool = False msg: str = "" if convert_vars.can_convert_to_pdf: # Use docx2pdf for windows and mac with MS Word installed try: docx2pdf.convert(docx_filename, output_pdf_filename) except Exception as e: msg = f"\nConvert error: {e}" fail = True else: fail = True if fail: msg = ( "Error. A temporary docx file was created in the output folder but cannot be converted " f"to pdf (yet) on operating system: {sys.platform}\n" "This does work on Windows and Mac with MS Word installed.") + msg logging.warning(msg) return docx_filename # If not debugging then delete the temp file if not convert_vars.args.debug: os.remove(docx_filename) return output_pdf_filename
def openFile(): global filez filez = askopenfilenames(title='Choose a file') if len(filez) != 0: for i in range(len(filez)): convert(filez[i]) print('done')
def doc_to_pdf(): # 文件位置 path = r'C:\小现代化(1)' file_list = get_filelist(path) # print(file_list) # 定义空list,存放文件列表 docx_files = [] for file in file_list: if (file.endswith(".docx")): docx_files.append(file) elif (file.endswith(".doc")): docx_file = doc2docx(file) docx_files.append(docx_file) # print(docx_files) for docx in docx_files: print(docx) print('docx文件数:', len(docx_files)) # 转换文件 print('-----------------开始转换 docx 文件-----------------') for file in docx_files: print(file) #print(file.split('.docx')[0]+'.pdf') convert(file, file.split('.docx')[0] + '.pdf')
def fill_template(self): context = self.var_template() path_to_template = os.path.normpath( f'{self.root}/src/pdf_creation/template.docx') template = DocxTemplate(path_to_template) template.render(context) date_str = self.date.strftime("%Y-%m-%d") year = self.date.strftime("%Y") month = self.date.strftime("%m - %B") #file_dir = f"{self.root}/reports/Daily_PDF/{year}/{month}" #file_name = f"First_page_{date_str}" file_dir = f"{self.root}/reports/Daily_PDF" file_name = "First_page" file_fct.creation_folder('', [file_dir]) full_path = os.path.normcase(f'{file_dir}/{file_name}.docx') template.save(full_path) convert(full_path) os.remove(full_path) return os.path.normcase(f'{file_dir}/{file_name}.pdf')
def generate_contract_list(request, *args, **kwargs): base_url = MEDIA_ROOT + '/contract/template/' contract_number_from = kwargs['contract_number_from'] contract_number_to = kwargs['contract_number_to'] contract_status = kwargs['contract_status'] contract_zone = kwargs['contract_zone'] # now = datetime.datetime.now() # html = "<html><body>It is now %s.</body></html>" % now # return HttpResponse(html) template_name = base_url + 'CNT_ContractStatus.docx' file_name = "contract_list_report" context = { 'customer': "TEST", } tpl = DocxTemplate(template_name) tpl.render(context) tpl.save(MEDIA_ROOT + '/contract/download/' + file_name + ".docx") # docx2pdf docx_file = path.abspath("media\\contract\\download\\" + file_name + ".docx") pdf_file = path.abspath("media\\contract\\download\\" + file_name + ".pdf") convert(docx_file, pdf_file) return FileResponse(open(pdf_file, 'rb'), content_type='application/pdf')
def proses(): rasma = asma.get() rref = ref.get() rttl = ttl.get() rttt= ttt.get() rprau = prau.get() rdowo = dowo.get() recdis=str(selhas()) tpl = DocxTemplate('Python.docx') context = {1:{ "nama":"%s" %str(rasma), "TTL":"%s" %str(rttl), "ref":"Ref: CPA-2021-FTR-%s"%str(rref), "TTT": "%s" %str(rttt), "ecdis": "ECDIS Model %s" % str(recdis), "ship":"Onboard %s" %str(rprau), "dura": "%s" % str(rdowo)} } sref= str(rref) sasma = str(rasma) sprau = str(rprau) jenber = " "+ sref + " "+ sasma + " " + sprau tpl.render(context[1]) tpl.save("CPA2021FTR%s.docx" %jenber) convert("CPA2021FTR%s.docx" %jenber, "C:/Users/QylaMeisya/Documents/Sertifikat Training Familiarrisasi ECDIS Photosop/sertfampy" ) label = teka.Label(cen_ut, text="Sert. %s Selesai ! Input Entry lagi / Tutup Program !" %str(rasma)) label.pack()
def make_custom_report(input_map, template): #fill the template: replace_words_in_document(input_map, template) #convert to pdf. convert(TEMP_REPLACED_DOC, PDF_FILE_NAME)
def certificate(name, fio, date1, date2, choose): f = fio.split("\r\n") print(f) zipObj = ZipFile('sample.zip', 'w') if choose == "Base": for person in f: doc = DocxTemplate("Sertifikat_ryzhiiy2.docx") context = { 'Program': name, 'Name': person.rstrip(), 'Date1': date1, 'Date2': date2, 'Choose': choose } doc.render(context) doc.save("{}_base.docx".format(person.rstrip())) convert("{}_base.docx".format(person.rstrip())) zipObj.write('{}_base.pdf'.format(person.rstrip())) else: for person in f: doc = DocxTemplate("Sertifikat_ryzhiiy2.docx") context = { 'Program': name, 'Name': person.rstrip(), 'Date1': date1, 'Date2': date2, 'Choose': choose } doc.render(context) doc.save("{}_project.docx".format(person.rstrip())) convert("{}_project.docx".format(person.rstrip())) zipObj.write('{}_project.pdf'.format(person.rstrip())) zipObj.close()
def docxToPdf(inDir,file): convertDir = inDir+"/converted" if not os.path.exists(convertDir): os.makedirs(convertDir) opFileName = file.split(".")[0]+".pdf" convert(os.path.join(inDir, file), os.path.join(convertDir, opFileName)) return os.path.join(convertDir, opFileName)
def word2pdf(files_path): """对于给定的文件夹,转换文件夹内所有docx文件为同名的pdf文件""" files = os.listdir(files_path) for f in files: if f.endswith('docx') and '转让登记表' in f: f_new = os.path.splitext(f)[0] + ".pdf" convert(os.path.join(files_path, f), os.path.join(files_path, f_new))
def convert_single(src_file, dst_file): """ 单个文件实现 word->pdf 转换 :param src_file: 源文件 xxx.docx :param dst_file: 目标文件 xxx.pdf :return: """ docx2pdf.convert(src_file, dst_file)
def convert_doc_to_pdf(doc_file): # Use a breakpoint in the code line below to debug your script. # Press Ctrl+F8 to toggle the breakpoint. filename, _ = os.path.splitext(doc_file) pdf_file = filename + '.pdf' docx2pdf.convert(doc_file, pdf_file) return pdf_file
def wordToPdf(requireDocPause): print("Converting from Word doc to pdf...") if (requireDocPause): input( "Please open " + outputDocName + ", save over the original output file, then close it and press enter" ) convert(outputDocName, outputPDFName)
def doc_pdf(self): """ :return: converts document to pdf """ if self.dst == "": convert(self.src) else: convert(self.src, self.dst)
def convert_pdf(input_dir="output/", output_dir="output_pdf/"): """ docxファイルの保存されたフォルダを指定して、フォルダ格納データを全てpdfにして、指定フォルダに保存する :param input_dir: dir_name, default:output/, outputフォルダを利用 :param output_dir: dir_name, default:output_pdf/, output_pdfフォルダを利用 :return: output_pdfフォルダにoutputフォルダのpdfが全て保存される """ convert(input_dir, output_dir)
def making_a_doc_function(request): #plot from uploaded data now need to make chart from live data that has been uploaded to the site. this will be done by either doing pandas crunching all over again to get the numbers that you want or by reading numbers tht have already been put in one of the tables. most ikley a combination of both doc = docx.Document() pk = 3 if not saf_defect_table.objects.filter( pk=pk).exists(): #this accesses the saf_defect_table doc.add_heading("no text") doc.save('thisisdoc.docx') else: saf_defect_table_variable = saf_defect_table.objects.get(pk=pk) doc.add_heading(str(saf_defect_table_variable.saf_number_in_table)) doc.add_heading(str(saf_defect_table_variable.defect_number_in_table)) #making a plot from a csv file that was uploaded to DB table if FileUpload.objects.filter( attribution_file_upload='pftest4.csv').exists(): life3 = pd.read_csv( FileUpload.objects.get(attribution_file_upload='pftest4.csv'). attribution_file_upload) life3.plot(kind='bar', x='status', y='item') plt.title('my plot title') plt.xlabel('years') plt.ylabel('Age') memfile = io.BytesIO( ) #this is needed in order to save the file to a temporary memory section plt.savefig(memfile) doc.add_picture(memfile, width=Inches(4)) memfile.close() plt.clf() #this shuts down the plot so then you can make a new. #making a plot out of numbers that are already stored in saf defect table DB table y1 = saf_defect_table_variable.saf_number_in_table y2 = saf_defect_table_variable.defect_number_in_table korea_scores = (y1, y2) col_count = 2 bar_width = .2 index = np.arange(col_count) plt.barh(index, korea_scores, bar_width, alpha=.4, label="Korea") memfile2 = io.BytesIO() plt.savefig(memfile2) doc.add_picture(memfile2, width=Inches(4)) memfile2.close() doc.save('thisisdoc.docx') #converting the generated docx into a pdf file convert("thisisdoc.docx", "output.pdf") pdf = open('output.pdf', 'rb') response = FileResponse(pdf) # Report_File_Upload_Table.objects.create(generated_report_file_upload=output.pdf) return response #return render(request, 'doc.html', response) #----------------end of section 2--------------------------------------------
def get_path_value(self): #print(self.path_value.get()) self.path = self.path_value.get() self.folder_name = self.path.rsplit("\\",1)[1] #print(self.folder_name) #self.directory = self.path for folder_entry in os.scandir(self.path): #directory): self.folder_name = Path(folder_entry).stem #print(folder_entry.path) mergedObject = PdfFileMerger() for file_entry in os.scandir(folder_entry): self.file_name = Path(file_entry).stem print("Filenname", self.file_name) if not (file_entry.path.endswith(".jpg") or file_entry.path.endswith(".jpeg") or file_entry.path.endswith(".png") #or file_entry.path.endswith(".doc") or file_entry.path.endswith(".docx") or file_entry.path.endswith(".pdf") )and file_entry.is_file(): print("Was ist das? Konvertier ich nicht " + file_entry.path) #if (file_entry.path.endswith("") and file_entry.is_file()): # print("nothing") if (file_entry.path.endswith(".jpg") or file_entry.path.endswith(".jpeg") or file_entry.path.endswith(".png")) and file_entry.is_file(): #print(file_entry.path) image = Image.open(file_entry.path) i = image.convert('RGB') i.save(os.path.splitext(file_entry.path)[0] + ".pdf") # if (file_entry.path.endswith(".doc") ) and file_entry.is_file(): # convert(file_entry) if (file_entry.path.endswith(".docx")) and file_entry.is_file(): convert(file_entry) for file_entry in os.scandir(folder_entry): if (file_entry.path.endswith(".pdf") and file_entry.is_file()): #print(file_entry.path + ".pdf") new_pdf = Pdf.new() with Pdf.open(file_entry.path, allow_overwriting_input=True) as pdf: pdf.save(file_entry.path) mergedObject.append(PdfFileReader(file_entry.path, "rb")) mergedObject.write(self.path + "\\" + Path(folder_entry).stem + " - " +Path(self.path).stem + ".pdf")
def ConvertToPdfFromFile(file_path, output_path): img_type = file_path.split(".")[1] if img_type == "jpg" or img_type == "jpeg" or img_type == "png": from PIL import Image image = Image.open(file_path) pdf_bytes = img2pdf.convert(image.filename) file = open(output_path, "wb") file.write(pdf_bytes) image.close() file.close() elif img_type == "doc" or img_type == "docx": from docx2pdf import convert convert(file_path, output_path) elif img_type == "txt": pdf = FPDF() # Add a page pdf.add_page() # set style and size of font # that you want in the pdf pdf.set_font("Arial", size=15) # open the text file in read mode f = open(file_path, "r") # insert the texts in pdf for x in f: pdf.cell(200, 10, txt=x, ln=1, align='C') # save the pdf with name .pdf pdf.output(output_path) elif img_type == "csv": config = pdfkit.configuration( wkhtmltopdf= r'C:\Users\A118090\AppData\Local\Programs\Python\Python36\Lib\site-packages\wkhtmltopdf' ) pdfkit.from_file(file_path, output_path, configuration=config) elif img_type == "xlsx" or img_type == "xlsm" or img_type == "xls": from win32com import client xlApp = client.Dispatch("Excel.Application") books = xlApp.Workbooks.Open(file_path) # ws = books.Worksheets[0] # ws.Visible = 1 # ws.ExportAsFixedFormat(0, output_path) ws_index_list = [] r1 = 1 while (r1 <= books.Worksheets.Count): ws_index_list.append(r1) r1 += 1 # print(ws_index_list) books.WorkSheets(ws_index_list).Select() # # Save books.ActiveSheet.ExportAsFixedFormat(0, output_path) books.Close(True, file_path) print("file_path closed..") else: text = "file format does not match" print("Converted file: " + file_path + "\n\tto: " + output_path)
def docx2pdf(): resultlab.configure(text= 'Please Wait... Converting Docx to PDF') print('Converting Docx to PDF') sleep(1) destvar = sourcevar.replace('.docx','.pdf') convert(sourcevar , destvar) print('\nConverted Successfully!!\n') print(f'File Sucessfully Saved as {destvar}') resultlab.configure(text= f'File Sucessfully Saved as {destvar}')
def convertToPdf(self, path=None): """Convert word document to pdf""" if path: path.replace('.docx', '.pdf') convert(path) else: self.path.replace('.docx', '.pdf') convert(self.path)
def convertAndDelete(): directory = os.getcwd() convert(str(directory), str(directory) + "/dataPDF") files_in_directory = os.listdir(directory) docxFiles = [file for file in files_in_directory if file.endswith(".docx")] for file in docxFiles: path_to_file = os.path.join(directory, file) os.remove(path_to_file)