def write_images_to_ppt(images, title, file=''): if file != '': ppt = pptx.Presentation(file) name = file else: ppt = pptx.Presentation() name = title + '.pptx' for image in images: slide = ppt.slides.add_slide(ppt.slide_layouts[6]) inch = pptx.util.Inches(1) slide = slide.shapes.add_picture(image, inch, inch) ppt.save(name)
def file_response(request): if request.method == "POST": return HttpResponse(status=200) xlsx_doc = request.GET.get("doc_id") prd = PresentationRequestData( ExcelDoc.objects.get(id=xlsx_doc).doc.path, float(request.GET.get("family_foundation_1")), float(request.GET.get("family_foundation_2")), [int(x) for x in request.GET.get("policy_years").split(",")], request.GET.get("paymentType")) prs = pptx.Presentation('ppt/templates/sample_ppt_template_1.pptx') slides = [x for x in prs.slides] twenty_eight = slides[27] thiry_two = slides[31] # HANDLE 28 py1 = prd.slide_28.first_policy_year_data py2 = prd.slide_28.second_policy_year_data text_box_1_text = f"Age {prd.slide_28.age_1} = {py1['tlv']} Total Legacy Value ({py1['nth']} Net to heirs +\n {py1['family foundation']} to Family Foundation)*\nTotal out of pocket at age {prd.slide_28.age_1}= {py1['toop']} (Annually at approx. {py1['annually']})" text_box_2_text = f"Age {prd.slide_28.age_2} = {py2['tlv']} Total Legacy Values ({py2['nth']} net heirs + {py2['family foundation']} to Family Foundation)*\n Cost out of pocket approx. {py2['coop']} (Annually at approx. {py2['annually']})" textrun = [] textrun = checkrecursivelyfortext(twenty_eight.shapes, textrun, text_box_1_text, text_box_2_text) filename = "Family Planning Presentation.pptx" prs.save(filename) prs = pptx.Presentation(filename) slides = [x for x in prs.slides] thiry_two = slides[31] # HANDLE 32 py1 = prd.slide_32.first_policy_year_data py2 = prd.slide_32.second_policy_year_data text_box_1_text = f"Age {prd.slide_32.age_1} = {py1['tlv']} Total Legacy Value* ({py1['nth']} Net To Heirs + {py1['family foundation']} to\n Family Foundation)*\n Total Collateral Today {py1['tct']} (Annually at {py1['annually']})" text_box_2_text = f"Age {prd.slide_32.age_2} = {py2['tlv']} Total Legacy Values ({py2['nth']} Net To Heirs +\n {py2['family foundation']} to Family Foundation)*\n Total Collateral Today {py2['tct']} (Annually at {py2['annually']})" textrun = [] textrun = checkrecursivelyfortext(thiry_two.shapes, textrun, text_box_1_text, text_box_2_text) prs.save(filename) response = FileResponse(open(f"{filename}", 'rb')) return response
def plot_comments(): prs = pptx.Presentation('chart-01.pptx') prs.slide_width = 9144000 pic_left = int(prs.slide_width * 0.001) pic_top = int(prs.slide_height * 0.01) pic_width = int(prs.slide_width) for g in glob.glob("python/Capture5.PNG"): print g slide = prs.slides.add_slide(prs.slide_layouts[6]) tb = slide.shapes.add_textbox(0, 0, prs.slide_width, pic_top / 2) # p = tb.textframe.add_paragraph() # p.text = g # p.font.size = pptx.util.Pt(14) img = scipy.misc.imread(g) pic_height = int(pic_width * img.shape[0] / img.shape[1]) #pic = slide.shapes.add_picture(g, pic_left, pic_top) pic = slide.shapes.add_picture(g, pic_left, pic_top, pic_width, pic_height) prs.save('chart-01.pptx') pass
def pic_to_ppt(filename): #前提是图片文件名全为数字,否则还需要修改 if not os.path.exists(filename): os.mkdir(filename) ppt = pptx.Presentation() pic_path = [] for i in os.walk(filename).__next__()[2]: if i.endswith('.png'): pic_path.append(i) #若是不全为数字,则可尝试运行下列代码 # ls=[] # for png in pic_path: # s='' # for item in png: # if item<='9' and item>='0': # s+=item # ls.append(s+'.png') # pic_path=ls pic_path.sort(key=lambda item: int(item.split('.')[0])) for i in pic_path: i = '{}/{}'.format(filename, i) slide = ppt.slides.add_slide(ppt.slide_layouts[1]) slide.shapes.add_picture(i, Inches(0), Inches(0), Inches(10), Inches(7.5)) fname = '{}/{}.pptx'.format(filename, filename) ppt.save(fname) print('生成的文件在 {} 文件夹下的 {}.ppt 中'.format(filename, filename))
def make_pptx(images_dir_path, labels_path, output_file_path, in_log=True, target_long_edge_len=500): prs = pptx.Presentation() slide_layout = prs.slide_layouts[5] # title only layout for i, (image_path, ratio) in enumerate(np.loadtxt(labels_path, str)): ratio = float(ratio) slide = prs.slides.add_slide(slide_layout) title_box = slide.shapes[0] title_box.text = str(i + 1) picture = slide.shapes.add_picture(image_path, 0, 0) stretch(picture, ratio, in_log=in_log) actua_long_edge_len = max(picture.height, picture.width) scale = px_to_emu(target_long_edge_len) / float(actua_long_edge_len) picture.height = int(picture.height * scale) picture.width = int(picture.width * scale) picture.left = 0 picture.top = prs.slide_height - picture.height prs.save(output_file_path)
def extract(self, filename, **kwargs): if not isinstance(kwargs["language"], type(None)): cors = processCors(kwargs["language"]).cor_list cors.sort(key=lambda x: len(x["from"]), reverse=True) converted_filename = filename[:-5] + '_converted.pptx' presentation = pptx.Presentation(filename) text_runs = [] all_text = "" for slide in presentation.slides: for shape in slide.shapes: if not shape.has_text_frame: continue for paragraph in shape.text_frame.paragraphs: for run in paragraph.runs: # Add font attributes font = run.font bold_attr = font.bold try: color_attr = font.color.rgb if font.color.type != None else None except: pass italic_attr = font.italic name_attr = font.name size_attr = font.size # Replace text a = run.text if not isinstance(kwargs["language"], type(None)): for kv in cors: a = a.replace(kv["from"], kv["to"]) # Remove old text run.text = "" # Add new run new_run = paragraph.add_run() # Add new text to run new_run.text = a # Add attributes new_run.font.bold = bold_attr try: new_run.font.color.rgb = color_attr except: pass new_run.font.italic = italic_attr new_run.font.name = name_attr new_run.font.size = size_attr # Append text to run text_runs.append(a) # Save new presentation presentation.save(converted_filename) return '\n\n'.join(text_runs)
def import_trans(self, powerpoint, translation): prs = pptx.Presentation(powerpoint) for i in range(0, translation.__len__()): shape = prs.slides[translation['SlideNo'][i]].shapes[ translation['ShapeNo'][i]] text_frame_paragraph = shape.text_frame.paragraphs[ translation['ParagraphNo'][i]] trans_text = str(translation['Translation'][i]).replace( '<br> ', '\n').replace('<br><br>', '\n\n') try: for r in range(0, text_frame_paragraph.runs.__len__()): try: if r > 0: text_frame_paragraph.runs[r].text = '' else: text_frame_paragraph.runs[r].text = trans_text print('working on line ', i, ' run ', r) except: print('Run Error in line', i) pass except: print('Error on line ', i, ': --> ', str(text_frame_paragraph.text)) pass prs.save( 'output/Translation_' + input_pptx_name + str(pd.datetime.today())[:16].replace(':', '').replace(' ', '_') + '.pptx')
def replaceStr(file, find, replace, fixyear=False): prs = pptx.Presentation(file) for slide in prs.slides: for shape in slide.shapes: if not shape.has_text_frame: continue for par in shape.text_frame.paragraphs: for run in par.runs: for f in find: run.text = run.text.replace(f, replace) for slidemaster in prs.slide_masters: for slide_layout in slidemaster.slide_layouts: for shape in slide_layout.shapes: if not shape.has_text_frame: continue for par in shape.text_frame.paragraphs: for run in par.runs: for f in find: run.text = run.text.replace(f, replace) for pholder in slide_layout.placeholders: if not pholder.has_text_frame: continue for ph in pholder.text_frame.paragraphs: for run in ph.runs: for f in find: run.text = run.text.replace(f, replace) outfolder = "fixedYear" if fixyear else "replacedStr" os.makedirs(f'output\\{outfolder}', exist_ok=True) prs.save('output\\{}\\{}'.format(outfolder, file.split("\\")[-1]))
def exchange(self): # 获取文件路径及名称 try: # 获取文件名 file_name = ".".join(self.route.split("/")[-1].split(".")[:-1]) file_all_name = self.route.split("/")[-1] only_route = "/".join(self.route.split("/")[:-1]) logging.info("正在提取" + " {}".format(file_all_name)) route, save_only_route = self.change_type(file_name) # 打开文件 ppt_file = pptx.Presentation(self.route) total_con = "" for shape in ppt_file.slides: data_s = shape.element.xml con_list = re.findall(r"<a:t>(.*)</a:t>", data_s) partial_content = "" for i in con_list: partial_content += i total_con += partial_content + "\t\n\n" f = file(route, "w") f.write(total_con.encode('utf-8')) f.close() # 显示当前文件内容提取结束 logging.info("{} ".format(file_all_name) + "提取结束!") # 按顺序将信息存入列表中 1.文件名 2.原路径 3.保存路径 4.源文件大小 5.保存文件大小 6.文件格式 dict_info_list = [ file_name, only_route, save_only_route, get_size(self.route), get_size(route), "pptx" ] # 保存信息 save_log_dict(dict_info_list) except Exception: logging.info("{}-{}".format("抽取失败!", self.route))
def build(self): # new logging.info("Generating PPTX") prs = pptx.Presentation() prs.slide_height = Pt(self.height) prs.slide_width = Pt(self.width) logging.info("Grabbing slides") slides = natsorted(glob.glob(self.slides_path + self.slide_pattern)) logging.info("Adding slides") for slide_image in slides: slide = prs.slides.add_slide(prs.slide_layouts[6]) pic = slide.shapes.add_picture(slide_image, Pt(0), Pt(0)) background = slide.background fill = background.fill fill.solid() fill.fore_color.rgb = RGBColor(0, 177, 64) #Green screen color logging.info("added slide") filename = self.root_path + self.slug + "-" + str( self.width) + "x" + str(self.height) + ".pptx" logging.info("Saving PPTX: " + filename) prs.save(filename)
def save_picture_to_ppt(saveImgNum, url, saveUrl, goal_path): if os.path.exists(goal_path) == False: # 检查文件夹是否存在 os.makedirs(goal_path) pptName = todayDate + ".pptx" goal_ppt = goal_path + '/' + pptName pptFile = pptx.Presentation(libPath + 'template.pptx') # 从已有模板读入初始化 count = 0 for i in saveImgNum: fn = goal_path + '/' + todayDate + '/' + str(i) + '.png' if os.path.exists(fn) == False: print fn + " is not exits! Please Check the picture ... " continue print fn slide = pptFile.slides.add_slide(pptFile.slide_layouts[5]) # 选取ppt样式 slide.shapes.placeholders[0].text = url + ": " + saveUrl[count] # 设置文字 img = cv2.imread(fn) sp = img.shape imgWidth = 9 imgHigth = round(float(sp[0]) / float(sp[1]), 2) * 9 slide.shapes.add_picture(fn, Inches(0.5), Inches(1.5), Inches(imgWidth), Inches(imgHigth)) # 设置图片 pptFile.save(goal_ppt) count += 1 if count != 0: print "Save Pictures to " + goal_ppt + " Success! ... \n" else: print "No Pictures saved to " + goal_ppt + " Failed! ... \n"
def make_power_point(text, title): ''' makes a full powerpoint out of a list of paragraphs ''' prs = pptx.Presentation() #title slide title_slide_layout = prs.slide_layouts[0] #slide = prs.slides.add_slide(title_slide_layout) left = top = Inches(0) #slide.shapes.add_picture('C:/Users/ravicz/Desktop/Python/scraper/cruIntro.jpg', left, top) # Try for verse in text: if(verse != ''): add_slide(prs, verse) title_slide_layout = prs.slide_layouts[0] #slide = prs.slides.add_slide(title_slide_layout) left = top = Inches(0) #slide.shapes.add_picture('C:/Users/ravicz/Desktop/Python/scraper/cruIntro.jpg', left, top) print("Save?\n") answer = input() if (answer in {'y', 'yes'}): prs.save(title)
def extract(self, filename, **kwargs): """ If `kwargs` has a keyword called `page` with value `True`, then the extraction will be done on a per page basis. The resultant text would be arranged in pages as it appears in original document. """ presentation = pptx.Presentation(filename) page_extraction = kwargs.get('page', False) if page_extraction: text_runs = {} for index, slide in enumerate(presentation.slides): text_runs[index] = [] for shape in slide.shapes: if not shape.has_text_frame: continue for paragraph in shape.text_frame.paragraphs: for run in paragraph.runs: text_runs[index].append(run.text) text_runs[index] = '\n\n'.join(text_runs[index]) return json.dumps(text_runs) else: text_runs = [] for slide in presentation.slides: for shape in slide.shapes: if not shape.has_text_frame: continue for paragraph in shape.text_frame.paragraphs: for run in paragraph.runs: text_runs.append(run.text) return '\n\n'.join(text_runs)
def get_pptx_info(path): """This will return the doc info infomation from the Named file.""" data = {} doc = pptx.Presentation(path) # get the core properties from the file... # https://python-docx.readthedocs.io/en/latest/api/document.html#coreproperties-objects cp = doc.core_properties data['author'] = cp.author data['category'] = cp.category data['comments'] = cp.comments data['content_status'] = cp.content_status data['created'] = cp.created data['identifier'] = cp.identifier data['keywords'] = cp.keywords data['language'] = cp.language data['last_modified_by'] = cp.last_modified_by data['last_printed'] = cp.last_printed data['modified'] = cp.modified data['revision'] = cp.revision data['subject'] = cp.subject data['title'] = cp.title data['version'] = cp.version return data
def parse(cls, filename: str) -> List[dict]: """ 解析 .pptx Args: filename (str): `.pptx` 为扩展名的文件 Raises: Exception: 不支持的文件类型/文件不存在 Returns: List[dict]: 解析结果 """ if not os.path.exists(filename): raise Exception(f"Not Found: {filename}") text_runs = [] with open(filename, 'rb') as f: presentation = pptx.Presentation(f) for index, slide in enumerate(presentation.slides): contents = [] for shape in slide.shapes: if not shape.has_text_frame: continue for paragraph in shape.text_frame.paragraphs: for run in paragraph.runs: contents.append(run.text) origin_contents = "\n\n".join(contents) text_runs.append({ "page": str(index + 1), "context": origin_contents, }) return text_runs
def make_fix_pptx(model, labels_path, output_file_path, target_long_edge_len=500): prs = pptx.Presentation() slide_layout = prs.slide_layouts[5] # title only layout for i, (image_path, ratio) in enumerate(np.loadtxt(labels_path, str)): slide = prs.slides.add_slide(slide_layout) title_box = slide.shapes[0] title_box.text = str(i + 1) img = plt.imread(image_path) dis_img = utility.change_aspect_ratio(img, np.exp(float(ratio)), 1) square = utility.crop_center(dis_img) resize = cv2.resize(square, (256, 256)) crop = utility.crop_224(resize) batch = crop[None, ...] batch = np.transpose(batch, (0, 3, 1, 2)) batch = batch.astype(np.float32) y = model.predict(batch, True) r = np.exp(y) fix = utility.change_aspect_ratio(dis_img, 1/r, 1) fix_image_path = os.path.join(save_path, str(i+1)) + '.jpg' plt.imsave(fix_image_path, fix) picture = slide.shapes.add_picture(fix_image_path, 0, 0) actua_long_edge_len = max(picture.height, picture.width) scale = px_to_emu(target_long_edge_len) / float(actua_long_edge_len) picture.height = int(picture.height * scale) picture.width = int(picture.width * scale) picture.left = 0 picture.top = prs.slide_height - picture.height prs.save(output_file_path)
def __init__(self, powerpoint, properties, options): self.__pptx = pptx.Presentation(powerpoint) self.__slides_by_id = OrderedDict() self.__seen = [] for slide in self.__pptx.slides: self.__slides_by_id[slide.slide_id] = Slide( slide, properties, options)
def generate_ppt(points_list): # given a list of points, generate PPT with picture inserted # load a ppt template _prs = pptx.Presentation('slide_template_20190305_Chen.pptm') # select the first slide _slide_0 = _prs.slides[0] # read the layout of the fist layout _slide_0_layout = _slide_0.slide_layout # add new slide and insert contents for i, point in enumerate(points_list): # for each point # generate a slide to hold contents # specify the file to be inserted _file_dir = 'D://工作//AUDI_L3_WP1_Virtual_Drive//Beijing_Virtual_Drive//airport_express//ae_tocity//resized_full//' _file_name = 'pic@lng=' + str(point[0]) + ',lat=' + str( point[1]) + '.jpg' _file_path = _file_dir + _file_name _pic = Image.open(_file_path) _prs.slides[i].shapes.add_picture(_file_path, 0.75 * left, 2.5 * top) # add hyperlink _url_click_action = _prs.slides[i].shapes[8].click_action _hyperlink = _url_click_action.hyperlink _hyperlink.address = str(point[2]) print('hyperlink is ' + _hyperlink.address) # add GPS data _gps = _prs.slides[i].shapes[9].text + 'lng:' + str( point[0]) + 'lat:' + str(point[1]) _prs.slides[i].shapes[9].text = _gps
def makePresentation(images: list, saveDirectory: Path): prs = pptx.Presentation() # slide height @ 4:3 # prs.slide_height = 6858000 # slide height @ 16:9 # prs.slide_height = 5143500 # print(prs.slide_width, prs.slide_height) for image in tqdm(images): slide = prs.slides.add_slide(prs.slide_layouts[6]) shape = slide.shapes.add_shape(MSO_SHAPE.RECTANGLE, 0, 0, prs.slide_width, prs.slide_height) pic = slide.shapes.add_picture(image.as_posix(), shape.left, shape.top) # calculate max width/height for target size ratio = min(shape.width / float(pic.width), shape.height / float(pic.height)) pic.height = int(pic.height * ratio) pic.width = int(pic.width * ratio) pic.left = shape.left + ((shape.width - pic.width) // 2) pic.top = shape.top + ((shape.height - pic.height) // 2) placeholder = shape.element placeholder.getparent().remove(placeholder) pptxSaveLocation = saveDirectory / "images.pptx" prs.save(pptxSaveLocation) print(f"PPt successfully saved to {pptxSaveLocation.as_posix()}")
def CountPagesPDFWorking(): # Returns table of all PDFs with page number of each list_of_pdf_filenames = glob.glob('*pdf') df = pd.DataFrame(columns=['fileName', 'fileLocation', 'pageNumber']) #PDF Data Frame for root, dirs, files in os.walk(filedialog.askdirectory()): for f in files: if f.endswith(".pdf"): pdf=PdfFileReader(open(os.path.join(root, f),'rb')) df2 = pd.DataFrame([[f, os.path.join(root,f), pdf.getNumPages()]], columns=['fileName', 'fileLocation', 'pageNumber']) df = df.append(df2, ignore_index=True) print(df.head) for root, dirs, files in os.walk(filedialog.askdirectory()): for f in files: #PowerPoint Data Frame # This works...retrieves the correct file and courn, but outputs the wrong number of pages on PPT if f.endswith(".pptx"): p = pptx.Presentation() df2 = pd.DataFrame([[f, os.path.join(root, f), len(p.slides)]], columns=['fileName', 'fileLocation', 'pageNumber']) df = df.append(df2, ignore_index=True) print(df.head)
def list_run_text(self, pptx_path, pptx_path2): #pptx_path: path of the translated PowerPoint file #pptx_path2: path of the original PowerPoint file prs2 = pptx.Presentation(pptx_path2) text_list = self.pptx_to_text_list(pptx_path) df = pd.DataFrame() cols = [ "Slide No", "Shape No", "Paragraph No", "Paragraph Text", "Run No", "Run Text" ] for col_i, col in enumerate(cols): df[col] = [pair[col_i] for pair in text_list] df[f"Revised {col}"] = df[col] #Add "Revised Run Text" Column #Insert original pptx's paragraph texts df.insert(3, "Original Paragraph Text", "orig para text") for i in range(df.shape[0]): slide_i = df["Slide No"][i] shape_i = df["Shape No"][i] para_i = df["Paragraph No"][i] # df.iloc[i, 3] = prs2.slides[slide_i].shapes[shape_i].text_frame.paragraphs[para_i].text df["Original Paragraph Text"][i] = prs2.slides[slide_i].shapes[ shape_i].text_frame.paragraphs[para_i].text df.to_csv(os.path.join(self.directory, f"{self.file}_revised.csv"), encoding="utf-8-sig", index=False) print(df) return df
def ppt_to_txt(ppt_path): txt_content = '' # 打开ppt try: ppt_file = pptx.Presentation(ppt_path) except: print('警告:\r\t无法处理"{}"'.format(ppt_path)) print('\t此文件可能不是ppt文件') print('\t或者,你需要把该文件另保存为"Power Point pptx文档"\n') return # 获取页数 page_num = len(ppt_file.slides) for n in range(page_num): for shap in ppt_file.slides[n].shapes: try: txt_content += shap.text + '\n' #print(shap.text) except: pass # 美化内容 txt_content = beautfy(txt_content) ppt_name = ppt_path.split('\\')[-1] try: with open(ppt_name + '.txt', 'w', encoding='gb18030') as f: f.write(txt_content) except: with open(ppt_name + '.txt', 'w', encoding='utf-8') as f: f.write(txt_content) print('\n提示:\n\t"{}" 处理成功!\n\t"{}.txt" 保存在当前目录下\n'.format( ppt_path, ppt_name))
def get_from_pptx(filename: str, words: dict): file = _pptx.Presentation(filename) # enum all pages(slides) for p in file.slides: #enum all shapes iterately for s in p.shapes: track_shape(s, words) return
def get_pptx(self, url): content = self.get(url).content prs = pptx.Presentation(io.BytesIO(content)) text = { shp.name: shp.text for shp in prs.slides[0].shapes if shp.has_text_frame } return prs, text
def open_powerpoint_file(path_to_input): try: active_presentation = pptx.Presentation(path_to_input) return active_presentation except pptx.exc.PackageNotFoundError: refresh_error_message = "Failed to open the Powerpoint file" tkinter.messagebox.showerror(title="Critical error", message=refresh_error_message) raise IOError
def write_to_powerpoint(self): """ Writes message attribute to powerpoint file. """ print("Write to Powerpoint...") prs = pptx.Presentation() # Neue Präsentation slide = prs.slides.add_slide(prs.slide_layouts[0]) # Neue Slide slide.shapes.title.text = self.message # Slide-Titel prs.save(self.ppt_file) # Präsi speichern
def _pptx_to_text(self, filename): import pptx ppt = pptx.Presentation(filename) content = "" for slide in ppt.slides: for shape in slide.shapes: content += self._pptx_txt_from_shape(shape) return content
def pres_verify(*args): try: pptx.Presentation(args[0]) args[1].append(args[0]) except: args[2].append(args[0]) return args[1], args[2]
def create_presentation_1(): presentation = pptx.Presentation("input_data/template.pptx") title_slide_layout = presentation.slide_layouts[0] slide = presentation.slides.add_slide(title_slide_layout) title = slide.shapes.title title.text = "Meaningful Title" subtitle = slide.placeholders[1] subtitle.text = "Some text for the placeholder defined in the layout" presentation.save("./output_data/presentation_1.pptx")
def __init__(self, filename: _Pathlike) -> None: """Initializes a Template-Modifier. Args: filename (path-like): file name or path """ self._template_path = filename self._presentation = pptx.Presentation(filename) pass