def _save(self): merger = PdfFileMerger() with open(self.path, 'rb') as file: merger.append(file) if '/AAPL:Keywords' in self._meta: # HACK: Some Apple software includes this field when producing PDFs. # The value is an array. However, at least as of version 1.7, the PDF spec # forbids custom document info fields from having anything but string values. # PyPDF will crash if we try to have it write an array to a document info field. del self._meta['/AAPL:Keywords'] merger.addMetadata(self._meta) with open(self.path, 'wb') as file: merger.write(file)
class PDFWriter: def __init__(self) -> None: self._merger = PdfFileMerger() self._tmp_dir = tempfile.TemporaryDirectory(dir=".") def merge_files(self, *files: List[Union[str, Path]]) -> None: logger.debug("Merging %d files", len(files)) for _file in files: # if it's image convert it to pdf first if str(_file).lower().endswith((".jpg", ".png")): image = Image.open(_file) image.convert("RGB") # save it as tmp file _file = Path(self._tmp_dir.name) / f"{Path(_file).stem}.pdf" logger.debug("Save image as tmp pdf file '%s'", _file) image.save(_file) with open(_file, "rb") as f: self._merger.append(f) def save(self, out_path: Union[str, Path]) -> Path: # add extension if doesn't exist if not str(out_path).lower().endswith(".pdf"): out_path += ".pdf" logger.debug("Saving '%s' file", out_path) with open(out_path, "wb") as fd: self._merger.write(fd) # clean up shutil.rmtree(self._tmp_dir.name) logger.debug("Tmp folder '%s' deleted", self._tmp_dir.name) return Path(out_path)
def post(self): data = self.reqparse.parse_args() # sprawdzenie czy zostały zapostowane pliki w base64 lub normalnie if data["pdf1_64"] and data["pdf2_64"]: input1 = io.BytesIO(base64.b64decode(data["pdf1_64"])) input2 = io.BytesIO(base64.b64decode(data["pdf2_64"])) input1.seek(0) input2.seek(0) elif data['pdf1'] and data['pdf2']: # inicjalizacja inputów i przypisanie do nich zawartości postowanych plików input1 = data["pdf1"] input2 = data["pdf2"] else: return { 'status': 'error', 'message': 'No files found' } # inicjalizacja "virtualnego" pliku wyjściowego i mergera PDF output = io.BytesIO() merger = PdfFileMerger(output) # Próbujemy mergować pliki # łączenie, zapisywanie i zamykanie wirtualnego pliku merger.append(input1) merger.append(input2) merger.write(output) merger.close() # przeniesienie kursora na początek wirtualnego pliku output.seek(0) print("Successfully merged") # jeśli output ma być w json i base64 to konwertujemy pdf na base64 if data['type'] == "64": output64 = base64.b64encode(output.getvalue()).decode() # zwracamy pdf jako string w base64 return { 'status': 'success', 'message': output64 } # w przeciwnym razie wywołujemy efekt kliknięcia przycisku "Download" i otworzenie przeglądarkowego okienka zapisu pliku else: return make_response(send_file(output, mimetype="application/pdf"), 200)
def main(what): output = f"{what}/final.pdf" files = sorted([ f"{what}/{zw}" for zw in os.listdir(what) if ".pdf" in zw and not zw in output ]) print("merging", *files) reader1 = PdfFileReader(files[0]) merger = PdfFileMerger() inp = [open(fil, "rb") for fil in files] for zw in inp: merger.append(zw, import_bookmarks=False) merger.write(open(output, "wb")) print("Output successfully written to", output) merger.close()
def post(self): try: json = request.get_json() files = json['input'] output_path = json['output'] merge = PdfFileMerger() for filename in files: # Validate that each file is pdf file input_path = validatePDF(filename) with open(input_path,'rb') as fh: merge.append(PdfFileReader(fh)) merge.write(output_path) return {'message' : 'OK'},200 except Exception as error: return {'error': str(error)}
def merge_pdfs(input_files: list, page_range: tuple, output_file: str, bookmark: bool = True): """ Merge a list of PDF files and save the combined result into the `output_file`. `page_range` to select a range of pages (behaving like Python's range() function) from the input files e.g (0,2) -> First 2 pages e.g (0,6,2) -> pages 1,3,5 bookmark -> add bookmarks to the output file to navigate directly to the input file section within the output file. """ # strict = False -> To ignore PdfReadError - Illegal Character error merger = PdfFileMerger(strict=False) for input_file in input_files: bookmark_name = os.path.splitext( os.path.basename(input_file))[0] if bookmark else None # pages To control which pages are appended from a particular file. merger.append(fileobj=open(input_file, 'rb'), pages=page_range, bookmark=bookmark_name) # Insert the pdf at specific page merger.write(fileobj=open(output_file, 'wb')) merger.close()
for pdf_file in glob.glob(new_pathfiles): f_path, f_name = os.path.split(pdf_file) name, *res = os.path.splitext(f_name) pdfs[int(name)] = pdf_file ranges = [] for i in range(len(file_groups) - 1): r = file_groups[i], file_groups[i + 1] ranges.append(r) groups = {} n = 1 for r in ranges: for i in range(*r): file = pdfs.get(i, 'None') if file != 'None': groups.setdefault(n, []).append(file) n += 1 for group, files in groups.items(): merger = PdfFileMerger() for file in files: merger.append(file) output_path = path + 'merged\\' + str(group) + new_ext merger.write(output_path) merger.close() print('Completed')
def main(req: func.HttpRequest) -> func.HttpResponse: logging.info('Python HTTP trigger function processed a request.') # name = req.params.get('name') # pdf1 = req.params.get('pdf1') # pdf2 = req.params.get('pdf2') # if not pdf1 and not pdf2: try: req_body = req.get_json() except ValueError: pass # else: # pdf1 = req_body.get('pdf1') # pdf2 = req_body.get('pdf2') pdf1 = req_body.get('pdf1') pdf2 = req_body.get('pdf2') if pdf1 and pdf2: input1 = io.BytesIO(base64.b64decode(pdf1)) input2 = io.BytesIO(base64.b64decode(pdf2)) # input1 = io.BytesIO(pdf1) # input2 = io.BytesIO(pdf2) input1.seek(0) input2.seek(0) output = io.BytesIO() merger = PdfFileMerger(output) # Próbujemy mergować pliki # łączenie, zapisywanie i zamykanie wirtualnego pliku merger.append(input1) merger.append(input2) merger.write(output) merger.close() # przeniesienie kursora na początek wirtualnego pliku output.seek(0) print("Successfully merged") # output64 = base64.b64encode(output.getvalue()) # output64 = base64.b64encode(output.getvalue()).decode() # b_output = str(output.getvalue())[2:-1] # base_output = str(base64.b64encode(output.getvalue()))[2:-1] # return func.HttpResponse( # json.dumps({ # 'status': 'success', # 'content': base_output # }), # mimetype="application/json", # status_code=200 # ) return func.HttpResponse(output.getvalue(), headers={'Content-Type':'application/pdf'} ) else: return func.HttpResponse( json.dumps({ 'status': 'error', 'content': 'Unable to parse pdf1 and pdf2' }), mimetype="application/json", status_code=400 )
def mergeBrachyPlanFile(ptPdfInfos, ptJpgInfos, mergFolderPath): patient = ptPdfInfos[0].patient print('%s Start creating plan for patient %s.' % (datetime.now().strftime('%m/%d %H:%M'), patient)) mergFilePath = os.path.join(mergFolderPath, patient + '.pdf') if os.path.isfile(mergFilePath): print('%s --The merged file already exists in %s.' % (datetime.now().strftime('%m/%d %H:%M'), mergFilePath)) return findPlnFile = False findDVHFile = False find2ndCalcFile = False findImgFile = False mergPdf = PdfFileMerger(strict=False) # find Oncentra plan pdfInfo = next( (info for info in ptPdfInfos if info.type == 'plandocOncentra'), None) if pdfInfo is not None: plnTime = pdfInfo.printTime findPlnFile = True mergPdf.append(pdfInfo.fileName) if findPlnFile == False: print('%s --Cannot find the plan file.' % datetime.now().strftime('%m/%d %H:%M')) mergPdf.close() return # find Oncentra DVH plan for pdfInfo in ptPdfInfos: if pdfInfo.type == 'DVHOncentra': if abs(plnTime - pdfInfo.printTime) < timedelta(hours=5): findDVHFile = True mergPdf.append(pdfInfo.fileName) break if findDVHFile == False: print('%s --Cannot find the DVH file within 5 hours time frame.' % datetime.now().strftime('%m/%d %H:%M')) mergPdf.close() return # find plan image(s), can merge multiple images for imgInfo in ptJpgInfos: if abs(plnTime - imgInfo.modifiedTime) < timedelta(hours=5): imgFilePath = imgInfo.convertPdf() if imgFilePath: mergPdf.append(imgFilePath) findImgFile = True if findImgFile == False: print('%s --Cannot find the image file within 5 hours time frame.' % datetime.now().strftime('%m/%d %H:%M')) mergPdf.close() return # find 2nd check for pdfInfo in ptPdfInfos: if pdfInfo.type == '2ndchkbrachy': if abs(plnTime - pdfInfo.printTime) < timedelta(hours=24): find2ndCalcFile = True mergPdf.append(pdfInfo.fileName) break if find2ndCalcFile == False: print('%s --Cannot find the 2nd check file within 1 day time frame.' % datetime.now().strftime('%m/%d %H:%M')) mergPdf.close() return try: mergPdf.write(mergFilePath) print('%s --Merged plan completed.' % datetime.now().strftime('%m/%d %H:%M')) mergPdf.close() except: print('%s --Fail to save merged plan file.' % datetime.now().strftime('%m/%d %H:%M')) return
def __init__(self) -> None: self._merger = PdfFileMerger() self._tmp_dir = tempfile.TemporaryDirectory(dir=".")
def merger(input_paths, output_path): pdf_merger = PdfFileMerger() for path in input_paths: pdf_merger.append(path) with open(output_path, 'wb') as fileobj: pdf_merger.write(fileobj)
reader = csv.reader(file) for row in reader: items.append(row[1]) return items def search_file(): item = search_item() pdf = [] directory = "C:\\Users\\Gizem\\Desktop\\cv\\Resumes" qty = len(item) for i in range(1, qty): for filename in os.listdir(directory): if item[i] in filename and filename.endswith(".pdf"): pdf.append(os.path.join(directory, filename)) return pdf pdfs = search_file() print(pdfs) merger = PdfFileMerger() for k in pdfs: merger.append(k) merger.write("C:\\Users\\Gizem\\Desktop\\result.pdf") merger.close()
import sys from PyPDF4 import PdfFileMerger sys.argv.pop(0) merger = PdfFileMerger() for pdf in sys.argv: merger.append(pdf) merger.write('combined.pdf') merger.close()
def dphtmpd(month:int, year:int): '''dphtmpd = detailed payroll html to multi-page pdf document''' multipdf = f'templates/pdf/payroll_{month}_{year}.pdf' # create multi-pages pdf menager merger = PdfFileMerger() # all required data leave_kind = ('unpaid_leave', 'paid_leave', 'maternity_leave') heads = ['Kwota brutto', 'Podstawa', 'Urlop', 'Nadgodziny', 'Za soboty', 'Za niedziele', 'Zaliczka', 'Do wypłaty'] total_work_days = len(list(workingdays(year, month))) holidays = holiday(year).values() payroll = payroll_set(month, year) # create context context = {'month': month, 'year': year, 'heads': heads, 'total_work_days': total_work_days,} # options to create pdf file options = {'page-size': 'A5', 'margin-top': '0.25in', 'margin-right': '0.2in', 'margin-bottom': '0.1in', 'margin-left': '0.2in', 'encoding': "UTF-8", 'orientation': 'landscape','no-outline': None, 'quiet': '',} # holidays workhours query holq =Q(start_work__date__in=list(holidays)) & Q(end_work__date__in=list(holidays)) # Saturday workhours query satq = Q(start_work__week_day=7) & (Q(end_work__week_day=7) | Q(end_work__week_day=1)) # Sunday workhours query sunq = Q(start_work__week_day=1) & Q(end_work__week_day=1) # leaves leaq = Q(leave_date__year=year, leave_date__month=month) # create dataset for each active employee for key, value in payroll.items(): worker, payroll_val = key, value # payroll data salary = payroll_val['salary'] # eliminate no salary worker if salary > 0: employeedata = get_object_or_404(EmployeeData, worker=worker) # main query mainquery = WorkEvidence.objects.filter(worker=worker, start_work__year=year, start_work__month=month) # Saturdays saturday_hours = mainquery.filter(satq).exclude(holq) saturday_hours = saturday_hours.aggregate(sh=Sum('jobhours'))['sh'] # Sundays sunday_hours = mainquery.filter(sunq).exclude(holq) sunday_hours = sunday_hours.aggregate(sh=Sum('jobhours'))['sh'] # holidays holiday_hours = mainquery.filter(holq) holiday_hours = holiday_hours.aggregate(hh=Sum('jobhours'))['hh'] # total workhours total_work_hours = mainquery.aggregate(twh=Sum('jobhours'))['twh'] # leaves year_leaves = EmployeeLeave.objects.filter(worker=worker, leave_date__year=year) mls = EmployeeLeave.objects.filter(worker=worker).filter(leaq).order_by('leave_date') month_leaves = {kind:mls.filter(leave_flag=kind).count() for kind in leave_kind} month_dates = {kind:[item.leave_date for item in mls.filter(leave_flag=kind)] for kind in leave_kind} year_leaves = {kind:year_leaves.filter(leave_flag=kind).count() for kind in leave_kind} # update context context.update({'worker': worker, 'payroll': payroll_val, 'salary': salary, 'employeedata': employeedata, 'saturday_hours': saturday_hours, 'month_leaves': month_leaves, 'month_dates': month_dates, 'sunday_hours': sunday_hours, 'year_leaves': year_leaves, 'total_work_hours': total_work_hours, 'holiday_hours': holiday_hours}) # create pdf file with following options template = get_template('evidence/monthly_detailed_payroll_pdf.html') html = template.render(context) pdfile = f'templates/pdf/{worker.surname} {worker.forename} lp_{month}_{year}.pdf' pdfkit.from_string(html, pdfile, options=options, css=settings.CSS_FILE) # merge all pdf file merger.append(pdfile) # writete multi-pages pdf file merger.write(multipdf) return multipdf
from PyPDF4 import PdfFileMerger pdfs = ["1.pdf", "2.pdf"] merger = PdfFileMerger() for pdf in pdfs: merger.append(open(pdf, "rb")) with open("result.pdf", "wb") as fout: merger.write(fout)
def merge_pdfs(self): ''' merge files into a PDF # todo: check file type ''' merged_pdf = PdfFileMerger() for file_path in self.paths_list: file_name, page_range = self._path_decople_(file_path) if file_name.lower().endswith(('.png', '.jpg', '.jpeg')): merged_pdf.append(fileobj=self._image_to_page_(file_name)) else: if page_range: merged_pdf.append(fileobj=file_name, pages=page_range) else: merged_pdf.append(fileobj=file_name) # write to outputfile output = open(self.output_file_path, 'wb') # output file merged_pdf.write(output) # write merge content to file output.close() merged_pdf.close()
def _visualize(self): """ Run the visualization of candidates """ ncand = len(self.files) self.logger.debug(f"Visualizing {ncand} candidates") # get max galactic DM dmgal = util.get_ymw16(self.obs_config['parset'], self.obs_config['beam'], self.logger) # DMgal is zero if something failed, in that case set the value to infinity so no plots are marked, instead of # all if dmgal == 0: dmgal = np.inf # get plot order order = self._get_plot_order() # get the number of plot pages nplot_per_page = self.config.nplot_per_side**2 npage = int(np.ceil(len(order) / nplot_per_page)) # order files, then split per page try: files = self.files[order] except IndexError: self.logger.error("Failed to get plot order") return num_full_page, nplot_last_incomplete_page = divmod( len(files), nplot_per_page) files_split = [] for page in range(num_full_page): files_split.append(files[page * nplot_per_page:(page + 1) * nplot_per_page]) if nplot_last_incomplete_page != 0: files_split.append(files[-nplot_last_incomplete_page:]) for page in range(npage): for plot_type in self.config.plot_types: # create figure fig, axes = plt.subplots(nrows=self.config.nplot_per_side, ncols=self.config.nplot_per_side, figsize=(self.config.figsize, self.config.figsize)) axes = axes.flatten() # loop over the files for i, fname in enumerate(files_split[page]): # load the data and parameters data, params = self._load_data(fname, plot_type) try: ntime = data.shape[1] except IndexError: ntime = len(data) times = np.arange(-ntime / 2, ntime / 2) * params['tsamp'] * 1e3 ax = axes[i] xlabel = 'Time (ms)' if plot_type == 'freq_time': nfreq = data.shape[0] ylabel = 'Frequency (MHz)' title = 'p:{prob_freqtime:.2f} DM:{dm:.2f} t:{toa:.2f}\n' \ 'S/N:{snr:.2f} width:{downsamp} SB:{sb}'.format(**params) freqs = np.linspace( 0, BANDWIDTH.to(u.MHz).value, nfreq) + self.obs_config['min_freq'] X, Y = np.meshgrid(times, freqs) ax.pcolormesh(X, Y, data, cmap=self.config.cmap_freqtime, shading='nearest') # Add DM 0 curve delays = util.dm_to_delay( params['dm'] * u.pc / u.cm**3, freqs[0] * u.MHz, freqs * u.MHz).to(u.ms).value ax.plot(times[0] + delays, freqs, c='r', alpha=.5) elif plot_type == 'dm_time': ylabel = r'DM (pc cm$^{-3}$)' title = 'p:{prob_dmtime:.2f} DM:{dm:.2f} t:{toa:.2f}\n' \ 'S/N:{snr:.2f} width:{downsamp} SB:{sb}'.format(**params) X, Y = np.meshgrid(times, params['dms']) ax.pcolormesh(X, Y, data, cmap=self.config.cmap_dmtime, shading='nearest') # add line if DM 0 is in plot range if min(params['dms']) <= 0 <= max(params['dms']): ax.axhline(0, c='r', alpha=.5) elif plot_type == '1d_time': ylabel = 'Power (norm.)' title = 'DM:{dm:.2f} t:{toa:.2f}\n' \ 'S/N:{snr:.2f} width:{downsamp} SB:{sb}'.format(**params) ax.plot(times, data, c=self.config.colour_1dtime) else: raise ProcessorException( f"Unknown plot type: {plot_type}, should not be able to get here!" ) # add plot title ax.set_title(title) # ylabel only the first column if ax.is_first_col(): ax.set_ylabel(ylabel) # xlabel only the last row. This is a bit tricky: on the last page, this is not necessarily # the last possible row if (page != npage - 1) and ax.is_last_row(): ax.set_xlabel(xlabel) else: # a plot is the bottom one in a column if the number of remaining plots is less than a full row nplot_remaining = len(files_split[page]) - i - 1 if nplot_remaining < self.config.nplot_per_side: ax.set_xlabel(xlabel) ax.set_xlim(times[0], times[-1]) # add red border if DM > DMgal if params['dm'] > dmgal: plt.setp(ax.spines.values(), color='red', linewidth=2, alpha=0.85) # on the last page, disable the remaining plots if there are any if page == npage - 1: remainder = nplot_per_page - nplot_last_incomplete_page if remainder > 0: for ax in axes[-remainder:]: ax.axis('off') fig.set_tight_layout(True) # ensure the number of digits used for the page index is always the same, and large enough # then sorting works as expected page_str = str(page).zfill(len(str(npage))) fig_fname = os.path.join(self.output_dir, f'ranked_{plot_type}_{page_str}.pdf') fig.savefig(fig_fname) # merge the plots output_file = f"{self.output_dir}/CB{self.obs_config['beam']:02d}.pdf" merger = PdfFileMerger() for plot_type in self.config.plot_types: fnames = glob.glob(f'{self.output_dir}/*{plot_type}*.pdf') fnames.sort() for fname in fnames: merger.append(fname) merger.write(output_file) # copy the file to the central output directory self.logger.info( f"Saving plots to {self.result_dir}/{os.path.basename(output_file)}" ) copy(output_file, self.result_dir)
def merge(files): """Merge two or more pdfs into one""" # Setting the file name of output output = "PyPDF-Merging-Output.pdf" # Create the merger instance merger = PdfFileMerger(open(output, "wb")) # Opening two input files input1 = open(files[0], "rb") input2 = open(files[1], "rb") # Add the first 3 pages of input1 to output merger.append(fileobj=input1, pages=(0, 3)) # Insert the first page of input2 into the output beginning after the second page merger.merge(position=2, fileobj=input2, pages=(0, 1)) # Append entire input3 document to the end of the output document merger.append(input2) # Output and close the file merger.write(output) print("Output successfully written to", output) merger.close()