Exemple #1
0
 def _save(self):
     merger = PdfFileMerger()
     with open(self.path, 'rb') as file:
         merger.append(file)
     if '/AAPL:Keywords' in self._meta:
         # HACK: Some Apple software includes this field when producing PDFs.
         # The value is an array. However, at least as of version 1.7, the PDF spec
         # forbids custom document info fields from having anything but string values.
         # PyPDF will crash if we try to have it write an array to a document info field.
         del self._meta['/AAPL:Keywords']
     merger.addMetadata(self._meta)
     with open(self.path, 'wb') as file:
         merger.write(file)
Exemple #2
0
class PDFWriter:
    def __init__(self) -> None:
        self._merger = PdfFileMerger()

        self._tmp_dir = tempfile.TemporaryDirectory(dir=".")

    def merge_files(self, *files: List[Union[str, Path]]) -> None:
        logger.debug("Merging %d files", len(files))
        for _file in files:
            # if it's image convert it to pdf first
            if str(_file).lower().endswith((".jpg", ".png")):

                image = Image.open(_file)
                image.convert("RGB")

                # save it as tmp file
                _file = Path(self._tmp_dir.name) / f"{Path(_file).stem}.pdf"

                logger.debug("Save image as tmp pdf file '%s'", _file)
                image.save(_file)

            with open(_file, "rb") as f:
                self._merger.append(f)

    def save(self, out_path: Union[str, Path]) -> Path:
        # add extension if doesn't exist
        if not str(out_path).lower().endswith(".pdf"):
            out_path += ".pdf"

        logger.debug("Saving '%s' file", out_path)

        with open(out_path, "wb") as fd:
            self._merger.write(fd)

        # clean up
        shutil.rmtree(self._tmp_dir.name)
        logger.debug("Tmp folder '%s' deleted", self._tmp_dir.name)

        return Path(out_path)
Exemple #3
0
    def post(self):
        data = self.reqparse.parse_args()

        # sprawdzenie czy zostały zapostowane pliki w base64 lub normalnie
        if data["pdf1_64"] and data["pdf2_64"]:
            input1 = io.BytesIO(base64.b64decode(data["pdf1_64"]))
            input2 = io.BytesIO(base64.b64decode(data["pdf2_64"]))
            input1.seek(0)
            input2.seek(0)
        elif data['pdf1'] and data['pdf2']:
            # inicjalizacja inputów i przypisanie do nich zawartości postowanych plików
            input1 = data["pdf1"]
            input2 = data["pdf2"]
        else:
            return {
                'status': 'error',
                'message': 'No files found'
            }

        # inicjalizacja "virtualnego" pliku wyjściowego i mergera PDF
        output = io.BytesIO()
        merger = PdfFileMerger(output)

        # Próbujemy mergować pliki
        # łączenie, zapisywanie i zamykanie wirtualnego pliku
        merger.append(input1)
        merger.append(input2)
        merger.write(output)
        merger.close()
        # przeniesienie kursora na początek wirtualnego pliku
        output.seek(0)
        print("Successfully merged")

        # jeśli output ma być w json i base64 to konwertujemy pdf na base64
        if data['type'] == "64":
            output64 = base64.b64encode(output.getvalue()).decode()
            # zwracamy pdf jako string w base64
            return {
                'status': 'success',
                'message': output64
            }
        # w przeciwnym razie wywołujemy efekt kliknięcia przycisku "Download" i otworzenie przeglądarkowego okienka zapisu pliku
        else:
            return make_response(send_file(output, mimetype="application/pdf"), 200)
Exemple #4
0
def main(what):
    output = f"{what}/final.pdf"

    files = sorted([
        f"{what}/{zw}" for zw in os.listdir(what)
        if ".pdf" in zw and not zw in output
    ])

    print("merging", *files)

    reader1 = PdfFileReader(files[0])
    merger = PdfFileMerger()

    inp = [open(fil, "rb") for fil in files]

    for zw in inp:
        merger.append(zw, import_bookmarks=False)

    merger.write(open(output, "wb"))
    print("Output successfully written to", output)

    merger.close()
Exemple #5
0
	def post(self):
		try:
			json = request.get_json()
			files = json['input']
			output_path = json['output']
			merge = PdfFileMerger()
			for filename in files:
				# Validate that each file is pdf file
				input_path = validatePDF(filename)
				with open(input_path,'rb') as fh:
					merge.append(PdfFileReader(fh))
			merge.write(output_path)
			return {'message' : 'OK'},200 
		except Exception as error:
			return {'error': str(error)}
def merge_pdfs(input_files: list,
               page_range: tuple,
               output_file: str,
               bookmark: bool = True):
    """
    Merge a list of PDF files and save the combined result into the `output_file`.
    `page_range` to select a range of pages (behaving like Python's range() function) from the input files
        e.g (0,2) -> First 2 pages 
        e.g (0,6,2) -> pages 1,3,5
    bookmark -> add bookmarks to the output file to navigate directly to the input file section within the output file.
    """
    # strict = False -> To ignore PdfReadError - Illegal Character error
    merger = PdfFileMerger(strict=False)
    for input_file in input_files:
        bookmark_name = os.path.splitext(
            os.path.basename(input_file))[0] if bookmark else None
        # pages To control which pages are appended from a particular file.
        merger.append(fileobj=open(input_file, 'rb'),
                      pages=page_range,
                      bookmark=bookmark_name)
    # Insert the pdf at specific page
    merger.write(fileobj=open(output_file, 'wb'))
    merger.close()
Exemple #7
0
for pdf_file in glob.glob(new_pathfiles):
    f_path, f_name = os.path.split(pdf_file)
    name, *res = os.path.splitext(f_name)
    pdfs[int(name)] = pdf_file

ranges = []
for i in range(len(file_groups) - 1):
    r = file_groups[i], file_groups[i + 1]
    ranges.append(r)

groups = {}

n = 1
for r in ranges:
    for i in range(*r):
        file = pdfs.get(i, 'None')
        if file != 'None':
            groups.setdefault(n, []).append(file)
    n += 1

for group, files in groups.items():
    merger = PdfFileMerger()
    for file in files:
        merger.append(file)
    output_path = path + 'merged\\' + str(group) + new_ext
    merger.write(output_path)
    merger.close()

print('Completed')
Exemple #8
0
def main(req: func.HttpRequest) -> func.HttpResponse:
    logging.info('Python HTTP trigger function processed a request.')

    # name = req.params.get('name')
    # pdf1 = req.params.get('pdf1')
    # pdf2 = req.params.get('pdf2')
    # if not pdf1 and not pdf2:
    try:
        req_body = req.get_json()
    except ValueError:
        pass
    # else:
    #     pdf1 = req_body.get('pdf1')
    #     pdf2 = req_body.get('pdf2')
    pdf1 = req_body.get('pdf1')
    pdf2 = req_body.get('pdf2')

    if pdf1 and pdf2:
        input1 = io.BytesIO(base64.b64decode(pdf1))
        input2 = io.BytesIO(base64.b64decode(pdf2))
        # input1 = io.BytesIO(pdf1)
        # input2 = io.BytesIO(pdf2)
        input1.seek(0)
        input2.seek(0)

        output = io.BytesIO()
        merger = PdfFileMerger(output)

        # Próbujemy mergować pliki
        # łączenie, zapisywanie i zamykanie wirtualnego pliku
        merger.append(input1)
        merger.append(input2)
        merger.write(output)
        merger.close()
        # przeniesienie kursora na początek wirtualnego pliku
        output.seek(0)
        print("Successfully merged")
        # output64 = base64.b64encode(output.getvalue())
        # output64 = base64.b64encode(output.getvalue()).decode()
        # b_output = str(output.getvalue())[2:-1]
        # base_output = str(base64.b64encode(output.getvalue()))[2:-1]
        # return func.HttpResponse(
        #     json.dumps({
        #         'status': 'success',
        #         'content': base_output
        #     }),
        #     mimetype="application/json",
        #     status_code=200
        #     )

        return func.HttpResponse(output.getvalue(),
            headers={'Content-Type':'application/pdf'}
            )
    else:
        return func.HttpResponse(
            json.dumps({
                'status': 'error',
                'content': 'Unable to parse pdf1 and pdf2'
            }),
            mimetype="application/json",
            status_code=400
            )
Exemple #9
0
def mergeBrachyPlanFile(ptPdfInfos, ptJpgInfos, mergFolderPath):
    patient = ptPdfInfos[0].patient
    print('%s  Start creating plan for patient %s.' %
          (datetime.now().strftime('%m/%d %H:%M'), patient))
    mergFilePath = os.path.join(mergFolderPath, patient + '.pdf')
    if os.path.isfile(mergFilePath):
        print('%s  --The merged file already exists in %s.' %
              (datetime.now().strftime('%m/%d %H:%M'), mergFilePath))
        return
    findPlnFile = False
    findDVHFile = False
    find2ndCalcFile = False
    findImgFile = False
    mergPdf = PdfFileMerger(strict=False)
    # find Oncentra plan
    pdfInfo = next(
        (info for info in ptPdfInfos if info.type == 'plandocOncentra'), None)
    if pdfInfo is not None:
        plnTime = pdfInfo.printTime
        findPlnFile = True
        mergPdf.append(pdfInfo.fileName)
    if findPlnFile == False:
        print('%s  --Cannot find the plan file.' %
              datetime.now().strftime('%m/%d %H:%M'))
        mergPdf.close()
        return
    # find Oncentra DVH plan
    for pdfInfo in ptPdfInfos:
        if pdfInfo.type == 'DVHOncentra':
            if abs(plnTime - pdfInfo.printTime) < timedelta(hours=5):
                findDVHFile = True
                mergPdf.append(pdfInfo.fileName)
                break
    if findDVHFile == False:
        print('%s  --Cannot find the DVH file within 5 hours time frame.' %
              datetime.now().strftime('%m/%d %H:%M'))
        mergPdf.close()
        return
    # find plan image(s), can merge multiple images
    for imgInfo in ptJpgInfos:
        if abs(plnTime - imgInfo.modifiedTime) < timedelta(hours=5):
            imgFilePath = imgInfo.convertPdf()
            if imgFilePath:
                mergPdf.append(imgFilePath)
                findImgFile = True
    if findImgFile == False:
        print('%s  --Cannot find the image file within 5 hours time frame.' %
              datetime.now().strftime('%m/%d %H:%M'))
        mergPdf.close()
        return
    # find 2nd check
    for pdfInfo in ptPdfInfos:
        if pdfInfo.type == '2ndchkbrachy':
            if abs(plnTime - pdfInfo.printTime) < timedelta(hours=24):
                find2ndCalcFile = True
                mergPdf.append(pdfInfo.fileName)
                break
    if find2ndCalcFile == False:
        print('%s  --Cannot find the 2nd check file within 1 day time frame.' %
              datetime.now().strftime('%m/%d %H:%M'))
        mergPdf.close()
        return
    try:
        mergPdf.write(mergFilePath)
        print('%s  --Merged plan completed.' %
              datetime.now().strftime('%m/%d %H:%M'))
        mergPdf.close()
    except:
        print('%s  --Fail to save merged plan file.' %
              datetime.now().strftime('%m/%d %H:%M'))
        return
Exemple #10
0
    def __init__(self) -> None:
        self._merger = PdfFileMerger()

        self._tmp_dir = tempfile.TemporaryDirectory(dir=".")
Exemple #11
0
def merger(input_paths, output_path):
    pdf_merger = PdfFileMerger()
    for path in input_paths:
        pdf_merger.append(path)
    with open(output_path, 'wb') as fileobj:
        pdf_merger.write(fileobj)
Exemple #12
0
        reader = csv.reader(file)

        for row in reader:
            items.append(row[1])

    return items


def search_file():
    item = search_item()
    pdf = []
    directory = "C:\\Users\\Gizem\\Desktop\\cv\\Resumes"
    qty = len(item)
    for i in range(1, qty):
        for filename in os.listdir(directory):
            if item[i] in filename and filename.endswith(".pdf"):

                pdf.append(os.path.join(directory, filename))
    return pdf


pdfs = search_file()
print(pdfs)

merger = PdfFileMerger()

for k in pdfs:
    merger.append(k)

merger.write("C:\\Users\\Gizem\\Desktop\\result.pdf")
merger.close()
Exemple #13
0
import sys
from PyPDF4 import PdfFileMerger

sys.argv.pop(0)
merger = PdfFileMerger()

for pdf in sys.argv:
    merger.append(pdf)

merger.write('combined.pdf')
merger.close()
Exemple #14
0
def dphtmpd(month:int, year:int):
	'''dphtmpd = detailed payroll html to multi-page pdf document'''
	multipdf = f'templates/pdf/payroll_{month}_{year}.pdf'
	# create multi-pages pdf menager
	merger = PdfFileMerger()
	# all required data
	leave_kind = ('unpaid_leave', 'paid_leave', 'maternity_leave')
	heads = ['Kwota brutto', 'Podstawa', 'Urlop', 'Nadgodziny', 'Za soboty', 'Za niedziele', 'Zaliczka', 'Do wypłaty']
	total_work_days = len(list(workingdays(year, month)))
	holidays = holiday(year).values()
	payroll = payroll_set(month, year)
	# create context
	context = {'month': month, 'year': year, 'heads': heads, 'total_work_days': total_work_days,}
	# options to create pdf file
	options = {'page-size': 'A5', 'margin-top': '0.25in', 'margin-right': '0.2in', 'margin-bottom': '0.1in',
			   'margin-left': '0.2in', 'encoding': "UTF-8", 'orientation': 'landscape','no-outline': None, 'quiet': '',}

	# holidays workhours query
	holq =Q(start_work__date__in=list(holidays)) & Q(end_work__date__in=list(holidays))
	# Saturday workhours query
	satq = Q(start_work__week_day=7) & (Q(end_work__week_day=7) | Q(end_work__week_day=1))
	# Sunday workhours query
	sunq = Q(start_work__week_day=1) & Q(end_work__week_day=1)
	# leaves
	leaq = Q(leave_date__year=year, leave_date__month=month)

	# create dataset for each active employee
	for key, value in payroll.items():
		worker, payroll_val = key, value
		# payroll data
		salary = payroll_val['salary']
		# eliminate no salary worker
		if salary > 0:
			employeedata = get_object_or_404(EmployeeData, worker=worker)
			# main query
			mainquery = WorkEvidence.objects.filter(worker=worker, start_work__year=year, start_work__month=month)
			# Saturdays
			saturday_hours = mainquery.filter(satq).exclude(holq)
			saturday_hours = saturday_hours.aggregate(sh=Sum('jobhours'))['sh']
			# Sundays
			sunday_hours = mainquery.filter(sunq).exclude(holq)
			sunday_hours = sunday_hours.aggregate(sh=Sum('jobhours'))['sh']
			# holidays
			holiday_hours = mainquery.filter(holq)
			holiday_hours = holiday_hours.aggregate(hh=Sum('jobhours'))['hh']
			# total workhours
			total_work_hours = mainquery.aggregate(twh=Sum('jobhours'))['twh']
			# leaves
			year_leaves = EmployeeLeave.objects.filter(worker=worker, leave_date__year=year)
			mls = EmployeeLeave.objects.filter(worker=worker).filter(leaq).order_by('leave_date')
			month_leaves = {kind:mls.filter(leave_flag=kind).count() for kind in leave_kind}
			month_dates = {kind:[item.leave_date for item in mls.filter(leave_flag=kind)] for kind in leave_kind}
			year_leaves = {kind:year_leaves.filter(leave_flag=kind).count() for kind in leave_kind}
			# update context
			context.update({'worker': worker, 'payroll': payroll_val, 'salary': salary,
							'employeedata': employeedata, 'saturday_hours': saturday_hours,
							'month_leaves': month_leaves, 'month_dates': month_dates,
							'sunday_hours': sunday_hours, 'year_leaves': year_leaves,
							'total_work_hours': total_work_hours, 'holiday_hours': holiday_hours})
			# create pdf file with following options
			template = get_template('evidence/monthly_detailed_payroll_pdf.html')
			html = template.render(context)
			pdfile = f'templates/pdf/{worker.surname} {worker.forename} lp_{month}_{year}.pdf'
			pdfkit.from_string(html, pdfile, options=options, css=settings.CSS_FILE)
			# merge all pdf file
			merger.append(pdfile)

	# writete multi-pages pdf file
	merger.write(multipdf)

	return multipdf
Exemple #15
0
from PyPDF4 import PdfFileMerger

pdfs = ["1.pdf", "2.pdf"]

merger = PdfFileMerger()

for pdf in pdfs:
    merger.append(open(pdf, "rb"))

with open("result.pdf", "wb") as fout:
    merger.write(fout)
Exemple #16
0
    def merge_pdfs(self):
        '''
        merge files into a PDF

        # todo: check file type
        '''
        merged_pdf = PdfFileMerger()

        for file_path in self.paths_list:

            file_name, page_range = self._path_decople_(file_path)
            if file_name.lower().endswith(('.png', '.jpg', '.jpeg')):
                merged_pdf.append(fileobj=self._image_to_page_(file_name))
            else:
                if page_range:
                    merged_pdf.append(fileobj=file_name, pages=page_range)
                else:
                    merged_pdf.append(fileobj=file_name)

        # write to outputfile
        output = open(self.output_file_path, 'wb')  # output file
        merged_pdf.write(output)  # write merge content to file
        output.close()
        merged_pdf.close()
Exemple #17
0
    def _visualize(self):
        """
        Run the visualization of candidates
        """
        ncand = len(self.files)
        self.logger.debug(f"Visualizing {ncand} candidates")

        # get max galactic DM
        dmgal = util.get_ymw16(self.obs_config['parset'],
                               self.obs_config['beam'], self.logger)
        # DMgal is zero if something failed, in that case set the value to infinity so no plots are marked, instead of
        # all
        if dmgal == 0:
            dmgal = np.inf

        # get plot order
        order = self._get_plot_order()
        # get the number of plot pages
        nplot_per_page = self.config.nplot_per_side**2
        npage = int(np.ceil(len(order) / nplot_per_page))
        # order files, then split per page
        try:
            files = self.files[order]
        except IndexError:
            self.logger.error("Failed to get plot order")
            return

        num_full_page, nplot_last_incomplete_page = divmod(
            len(files), nplot_per_page)
        files_split = []
        for page in range(num_full_page):
            files_split.append(files[page * nplot_per_page:(page + 1) *
                                     nplot_per_page])
        if nplot_last_incomplete_page != 0:
            files_split.append(files[-nplot_last_incomplete_page:])

        for page in range(npage):
            for plot_type in self.config.plot_types:
                # create figure
                fig, axes = plt.subplots(nrows=self.config.nplot_per_side,
                                         ncols=self.config.nplot_per_side,
                                         figsize=(self.config.figsize,
                                                  self.config.figsize))
                axes = axes.flatten()
                # loop over the files
                for i, fname in enumerate(files_split[page]):
                    # load the data and parameters
                    data, params = self._load_data(fname, plot_type)
                    try:
                        ntime = data.shape[1]
                    except IndexError:
                        ntime = len(data)
                    times = np.arange(-ntime / 2,
                                      ntime / 2) * params['tsamp'] * 1e3

                    ax = axes[i]
                    xlabel = 'Time (ms)'
                    if plot_type == 'freq_time':
                        nfreq = data.shape[0]
                        ylabel = 'Frequency (MHz)'
                        title = 'p:{prob_freqtime:.2f} DM:{dm:.2f} t:{toa:.2f}\n' \
                                'S/N:{snr:.2f} width:{downsamp} SB:{sb}'.format(**params)
                        freqs = np.linspace(
                            0,
                            BANDWIDTH.to(u.MHz).value,
                            nfreq) + self.obs_config['min_freq']
                        X, Y = np.meshgrid(times, freqs)
                        ax.pcolormesh(X,
                                      Y,
                                      data,
                                      cmap=self.config.cmap_freqtime,
                                      shading='nearest')
                        # Add DM 0 curve
                        delays = util.dm_to_delay(
                            params['dm'] * u.pc / u.cm**3, freqs[0] * u.MHz,
                            freqs * u.MHz).to(u.ms).value
                        ax.plot(times[0] + delays, freqs, c='r', alpha=.5)
                    elif plot_type == 'dm_time':
                        ylabel = r'DM (pc cm$^{-3}$)'
                        title = 'p:{prob_dmtime:.2f} DM:{dm:.2f} t:{toa:.2f}\n' \
                                'S/N:{snr:.2f} width:{downsamp} SB:{sb}'.format(**params)
                        X, Y = np.meshgrid(times, params['dms'])
                        ax.pcolormesh(X,
                                      Y,
                                      data,
                                      cmap=self.config.cmap_dmtime,
                                      shading='nearest')
                        # add line if DM 0 is in plot range
                        if min(params['dms']) <= 0 <= max(params['dms']):
                            ax.axhline(0, c='r', alpha=.5)
                    elif plot_type == '1d_time':
                        ylabel = 'Power (norm.)'
                        title = 'DM:{dm:.2f} t:{toa:.2f}\n' \
                                'S/N:{snr:.2f} width:{downsamp} SB:{sb}'.format(**params)
                        ax.plot(times, data, c=self.config.colour_1dtime)
                    else:
                        raise ProcessorException(
                            f"Unknown plot type: {plot_type}, should not be able to get here!"
                        )

                    # add plot title
                    ax.set_title(title)
                    # ylabel only the first column
                    if ax.is_first_col():
                        ax.set_ylabel(ylabel)
                    # xlabel only the last row. This is a bit tricky: on the last page, this is not necessarily
                    # the last possible row
                    if (page != npage - 1) and ax.is_last_row():
                        ax.set_xlabel(xlabel)
                    else:
                        # a plot is the bottom one in a column if the number of remaining plots is less than a full row
                        nplot_remaining = len(files_split[page]) - i - 1
                        if nplot_remaining < self.config.nplot_per_side:
                            ax.set_xlabel(xlabel)
                    ax.set_xlim(times[0], times[-1])
                    # add red border if DM > DMgal
                    if params['dm'] > dmgal:
                        plt.setp(ax.spines.values(),
                                 color='red',
                                 linewidth=2,
                                 alpha=0.85)

                    # on the last page, disable the remaining plots if there are any
                    if page == npage - 1:
                        remainder = nplot_per_page - nplot_last_incomplete_page
                        if remainder > 0:
                            for ax in axes[-remainder:]:
                                ax.axis('off')

                fig.set_tight_layout(True)
                # ensure the number of digits used for the page index is always the same, and large enough
                # then sorting works as expected
                page_str = str(page).zfill(len(str(npage)))
                fig_fname = os.path.join(self.output_dir,
                                         f'ranked_{plot_type}_{page_str}.pdf')
                fig.savefig(fig_fname)
        # merge the plots
        output_file = f"{self.output_dir}/CB{self.obs_config['beam']:02d}.pdf"
        merger = PdfFileMerger()
        for plot_type in self.config.plot_types:
            fnames = glob.glob(f'{self.output_dir}/*{plot_type}*.pdf')
            fnames.sort()
            for fname in fnames:
                merger.append(fname)
        merger.write(output_file)
        # copy the file to the central output directory
        self.logger.info(
            f"Saving plots to {self.result_dir}/{os.path.basename(output_file)}"
        )
        copy(output_file, self.result_dir)
Exemple #18
0
def merge(files):
    """Merge two or more pdfs into one"""
    # Setting the file name of output
    output = "PyPDF-Merging-Output.pdf"
    # Create the merger instance
    merger = PdfFileMerger(open(output, "wb"))
    # Opening two input files
    input1 = open(files[0], "rb")
    input2 = open(files[1], "rb")
    # Add the first 3 pages of input1 to output
    merger.append(fileobj=input1, pages=(0, 3))
    # Insert the first page of input2 into the output beginning after the second page
    merger.merge(position=2, fileobj=input2, pages=(0, 1))
    # Append entire input3 document to the end of the output document
    merger.append(input2)
    # Output and close the file
    merger.write(output)
    print("Output successfully written to", output)
    merger.close()