Ejemplo n.º 1
0
def pdf_encrypt(filename, password):
    #logmessage("pdf_encrypt: running; password is " + repr(password))
    if type(password) in (str, unicode, bool, int, float):
        owner_password = unicode(password).strip()
        user_password = unicode(password).strip()
    elif type(password) is list:
        owner_password = unicode(password[0]).strip()
        user_password = unicode(password[1]).strip()
    elif type(password) is dict:
        owner_password = unicode(password.get('owner', 'password')).strip()
        user_password = unicode(password.get('user', 'password')).strip()
    else:
        raise DAError("pdf_encrypt: invalid password")
    outfile = tempfile.NamedTemporaryFile(suffix=".pdf", delete=False)
    if owner_password == user_password:
        commands = [
            'pdftk', filename, 'output', outfile.name, 'user_pw',
            user_password, 'allow', 'printing'
        ]
    else:
        commands = [
            'pdftk', filename, 'output', outfile.name, 'owner_pw',
            owner_password, 'user_pw', user_password, 'allow', 'printing'
        ]
    try:
        output = subprocess.check_output(commands, stderr=subprocess.STDOUT)
    except subprocess.CalledProcessError as err:
        output = err.output
        raise DAError("pdf_encrypt: error running pdftk.  " + output)
    #logmessage(' '.join(commands))
    #logmessage(output)
    shutil.move(outfile.name, filename)
Ejemplo n.º 2
0
def concatenate_files(path_list, pdfa=False, password=None):
    pdf_file = tempfile.NamedTemporaryFile(prefix="datemp",
                                           mode="wb",
                                           suffix=".pdf",
                                           delete=False)
    subprocess_arguments = [PDFTK_PATH]
    new_path_list = list()
    for path in path_list:
        mimetype, encoding = mimetypes.guess_type(path)
        if mimetype.startswith('image'):
            new_pdf_file = tempfile.NamedTemporaryFile(prefix="datemp",
                                                       mode="wb",
                                                       suffix=".pdf",
                                                       delete=False)
            args = ["convert", path, new_pdf_file.name]
            result = call(args)
            if result != 0:
                logmessage("failed to convert image to PDF: " + " ".join(args))
                continue
            new_path_list.append(new_pdf_file.name)
        elif mimetype in (
                'application/rtf',
                'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
                'application/msword',
                'application/vnd.oasis.opendocument.text'):
            new_pdf_file = tempfile.NamedTemporaryFile(prefix="datemp",
                                                       mode="wb",
                                                       suffix=".pdf",
                                                       delete=False)
            if mimetype == 'application/rtf':
                ext = 'rtf'
            elif mimetype == 'application/vnd.openxmlformats-officedocument.wordprocessingml.document':
                ext = 'docx'
            elif mimetype == 'application/msword':
                ext = 'doc'
            elif mimetype == 'application/vnd.oasis.opendocument.text':
                ext = 'odt'
            docassemble.base.pandoc.word_to_pdf(path,
                                                ext,
                                                new_pdf_file.name,
                                                pdfa=False)
            new_path_list.append(new_pdf_file.name)
        elif mimetype == 'application/pdf':
            new_path_list.append(path)
    if len(new_path_list) == 0:
        raise DAError("concatenate_files: no valid files to concatenate")
    subprocess_arguments.extend(new_path_list)
    subprocess_arguments.extend(['cat', 'output', pdf_file.name])
    #logmessage("Arguments are " + str(subprocess_arguments))
    result = call(subprocess_arguments)
    if result != 0:
        logmessage("Failed to concatenate PDF files")
        raise DAError(
            "Call to pdftk failed for concatenation where arguments were " +
            " ".join(subprocess_arguments))
    if pdfa:
        pdf_to_pdfa(pdf_file.name)
    if password:
        pdf_encrypt(pdf_file.name, password)
    return pdf_file.name
Ejemplo n.º 3
0
 def get_modtime(self, **kwargs):
     filename = kwargs.get('filename', self.filename)
     # logmessage("Get modtime called with filename " + str(filename))
     if cloud is not None and not self.fixed:
         key_name = str(self.section) + '/' + str(self.file_number) + '/' + path_to_key(filename)
         key = cloud.search_key(key_name)
         if key is None or not key.does_exist:
             raise DAError("get_modtime: file " + filename + " in " + self.section + " did not exist")
         # logmessage("Modtime for key " + key_name + " is now " + str(key.last_modified))
         return key.get_epoch_modtime()
     the_path = os.path.join(self.directory, filename)
     if not os.path.isfile(the_path):
         raise DAError("get_modtime: file " + filename + " in " + self.section + " did not exist")
     return os.path.getmtime(the_path)
Ejemplo n.º 4
0
def concatenate_files(path_list, pdfa=False, password=None):
    pdf_file = tempfile.NamedTemporaryFile(prefix="datemp", mode="wb", suffix=".pdf", delete=False)
    subprocess_arguments = [PDFTK_PATH]
    new_path_list = list()
    for path in path_list:
        mimetype, encoding = mimetypes.guess_type(path)
        if mimetype.startswith('image'):
            new_pdf_file = tempfile.NamedTemporaryFile(prefix="datemp", mode="wb", suffix=".pdf", delete=False)
            args = [daconfig.get('imagemagick', 'convert'), path, new_pdf_file.name]
            try:
                result = subprocess.run(args, timeout=60).returncode
            except subprocess.TimeoutExpired:
                logmessage("concatenate_files: convert took too long")
                result = 1
            if result != 0:
                logmessage("failed to convert image to PDF: " + " ".join(args))
                continue
            new_path_list.append(new_pdf_file.name)
        elif mimetype in ('application/rtf', 'application/vnd.openxmlformats-officedocument.wordprocessingml.document', 'application/msword', 'application/vnd.oasis.opendocument.text'):
            new_pdf_file = tempfile.NamedTemporaryFile(prefix="datemp", mode="wb", suffix=".pdf", delete=False)
            if mimetype == 'application/rtf':
                ext = 'rtf'
            elif mimetype == 'application/vnd.openxmlformats-officedocument.wordprocessingml.document':
                ext = 'docx'
            elif mimetype == 'application/msword':
                ext = 'doc'
            elif mimetype == 'application/vnd.oasis.opendocument.text':
                ext = 'odt'
            word_to_pdf(path, ext, new_pdf_file.name, pdfa=False)
            new_path_list.append(new_pdf_file.name)
        elif mimetype == 'application/pdf':
            new_path_list.append(path)
    if len(new_path_list) == 0:
        raise DAError("concatenate_files: no valid files to concatenate")
    subprocess_arguments.extend(new_path_list)
    subprocess_arguments.extend(['cat', 'output', pdf_file.name])
    #logmessage("Arguments are " + str(subprocess_arguments))
    try:
        result = subprocess.run(subprocess_arguments, timeout=60).returncode
    except subprocess.TimeoutExpired:
        result = 1
        logmessage("concatenate_files: call to cat took too long")
    if result != 0:
        logmessage("Failed to concatenate PDF files")
        raise DAError("Call to pdftk failed for concatenation where arguments were " + " ".join(subprocess_arguments))
    if pdfa:
        pdf_to_pdfa(pdf_file.name)
    replicate_js_and_calculations(new_path_list[0], pdf_file.name, password)
    return pdf_file.name
Ejemplo n.º 5
0
def concatenate_files(path_list):
    new_path_list = []
    for path in path_list:
        mimetype, encoding = mimetypes.guess_type(path)
        if mimetype in ('application/rtf', 'application/msword', 'application/vnd.oasis.opendocument.text'):
            new_docx_file = tempfile.NamedTemporaryFile(prefix="datemp", mode="wb", suffix=".docx", delete=False)
            if mimetype == 'application/rtf':
                ext = 'rtf'
            elif mimetype == 'application/msword':
                ext = 'doc'
            elif mimetype == 'application/vnd.oasis.opendocument.text':
                ext = 'odt'
            docassemble.base.pandoc.convert_file(path, new_docx_file.name, ext, 'docx')
            new_path_list.append(new_docx_file.name)
        elif mimetype == 'application/vnd.openxmlformats-officedocument.wordprocessingml.document':
            new_path_list.append(path)
    if len(new_path_list) == 0:
        raise DAError("concatenate_files: no valid files to concatenate")
    if len(new_path_list) == 1:
        return new_path_list[0]
    composer = Composer(docx.Document(new_path_list[0]))
    for indexno in range(1, len(new_path_list)):
        composer.append(docx.Document(new_path_list[indexno]))
    docx_file = tempfile.NamedTemporaryFile(prefix="datemp", mode="wb", suffix=".docx", delete=False)
    composer.save(docx_file.name)
    return docx_file.name
Ejemplo n.º 6
0
def pdf_encrypt(filename, password):
    #logmessage("pdf_encrypt: running; password is " + repr(password))
    (owner_password, user_password) = get_passwords(password)
    outfile = tempfile.NamedTemporaryFile(prefix="datemp",
                                          suffix=".pdf",
                                          delete=False)
    if owner_password == user_password:
        commands = [
            'pdftk', filename, 'output', outfile.name, 'user_pw',
            user_password, 'allow', 'printing'
        ]
    else:
        commands = [
            'pdftk', filename, 'output', outfile.name, 'owner_pw',
            owner_password, 'user_pw', user_password, 'allow', 'printing'
        ]
    try:
        output = subprocess.check_output(commands,
                                         stderr=subprocess.STDOUT).decode()
    except subprocess.CalledProcessError as err:
        output = err.output
        raise DAError("pdf_encrypt: error running pdftk.  " + output)
    #logmessage(' '.join(commands))
    #logmessage(output)
    shutil.move(outfile.name, filename)
Ejemplo n.º 7
0
def apply_qpdf(filename):
    try:
        pypdf.PdfFileReader(open(filename, 'rb'), overwriteWarnings=False)
        pdf_ok = True
    except pypdf.utils.PdfReadError:
        pdf_ok = False
    if pdf_ok:
        return
    try:
        new_file = tempfile.NamedTemporaryFile(prefix="datemp",
                                               mode="wb",
                                               suffix=".pdf",
                                               delete=False)
        qpdf_subprocess_arguments = [QPDF_PATH, filename, new_file.name]
        try:
            result = subprocess.run(qpdf_subprocess_arguments,
                                    timeout=60,
                                    check=False).returncode
        except subprocess.TimeoutExpired:
            result = 1
            logmessage("apply_qpdf: call to qpdf took too long")
        if result != 0:
            logmessage("Failed to convert PDF " + str(filename))
            logmessage("Call to qpdf failed for " + str(filename) +
                       " where arguments were " +
                       " ".join(qpdf_subprocess_arguments))
            raise Exception("qpdf error")
        pypdf.PdfFileReader(open(new_file.name, 'rb'), overwriteWarnings=False)
    except:
        raise DAError("Could not fix PDF")
    shutil.copyfile(new_file.name, filename)
Ejemplo n.º 8
0
 def finalize(self):
     if cloud is None:
         return
     if not self.fixed:
         raise DAError("SavedFile: finalize called before fix")
     existing_files = list()
     for filename in os.listdir(self.directory):
         existing_files.append(filename)
         fullpath = os.path.join(self.directory, filename)
         #logmessage("Found " + fullpath)
         if os.path.isfile(fullpath):
             save = True
             if filename in self.keydict:
                 key = self.keydict[filename]
                 if self.modtimes[filename] == os.path.getmtime(fullpath):
                     save = False
             else:
                 key = cloud.get_key(
                     str(self.section) + '/' + str(self.file_number) + '/' +
                     str(filename))
                 if self.extension is not None and filename == self.filename:
                     extension, mimetype = get_ext_and_mimetype(
                         filename + '.' + self.extension)
                 else:
                     extension, mimetype = get_ext_and_mimetype(filename)
                 key.content_type = mimetype
             if save:
                 key.set_contents_from_filename(fullpath)
     for filename, key in self.keydict.iteritems():
         if filename not in existing_files:
             #logmessage("Deleting filename " + str(filename) + " from cloud")
             key.delete()
     return
Ejemplo n.º 9
0
def alchemy_url(db_config):
    if db_config not in daconfig or (not isinstance(daconfig[db_config], dict)) or 'name' not in daconfig[db_config]:
        raise Exception("alchemy_connection_string: missing or invalid configuration for " + db_config)
    dbuser = daconfig[db_config].get('user', None)
    dbpassword = daconfig[db_config].get('password', None)
    dbhost = daconfig[db_config].get('host', None)
    if dbhost is None and dbuser is not None:
        dbhost = 'localhost'
    dbport = daconfig[db_config].get('port', None)
    dbprefix = daconfig[db_config].get('prefix', 'postgresql+psycopg2://')
    dbname = daconfig[db_config]['name']

    alchemy_connect_string = ""
    if dbprefix is not None:
        alchemy_connect_string += dbprefix
    if dbuser is not None:
        alchemy_connect_string += dbuser
    if dbpassword is not None:
        alchemy_connect_string += ":" + dbpassword
    else:
        alchemy_connect_string += ":"
    if dbhost is not None:
        alchemy_connect_string += '@' + dbhost
        if dbport is not None:
            alchemy_connect_string += ':' + str(dbport)
    else:
        alchemy_connect_string += '@'
    if not dbprefix.startswith('oracle'):
        if dbname is not None:
            alchemy_connect_string += "/" + dbname
        else:
            raise DAError("No database name provided")
    return alchemy_connect_string
Ejemplo n.º 10
0
 def __init__(self, section='', project='default'):
     if docassemble.base.functions.this_thread.current_info['user']['is_anonymous']:
         raise DAError("Users must be logged in to create Playground objects")
     self.user_id = docassemble.base.functions.this_thread.current_info['user']['theid']
     self.current_info = docassemble.base.functions.this_thread.current_info
     self.section = section
     self.project = project
     self._update_file_list()
Ejemplo n.º 11
0
 def size_in_bytes(self, **kwargs):
     filename = kwargs.get('filename', self.filename)
     if cloud is not None and not self.fixed:
         key = cloud.search_key(str(self.section) + '/' + str(self.file_number) + '/' + path_to_key(filename))
         if key is None or not key.does_exist:
             raise DAError("size_in_bytes: file " + filename + " in " + self.section + " did not exist")
         return key.size
     return os.path.getsize(os.path.join(self.directory, filename))
Ejemplo n.º 12
0
def fill_template(template, data_strings=[], data_names=[], hidden=[], readonly=[], images=[], pdf_url=''):
    fdf = fdfgen.forge_fdf(pdf_url, data_strings, data_names, hidden, readonly)
    fdf_file = tempfile.NamedTemporaryFile(mode="wb", suffix=".fdf", delete=False)
    fdf_file.write(fdf)
    fdf_file.close()
    pdf_file = tempfile.NamedTemporaryFile(mode="wb", suffix=".pdf", delete=False)
    subprocess_arguments = [PDFTK_PATH, template, 'fill_form', fdf_file.name,'output', pdf_file.name, 'flatten']
    result = call(subprocess_arguments)
    if result != 0:
        logmessage("Failed to fill PDF form " + str(template))
        raise DAError("Call to pdftk failed for template " + str(template) + " where arguments were " + " ".join(subprocess_arguments))
    if len(images):
        fields = dict()
        for field, default, pageno, rect, field_type in read_fields(template):
            if str(field_type) == '/Sig':
                fields[field] = {'pageno': pageno, 'rect': rect}
        for field, file_info in images:
            if field not in fields:
                logmessage("field name " + str(field) + " not found in PDF file")
                continue
            logmessage("Need to put image on page " + str(fields[field]['pageno']))
            temp_png = tempfile.NamedTemporaryFile(mode="wb", suffix=".png")
            args = ["convert", file_info['fullpath'], "-trim", "+repage", temp_png.name]
            result = call(args)
            if result == 1:
                logmessage("failed to trim file: " + " ".join(args))
                continue
            im = Image.open(temp_png.name)
            width, height = im.size
            xone, yone, xtwo, ytwo = fields[field]['rect']
            dppx = width/(xtwo-xone)
            dppy = height/(ytwo-yone)
            if (dppx > dppy):
                dpp = dppx
            else:
                dpp = dppy
            extent_x, extent_y = xone*dpp+width, yone*dpp+height
            overlay_pdf_file = tempfile.NamedTemporaryFile(mode="wb", suffix=".pdf")
            args = ["convert", temp_png.name, "-background", "none", "-density", str(int(dpp*72)), "-gravity", "NorthEast", "-extent", str(int(extent_x)) + 'x' + str(int(extent_y)), overlay_pdf_file.name]
            result = call(args)
            if result == 1:
                logmessage("failed to make overlay: " + " ".join(args))
                continue
            new_pdf_file = tempfile.NamedTemporaryFile(mode="wb", suffix=".pdf")
            with open(pdf_file.name, "rb") as inFile, open(overlay_pdf_file.name, "rb") as overlay:
                original = pypdf.PdfFileReader(inFile)
                background = original.getPage(fields[field]['pageno']-1)
                foreground = pypdf.PdfFileReader(overlay).getPage(0)
                background.mergePage(foreground)
                writer = pypdf.PdfFileWriter()
                for i in range(original.getNumPages()):
                    page = original.getPage(i)
                    writer.addPage(page)
                with open(new_pdf_file.name, "wb") as outFile:
                    writer.write(outFile)
            shutil.copyfile(new_pdf_file.name, pdf_file.name)
    return pdf_file.name
Ejemplo n.º 13
0
 def fetch_url_post(self, url, post_args, **kwargs):
     filename = kwargs.get('filename', self.filename)
     self.fix()
     r = requests.post(url_sanitize(url), data=post_args)
     if r.status_code != 200:
         raise DAError('fetch_url_post: retrieval from ' + url + 'failed')
     with open(os.path.join(self.directory, filename), 'wb') as fp:
         for block in r.iter_content(1024):
             fp.write(block)
     self.save()
Ejemplo n.º 14
0
def overlay_pdf_multi(main_file, logo_file, out_file):
    subprocess_arguments = [PDFTK_PATH, main_file, 'multistamp', logo_file, 'output', out_file]
    try:
        result = subprocess.run(subprocess_arguments, timeout=60, check=False).returncode
    except subprocess.TimeoutExpired:
        result = 1
        logmessage("overlay_pdf_multi: call to pdftk took too long")
    if result != 0:
        logmessage("Failed to overlay PDF")
        raise DAError("Call to pdftk failed for overlay where arguments were " + " ".join(subprocess_arguments))
Ejemplo n.º 15
0
def flatten_pdf(filename):
    #logmessage("flatten_pdf: running")
    outfile = tempfile.NamedTemporaryFile(prefix="datemp", suffix=".pdf", delete=False)
    subprocess_arguments = [PDFTK_PATH, filename, 'output', outfile.name, 'flatten']
    #logmessage("Arguments are " + str(subprocess_arguments))
    result = call(subprocess_arguments)
    if result != 0:
        logmessage("Failed to flatten PDF form " + str(template))
        raise DAError("Call to pdftk failed for template " + str(template) + " where arguments were " + " ".join(subprocess_arguments))
    commands = []
    shutil.move(outfile.name, filename)
Ejemplo n.º 16
0
def pdf_to_pdfa(filename):
    logmessage("pdf_to_pdfa: running")
    outfile = tempfile.NamedTemporaryFile(suffix=".pdf", delete=False)
    directory = tempfile.mkdtemp()
    commands = ['gs', '-dPDFA', '-dBATCH', '-dNOPAUSE', '-sProcessColorModel=DeviceCMYK', '-sDEVICE=pdfwrite', '-sPDFACompatibilityPolicy=1', '-sOutputFile=' + outfile.name, filename]
    try:
        output = subprocess.check_output(commands, cwd=directory, stderr=subprocess.STDOUT)
    except subprocess.CalledProcessError as err:
        output = err.output
        raise DAError("pdf_to_pdfa: error running ghostscript.  " + output)
    logmessage(output)
    shutil.move(outfile.name, filename)
Ejemplo n.º 17
0
 def __init__(self, section=''):
     if docassemble.base.functions.this_thread.current_info['user'][
             'is_anonymous']:
         raise DAError(
             "Users must be logged in to create Playground objects")
     self.user_id = docassemble.base.functions.this_thread.current_info[
         'user']['theid']
     self.current_info = docassemble.base.functions.this_thread.current_info
     self.section = section
     self.area = SavedFile(self.user_id,
                           fix=True,
                           section='playground' + self.section)
     self._update_file_list()
Ejemplo n.º 18
0
def flatten_pdf(filename):
    #logmessage("flatten_pdf: running")
    outfile = tempfile.NamedTemporaryFile(prefix="datemp", suffix=".pdf", delete=False)
    subprocess_arguments = [PDFTK_PATH, filename, 'output', outfile.name, 'flatten']
    #logmessage("Arguments are " + str(subprocess_arguments))
    try:
        result = subprocess.run(subprocess_arguments, timeout=60, check=False).returncode
    except subprocess.TimeoutExpired:
        result = 1
        logmessage("flatten_pdf: call to pdftk took too long")
    if result != 0:
        logmessage("Failed to flatten PDF form")
        raise DAError("Call to pdftk failed for template where arguments were " + " ".join(subprocess_arguments))
    shutil.move(outfile.name, filename)
Ejemplo n.º 19
0
 def finalize(self):
     #sys.stderr.write("finalize: starting " + str(self.section) + '/' + str(self.file_number) + "\n")
     if cloud is None:
         return
     if not self.fixed:
         raise DAError("SavedFile: finalize called before fix")
     for filename in listfiles(self.directory):
         fullpath = os.path.join(self.directory, filename)
         #logmessage("Found " + fullpath)
         if os.path.isfile(fullpath):
             save = True
             if filename in self.keydict:
                 key = self.keydict[filename]
                 if self.modtimes[filename] == os.path.getmtime(fullpath):
                     save = False
             else:
                 key = cloud.get_key(
                     str(self.section) + '/' + str(self.file_number) + '/' +
                     path_to_key(filename))
             if save:
                 if self.extension is not None and filename == self.filename:
                     extension, mimetype = get_ext_and_mimetype(
                         filename + '.' + self.extension)
                 else:
                     extension, mimetype = get_ext_and_mimetype(filename)
                 key.content_type = mimetype
                 #sys.stderr.write("finalize: saving " + str(self.section) + '/' + str(self.file_number) + '/' + str(filename) + "\n")
                 if not os.path.isfile(fullpath):
                     continue
                 try:
                     key.set_contents_from_filename(fullpath)
                     self.modtimes[filename] = key.get_epoch_modtime()
                 except FileNotFoundError:
                     sys.stderr.write("finalize: error while saving " +
                                      str(self.section) + '/' +
                                      str(self.file_number) + '/' +
                                      str(filename) + "; path " +
                                      str(fullpath) + " disappeared\n")
     for filename, key in self.keydict.items():
         if not os.path.isfile(os.path.join(self.directory, filename)):
             sys.stderr.write("finalize: deleting " + str(self.section) +
                              '/' + str(self.file_number) + '/' +
                              path_to_key(filename) + "\n")
             try:
                 key.delete()
             except:
                 pass
     #sys.stderr.write("finalize: ending " + str(self.section) + '/' + str(self.file_number) + "\n")
     return
Ejemplo n.º 20
0
def safe_pypdf_reader(filename):
    try:
        return pypdf.PdfFileReader(open(filename, 'rb'), overwriteWarnings=False)
    except pypdf.utils.PdfReadError:
        new_filename = tempfile.NamedTemporaryFile(prefix="datemp", mode="wb", suffix=".pdf", delete=False)
        qpdf_subprocess_arguments = [QPDF_PATH, filename, new_filename.name]
        try:
            result = subprocess.run(qpdf_subprocess_arguments, timeout=60, check=False).returncode
        except subprocess.TimeoutExpired:
            result = 1
            logmessage("fill_template: call to qpdf took too long")
        if result != 0:
            logmessage("Failed to convert PDF template " + str(filename))
            raise DAError("Call to qpdf failed for template " + str(filename) + " where arguments were " + " ".join(qpdf_subprocess_arguments))
        return pypdf.PdfFileReader(open(new_filename.name, 'rb'), overwriteWarnings=False)
Ejemplo n.º 21
0
def get_passwords(password):
    if password is None:
        return (None, None)
    if isinstance(password, (str, bool, int, float)):
        owner_password = str(password).strip()
        user_password = str(password).strip()
    elif isinstance(password, list):
        owner_password = str(password[0]).strip()
        user_password = str(password[1]).strip()
    elif isinstance(password, dict):
        owner_password = str(password.get('owner', 'password')).strip()
        user_password = str(password.get('user', 'password')).strip()
    else:
        raise DAError("get_passwords: invalid password")
    return (owner_password, user_password)
Ejemplo n.º 22
0
def get_passwords(password):
    if password is None:
        return (None, None)
    if type(password) in (str, unicode, bool, int, float):
        owner_password = text_type(password).strip()
        user_password = text_type(password).strip()
    elif type(password) is list:
        owner_password = text_type(password[0]).strip()
        user_password = text_type(password[1]).strip()
    elif type(password) is dict:
        owner_password = text_type(password.get('owner', 'password')).strip()
        user_password = text_type(password.get('user', 'password')).strip()
    else:
        raise DAError("get_passwords: invalid password")
    return (owner_password, user_password)
Ejemplo n.º 23
0
dbpassword = daconfig['db'].get('password', None)
dbhost = daconfig['db'].get('host', None)
if dbhost is None and dbuser is not None:
    dbhost = 'localhost'
dbport = daconfig['db'].get('port', None)
dbprefix = daconfig['db'].get('prefix', 'postgresql+psycopg2://')
dbname = daconfig['db'].get('name', 'docassemble')
dbtableprefix = daconfig['db'].get('table prefix', None)
if not dbtableprefix:
    dbtableprefix = ''

connect_string = ""
if dbname is not None:
    connect_string += "dbname=" + dbname
else:
    raise DAError("No database name provided")
if dbuser is not None:
    connect_string += " user="******" password="******""
if dbprefix is not None:
    alchemy_connect_string += dbprefix
if dbuser is not None:
    alchemy_connect_string += dbuser
if dbpassword is not None:
    alchemy_connect_string += ":" + dbpassword
else:
    alchemy_connect_string += ":"
if dbhost is not None:
Ejemplo n.º 24
0
def ocr_pdf(*pargs,
            target=None,
            filename=None,
            lang=None,
            psm=6,
            dafilelist=None,
            preserve_color=False):
    if preserve_color:
        device = 'tiff48nc'
    else:
        device = 'tiffgray'
    docs = []
    all_pdf = True
    if not isinstance(target, DAFile):
        raise DAError("ocr_pdf: target must be a DAFile")
    for other_file in pargs:
        if isinstance(other_file, DAFileList):
            for other_file_sub in other_file.elements:
                if not other_file._is_pdf():
                    all_pdf = False
                docs.append(other_file_sub)
        elif isinstance(other_file, DAFileCollection):
            if not hasattr(other_file, 'pdf'):
                raise DAError(
                    'ocr_pdf: DAFileCollection object did not have pdf attribute.'
                )
            docs.append(other_file.pdf)
        elif isinstance(other_file, DAStaticFile):
            if not other_file._is_pdf():
                all_pdf = False
            docs.append(other_file)
        elif isinstance(other_file, (str, DAFile)):
            all_pdf = False
            docs.append(other_file)
    if len(docs) == 0:
        if not target._is_pdf():
            all_pdf = False
        docs.append(target)
    if len(docs) > 1 or not all_pdf:
        import docassemble.base.util
        doc = docassemble.base.util.pdf_concatenate(docs)
    else:
        doc = docs[0]
    if psm is None:
        psm = 6
    if filename is None:
        filename = 'file.pdf'
    if not hasattr(doc, 'extension'):
        return None
    if doc.extension not in ['pdf', 'png', 'jpg', 'gif']:
        raise DAError("ocr_pdf: not a readable image file")
    path = doc.path()
    pdf_file = tempfile.NamedTemporaryFile(prefix="datemp",
                                           mode="wb",
                                           delete=False)
    pdf_file.close()
    if doc.extension == 'pdf':
        tiff_file = tempfile.NamedTemporaryFile(prefix="datemp",
                                                mode="wb",
                                                suffix=".tiff",
                                                delete=False)
        params = [
            'gs', '-q', '-dNOPAUSE', '-sDEVICE=' + device, '-r600',
            '-sOutputFile=' + tiff_file.name, path, '-c', 'quit'
        ]
        try:
            result = subprocess.run(params, timeout=60 * 60).returncode
        except subprocess.TimeoutExpired:
            result = 1
            logmessage("ocr_pdf: call to gs took too long")
        if result != 0:
            raise Exception("ocr_pdf: failed to run gs with command " +
                            " ".join(params))
        params = [
            'tesseract', tiff_file.name, pdf_file.name, '-l',
            str(lang), '--psm',
            str(psm), 'pdf'
        ]
        try:
            result = subprocess.run(params, timeout=60 * 60).returncode
        except subprocess.TimeoutExpired:
            result = 1
            logmessage("ocr_pdf: call to tesseract took too long")
        if result != 0:
            raise Exception("ocr_pdf: failed to run tesseract with command " +
                            " ".join(params))
    else:
        params = [
            'tesseract', path, pdf_file.name, '-l',
            str(lang), '--psm',
            str(psm), 'pdf'
        ]
        try:
            result = subprocess.run(params, timeout=60 * 60).returncode
        except subprocess.TimeoutExpired:
            result = 1
            logmessage("ocr_pdf: call to tesseract took too long")
        if result != 0:
            raise Exception("ocr_pdf: failed to run tesseract with command " +
                            " ".join(params))
    target.initialize(filename=filename,
                      extension='pdf',
                      mimetype='application/pdf',
                      reinitialize=True)
    shutil.copyfile(pdf_file.name + '.pdf', target.file_info['path'])
    del target.file_info
    target._make_pdf_thumbnail(1, both_formats=True)
    target.commit()
    target.retrieve()
    return target
Ejemplo n.º 25
0
def fill_template(template,
                  data_strings=None,
                  data_names=None,
                  hidden=None,
                  readonly=None,
                  images=None,
                  pdf_url=None,
                  editable=True,
                  pdfa=False,
                  password=None,
                  template_password=None,
                  default_export_value=None):
    if data_strings is None:
        data_strings = []
    if data_names is None:
        data_names = []
    if hidden is None:
        hidden = []
    if readonly is None:
        readonly = []
    if images is None:
        images = []
    if pdf_url is None:
        pdf_url = 'file.pdf'
    if not pdf_url.endswith('.pdf'):
        pdf_url += '.pdf'
    the_fields = read_fields(template)
    if len(the_fields) == 0:
        raise DAError("PDF template has no fields in it.")
    export_values = {}
    for field, default, pageno, rect, field_type, export_value in the_fields:
        field_type = re.sub(r'[^/A-Za-z]', '', str(field_type))
        if field_type in ('/Btn', "/'Btn'"):
            export_values[
                field] = export_value or default_export_value or 'Yes'
    if len(export_values) > 0:
        new_data_strings = []
        for key, val in data_strings:
            if key in export_values:
                if str(val) in ('Yes', 'yes', 'True', 'true', 'On', 'on',
                                export_values[key]):
                    val = export_values[key]
                else:
                    if export_values[key] == 'On':
                        val = 'Off'
                    elif export_values[key] == 'on':
                        val = 'off'
                    elif export_values[key] == 'yes':
                        val = 'no'
                    else:
                        val = 'No'
            new_data_strings.append((key, val))
        data_strings = new_data_strings
    data_dict = {}
    for key, val in data_strings:
        data_dict[key] = val
    fdf = Xfdf(pdf_url, data_dict)
    #fdf = fdfgen.forge_fdf(pdf_url, data_strings, data_names, hidden, readonly)
    fdf_file = tempfile.NamedTemporaryFile(prefix="datemp",
                                           mode="wb",
                                           suffix=".xfdf",
                                           delete=False)
    #fdf_file.write(fdf)
    fdf_file.close()
    fdf.write_xfdf(fdf_file.name)
    #     if False:
    #         fdf_dict = {}
    #         for key, val in data_strings:
    #             fdf_dict[key] = val
    #         xfdf_temp_filename = pypdftk.gen_xfdf(fdf_dict)
    #         xfdf_file = tempfile.NamedTemporaryFile(prefix="datemp", mode="wb", suffix=\
    # ".xfdf", delete=False)
    #         shutil.copyfile(xfdf_temp_filename, xfdf_file.name)
    pdf_file = tempfile.NamedTemporaryFile(prefix="datemp",
                                           mode="wb",
                                           suffix=".pdf",
                                           delete=False)
    if template_password is not None:
        template_file = tempfile.NamedTemporaryFile(prefix="datemp",
                                                    mode="wb",
                                                    suffix=".pdf",
                                                    delete=False)
        qpdf_subprocess_arguments = [
            QPDF_PATH, '--decrypt', '--password='******'fill_form', fdf_file.name, 'output',
        pdf_file.name
    ]
    #logmessage("Arguments are " + str(subprocess_arguments))
    if editable or len(images) > 0:
        subprocess_arguments.append('need_appearances')
    else:
        subprocess_arguments.append('flatten')
    try:
        result = subprocess.run(subprocess_arguments, timeout=600,
                                check=False).returncode
    except subprocess.TimeoutExpired:
        result = 1
        logmessage("fill_template: call to pdftk fill_form took too long")
    if result != 0:
        logmessage("Failed to fill PDF form " + str(template))
        raise DAError("Call to pdftk failed for template " + str(template) +
                      " where arguments were " +
                      " ".join(subprocess_arguments))
    if len(images) > 0:
        fields = {}
        for field, default, pageno, rect, field_type, export_value in the_fields:
            if str(field_type) in ('/Sig', "/'Sig'"):
                fields[field] = {'pageno': pageno, 'rect': rect}
        image_todo = []
        for field, file_info in images:
            if field not in fields:
                logmessage("field name " + str(field) +
                           " not found in PDF file")
                continue
            #logmessage("Need to put image on page " + str(fields[field]['pageno']))
            temp_png = tempfile.NamedTemporaryFile(mode="wb", suffix=".png")
            args = [
                daconfig.get('imagemagick',
                             'convert'), file_info['fullpath'], "-trim",
                "+repage", "+profile", '*', '-density', '0', temp_png.name
            ]
            try:
                result = subprocess.run(args, timeout=60,
                                        check=False).returncode
            except subprocess.TimeoutExpired:
                logmessage("fill_template: convert took too long")
                result = 1
            if result == 1:
                logmessage("failed to trim file: " + " ".join(args))
                continue
            im = Image.open(temp_png.name)
            width, height = im.size
            xone, yone, xtwo, ytwo = fields[field]['rect']
            dppx = width / (xtwo - xone)
            dppy = height / (ytwo - yone)
            if dppx > dppy:
                dpp = dppx
            else:
                dpp = dppy
            extent_x, extent_y = xone * dpp + width, yone * dpp + height
            overlay_pdf_file = tempfile.NamedTemporaryFile(prefix="datemp",
                                                           mode="wb",
                                                           suffix=".pdf",
                                                           delete=False)
            args = [
                daconfig.get('imagemagick', 'convert'), temp_png.name,
                "-background", "none", "-density",
                str(int(dpp * 72)), "-gravity", "NorthEast", "-extent",
                str(int(extent_x)) + 'x' + str(int(extent_y)),
                overlay_pdf_file.name
            ]
            try:
                result = subprocess.run(args, timeout=60,
                                        check=False).returncode
            except subprocess.TimeoutExpired:
                result = 1
                logmessage("fill_template: call to convert took too long")
            if result == 1:
                logmessage("failed to make overlay: " + " ".join(args))
                continue
            image_todo.append({
                'overlay_file': overlay_pdf_file.name,
                'pageno': fields[field]['pageno']
            })
        if len(image_todo) > 0:
            new_pdf_file = tempfile.NamedTemporaryFile(mode="wb",
                                                       suffix=".pdf")
            original = safe_pypdf_reader(pdf_file.name)
            original.idnum_to_page = get_page_hash(original.trailer)
            catalog = original.trailer["/Root"]
            writer = DAPdfFileWriter()
            tree = {}
            for part in pdf_parts:
                if part in catalog:
                    tree[part] = catalog[part]
            for i in range(original.getNumPages()):
                for item in image_todo:
                    if (item['pageno'] - 1) == i:
                        page = original.getPage(i)
                        foreground_file = safe_pypdf_reader(
                            item['overlay_file'])
                        foreground_page = foreground_file.getPage(0)
                        page.mergePage(foreground_page)
            for i in range(original.getNumPages()):
                newpage = original.getPage(i)
                writer.addPage(newpage)
            for key, val in tree.items():
                writer._root_object.update(
                    {pypdf.generic.NameObject(key): val})
            writer.page_list = []
            recursive_get_pages(writer._root_object['/Pages'],
                                writer.page_list)
            try:
                recursive_add_bookmark(original, writer,
                                       original.getOutlines())
            except:
                pass
            with open(new_pdf_file.name, "wb") as outFile:
                writer.write(outFile)
            shutil.copyfile(new_pdf_file.name, pdf_file.name)
    if (not editable) and len(images) > 0:
        flatten_pdf(pdf_file.name)
    if pdfa:
        pdf_to_pdfa(pdf_file.name)
    if editable:
        replicate_js_and_calculations(template, pdf_file.name, password)
    elif password:
        pdf_encrypt(pdf_file.name, password)
    return pdf_file.name
Ejemplo n.º 26
0
def fill_template(template,
                  data_strings=[],
                  data_names=[],
                  hidden=[],
                  readonly=[],
                  images=[],
                  pdf_url=None,
                  editable=True,
                  pdfa=False,
                  password=None,
                  template_password=None):
    if pdf_url is None:
        pdf_url = ''
    fdf = fdfgen.forge_fdf(pdf_url, data_strings, data_names, hidden, readonly)
    fdf_file = tempfile.NamedTemporaryFile(prefix="datemp",
                                           mode="wb",
                                           suffix=".fdf",
                                           delete=False)
    fdf_file.write(fdf)
    fdf_file.close()
    if False:
        fdf_dict = dict()
        for key, val in data_strings:
            fdf_dict[key] = val
        xfdf_temp_filename = pypdftk.gen_xfdf(fdf_dict)
        xfdf_file = tempfile.NamedTemporaryFile(prefix="datemp", mode="wb", suffix=\
".xfdf", delete=False)
        shutil.copyfile(xfdf_temp_filename, xfdf_file.name)
    pdf_file = tempfile.NamedTemporaryFile(prefix="datemp",
                                           mode="wb",
                                           suffix=".pdf",
                                           delete=False)
    if template_password is not None:
        template_file = tempfile.NamedTemporaryFile(prefix="datemp",
                                                    mode="wb",
                                                    suffix=".pdf",
                                                    delete=False)
        qpdf_subprocess_arguments = [
            QPDF_PATH, '--decrypt', '--password='******'fill_form', fdf_file.name, 'output',
        pdf_file.name
    ]
    #logmessage("Arguments are " + str(subprocess_arguments))
    if editable or len(images):
        subprocess_arguments.append('need_appearances')
    else:
        subprocess_arguments.append('flatten')
    result = call(subprocess_arguments)
    if result != 0:
        logmessage("Failed to fill PDF form " + str(template))
        raise DAError("Call to pdftk failed for template " + str(template) +
                      " where arguments were " +
                      " ".join(subprocess_arguments))
    if len(images):
        fields = dict()
        for field, default, pageno, rect, field_type in read_fields(template):
            if str(field_type) in ('/Sig', "/u'Sig'", "/'Sig'"):
                fields[field] = {'pageno': pageno, 'rect': rect}
        image_todo = list()
        for field, file_info in images:
            if field not in fields:
                logmessage("field name " + str(field) +
                           " not found in PDF file")
                continue
            #logmessage("Need to put image on page " + str(fields[field]['pageno']))
            temp_png = tempfile.NamedTemporaryFile(mode="wb", suffix=".png")
            args = [
                "convert", file_info['fullpath'], "-trim", "+repage",
                temp_png.name
            ]
            result = call(args)
            if result == 1:
                logmessage("failed to trim file: " + " ".join(args))
                continue
            im = Image.open(temp_png.name)
            width, height = im.size
            xone, yone, xtwo, ytwo = fields[field]['rect']
            dppx = width / (xtwo - xone)
            dppy = height / (ytwo - yone)
            if (dppx > dppy):
                dpp = dppx
            else:
                dpp = dppy
            extent_x, extent_y = xone * dpp + width, yone * dpp + height
            overlay_pdf_file = tempfile.NamedTemporaryFile(prefix="datemp",
                                                           mode="wb",
                                                           suffix=".pdf",
                                                           delete=False)
            args = [
                "convert", temp_png.name, "-background", "none", "-density",
                str(int(dpp * 72)), "-gravity", "NorthEast", "-extent",
                str(int(extent_x)) + 'x' + str(int(extent_y)),
                overlay_pdf_file.name
            ]
            result = call(args)
            if result == 1:
                logmessage("failed to make overlay: " + " ".join(args))
                continue
            image_todo.append({
                'overlay_stream':
                open(overlay_pdf_file.name, "rb"),
                'pageno':
                fields[field]['pageno']
            })
        if len(image_todo):
            new_pdf_file = tempfile.NamedTemporaryFile(mode="wb",
                                                       suffix=".pdf")
            with open(pdf_file.name, "rb") as inFile:
                original = pypdf.PdfFileReader(inFile)
                original.idnum_to_page = get_page_hash(original.trailer)
                catalog = original.trailer["/Root"]
                writer = DAPdfFileWriter()
                tree = dict()
                for part in pdf_parts:
                    if part in catalog:
                        tree[part] = catalog[part]
                for i in range(original.getNumPages()):
                    for item in image_todo:
                        if (item['pageno'] - 1) == i:
                            page = original.getPage(i)
                            foreground_file = pypdf.PdfFileReader(
                                item['overlay_stream'])
                            foreground_page = foreground_file.getPage(0)
                            page.mergePage(foreground_page)
                for i in range(original.getNumPages()):
                    newpage = original.getPage(i)
                    writer.addPage(newpage)
                for key, val in tree.items():
                    writer._root_object.update(
                        {pypdf.generic.NameObject(key): val})
                writer.page_list = list()
                recursive_get_pages(writer._root_object['/Pages'],
                                    writer.page_list)
                recursive_add_bookmark(original, writer,
                                       original.getOutlines())
                with open(new_pdf_file.name, "wb") as outFile:
                    writer.write(outFile)
            shutil.copyfile(new_pdf_file.name, pdf_file.name)
            for item in image_todo:
                item['overlay_stream'].close()
    if (not editable) and len(images):
        flatten_pdf(pdf_file.name)
    if pdfa:
        pdf_to_pdfa(pdf_file.name)
    if editable:
        replicate_js_and_calculations(template, pdf_file.name, password)
    elif password:
        pdf_encrypt(pdf_file.name, password)
    return pdf_file.name