def get_fields(the_file, include_attributes=False): """ Get the list of fields needed inside a template file (PDF or Docx Jinja tags)""" if isinstance(the_file,DAFileList): if the_file[0].mimetype == 'application/pdf': return [field[0] for field in the_file[0].get_pdf_fields()] else: if the_file.mimetype == 'application/pdf': return [field[0] for field in the_file.get_pdf_fields()] result_file = word_to_markdown(the_file.path(), 'docx') if result_file is None: # fields = word("Error: no fields could be found in the file") return [] else: with open(result_file.name, 'rU', encoding='utf-8') as fp: result = fp.read() fields = set() addresses = r"(\b\S*)(((\.address_block\(\))|(\.address\.on_one_line())))" methods = r"(.*)(\..*\(\))" # look for variables inside {{ }} tags for variable in re.findall(r'{{ *([^\} ]+) *}}', result): # look for all regular fields variable = variable.replace("\\","") # test if it's a method. if so, scan inside it for variables mentioned matches = re.match(methods, variable) if matches: fields.add(matches.groups()[0]) else: fields.add(variable) # check for implicit reference to address fields in common methods matches = re.match(addresses, variable) if matches: fields.add(matches.groups()[0] + '.address.address') # look for all variables inside {% %} tags for variable in re.findall(r'{%[a-z]* for [A-Za-z\_][A-Za-z0-9\_]* in *([^\} ]+) *%}', result): variable = variable.replace("\\","") # same test for method as above matches = re.match(methods, variable) if matches: fields.add(matches.groups()[0]) else: fields.add(variable) del matches return [x for x in fields if not "(" in x] # strip out functions/method calls
def is_fillable_docx(self, filename): extension, mimetype = get_ext_and_mimetype(filename) if extension != "docx": return False if not self.file_exists(filename): return False path = self.get_file(filename) result_file = word_to_markdown(path, 'docx') if result_file is None: return False with open(result_file.name, 'r', encoding='utf-8') as fp: result = fp.read() fields = set() for variable in re.findall(r'{{ *([^\} ]+) *}}', result): fields.add(docx_variable_fix(variable)) for variable in re.findall(r'{%[a-z]* for [A-Za-z\_][A-Za-z0-9\_]* in *([^\} ]+) *%}', result): fields.add(docx_variable_fix(variable)) return bool(len(fields) > 0)
def is_fillable_docx(self, filename): extension, mimetype = get_ext_and_mimetype(filename) if extension != "docx": return False if not self.file_exists(filename): return False path = self.get_file(filename) result_file = word_to_markdown(path, 'docx') if result_file is None: return False with open(result_file.name, 'rU', encoding='utf-8') as fp: result = fp.read() fields = set() for variable in re.findall(r'{{ *([^\} ]+) *}}', result): fields.add(docx_variable_fix(variable)) for variable in re.findall(r'{%[a-z]* for [A-Za-z\_][A-Za-z0-9\_]* in *([^\} ]+) *%}', result): fields.add(docx_variable_fix(variable)) if len(fields): return True return False
def convert_file_to_md(self, filename, convert_variables=True): extension, mimetype = get_ext_and_mimetype(filename) if (mimetype and mimetype in convertible_mimetypes): the_format = convertible_mimetypes[mimetype] elif extension and extension in convertible_extensions: the_format = convertible_extensions[extension] else: return None if not self.file_exists(filename): return None path = self.get_file(filename) temp_file = word_to_markdown(path, the_format) if temp_file is None: return None out_filename = os.path.splitext(filename)[0] + '.md' if convert_variables: with open(temp_file.name, 'rU', encoding='utf-8') as fp: self.write_file(out_filename, replace_square_brackets.sub(fix_variable_name, fp.read())) else: shutil.copyfile(temp_file.name, self.get_file(out_filename)) return out_filename