def extract_metadata(report): report_path = path_for(report, report['file_type']) file_type_lower = report['file_type'].lower() if file_type_lower == "pdf": real_report_path = os.path.abspath( os.path.expandvars(os.path.join(utils.data_dir(), report_path))) if utils.check_pdf_decryption(real_report_path): real_decrypted_path = real_report_path[:-4] + ".decrypted.pdf" decrypted_path = report_path[:-4] + ".decrypted.pdf" if os.path.isfile(real_decrypted_path) or utils.decrypt_pdf( real_report_path, real_decrypted_path): metadata = utils.metadata_from_pdf(decrypted_path) else: metadata = None else: metadata = utils.metadata_from_pdf(report_path) if metadata: report['pdf'] = metadata return metadata elif file_type_lower == "doc": metadata = utils.metadata_from_doc(report_path) if metadata: report['doc'] = metadata return metadata elif file_type_lower == "docx": metadata = utils.metadata_from_docx(report_path) if metadata: report['docx'] = metadata return metadata elif file_type_lower in FILE_EXTENSIONS_HTML: return None else: logging.warn("Unknown file type, don't know how to extract metadata!") return None
def extract_metadata(report): report_path = path_for(report, report['file_type']) file_type_lower = report['file_type'].lower() if file_type_lower == "pdf": real_report_path = os.path.abspath(os.path.expandvars(os.path.join(utils.data_dir(), report_path))) if utils.check_pdf_decryption(real_report_path): real_decrypted_path = real_report_path[:-4] + ".decrypted.pdf" decrypted_path = report_path[:-4] + ".decrypted.pdf" if os.path.isfile(real_decrypted_path) or utils.decrypt_pdf(real_report_path, real_decrypted_path): metadata = utils.metadata_from_pdf(decrypted_path) else: metadata = None else: metadata = utils.metadata_from_pdf(report_path) if metadata: report['pdf'] = metadata return metadata elif file_type_lower == "doc": metadata = utils.metadata_from_doc(report_path) if metadata: report['doc'] = metadata return metadata elif file_type_lower == "docx": metadata = utils.metadata_from_docx(report_path) if metadata: report['docx'] = metadata return metadata elif file_type_lower in FILE_EXTENSIONS_HTML: return None else: logging.warn("Unknown file type, don't know how to extract metadata!") return None
def extract_metadata(report): report_path = path_for(report, report["file_type"]) real_report_path = os.path.join(utils.data_dir(), report_path) file_type_lower = report["file_type"].lower() if file_type_lower == "pdf": metadata = utils.metadata_from_pdf(report_path) if metadata: report["pdf"] = metadata return metadata elif file_type_lower == "htm" or file_type_lower == "html": return None else: logging.warn("Unknown file type, don't know how to extract metadata!") return None
def extract_metadata(report): report_path = path_for(report, report['file_type']) real_report_path = os.path.join(utils.data_dir(), report_path) file_type_lower = report['file_type'].lower() if file_type_lower == "pdf": metadata = utils.metadata_from_pdf(report_path) if metadata: report['pdf'] = metadata return metadata elif file_type_lower in FILE_EXTENSIONS_HTML: return None else: logging.warn("Unknown file type, don't know how to extract metadata!") return None
def extract_metadata(report): report_path = path_for(report, report['file_type']) file_type_lower = report['file_type'].lower() if file_type_lower == "pdf": metadata = utils.metadata_from_pdf(report_path) if metadata: report['pdf'] = metadata return metadata elif file_type_lower == "doc": metadata = utils.metadata_from_doc(report_path) if metadata: report['doc'] = metadata return metadata elif file_type_lower in FILE_EXTENSIONS_HTML: return None else: logging.warn("Unknown file type, don't know how to extract metadata!") return None