Python text_from_html Exemples

Langage de programmation: Python

Espace de nommage/Pack: utils.utils

Méthode/Fonction: text_from_html

Exemples au hotexamples.com: 6

Python text_from_html - 6 exemples trouvés. Ce sont les exemples réels les mieux notés de utils.utils.text_from_html extraits de projets open source. Vous pouvez noter les exemples pour nous aider à en améliorer la qualité.

Associées

Bike

save_running_to_startup

get_language_name

CouchBase

command

Corpus

defaultNeuronTransform

detect_filetype

getIsoTimestamp

get_attributes

Related in langs

no_special_char (PHP)

TaxRate (PHP)

RSSFeedParser (C#)

SHA3 (C#)

spl_map (C++)

AddAliasName (C++)

glUniform4iv (Go)

RegisterSetup (Go)

TextGraphics (Java)

Exemple #1

0

Afficher le fichier

Fichier : inspector.py Projet : harrisj/inspectors-general

def extract_report(report): report_path = path_for(report, report['file_type']) real_report_path = os.path.abspath( os.path.expandvars(os.path.join(utils.data_dir(), report_path))) text_path = "%s.txt" % os.path.splitext(report_path)[0] real_text_path = os.path.abspath( os.path.expandvars(os.path.join(utils.data_dir(), text_path))) if os.path.exists(real_text_path): # This report has already had its text extracted return text_path file_type_lower = report['file_type'].lower() if file_type_lower == "pdf": utils.text_from_pdf(real_report_path, real_text_path) return text_path elif file_type_lower == "doc": utils.text_from_doc(real_report_path, real_text_path) return text_path elif file_type_lower in FILE_EXTENSIONS_HTML: utils.text_from_html(real_report_path, real_text_path) return text_path else: logging.warn("Unknown file type, don't know how to extract text!") return None

Exemple #2

0

Afficher le fichier

Fichier : inspector.py Projet : unitedstates/inspectors-general

def extract_report(report): report_path = path_for(report, report['file_type']) real_report_path = os.path.abspath(os.path.expandvars(os.path.join(utils.data_dir(), report_path))) text_path = "%s.txt" % os.path.splitext(report_path)[0] real_text_path = os.path.abspath(os.path.expandvars(os.path.join(utils.data_dir(), text_path))) if os.path.exists(real_text_path): # This report has already had its text extracted return text_path file_type_lower = report['file_type'].lower() if file_type_lower == "pdf": if utils.check_pdf_decryption(real_report_path): real_decrypted_path = real_report_path[:-4] + ".decrypted.pdf" if os.path.isfile(real_decrypted_path) or utils.decrypt_pdf(real_report_path, real_decrypted_path): utils.text_from_pdf(real_decrypted_path, real_text_path) return text_path else: utils.text_from_pdf(real_report_path, real_text_path) return text_path elif file_type_lower == "doc": utils.text_from_doc(real_report_path, real_text_path) return text_path elif file_type_lower == "docx": utils.text_from_docx(real_report_path, real_text_path) return text_path elif file_type_lower in FILE_EXTENSIONS_HTML: utils.text_from_html(real_report_path, real_text_path) return text_path else: logging.warn("Unknown file type, don't know how to extract text!") return None

Exemple #3

0

Afficher le fichier

Fichier : inspector.py Projet : ericalthatcher/inspectors-general

def extract_report(report): report_path = path_for(report, report['file_type']) if report['file_type'] == "pdf": return utils.text_from_pdf(report_path) elif report['file_type'].startswith("htm"): return utils.text_from_html(report_path) else: logging.warn("Unknown file type, don't know how to extract text!") return None

Exemple #4

0

Afficher le fichier

Fichier : inspector.py Projet : BunsenMcDubbs/inspectors-general

def extract_report(report): report_path = path_for(report, report["file_type"]) file_type_lower = report["file_type"].lower() if file_type_lower == "pdf": return utils.text_from_pdf(report_path) elif file_type_lower.startswith("htm"): return utils.text_from_html(report_path) else: logging.warn("Unknown file type, don't know how to extract text!") return None

Exemple #5

0

Afficher le fichier

Fichier : inspector.py Projet : JaimeLynSchatz/inspectors-general

def extract_report(report): report_path = path_for(report, report['file_type']) file_type_lower = report['file_type'].lower() if file_type_lower == "pdf": return utils.text_from_pdf(report_path) elif file_type_lower in FILE_EXTENSIONS_HTML: return utils.text_from_html(report_path) else: logging.warn("Unknown file type, don't know how to extract text!") return None

Exemple #6

0

Afficher le fichier

def extract_report(report): report_path = path_for(report, report['file_type']) file_type_lower = report['file_type'].lower() if file_type_lower == "pdf": return utils.text_from_pdf(report_path) elif file_type_lower == "doc": return utils.text_from_doc(report_path) elif file_type_lower in FILE_EXTENSIONS_HTML: return utils.text_from_html(report_path) else: logging.warn("Unknown file type, don't know how to extract text!") return None