def getText(word_document_content): """ @param[in] word_document_content the content of the html page to convert @param[in] encoding the document encoding @returns the text representation of the Web page """ _, html = pipe_content( CMD_CONV['doc'], word_document_content ) return html
def getText(word_document_content): """ @param[in] word_document_content the content of the html page to convert @param[in] encoding the document encoding @returns the text representation of the Web page """ _, html = pipe_content(CMD_CONV['doc'], word_document_content) return html
def getText(pdf_content): """ @param[in] pdf_content the content of the html page to convert @param[in] encoding the document encoding @returns the text representation of the Web page """ _, text = pipe_content(CMD_CONV['pdf'], pdf_content) return text
def getText(pdf_content): """ @param[in] pdf_content the content of the html page to convert @param[in] encoding the document encoding @returns the text representation of the Web page """ _, text = pipe_content( CMD_CONV['pdf'], pdf_content ) return text
def getText(html_content, encoding="utf8"): """ @param[in] html_content the content of the html page to convert @param[in] encoding the document encoding @returns the text representation of the Web page """ # check whether this is really a html file if not "<" in html_content or not ">" in html_content: return html_content _, html = pipe_content(CMD_CONV['html'], html_content) return html