Exemple #1
0
 def getText(word_document_content):
     """ @param[in] word_document_content the content of the html page to convert 
         @param[in] encoding the document encoding 
         @returns the text representation of the Web page
     """
     _, html = pipe_content( CMD_CONV['doc'], word_document_content )
     return html
Exemple #2
0
 def getText(word_document_content):
     """ @param[in] word_document_content the content of the html page to convert 
         @param[in] encoding the document encoding 
         @returns the text representation of the Web page
     """
     _, html = pipe_content(CMD_CONV['doc'], word_document_content)
     return html
Exemple #3
0
    def getText(pdf_content):
        """ @param[in] pdf_content the content of the html page to convert 
            @param[in] encoding the document encoding 
            @returns the text representation of the Web page
        """

        _, text = pipe_content(CMD_CONV['pdf'], pdf_content)
        return text
Exemple #4
0
    def getText(pdf_content):
        """ @param[in] pdf_content the content of the html page to convert 
            @param[in] encoding the document encoding 
            @returns the text representation of the Web page
        """

        _, text = pipe_content( CMD_CONV['pdf'], pdf_content )
        return text
Exemple #5
0
    def getText(html_content, encoding="utf8"):
        """ @param[in] html_content the content of the html page to convert 
            @param[in] encoding the document encoding 
            @returns the text representation of the Web page
        """
        # check whether this is really a html file
        if not "<" in html_content or not ">" in html_content:
            return html_content

        _, html = pipe_content(CMD_CONV['html'], html_content)
        return html
Exemple #6
0
    def getText(html_content, encoding="utf8"):
        """ @param[in] html_content the content of the html page to convert 
            @param[in] encoding the document encoding 
            @returns the text representation of the Web page
        """
        # check whether this is really a html file
        if not "<" in html_content or not ">" in html_content:
            return html_content

        _, html = pipe_content(CMD_CONV['html'], html_content)
        return html