Python extractTitle Examples, zoundry.base.xhtml.xhtmlutil.extractTitle Python Examples

Example #1

0

Show file

File: mshtmlcontrol.py Project: Tidosho/zoundryraven

    def _loadStringFromFile(self, html):
        # MSHTML control requires a <head> and <title> element
        title = getNoneString( extractTitle(html) )
        if not title or html.find(u"<html") == -1: #$NON-NLS-1$
            # case where only the body content is given or the content did not have non-empty <head> and <title> elems.
            # try and create wrapper around the body. Eg:  <html><head><title>ZoundryDocument</title></head><body> CONTENT </body> </html>
            html = wrapHtmlBody(html, u"ZoundryDocument") #$NON-NLS-1$

        # note: \r\n must be replace with \n. Otherwise, in <pre> blocks, the \r' will show up as an extra line.
        html = html.replace(u"\r\n", u"\n")  #$NON-NLS-1$  #$NON-NLS-2$
        # For the test-harness to work, hard code temp dir
        tmpDir = u"c:/temp" #$NON-NLS-1$
        if getApplicationModel():
            userProfile = getApplicationModel().getUserProfile()
            tmpDir = userProfile.getTempDirectory()
        d = str(time.time())
        fname = os.path.join(tmpDir, u"_z_raven_mshtml_%s_tmp.xhtml" % d) #$NON-NLS-1$
        tmpFile = codecs.open(fname, u"w") #$NON-NLS-1$
        try:
            # write the utf-8 byte order marker for wintel platforms.
            tmpFile.write(codecs.BOM_UTF8)
            tmpFile.write( convertToUtf8(html) )
            tmpFile.close()
            self._loadFile(fname)
        finally:
            tmpFile.close()

Example #2

0

Show file

    def _loadStringFromFile(self, html):
        # MSHTML control requires a <head> and <title> element
        title = getNoneString(extractTitle(html))
        if not title or html.find(u"<html") == -1:  #$NON-NLS-1$
            # case where only the body content is given or the content did not have non-empty <head> and <title> elems.
            # try and create wrapper around the body. Eg:  <html><head><title>ZoundryDocument</title></head><body> CONTENT </body> </html>
            html = wrapHtmlBody(html, u"ZoundryDocument")  #$NON-NLS-1$

        # note: \r\n must be replace with \n. Otherwise, in <pre> blocks, the \r' will show up as an extra line.
        html = html.replace(u"\r\n", u"\n")  #$NON-NLS-1$  #$NON-NLS-2$
        # For the test-harness to work, hard code temp dir
        tmpDir = u"c:/temp"  #$NON-NLS-1$
        if getApplicationModel():
            userProfile = getApplicationModel().getUserProfile()
            tmpDir = userProfile.getTempDirectory()
        d = str(time.time())
        fname = os.path.join(tmpDir,
                             u"_z_raven_mshtml_%s_tmp.xhtml" % d)  #$NON-NLS-1$
        tmpFile = codecs.open(fname, u"w")  #$NON-NLS-1$
        try:
            # write the utf-8 byte order marker for wintel platforms.
            tmpFile.write(codecs.BOM_UTF8)
            tmpFile.write(convertToUtf8(html))
            tmpFile.close()
            self._loadFile(fname)
        finally:
            tmpFile.close()

Example #3

0

Show file

File: trackback.py Project: mpm2050/Raven

    def discover(self, url):
        u"""discover(string) -> list of IZTrackbackEntry
        Retrieves the contents of the given url and discovers (extracts) the trackback
        information from either the RDF of RSS Item constructs. This method returns a list
        of IZTrackbackEntry objects for each trackback discovered.""" #$NON-NLS-1$

        trackbackEntryList = []
        htmlContent = self._downloadHtmlContent(url)
        title = u""  #$NON-NLS-1$
        if htmlContent:
            title = extractTitle(htmlContent)
            trackbackEntryList = self._parseContent(url, title, htmlContent)
        rval = ZTrackbackDiscoverResult(title, trackbackEntryList)
        return rval

Example #4

0

Show file

File: trackback.py Project: Tidosho/zoundryraven

    def discover(self, url):
        u"""discover(string) -> list of IZTrackbackEntry
        Retrieves the contents of the given url and discovers (extracts) the trackback
        information from either the RDF of RSS Item constructs. This method returns a list
        of IZTrackbackEntry objects for each trackback discovered.""" #$NON-NLS-1$

        trackbackEntryList = []
        htmlContent = self._downloadHtmlContent(url)
        title = u"" #$NON-NLS-1$
        if htmlContent:
            title = extractTitle(htmlContent)
            trackbackEntryList = self._parseContent(url, title, htmlContent)
        rval = ZTrackbackDiscoverResult(title, trackbackEntryList)
        return rval

Example #5

0

Show file

File: mshtmlcontrol.py Project: mpm2050/Raven

    def _loadStringFromFile(self, html):
        # MSHTML control requires a <head> and <title> element
        title = getNoneString(extractTitle(html))
        if not title or html.find(u"<html") == -1:  #$NON-NLS-1$
            # case where only the body content is given or the content did not have non-empty <head> and <title> elems.
            # try and create wrapper around the body. Eg:  <html><head><title>ZoundryDocument</title></head><body> CONTENT </body> </html>
            html = wrapHtmlBody(html, u"ZoundryDocument")  #$NON-NLS-1$

        # note: \r\n must be replace with \n. Otherwise, in <pre> blocks, the \r' will show up as an extra line.
        html = html.replace(u"\r\n", u"\n")  #$NON-NLS-1$  #$NON-NLS-2$
        # For the test-harness to work, hard code temp dir
        tmpDir = u"c:/temp"  #$NON-NLS-1$
        if getApplicationModel():
            userProfile = getApplicationModel().getUserProfile()
            tmpDir = userProfile.getTempDirectory()
        d = str(time.time())

        # For Microsoft Internet Explorer Version 9 (and above?) the file extension for the temporary file must have
        # a ".html" (previously a ".xhtml") extension in order for the blog post to load successfully into the ActiveX
        # mshtml IHtmlDocument. Otherwise, the blog posts will appear to be mal-formatted during previews and fail to
        # load correctly during editing.
        #
        # Chuah TC    23 December 2013
        #
        #fname = os.path.join(tmpDir, u"_z_raven_mshtml_%s_tmp.xhtml" % d) #$NON-NLS-1$
        fname = os.path.join(tmpDir,
                             u"_z_raven_mshtml_%s_tmp.html" % d)  #$NON-NLS-1$

        tmpFile = codecs.open(fname, u"w")  #$NON-NLS-1$
        try:
            # write the utf-8 byte order marker for wintel platforms.
            tmpFile.write(codecs.BOM_UTF8)
            tmpFile.write(convertToUtf8(html))
            tmpFile.close()
            self._loadFile(fname)
        finally:
            tmpFile.close()