def _loadStringFromFile(self, html): # MSHTML control requires a <head> and <title> element title = getNoneString( extractTitle(html) ) if not title or html.find(u"<html") == -1: #$NON-NLS-1$ # case where only the body content is given or the content did not have non-empty <head> and <title> elems. # try and create wrapper around the body. Eg: <html><head><title>ZoundryDocument</title></head><body> CONTENT </body> </html> html = wrapHtmlBody(html, u"ZoundryDocument") #$NON-NLS-1$ # note: \r\n must be replace with \n. Otherwise, in <pre> blocks, the \r' will show up as an extra line. html = html.replace(u"\r\n", u"\n") #$NON-NLS-1$ #$NON-NLS-2$ # For the test-harness to work, hard code temp dir tmpDir = u"c:/temp" #$NON-NLS-1$ if getApplicationModel(): userProfile = getApplicationModel().getUserProfile() tmpDir = userProfile.getTempDirectory() d = str(time.time()) fname = os.path.join(tmpDir, u"_z_raven_mshtml_%s_tmp.xhtml" % d) #$NON-NLS-1$ tmpFile = codecs.open(fname, u"w") #$NON-NLS-1$ try: # write the utf-8 byte order marker for wintel platforms. tmpFile.write(codecs.BOM_UTF8) tmpFile.write( convertToUtf8(html) ) tmpFile.close() self._loadFile(fname) finally: tmpFile.close()
def _loadStringFromFile(self, html): # MSHTML control requires a <head> and <title> element title = getNoneString(extractTitle(html)) if not title or html.find(u"<html") == -1: #$NON-NLS-1$ # case where only the body content is given or the content did not have non-empty <head> and <title> elems. # try and create wrapper around the body. Eg: <html><head><title>ZoundryDocument</title></head><body> CONTENT </body> </html> html = wrapHtmlBody(html, u"ZoundryDocument") #$NON-NLS-1$ # note: \r\n must be replace with \n. Otherwise, in <pre> blocks, the \r' will show up as an extra line. html = html.replace(u"\r\n", u"\n") #$NON-NLS-1$ #$NON-NLS-2$ # For the test-harness to work, hard code temp dir tmpDir = u"c:/temp" #$NON-NLS-1$ if getApplicationModel(): userProfile = getApplicationModel().getUserProfile() tmpDir = userProfile.getTempDirectory() d = str(time.time()) fname = os.path.join(tmpDir, u"_z_raven_mshtml_%s_tmp.xhtml" % d) #$NON-NLS-1$ tmpFile = codecs.open(fname, u"w") #$NON-NLS-1$ try: # write the utf-8 byte order marker for wintel platforms. tmpFile.write(codecs.BOM_UTF8) tmpFile.write(convertToUtf8(html)) tmpFile.close() self._loadFile(fname) finally: tmpFile.close()
def discover(self, url): u"""discover(string) -> list of IZTrackbackEntry Retrieves the contents of the given url and discovers (extracts) the trackback information from either the RDF of RSS Item constructs. This method returns a list of IZTrackbackEntry objects for each trackback discovered.""" #$NON-NLS-1$ trackbackEntryList = [] htmlContent = self._downloadHtmlContent(url) title = u"" #$NON-NLS-1$ if htmlContent: title = extractTitle(htmlContent) trackbackEntryList = self._parseContent(url, title, htmlContent) rval = ZTrackbackDiscoverResult(title, trackbackEntryList) return rval
def _loadStringFromFile(self, html): # MSHTML control requires a <head> and <title> element title = getNoneString(extractTitle(html)) if not title or html.find(u"<html") == -1: #$NON-NLS-1$ # case where only the body content is given or the content did not have non-empty <head> and <title> elems. # try and create wrapper around the body. Eg: <html><head><title>ZoundryDocument</title></head><body> CONTENT </body> </html> html = wrapHtmlBody(html, u"ZoundryDocument") #$NON-NLS-1$ # note: \r\n must be replace with \n. Otherwise, in <pre> blocks, the \r' will show up as an extra line. html = html.replace(u"\r\n", u"\n") #$NON-NLS-1$ #$NON-NLS-2$ # For the test-harness to work, hard code temp dir tmpDir = u"c:/temp" #$NON-NLS-1$ if getApplicationModel(): userProfile = getApplicationModel().getUserProfile() tmpDir = userProfile.getTempDirectory() d = str(time.time()) # For Microsoft Internet Explorer Version 9 (and above?) the file extension for the temporary file must have # a ".html" (previously a ".xhtml") extension in order for the blog post to load successfully into the ActiveX # mshtml IHtmlDocument. Otherwise, the blog posts will appear to be mal-formatted during previews and fail to # load correctly during editing. # # Chuah TC 23 December 2013 # #fname = os.path.join(tmpDir, u"_z_raven_mshtml_%s_tmp.xhtml" % d) #$NON-NLS-1$ fname = os.path.join(tmpDir, u"_z_raven_mshtml_%s_tmp.html" % d) #$NON-NLS-1$ tmpFile = codecs.open(fname, u"w") #$NON-NLS-1$ try: # write the utf-8 byte order marker for wintel platforms. tmpFile.write(codecs.BOM_UTF8) tmpFile.write(convertToUtf8(html)) tmpFile.close() self._loadFile(fname) finally: tmpFile.close()