def get_links(self): '''Generator for links in the page content This method gives the raw links from the content, if you want nice L{Link} objects use L{index.list_links()<zim.index.Index.list_links()>} instead. @returns: yields a list of 3-tuples C{(type, href, attrib)} where: - C{type} is the link type (e.g. "page" or "file") - C{href} is the link itself - C{attrib} is a dict with link properties ''' # FIXME optimize with a ParseTree.get_links that does not # use Node tree = self.get_parsetree() if tree: for elt in tree.findall(zim.formats.LINK): href = elt.attrib.pop('href') type = link_type(href) yield type, href, elt.attrib for elt in tree.findall(zim.formats.IMAGE): if not 'href' in elt.attrib: continue href = elt.attrib.pop('href') type = link_type(href) yield type, href, elt.attrib
def _link_tree(links, notebook, path): # Convert a list of links (of any type) into a parsetree #~ print('LINKS: ', links) #~ print('NOTEBOOK and PATH:', notebook, path) builder = ParseTreeBuilder() builder.start(FORMATTEDTEXT) for i in range(len(links)): if i > 0: builder.text(' ') link = links[i] type = link_type(link) isimage = False if type == 'interwiki': prefix = notebook.interwiki + '?' if link.startswith(prefix): link = link[len(prefix):] type = link_type(link) elif type == 'file': try: file = File(link) isimage = file.isimage() except: pass logger.debug('Pasting link: %s (type: %s, isimage: %s)', link, type, isimage) if isimage: src = notebook.relative_filepath(file, path) or file.uri builder.append(IMAGE, {'src': src}) elif link.startswith('@'): # FIXME - is this ever used ?? builder.append(TAG, {'name': links[i][1:]}, links[i]) else: name = None if type == 'page': anchor = None if '#' in link: link, anchor = link.split('#', 1) target = Path(Path.makeValidPageName( link)) # Assume links are always absolute href = notebook.pages.create_link(path, target) href.anchor = anchor link = href.to_wiki_link() if notebook.config['Notebook']['short_links']: name = href.parts()[-1] if anchor: name += '#' + anchor elif type == 'file': file = File(link) # Assume links are always URIs link = notebook.relative_filepath(file, path) or file.uri builder.append(LINK, {'href': link}, name or link) builder.end(FORMATTEDTEXT) tree = builder.get_parsetree() tree.resolve_images(notebook, path) tree.decode_urls() return tree
def replacefunc(elt): text = elt.attrib['href'] if link_type(text) != 'page': raise zim.formats.VisitorSkip href = HRef.new_from_wiki_link(text) target = self.pages.resolve_link(page, href) if target == oldroot: return self._update_link_tag(elt, page, newroot, href) elif target.ischild(oldroot): newtarget = newroot.child(target.relname(oldroot)) return self._update_link_tag(elt, page, newtarget, href) elif href.rel == HREF_REL_FLOATING \ and natural_sort_key(href.parts()[0]) == natural_sort_key(oldroot.basename) \ and page.ischild(oldroot.parent): targetrecord = self.pages.lookup_by_pagename(target) if not target.ischild(oldroot.parent) \ or not targetrecord.exists(): # An link that was anchored to the moved page, # but now resolves somewhere higher in the tree # Or a link that no longer resolves if len(href.parts()) == 1: return self._update_link_tag(elt, page, newroot, href) else: mynewroot = newroot.child(':'.join(href.parts()[1:])) return self._update_link_tag(elt, page, mynewroot, href) raise zim.formats.VisitorSkip
def _update_links_in_page(self, page, oldpath, newpath): # Maybe counter intuitive, but pages below oldpath do not need # to exist anymore while we still try to resolve links to these # pages. The reason is that all pages that could link _upward_ # to these pages are below and are moved as well. logger.debug('Updating links in %s to %s (was: %s)', page, newpath, oldpath) tree = page.get_parsetree() if not tree: logger.warn('Page turned out to be empty: %s', page) return for tag in tree.getiterator('link'): href = tag.attrib['href'] type = link_type(href) if type == 'page': hrefpath = self.resolve_path(href, source=page) #~ print 'LINK', hrefpath if hrefpath == oldpath: newhrefpath = newpath #~ print '\t==', oldpath, '->', newhrefpath elif hrefpath > oldpath: rel = hrefpath.relname(oldpath) newhrefpath = newpath + rel #~ print '\t>', oldpath, '->', newhrefpath else: continue newhref = self.relative_link(page, newhrefpath) self._update_link_tag(tag, newhref) page.set_parsetree(tree)
def _resolve_links_and_images(notebook, src_path, node): if node.tag == LINK: href = node.get('href') my_type = link_type(href) if my_type == 'page': target = notebook.pages.resolve_link(src_path, HRef.new_from_wiki_link(href)) node.set('_href', target.name) elif my_type == 'file': try: target = notebook.resolve_file(href, src_path) except: pass # may by e.g. file://host/path URI, not supported as local file else: node.set('_href', target.uri) return node elif node.tag == IMAGE: target = notebook.resolve_file(node.get('src'), src_path) node.set('_src', target.uri) return node elif node.tag == OBJECT: if node.get('type').startswith('image+'): # Objects based on generated images target = notebook.resolve_file(node.get('src'), src_path) node.set('_src', target.uri) return node else: raise VisitorSkip else: raise AssertionError('unknown tag')
def replacefunc(elt): text = elt.attrib['href'] if link_type(text) != 'page': raise zim.formats.VisitorSkip href = HRef.new_from_wiki_link(text) target = self.pages.resolve_link(page, href) if target == newtarget or target.ischild(newtarget): raise zim.formats.VisitorSkip elif target == oldtarget: return self._update_link_tag(elt, page, newtarget, href) elif target.ischild(oldtarget): mynewtarget = newtarget.child(target.relname(oldtarget)) return self._update_link_tag(elt, page, mynewtarget, href) elif href.rel == HREF_REL_FLOATING \ and href.parts()[0] == newtarget.basename \ and page.ischild(oldtarget.parent) \ and not target.ischild(oldtarget.parent): # Edge case: an link that was anchored to the moved page, # and now resolves somewhere higher in the tree if href.names == newtarget.basename: return self._update_link_tag(elt, page, newtarget, href) else: mynewtarget = newtarget.child(':'.join(href.parts[1:])) return self._update_link_tag(elt, page, mynewtarget, href) else: raise zim.formats.VisitorSkip
def link(self, link): type = link_type(link) if type == 'mailto' and not link.startswith('mailto:'): return 'mailto:' + link elif type == 'interwiki': return 'interwiki:' + link else: return link
def dump_link(self, tag, attrib, strings=None): href = self.linker.link(attrib['href']) type = link_type(attrib['href']) if strings: text = u''.join(strings) else: text = attrib['href'] title = text.replace('"', '"') return [ '<a href="%s" title="%s" class="%s">%s</a>' % (href, title, type, text) ]
def _replace_links_to_page_and_copy_images(notebook, old_folder, new_path, node): if node.tag == LINK: abs_href = node.attrib.pop('_href', None) if abs_href: my_type = link_type(abs_href) if my_type == 'page': target = Path(abs_href) oldhref = HRef.new_from_wiki_link( node.get('href')) # *not* abs_href return notebook._update_link_tag(node, new_path, target, oldhref) elif my_type == 'file': new_href = notebook.relative_filepath(LocalFile(abs_href), new_path) if new_href is None: return node # could be VisitorSkip, but want to get rid of _href else: if node.gettext() == node.get('href'): # *not* abs_href node[:] = [new_href] node.set('href', new_href) return node else: logger.warn('Could not update link of type "%s": %s', my_type, abs_href) raise VisitorSkip else: raise VisitorSkip elif node.tag == IMAGE: # Only copy direct attachments - else the image already was a link # to a file outside of the attachment folder abs_src = node.attrib.pop('_src', None) if abs_src: src_file = LocalFile(abs_src) if src_file.ischild(old_folder): return _copy_image(notebook, new_path, src_file, node) else: return _update_image(notebook, new_path, src_file, node) else: raise VisitorSkip elif node.tag == OBJECT: abs_src = node.attrib.pop('_src', None) if abs_src and node.get('type').startswith('image+'): src_file = LocalFile(abs_src) if src_file.ischild(old_folder): return _copy_image_object(notebook, new_path, src_file, node) else: return _update_image(notebook, new_path, src_file, node) else: raise VisitorSkip else: raise AssertionError('unknown tag')
def extract_links(element): '''Returns any file links for a parsetree element''' links = [] if element.tag == 'img': return [element.attrib['src']] elif element.tag == 'link' and link_type(element.text) == 'file': return [element.attrib['href']] else: for child in element.getchildren(): if child.tag in ('p','ul','li', 'img', 'link'): l = extract_links(child) if l: links = links + l return links
def get_links(self): '''Generator for a list of tuples of type, href and attrib for links in the parsetree. This gives the raw links, if you want nice Link objects use index.list_links() instead. ''' tree = self.get_parsetree() if tree: for tag in tree.getiterator('link'): attrib = tag.attrib.copy() href = attrib.pop('href') type = link_type(href) yield type, href, attrib
def replacefunc(elt): href = elt.attrib['href'] type = link_type(href) if type != 'page': raise zim.formats.VisitorSkip hrefpath = self.pages.lookup_from_user_input(href, page) #~ print('LINK', hrefpath) if hrefpath == path \ or hrefpath.ischild(path): # Replace the link by it's text return zim.formats.DocumentFragment(*elt) else: raise zim.formats.VisitorSkip
def _link_tree(links, notebook, path): # Convert a list of links (of any type) into a parsetree #~ print 'LINKS: ', links #~ print 'NOTEBOOK and PATH:', notebook, path builder = TreeBuilder() builder.start('zim-tree') for i in range(len(links)): if i > 0: builder.data(' ') link = links[i] type = link_type(link) isimage = False if type == 'file': try: file = File(link) isimage = file.isimage() except: pass logger.debug('Pasting link: %s (type: %s, isimage: %s)', link, type, isimage) if isimage: src = notebook.relative_filepath(file, path) or file.uri builder.start('img', {'src': src}) builder.end('img') elif link.startswith('@'): # FIXME - is this ever used ?? builder.start('tag', {'name': links[i][1:]}) builder.data(links[i]) builder.end('tag') else: if type == 'page': href = Path(notebook.cleanup_pathname(link)) # Assume links are always absolute link = notebook.relative_link(path, href) or link elif type == 'file': file = File(link) # Assume links are always URIs link = notebook.relative_filepath(file, path) or file.uri builder.start('link', {'href': link}) builder.data(link) builder.end('link') builder.end('zim-tree') tree = ParseTree(builder.close()) tree.resolve_images(notebook, path) tree.decode_urls() return tree
def _link_tree(links, notebook, path): # Convert a list of links (of any type) into a parsetree #~ print 'LINKS: ', links #~ print 'NOTEBOOK and PATH:', notebook, path builder = ParseTreeBuilder() builder.start(FORMATTEDTEXT) for i in range(len(links)): if i > 0: builder.text(' ') link = links[i] type = link_type(link) isimage = False if type == 'file': try: file = File(link) isimage = file.isimage() except: pass logger.debug('Pasting link: %s (type: %s, isimage: %s)', link, type, isimage) if isimage: src = notebook.relative_filepath(file, path) or file.uri builder.append(IMAGE, {'src': src}) elif link.startswith('@'): # FIXME - is this ever used ?? builder.append(TAG, {'name': links[i][1:]}, links[i]) else: if type == 'page': target = Path(Path.makeValidPageName( link)) # Assume links are always absolute href = notebook.pages.create_link(path, target) link = href.to_wiki_link() elif type == 'file': file = File(link) # Assume links are always URIs link = notebook.relative_filepath(file, path) or file.uri builder.append(LINK, {'href': link}, link) builder.end(FORMATTEDTEXT) tree = builder.get_parsetree() tree.resolve_images(notebook, path) tree.decode_urls() return tree
def link(self, link): """Returns a path or url for 'link' """ # TODO optimize by hashing links seen (reset per page) assert not self.path is None type = link_type(link) if type == "page": return self.page(link) elif type == "file": return self.file(link) elif type == "mailto": if link.startswith("mailto:"): return link else: return "mailto:" + link else: # I dunno, some url ? return link
def link(self, link): '''Returns an url for a link in a zim page This method is used to translate links of any type. @param link: link to be translated @returns: url, uri, or relative path context of this linker ''' # Determines the link type and dispatches any of the "link_*" methods assert isinstance(link, basestring) type = link_type(link) methodname = '_link_' + type if hasattr(self, methodname): href = getattr(self, methodname)(link) else: href = link #~ print "Linker:", link, '-->', href, '(%s)' % type return href
def iter_href(self): '''Generator for links in the text @returns: yields a list of unique L{HRef} objects ''' from zim.notebook.page import HRef # XXX seen = set() for elt in itertools.chain( self._etree.getiterator(LINK), self._etree.getiterator(IMAGE) ): href = elt.attrib.get('href') if href and href not in seen: seen.add(href) if link_type(href) == 'page': try: yield HRef.new_from_wiki_link(href) except ValueError: pass
def link(self, link): '''Returns an url for a link in a zim page This method is used to translate links of any type. It determined the link type and dispatches to L{link_page()}, L{link_file()}, or other C{link_*} methods. Results of this method are cached, so only calls dispatch method once for repeated occurences. Setting a new path with L{set_path()} will clear the cache. @param link: link to be translated @type link: string @returns: url, uri or whatever link notation is relevant in the context of this linker @rtype: string ''' assert not self.path is None if not link in self._links: type = link_type(link) if type == 'page': href = self.link_page(link) elif type == 'file': href = self.link_file(link) elif type == 'mailto': if link.startswith('mailto:'): href = self.link_mailto(link) else: href = self.link_mailto('mailto:' + link) elif type == 'interwiki': href = zim.notebook.interwiki_link(link) if href and href != link: href = self.link(href) # recurs else: logger.warn('No URL found for interwiki link "%s"', link) link = href elif type == 'notebook': href = self.link_notebook(link) else: # I dunno, some url ? method = 'link_' + type if hasattr(self, method): href = getattr(self, method)(link) else: href = link self._links[link] = href return self._links[link]
def _replace_links_to_interwiki_and_copy_images(src_interwiki, notebook, new_path, node): if node.tag == LINK: abs_href = node.attrib.pop('_href', None) if abs_href: my_type = link_type(abs_href) if my_type == 'page': oldhref = HRef.new_from_wiki_link( node.get('href')) # *not* abs_href new_href = src_interwiki + '?' + abs_href new_href += '#' + oldhref.anchor if oldhref.anchor else '' elif my_type == 'file': # NOTE: no proper syntax for this type of link - just abs file link # should be improved - e.g. path:./file style links like in docuwiki new_href = abs_href else: logger.warn('Could not update link of type "%s": %s', my_type, abs_href) raise VisitorSkip if node.gettext() == node.get('href'): # *not* abs_href node[:] = [new_href] node.set('href', new_href) return node else: raise VisitorSkip elif node.tag == IMAGE: # Just copy all images - image links to other notebook don't make sense abs_src = node.attrib.pop('_src', None) if abs_src: src_file = LocalFile(abs_src) return _copy_image(notebook, new_path, src_file, node) else: raise VisitorSkip elif node.tag == OBJECT: abs_src = node.attrib.pop('_src', None) if abs_src and node.get('type').startswith('image+'): src_file = LocalFile(abs_src) return _copy_image_object(notebook, new_path, src_file, node) else: raise VisitorSkip else: raise AssertionError('unknown tag')
def replacefunc(elt): text = elt.attrib['href'] if link_type(text) != 'page': raise zim.formats.VisitorSkip href = HRef.new_from_wiki_link(text) if href.rel == HREF_REL_FLOATING: newtarget = self.pages.resolve_link(page, href) oldtarget = self.pages.resolve_link(oldpath, href) if newtarget != oldtarget: try: update = \ newtarget.relname(newroot) != oldtarget.relname(oldroot) except ValueError: update = True if update: return self._update_link_tag(elt, page, oldtarget, href) raise zim.formats.VisitorSkip
def replacefunc(elt): text = elt.attrib['href'] if link_type(text) != 'page': raise zim.formats.VisitorSkip href = HRef.new_from_wiki_link(text) if href.rel == HREF_REL_RELATIVE: raise zim.formats.VisitorSkip elif href.rel == HREF_REL_ABSOLUTE: oldtarget = self.pages.resolve_link(page, href) if oldtarget == oldroot: return self._update_link_tag(elt, page, newroot, href) elif oldtarget.ischild(oldroot): newtarget = newroot + oldtarget.relname(oldroot) return self._update_link_tag(elt, page, newtarget, href) else: raise zim.formats.VisitorSkip else: assert href.rel == HREF_REL_FLOATING newtarget = self.pages.resolve_link(page, href) oldtarget = self.pages.resolve_link(oldpath, href) if oldtarget == oldroot: return self._update_link_tag(elt, page, newroot, href) elif oldtarget.ischild(oldroot): oldanchor = self.pages.resolve_link( oldpath, HRef(HREF_REL_FLOATING, href.parts()[0])) if oldanchor.ischild(oldroot): raise zim.formats.VisitorSkip # oldtarget cannot be trusted else: newtarget = newroot + oldtarget.relname(oldroot) return self._update_link_tag(elt, page, newtarget, href) elif newtarget != oldtarget: # Redirect back to old target return self._update_link_tag(elt, page, oldtarget, href) else: raise zim.formats.VisitorSkip
def _update_links_from(self, page, oldpath): logger.debug('Updating links in %s (was %s)', page, oldpath) tree = page.get_parsetree() if not tree: return for tag in tree.getiterator('link'): href = tag.attrib['href'] type = link_type(href) if type == 'page': hrefpath = self.resolve_path(href, source=page) oldhrefpath = self.resolve_path(href, source=oldpath) #~ print 'LINK', oldhrefpath, '->', hrefpath if hrefpath != oldhrefpath: if hrefpath >= page and oldhrefpath >= oldpath: #~ print '\t.. Ignore' pass else: newhref = self.relative_link(page, oldhrefpath) #~ print '\t->', newhref self._update_link_tag(tag, newhref) page.set_parsetree(tree)
def link(self, link): return '%s://%s' % (link_type(link), link) def img(self, src): return 'img://' + src
def _dump_children(self, list, output, istoplevel=False): for element in list.getchildren(): text = html_encode(element.text) if not element.tag == 'pre': # text that goes into the element # always encode excepts for <pre></pre> text = encode_whitespace(text) if element.tag == 'h': tag = 'h' + str(element.attrib['level']) if self.isrtl(element): output += ['<', tag, ' dir=\'rtl\'>', text, '</', tag, '>'] else: output += ['<', tag, '>', text, '</', tag, '>'] elif element.tag in ('p', 'div'): tag = element.tag if self.isrtl(element): tag += ' dir=\'rtl\'' if 'indent' in element.attrib: level = int(element.attrib['indent']) tag += ' style=\'padding-left: %ipt\'' % (30 * level) output += ['<', tag, '>\n', text] self._dump_children(element, output) # recurs output.append('</%s>\n' % element.tag) elif element.tag == 'pre': tag = 'pre' if self.isrtl(element): tag += ' dir=\'rtl\'' if 'indent' in element.attrib: level = int(element.attrib['indent']) tag += ' style=\'padding-left: %ipt\'' % (30 * level) output += ['<', tag, '>\n', text, '</pre>\n'] elif element.tag in ('ul', 'ol'): # TODO for ol set start and bullet style tag = element.tag if tag == 'ol' and 'start' in element.attrib: start = element.attrib.get('start') if start in string.lowercase: type = 'a' start = string.lowercase.index(start) + 1 elif start in string.uppercase: type = 'A' start = string.uppercase.index(start) + 1 else: type = '1' tag += ' type="%s" start="%s"' % (type, start) if 'indent' in element.attrib: level = int(element.attrib['indent']) tag += ' style=\'padding-left: %ipt\'' % (30 * level) output += ['<%s>\n' % tag, text] self._dump_children(element, output) # recurs output.append('</%s>\n' % element.tag) elif element.tag == 'li': if 'bullet' in element.attrib and element.attrib['bullet'] != '*': icon = self.linker.icon(element.attrib['bullet']) output += ['<li style="list-style-image: url(%s)">' % icon, text] else: output += ['<li>', text] self._dump_children(element, output) # recurs output.append('</li>\n') elif element.tag == 'img': src = self.linker.img(element.attrib['src']) opt = '' for o in ('width', 'height'): if o in element.attrib and int(float(element.attrib[o])) > 0: opt = ' %s="%s"' % (o, element.attrib[o]) if 'href' in element.attrib: href = self.linker.link(element.attrib['href']) output.append('<a href="%s"><img src="%s" alt="%s"%s></a>' % (href, src, text, opt)) else: output.append('<img src="%s" alt="%s"%s>' % (src, text, opt)) elif element.tag == 'link': href = self.linker.link(element.attrib['href']) title = text.replace('"', '"') hrefClass = link_type(element.attrib['href']) output.append('<a href="%s" title="%s" class="%s">%s</a>' % (href, title, hrefClass, text)) elif element.tag in ['emphasis', 'strong', 'mark', 'strike', 'code','sub','sup']: if element.tag == 'mark': tag = 'u' elif element.tag == 'emphasis': tag = 'em' else: tag = element.tag if "_class" in element.attrib: # HACK output += ['<', tag, ' class="%s">' % element.attrib['_class'], text, '</', tag, '>'] else: output += ['<', tag, '>', text, '</', tag, '>'] elif element.tag == 'tag': output += ['<span class="zim-tag">', text, '</span>'] else: assert False, 'Unknown node type: %s' % element if not element.tail is None: tail = html_encode(element.tail) if not (istoplevel and tail.isspace()): # text in between elements, skip encoding # for whitespace between headings, paras etc. tail = encode_whitespace(tail) output.append(tail)