def permalink(self, lang=None, absolute=False, extension='.html', query=None): """Return permalink for a post.""" if lang is None: lang = nikola.utils.LocaleBorg().current_lang # Let compilers override extension (e.g. the php compiler) if self.compiler.extension() != '.html': extension = self.compiler.extension() pieces = self.translations[lang].split(os.sep) pieces += self.folders[lang].split(os.sep) if self.has_pretty_url(lang): pieces += [self.meta[lang]['slug'], 'index' + extension] else: pieces += [self.meta[lang]['slug'] + extension] pieces = [_f for _f in pieces if _f and _f != '.'] link = '/' + '/'.join(pieces) if absolute: link = urljoin(self.base_url, link[1:]) index_len = len(self.index_file) if self.strip_indexes and link[-(1 + index_len):] == '/' + self.index_file: link = link[:-index_len] if query: link = link + "?" + query link = utils.encodelink(link) return link
def permalink(self, lang=None, absolute=False, extension='.html', query=None): """Return permalink for a post.""" if lang is None: lang = nikola.utils.LocaleBorg().current_lang # Let compilers override extension (e.g. the php compiler) if self.compiler.extension() != '.html': extension = self.compiler.extension() pieces = self.translations[lang].split(os.sep) pieces += self.folder.split(os.sep) if self._has_pretty_url(lang): pieces += [self.meta[lang]['slug'], 'index' + extension] else: pieces += [self.meta[lang]['slug'] + extension] pieces = [_f for _f in pieces if _f and _f != '.'] link = '/' + '/'.join(pieces) if absolute: link = urljoin(self.base_url, link[1:]) index_len = len(self.index_file) if self.strip_indexes and link[-(1 + index_len):] == '/' + self.index_file: link = link[:-index_len] if query: link = link + "?" + query link = utils.encodelink(link) return link
def section_link(self, lang=None): """Return the link to the post's section.""" slug = self.section_slug(lang) if not self.pretty_urls: link = urljoin('/' + slug + '/', self.index_file) else: link = '/' + slug + '/' link = utils.encodelink(link) return link
def section_link(self, lang=None): """Return the link to the post's section.""" if lang is None: lang = nikola.utils.LocaleBorg().current_lang slug = self.section_slug(lang) t = os.path.normpath(self.translations[lang]) if t == '.': t = '' link = '/' + '/'.join(i for i in (t, slug) if i) + '/' if not self.pretty_urls: link = urljoin(link, self.index_file) link = utils.encodelink(link) return link
def section_link(self, lang=None): """Return the link to the post's section (deprecated).""" utils.LOGGER.warning("Post.section_link is deprecated. Please use " + "site.link('section_index', post.section_slug()) instead.") if lang is None: lang = nikola.utils.LocaleBorg().current_lang slug = self.section_slug(lang) t = os.path.normpath(self.translations[lang]) if t == '.': t = '' link = '/' + '/'.join(i for i in (t, slug) if i) + '/' if not self.pretty_urls: link = urljoin(link, self.index_file) link = utils.encodelink(link) return link
def scan_locs(): """Scan site locations.""" for root, dirs, files in os.walk(output, followlinks=True): if not dirs and not files: continue # Totally empty, not on sitemap path = os.path.relpath(root, output) # ignore the current directory. if path == '.': path = syspath = '' else: syspath = path + os.sep path = path.replace(os.sep, '/') + '/' lastmod = self.get_lastmod(root) loc = urljoin(base_url, base_path + path) if kw['index_file'] in files and kw['strip_indexes']: # ignore folders when not stripping urls post = self.site.post_per_file.get(syspath + kw['index_file']) if post and (post.is_draft or post.is_private or post.publish_later): continue alternates = [] if post: for lang in post.translated_to: alt_url = post.permalink(lang=lang, absolute=True) if encodelink(loc) == alt_url: continue alternates.append(alternates_format.format(lang, alt_url)) urlset[loc] = loc_format.format(encodelink(loc), lastmod, ''.join(alternates)) for fname in files: if kw['strip_indexes'] and fname == kw['index_file']: continue # We already mapped the folder if os.path.splitext(fname)[-1] in mapped_exts: real_path = os.path.join(root, fname) path = os.path.relpath(real_path, output) if path.endswith(kw['index_file']) and kw['strip_indexes']: # ignore index files when stripping urls continue if not robot_fetch(path): continue # read in binary mode to make ancient files work fh = open(real_path, 'rb') filehead = fh.read(1024) fh.close() if path.endswith('.html') or path.endswith('.htm') or path.endswith('.php'): # Ignores "html" files without doctype if b'<!doctype html' not in filehead.lower(): continue # Ignores "html" files with noindex robot directives robots_directives = [b'<meta content=noindex name=robots', b'<meta content=none name=robots', b'<meta name=robots content=noindex', b'<meta name=robots content=none'] lowquothead = filehead.lower().decode('utf-8', 'ignore').replace('"', '').encode('utf-8') if any([robot_directive in lowquothead for robot_directive in robots_directives]): continue # put Atom and RSS in sitemapindex[] instead of in urlset[], # sitemap_path is included after it is generated if path.endswith('.xml') or path.endswith('.atom') or path.endswith('.rss'): known_elm_roots = (b'<feed', b'<rss', b'<urlset') if any([elm_root in filehead.lower() for elm_root in known_elm_roots]) and path != sitemap_path: path = path.replace(os.sep, '/') lastmod = self.get_lastmod(real_path) loc = urljoin(base_url, base_path + path) sitemapindex[loc] = sitemap_format.format(encodelink(loc), lastmod) continue else: continue # ignores all XML files except those presumed to be RSS post = self.site.post_per_file.get(syspath) if post and (post.is_draft or post.is_private or post.publish_later): continue path = path.replace(os.sep, '/') lastmod = self.get_lastmod(real_path) loc = urljoin(base_url, base_path + path) alternates = [] if post: for lang in post.translated_to: alt_url = post.permalink(lang=lang, absolute=True) if encodelink(loc) == alt_url: continue alternates.append(alternates_format.format(lang, alt_url)) urlset[loc] = loc_format.format(encodelink(loc), lastmod, '\n'.join(alternates))
def scan_locs(): """Scan site locations.""" for root, dirs, files in os.walk(output, followlinks=True): if not dirs and not files and not kw[ 'sitemap_include_fileless_dirs']: continue # Totally empty, not on sitemap path = os.path.relpath(root, output) # ignore the current directory. if path == '.': path = syspath = '' else: syspath = path + os.sep path = path.replace(os.sep, '/') + '/' lastmod = self.get_lastmod(root) loc = urljoin(base_url, base_path + path) if kw['index_file'] in files and kw[ 'strip_indexes']: # ignore folders when not stripping urls post = self.site.post_per_file.get(syspath + kw['index_file']) if post and (post.is_draft or post.is_private or post.publish_later): continue alternates = [] if post: for lang in post.translated_to: alt_url = post.permalink(lang=lang, absolute=True) if encodelink(loc) == alt_url: continue alternates.append( alternates_format.format(lang, alt_url)) urlset[loc] = loc_format.format(encodelink(loc), lastmod, ''.join(alternates)) for fname in files: if kw['strip_indexes'] and fname == kw['index_file']: continue # We already mapped the folder if os.path.splitext(fname)[-1] in mapped_exts: real_path = os.path.join(root, fname) path = os.path.relpath(real_path, output) if path.endswith( kw['index_file']) and kw['strip_indexes']: # ignore index files when stripping urls continue if not robot_fetch(path): continue # read in binary mode to make ancient files work fh = open(real_path, 'rb') filehead = fh.read(1024) fh.close() if path.endswith('.html') or path.endswith( '.htm') or path.endswith('.php'): # Ignores "html" files without doctype if b'<!doctype html' not in filehead.lower(): continue # Ignores "html" files with noindex robot directives robots_directives = [ b'<meta content=noindex name=robots', b'<meta content=none name=robots', b'<meta name=robots content=noindex', b'<meta name=robots content=none' ] lowquothead = filehead.lower().decode( 'utf-8', 'ignore').replace('"', '').encode('utf-8') if any([ robot_directive in lowquothead for robot_directive in robots_directives ]): continue # put Atom and RSS in sitemapindex[] instead of in urlset[], # sitemap_path is included after it is generated if path.endswith('.xml') or path.endswith( '.atom') or path.endswith('.rss'): known_elm_roots = (b'<feed', b'<rss', b'<urlset') if any([ elm_root in filehead.lower() for elm_root in known_elm_roots ]) and path != sitemap_path: path = path.replace(os.sep, '/') lastmod = self.get_lastmod(real_path) loc = urljoin(base_url, base_path + path) sitemapindex[loc] = sitemap_format.format( encodelink(loc), lastmod) continue else: continue # ignores all XML files except those presumed to be RSS post = self.site.post_per_file.get(syspath) if post and (post.is_draft or post.is_private or post.publish_later): continue path = path.replace(os.sep, '/') lastmod = self.get_lastmod(real_path) loc = urljoin(base_url, base_path + path) alternates = [] if post: for lang in post.translated_to: alt_url = post.permalink(lang=lang, absolute=True) if encodelink(loc) == alt_url: continue alternates.append( alternates_format.format(lang, alt_url)) urlset[loc] = loc_format.format( encodelink(loc), lastmod, '\n'.join(alternates))
def source_link(self, lang=None): """Return absolute link to the post's source.""" ext = self.source_ext(True) link = "/" + self.destination_path(lang=lang, extension=ext, sep='/') link = utils.encodelink(link) return link