Beispiel #1
0
    def permalink(self,
                  lang=None,
                  absolute=False,
                  extension='.html',
                  query=None):
        """Return permalink for a post."""
        if lang is None:
            lang = nikola.utils.LocaleBorg().current_lang

        # Let compilers override extension (e.g. the php compiler)
        if self.compiler.extension() != '.html':
            extension = self.compiler.extension()

        pieces = self.translations[lang].split(os.sep)
        pieces += self.folders[lang].split(os.sep)
        if self.has_pretty_url(lang):
            pieces += [self.meta[lang]['slug'], 'index' + extension]
        else:
            pieces += [self.meta[lang]['slug'] + extension]
        pieces = [_f for _f in pieces if _f and _f != '.']
        link = '/' + '/'.join(pieces)
        if absolute:
            link = urljoin(self.base_url, link[1:])
        index_len = len(self.index_file)
        if self.strip_indexes and link[-(1 +
                                         index_len):] == '/' + self.index_file:
            link = link[:-index_len]
        if query:
            link = link + "?" + query
        link = utils.encodelink(link)
        return link
Beispiel #2
0
    def permalink(self, lang=None, absolute=False, extension='.html', query=None):
        """Return permalink for a post."""
        if lang is None:
            lang = nikola.utils.LocaleBorg().current_lang

        # Let compilers override extension (e.g. the php compiler)
        if self.compiler.extension() != '.html':
            extension = self.compiler.extension()

        pieces = self.translations[lang].split(os.sep)
        pieces += self.folder.split(os.sep)
        if self._has_pretty_url(lang):
            pieces += [self.meta[lang]['slug'], 'index' + extension]
        else:
            pieces += [self.meta[lang]['slug'] + extension]
        pieces = [_f for _f in pieces if _f and _f != '.']
        link = '/' + '/'.join(pieces)
        if absolute:
            link = urljoin(self.base_url, link[1:])
        index_len = len(self.index_file)
        if self.strip_indexes and link[-(1 + index_len):] == '/' + self.index_file:
            link = link[:-index_len]
        if query:
            link = link + "?" + query
        link = utils.encodelink(link)
        return link
Beispiel #3
0
 def section_link(self, lang=None):
     """Return the link to the post's section."""
     slug = self.section_slug(lang)
     if not self.pretty_urls:
         link = urljoin('/' + slug + '/', self.index_file)
     else:
         link = '/' + slug + '/'
     link = utils.encodelink(link)
     return link
Beispiel #4
0
    def section_link(self, lang=None):
        """Return the link to the post's section."""
        if lang is None:
            lang = nikola.utils.LocaleBorg().current_lang

        slug = self.section_slug(lang)
        t = os.path.normpath(self.translations[lang])
        if t == '.':
            t = ''
        link = '/' + '/'.join(i for i in (t, slug) if i) + '/'
        if not self.pretty_urls:
            link = urljoin(link, self.index_file)
        link = utils.encodelink(link)
        return link
Beispiel #5
0
    def section_link(self, lang=None):
        """Return the link to the post's section (deprecated)."""
        utils.LOGGER.warning("Post.section_link is deprecated. Please use " +
                             "site.link('section_index', post.section_slug()) instead.")
        if lang is None:
            lang = nikola.utils.LocaleBorg().current_lang

        slug = self.section_slug(lang)
        t = os.path.normpath(self.translations[lang])
        if t == '.':
            t = ''
        link = '/' + '/'.join(i for i in (t, slug) if i) + '/'
        if not self.pretty_urls:
            link = urljoin(link, self.index_file)
        link = utils.encodelink(link)
        return link
Beispiel #6
0
    def section_link(self, lang=None):
        """Return the link to the post's section (deprecated)."""
        utils.LOGGER.warning("Post.section_link is deprecated. Please use " +
                             "site.link('section_index', post.section_slug()) instead.")
        if lang is None:
            lang = nikola.utils.LocaleBorg().current_lang

        slug = self.section_slug(lang)
        t = os.path.normpath(self.translations[lang])
        if t == '.':
            t = ''
        link = '/' + '/'.join(i for i in (t, slug) if i) + '/'
        if not self.pretty_urls:
            link = urljoin(link, self.index_file)
        link = utils.encodelink(link)
        return link
Beispiel #7
0
        def scan_locs():
            """Scan site locations."""
            for root, dirs, files in os.walk(output, followlinks=True):
                if not dirs and not files:
                    continue  # Totally empty, not on sitemap
                path = os.path.relpath(root, output)
                # ignore the current directory.
                if path == '.':
                    path = syspath = ''
                else:
                    syspath = path + os.sep
                    path = path.replace(os.sep, '/') + '/'
                lastmod = self.get_lastmod(root)
                loc = urljoin(base_url, base_path + path)
                if kw['index_file'] in files and kw['strip_indexes']:  # ignore folders when not stripping urls
                    post = self.site.post_per_file.get(syspath + kw['index_file'])
                    if post and (post.is_draft or post.is_private or post.publish_later):
                        continue
                    alternates = []
                    if post:
                        for lang in post.translated_to:
                            alt_url = post.permalink(lang=lang, absolute=True)
                            if encodelink(loc) == alt_url:
                                continue
                            alternates.append(alternates_format.format(lang, alt_url))
                    urlset[loc] = loc_format.format(encodelink(loc), lastmod, ''.join(alternates))
                for fname in files:
                    if kw['strip_indexes'] and fname == kw['index_file']:
                        continue  # We already mapped the folder
                    if os.path.splitext(fname)[-1] in mapped_exts:
                        real_path = os.path.join(root, fname)
                        path = os.path.relpath(real_path, output)
                        if path.endswith(kw['index_file']) and kw['strip_indexes']:
                            # ignore index files when stripping urls
                            continue
                        if not robot_fetch(path):
                            continue

                        # read in binary mode to make ancient files work
                        fh = open(real_path, 'rb')
                        filehead = fh.read(1024)
                        fh.close()

                        if path.endswith('.html') or path.endswith('.htm') or path.endswith('.php'):
                            # Ignores "html" files without doctype
                            if b'<!doctype html' not in filehead.lower():
                                continue

                            # Ignores "html" files with noindex robot directives
                            robots_directives = [b'<meta content=noindex name=robots',
                                                 b'<meta content=none name=robots',
                                                 b'<meta name=robots content=noindex',
                                                 b'<meta name=robots content=none']
                            lowquothead = filehead.lower().decode('utf-8', 'ignore').replace('"', '').encode('utf-8')
                            if any([robot_directive in lowquothead for robot_directive in robots_directives]):
                                continue

                        # put Atom and RSS in sitemapindex[] instead of in urlset[],
                        # sitemap_path is included after it is generated
                        if path.endswith('.xml') or path.endswith('.atom') or path.endswith('.rss'):
                            known_elm_roots = (b'<feed', b'<rss', b'<urlset')
                            if any([elm_root in filehead.lower() for elm_root in known_elm_roots]) and path != sitemap_path:
                                path = path.replace(os.sep, '/')
                                lastmod = self.get_lastmod(real_path)
                                loc = urljoin(base_url, base_path + path)
                                sitemapindex[loc] = sitemap_format.format(encodelink(loc), lastmod)
                                continue
                            else:
                                continue  # ignores all XML files except those presumed to be RSS
                        post = self.site.post_per_file.get(syspath)
                        if post and (post.is_draft or post.is_private or post.publish_later):
                            continue
                        path = path.replace(os.sep, '/')
                        lastmod = self.get_lastmod(real_path)
                        loc = urljoin(base_url, base_path + path)
                        alternates = []
                        if post:
                            for lang in post.translated_to:
                                alt_url = post.permalink(lang=lang, absolute=True)
                                if encodelink(loc) == alt_url:
                                    continue
                                alternates.append(alternates_format.format(lang, alt_url))
                        urlset[loc] = loc_format.format(encodelink(loc), lastmod, '\n'.join(alternates))
Beispiel #8
0
        def scan_locs():
            """Scan site locations."""
            for root, dirs, files in os.walk(output, followlinks=True):
                if not dirs and not files and not kw[
                        'sitemap_include_fileless_dirs']:
                    continue  # Totally empty, not on sitemap
                path = os.path.relpath(root, output)
                # ignore the current directory.
                if path == '.':
                    path = syspath = ''
                else:
                    syspath = path + os.sep
                    path = path.replace(os.sep, '/') + '/'
                lastmod = self.get_lastmod(root)
                loc = urljoin(base_url, base_path + path)
                if kw['index_file'] in files and kw[
                        'strip_indexes']:  # ignore folders when not stripping urls
                    post = self.site.post_per_file.get(syspath +
                                                       kw['index_file'])
                    if post and (post.is_draft or post.is_private
                                 or post.publish_later):
                        continue
                    alternates = []
                    if post:
                        for lang in post.translated_to:
                            alt_url = post.permalink(lang=lang, absolute=True)
                            if encodelink(loc) == alt_url:
                                continue
                            alternates.append(
                                alternates_format.format(lang, alt_url))
                    urlset[loc] = loc_format.format(encodelink(loc), lastmod,
                                                    ''.join(alternates))
                for fname in files:
                    if kw['strip_indexes'] and fname == kw['index_file']:
                        continue  # We already mapped the folder
                    if os.path.splitext(fname)[-1] in mapped_exts:
                        real_path = os.path.join(root, fname)
                        path = os.path.relpath(real_path, output)
                        if path.endswith(
                                kw['index_file']) and kw['strip_indexes']:
                            # ignore index files when stripping urls
                            continue
                        if not robot_fetch(path):
                            continue

                        # read in binary mode to make ancient files work
                        fh = open(real_path, 'rb')
                        filehead = fh.read(1024)
                        fh.close()

                        if path.endswith('.html') or path.endswith(
                                '.htm') or path.endswith('.php'):
                            # Ignores "html" files without doctype
                            if b'<!doctype html' not in filehead.lower():
                                continue

                            # Ignores "html" files with noindex robot directives
                            robots_directives = [
                                b'<meta content=noindex name=robots',
                                b'<meta content=none name=robots',
                                b'<meta name=robots content=noindex',
                                b'<meta name=robots content=none'
                            ]
                            lowquothead = filehead.lower().decode(
                                'utf-8', 'ignore').replace('"',
                                                           '').encode('utf-8')
                            if any([
                                    robot_directive in lowquothead
                                    for robot_directive in robots_directives
                            ]):
                                continue

                        # put Atom and RSS in sitemapindex[] instead of in urlset[],
                        # sitemap_path is included after it is generated
                        if path.endswith('.xml') or path.endswith(
                                '.atom') or path.endswith('.rss'):
                            known_elm_roots = (b'<feed', b'<rss', b'<urlset')
                            if any([
                                    elm_root in filehead.lower()
                                    for elm_root in known_elm_roots
                            ]) and path != sitemap_path:
                                path = path.replace(os.sep, '/')
                                lastmod = self.get_lastmod(real_path)
                                loc = urljoin(base_url, base_path + path)
                                sitemapindex[loc] = sitemap_format.format(
                                    encodelink(loc), lastmod)
                                continue
                            else:
                                continue  # ignores all XML files except those presumed to be RSS
                        post = self.site.post_per_file.get(syspath)
                        if post and (post.is_draft or post.is_private
                                     or post.publish_later):
                            continue
                        path = path.replace(os.sep, '/')
                        lastmod = self.get_lastmod(real_path)
                        loc = urljoin(base_url, base_path + path)
                        alternates = []
                        if post:
                            for lang in post.translated_to:
                                alt_url = post.permalink(lang=lang,
                                                         absolute=True)
                                if encodelink(loc) == alt_url:
                                    continue
                                alternates.append(
                                    alternates_format.format(lang, alt_url))
                        urlset[loc] = loc_format.format(
                            encodelink(loc), lastmod, '\n'.join(alternates))
Beispiel #9
0
 def source_link(self, lang=None):
     """Return absolute link to the post's source."""
     ext = self.source_ext(True)
     link = "/" + self.destination_path(lang=lang, extension=ext, sep='/')
     link = utils.encodelink(link)
     return link
Beispiel #10
0
 def source_link(self, lang=None):
     """Return absolute link to the post's source."""
     ext = self.source_ext(True)
     link = "/" + self.destination_path(lang=lang, extension=ext, sep='/')
     link = utils.encodelink(link)
     return link