Ejemplo n.º 1
0
 def render_template(self, template, path, **ctx):
     """Render a template within the given context ctx"""
     nctx = self.ctx.copy()
     nctx.update(ctx)
     ctx = nctx
     ctx.update({
         'articles': self.ctx['articles'],
     })
     if self.lang is not None and "a" in ctx:
         ctx["a"] = filter(lambda a: a.hard_language in (self.lang, None),
                           ctx["a"])
     if ctx.get("filter_langs", False) and "a" in ctx:
         ctx["a"] = filter(
             lambda a: a.headers.language in (self.lang, None), ctx["a"])
     if not isinstance(path, Url):
         path = Url(path).switch_language(self.lang)
     else:
         path.switch_language(self.lang)
     ctx['url'] = path
     tpl = self.lookup.get_template(template)
     template_engine.write_to(ctx['url'].get_path(),
                              tpl.render_unicode(**ctx),
                              ctx.get("date", settings.now))
     sitemap = [
         ctx["url"].copy(),
         ctx.get("date", settings.now), "monthly", 0.5
     ]
     self.sitemap.append(sitemap)
Ejemplo n.º 2
0
 def render_paginated(self, template, path, **ctx):
     """Render a template, but break the content into multiple pages"""
     nctx = self.ctx.copy()
     nctx.update(ctx)
     ctx = nctx
     ctx.update({
         'articles': self.ctx['articles'],
     })
     pl = settings.PAGINATE_N
     articles = ctx["a"][:]
     if self.lang is not None:
         articles = filter(lambda a: a.hard_language in (self.lang, None),
                           articles)
     path = path.lstrip("/")
     dirname = os.path.dirname(path)
     baseurl = Url(path).switch_language(self.lang)
     if len(articles) > pl:
         if "pag" not in ctx:
             ctx["pag"] = {}
         if "base" not in ctx["pag"]:
             ctx["pag"]["base"] = dirname + "/" + ctx["_"]("page_%s")
         pages = (len(articles) - 1) // pl + 1
         ctx["pag"]["first"] = baseurl.get_path()
         ctx["pag"]["pages"] = pages
         for p in range(1, pages):
             ctx["pag"]["cur"] = p + 1
             ctx["a"] = articles[p * pl:(p + 1) * pl]
             self.render_template(
                 "_templates/%s.mako" % template,
                 Url(ctx["pag"]["base"] % (p + 1) +
                     ".html").switch_language(self.lang), **ctx)
         ctx["pag"]["cur"] = 1
         ctx["a"] = articles[:pl]
     self.render_template("_templates/%s.mako" % template, baseurl, **ctx)
Ejemplo n.º 3
0
def aa(path):
    """Make a path absolute"""
    if isinstance(path, Url):
        return path.get()
    elif re.match(r"[a-z0-9\-]+:", path) or path.startswith("//"):
        return path
    return Url(path).get()
Ejemplo n.º 4
0
 def _laa(path):
     if isinstance(path, Article):
         if path.headers.translation:
             for t in path.headers.translation:
                 if t.headers.language.startswith(lang):
                     return t.url.get()
         return path.url.copy().switch_language(lang).get()
     elif isinstance(path, Url):
         return path.copy().switch_language(lang).get()
     else:
         return Url(path).switch_language(lang).get()
Ejemplo n.º 5
0
    def __init__(self, path):
        """Initialize with path to article source"""
        path = path.lstrip("/")
        self.lexers.update(settings.get('LEXERS', {}))
        self.processed = False
        self.category = os.path.dirname(path).strip("/")
        self.url = Url(settings.get("ARTICLE_PATH", "") + path)
        l = filter(lambda s: s in settings.languages,
                   self.url.get_extensions())
        self.hard_language = None
        if len(l) == 1:
            self.hard_language = l[0]
            self.url.fix_language()

        f = open("_articles/%s" % path, 'r')
        head, content = f.read().replace("\r\n", "\n").split("\n\n", 1)
        f.close()
        self.headers = ArticleHeaders(head)
        self.raw_content = content.decode("utf-8")
        self.soup = BeautifulSoup(self.raw_content, fromEncoding="utf-8")

        self.complete_headers()
        self.process_content()
Ejemplo n.º 6
0
    def __init__(self, path):
        """Initialize with path to article source"""
        path = path.lstrip("/")
        self.lexers.update(settings.get('LEXERS', {}))
        self.processed = False
        self.category = os.path.dirname(path).strip("/")
        self.url = Url(settings.get("ARTICLE_PATH", "") + path)
        l = filter(lambda s: s in settings.languages, self.url.get_extensions())
        self.hard_language = None
        if len(l) == 1:
            self.hard_language = l[0]
            self.url.fix_language()

        f = open("_articles/%s" % path, 'r')
        head, content = f.read().replace("\r\n", "\n").split("\n\n", 1)
        f.close()
        self.headers = ArticleHeaders(head)
        self.raw_content = content.decode("utf-8")
        self.soup = BeautifulSoup(self.raw_content, fromEncoding="utf-8")

        self.complete_headers()
        self.process_content()
Ejemplo n.º 7
0
class Article(object):
    """A single article or blog post"""

    # Lexer aliases
    lexers = {
        'php-inline': ['php', {
            "startinline": True
        }],
    }

    def __init__(self, path):
        """Initialize with path to article source"""
        path = path.lstrip("/")
        self.lexers.update(settings.get('LEXERS', {}))
        self.processed = False
        self.category = os.path.dirname(path).strip("/")
        self.url = Url(settings.get("ARTICLE_PATH", "") + path)
        l = filter(lambda s: s in settings.languages,
                   self.url.get_extensions())
        self.hard_language = None
        if len(l) == 1:
            self.hard_language = l[0]
            self.url.fix_language()

        f = open("_articles/%s" % path, 'r')
        head, content = f.read().replace("\r\n", "\n").split("\n\n", 1)
        f.close()
        self.headers = ArticleHeaders(head)
        self.raw_content = content.decode("utf-8")
        self.soup = BeautifulSoup(self.raw_content, fromEncoding="utf-8")

        self.complete_headers()
        self.process_content()

    def is_live(self):
        """Check meta info to see, if this article is live"""
        if self.headers.available and self.headers.available < settings.now:
            return False
        if self.headers.issued and self.headers.issued > settings.now:
            return False
        if self.headers.valid and self.headers.valid < settings.now:
            return False
        if "exclude" in self.headers.status:
            return False
        if "draft" in self.headers.get("status", []) and not settings.DEBUG:
            return False
        return True

    def complete_headers(self):
        """Set default headers, that are missing"""
        defaults = {
            "ID": str(self.url),
            "date": settings.now,
            "type": "Text",
            "format": "application/xhtml+xml",
            "status": [],
            "language": self.hard_language or settings.LANGUAGE,
        }
        self.headers.set_defaults(defaults)
        if "title" not in self.headers:
            if "standalone" in self.headers.status:
                self.process_content()
                self.headers.title = self.soup.html.head.title.string
            else:
                self.headers.title = ""
        if "description" not in self.headers:
            self.process_content()
            if "abstract" in self.headers:
                self.headers.description = self.headers.abstract
            elif "standalone" in self.headers.status:
                self.headers.description = generate_description(
                    str(self.soup.body).decode("UTF-8"))
            else:
                self.headers.description = generate_description(
                    self.__unicode__())

    if pygments is not None:
        if "FORMATTER" in settings:

            class MyHtmlFormatter(settings.FORMATTER):
                pass
        else:

            class MyHtmlFormatter(HtmlFormatter):
                def __init__(self, hl_lines=None):
                    super(Article.MyHtmlFormatter,
                          self).__init__(encoding='UTF-8',
                                         classprefix="s_",
                                         hl_lines=hl_lines)

                def wrap(self, inner, outfile):
                    if settings.HIGHLIGHT_OL:
                        yield (0, '<ol class="highlight">')
                        for i, (c, l) in enumerate(inner):
                            if c != 1:
                                yield t, value
                            if i + 1 in self.hl_lines:
                                yield (c, '<li class="hll"><code>' + l +
                                       '</code></li>')
                            else:
                                yield (c, '<li><code>' + l + '</code></li>')
                        yield (0, '</ol>')
                    else:
                        yield (0, '<pre class="highlight"><code>')
                        for i, (c, l) in enumerate(inner):
                            if c != 1:
                                yield t, value
                            if i + 1 in self.hl_lines:
                                yield (c, '<span class="line hll">' + l +
                                       '</span>')
                            else:
                                yield (c,
                                       '<span class="line">' + l + '</span>')
                        yield (0, '</code></pre>')

                def _highlight_lines(self, tokensource):
                    for tup in tokensource:
                        yield tup

    def process_content(self):
        """Change the raw content to a renderable state

        This contains syntax highlighting but not URI scheme resolving.
        The latter is done in self.save(). This function works exclusively
        upon self.soup."""
        if self.processed:
            return True
        elif "standalone" in self.headers.status:
            self.processed = True
            return True
        # Markup cleaning
        # see http://code.davidjanes.com/blog/2009/02/05/turning-garbage-html-into-xml-parsable-xhtml-using-beautiful-soup/
        for item in self.soup.findAll():
            for index, (name, value) in enumerate(item.attrs):
                if value == None:
                    item.attrs[index] = (name, name)
        # Syntax highlighting:
        pres = self.soup.findAll("pre", {"data-lang": re.compile(r".+")})
        if pygments is not None:
            for pre in pres:
                ArticleFormatter = Article.MyHtmlFormatter(
                    hl_lines=pre.get("data-hl", "").split(","))
                lang = pre["data-lang"]
                text = _unescape(pre.renderContents())
                try:
                    # lexers can be given in the config. They are either directly
                    # Pygment Lexer instances or a list of lexer name and optional
                    # lexer config dict.
                    if lang in self.lexers:
                        if isinstance(self.lexers[lang], Lexer):
                            lexer = self.lexers[lang]
                        else:
                            lexer = get_lexer_by_name(self.lexers[lang][0],
                                                      stripnl=False,
                                                      **self.lexers[lang][1])
                    else:
                        lexer = get_lexer_by_name(lang, stripnl=False)
                except pygments.util.ClassNotFound:
                    logger.warning("Couldn't find lexer for %s" % lang)
                    lexer = guess_lexer(text)
                result = pygments.highlight(text, lexer, ArticleFormatter)
                highlighted = BeautifulSoup(result, fromEncoding="utf-8")
                if settings.HIGHLIGHT_OL:
                    for at, val in pre.attrs:
                        if at == "class":
                            highlighted.ol[at] += u" " + val
                        else:
                            highlighted.ol[at] = val
                    pre.replaceWith(highlighted.ol)
                else:
                    for at, val in pre.attrs:
                        if at == "data-lang":
                            highlighted.pre.code[at] = val
                        elif at == "class":
                            highlighted.pre[at] += u" " + val
                        else:
                            highlighted.pre[at] = val
                    pre.replaceWith(highlighted.pre)
        self.processed = True

    def save(self, **ctx):
        """Save the article to a file

        If it's a standalone, save it directly. Else send the
        context to the corresponding template. In order to recognize
        the "id:" URI scheme, the parameter **ctx must contain
        the value "articles", against which's content the URI is
        checked."""
        dr = ""
        if "draft" in self.headers.status:
            dr = "*DRAFT* "
        logger.debug(dr + self.url.get())
        if "draft" in self.headers.status and not settings.DEBUG:
            raise ValueError("Can't save drafts")
        if "noindex" not in self.headers.get("robots", ""):
            x = u""
            x += u" ".join([
                "%s %s" % (x, y) for x, y in self.headers.get_dc().iteritems()
            ])
            template_engine.add_to_index(self.url,
                                         x + u" " + self.__unicode__(),
                                         self.headers.language)
        if "standalone" in self.headers.status:
            template_engine.write_to(self.url.get_path(), self.__unicode__())
        else:
            if "articles" in ctx:
                # resolve the "id:" pseudo-scheme
                ax = self.soup.findAll("a", href=re.compile(r"^id:"))
                for a in ax:
                    a['href'] = _get_by_id(
                        a['href'][3:],
                        ctx['articles']).url.copy().switch_language(
                            self.headers.language).get()
                # resolve links to Requires and isRequiredBy
                # TODO: Do we need multiple Requires?
                if 'Requires' in self.headers:
                    self.headers.Requires = _get_by_id(self.headers.Requires,
                                                       ctx['articles'])
                if 'IsRequiredBy' in self.headers:
                    self.headers.IsRequiredBy = _get_by_id(
                        self.headers.IsRequiredBy, ctx['articles'])
                if 'translation' in self.headers:
                    self.headers.translation = _get_by_id(
                        self.headers.translation, ctx['articles'])
            for protocol, url_scheme in settings.PROTOCOLS.iteritems():
                # resolve all pseudo-schemes
                ax = self.soup.findAll(href=re.compile(u"^%s:" % protocol))
                ix = self.soup.findAll(src=re.compile(u"^%s:" % protocol))
                dx = self.soup.findAll(data=re.compile(u"^%s:" % protocol))
                for a in ax:
                    if callable(url_scheme):
                        a['href'] = url_scheme(a['href'][len(protocol) + 1:])
                    else:
                        a['href'] = url_scheme % a['href'][len(protocol) + 1:]
                    if a.get('class', False):
                        a['class'] += " protocol_%s" % protocol
                    else:
                        a['class'] = "protocol_%s" % protocol
                for a in ix:
                    if callable(url_scheme):
                        a['src'] = url_scheme(a['src'][len(protocol) + 1:])
                    else:
                        a['src'] = url_scheme % a['src'][len(protocol) + 1:]
                    if a.get('class', False):
                        a['class'] += " protocol_%s" % protocol
                    else:
                        a['class'] = "protocol_%s" % protocol
                for a in dx:
                    if callable(url_scheme):
                        a['data'] = url_scheme(a['data'][len(protocol) + 1:])
                    else:
                        a['data'] = url_scheme % a['data'][len(protocol) + 1:]
                    if a.get('class', False):
                        a['class'] += " protocol_%s" % protocol
                    else:
                        a['class'] = "protocol_%s" % protocol
            template_engine.render_article(self, **ctx)

    def __unicode__(self):
        # work around bug in BeautifulSoup
        return str(self.soup).decode('UTF-8')

    def __hash__(self):
        s = hashlib.sha224(
            self.headers.date.strftime("%Y-%m-%dT%H:%m:%s") + "_" +
            self.headers.ID).hexdigest()
        return int(s, 16)

    def __repr__(self):
        return '<Article "%s">' % self.url.get()

    def __cmp__(self, other):
        """Compare articles by date first, ID second"""
        s = self.headers.date.strftime(
            "%Y-%m-%dT%H:%m:%s") + "_" + self.headers.ID
        if isinstance(other, basestring):
            o = other
        else:
            o = other.headers.date.strftime(
                "%Y-%m-%dT%H:%m:%s") + "_" + other.headers.ID
        return cmp(o, s)
Ejemplo n.º 8
0
class Article(object):
    """A single article or blog post"""

    # Lexer aliases
    lexers = {
        'php-inline' : ['php', {"startinline": True}],
    }

    def __init__(self, path):
        """Initialize with path to article source"""
        path = path.lstrip("/")
        self.lexers.update(settings.get('LEXERS', {}))
        self.processed = False
        self.category = os.path.dirname(path).strip("/")
        self.url = Url(settings.get("ARTICLE_PATH", "") + path)
        l = filter(lambda s: s in settings.languages, self.url.get_extensions())
        self.hard_language = None
        if len(l) == 1:
            self.hard_language = l[0]
            self.url.fix_language()

        f = open("_articles/%s" % path, 'r')
        head, content = f.read().replace("\r\n", "\n").split("\n\n", 1)
        f.close()
        self.headers = ArticleHeaders(head)
        self.raw_content = content.decode("utf-8")
        self.soup = BeautifulSoup(self.raw_content, fromEncoding="utf-8")

        self.complete_headers()
        self.process_content()

    def is_live(self):
        """Check meta info to see, if this article is live"""
        if self.headers.available and self.headers.available < settings.now:
            return False
        if self.headers.issued and self.headers.issued > settings.now:
            return False
        if self.headers.valid and self.headers.valid < settings.now:
            return False
        if "exclude" in self.headers.status:
            return False
        if "draft" in self.headers.get("status", []) and not settings.DEBUG:
            return False
        return True

    def complete_headers(self):
        """Set default headers, that are missing"""
        defaults = {
            "ID": str(self.url),
            "date": settings.now,
            "type": "Text",
            "format": "application/xhtml+xml",
            "status": [],
            "language": self.hard_language or settings.LANGUAGE,
        }
        self.headers.set_defaults(defaults)
        if "title" not in self.headers:
            if "standalone" in self.headers.status:
                self.process_content()
                self.headers.title = self.soup.html.head.title.string
            else:
                self.headers.title = ""
        if "description" not in self.headers:
            self.process_content()
            if "abstract" in self.headers:
                self.headers.description = self.headers.abstract
            elif "standalone" in self.headers.status:
                self.headers.description = generate_description(str(self.soup.body).decode("UTF-8"))
            else:
                self.headers.description = generate_description(self.__unicode__())

    if pygments is not None:
        if "FORMATTER" in settings:
            class MyHtmlFormatter(settings.FORMATTER):
                pass
        else:
            class MyHtmlFormatter(HtmlFormatter):
                def __init__(self, hl_lines=None):
                    super(Article.MyHtmlFormatter, self).__init__(encoding='UTF-8', classprefix="s_", hl_lines=hl_lines)

                def wrap(self, inner, outfile):
                    if settings.HIGHLIGHT_OL:
                        yield (0, '<ol class="highlight">')
                        for i, (c, l) in enumerate(inner):
                            if c != 1:
                                yield t, value
                            if i+1 in self.hl_lines:
                                yield (c, '<li class="hll"><code>'+l+'</code></li>')
                            else:
                                yield (c, '<li><code>'+l+'</code></li>')
                        yield (0, '</ol>')
                    else:
                        yield (0, '<pre class="highlight"><code>')
                        for i, (c, l) in enumerate(inner):
                            if c != 1:
                                yield t, value
                            if i+1 in self.hl_lines:
                                yield (c, '<span class="line hll">'+l+'</span>')
                            else:
                                yield (c, '<span class="line">'+l+'</span>')
                        yield (0, '</code></pre>')

                def _highlight_lines(self, tokensource):
                    for tup in tokensource:
                        yield tup

    def process_content(self):
        """Change the raw content to a renderable state

        This contains syntax highlighting but not URI scheme resolving.
        The latter is done in self.save(). This function works exclusively
        upon self.soup."""
        if self.processed:
            return True
        elif "standalone" in self.headers.status:
            self.processed = True
            return True
        # Markup cleaning
        # see http://code.davidjanes.com/blog/2009/02/05/turning-garbage-html-into-xml-parsable-xhtml-using-beautiful-soup/
        for item in self.soup.findAll():
            for index, ( name, value ) in enumerate(item.attrs):
                if value == None:
                    item.attrs[index] = ( name, name )
        # Syntax highlighting:
        pres = self.soup.findAll("pre", {"data-lang": re.compile(r".+")})
        if pygments is not None:
            for pre in pres:
                ArticleFormatter = Article.MyHtmlFormatter(hl_lines=pre.get("data-hl", "").split(","))
                lang = pre["data-lang"]
                text = _unescape(pre.renderContents())
                try:
                    # lexers can be given in the config. They are either directly
                    # Pygment Lexer instances or a list of lexer name and optional
                    # lexer config dict.
                    if lang in self.lexers:
                        if isinstance(self.lexers[lang], Lexer):
                            lexer = self.lexers[lang]
                        else:
                            lexer = get_lexer_by_name(self.lexers[lang][0], stripnl=False, **self.lexers[lang][1])
                    else:
                        lexer = get_lexer_by_name(lang, stripnl=False)
                except pygments.util.ClassNotFound:
                    logger.warning("Couldn't find lexer for %s" % lang)
                    lexer = guess_lexer(text)
                result = pygments.highlight(text, lexer, ArticleFormatter)
                highlighted = BeautifulSoup(result, fromEncoding="utf-8")
                if settings.HIGHLIGHT_OL:
                    for at, val in pre.attrs:
                        if at == "class":
                            highlighted.ol[at] += u" "+val
                        else:
                            highlighted.ol[at] = val
                    pre.replaceWith(highlighted.ol)
                else:
                    for at, val in pre.attrs:
                        if at == "data-lang":
                            highlighted.pre.code[at] = val
                        elif at == "class":
                            highlighted.pre[at] += u" "+val
                        else:
                            highlighted.pre[at] = val
                    pre.replaceWith(highlighted.pre)
        self.processed = True

    def save(self, **ctx):
        """Save the article to a file

        If it's a standalone, save it directly. Else send the
        context to the corresponding template. In order to recognize
        the "id:" URI scheme, the parameter **ctx must contain
        the value "articles", against which's content the URI is
        checked."""
        dr = ""
        if "draft" in self.headers.status:
            dr = "*DRAFT* "
        logger.debug(dr + self.url.get())
        if "draft" in self.headers.status and not settings.DEBUG:
            raise ValueError("Can't save drafts")
        if "noindex" not in self.headers.get("robots", ""):
            x = u""
            x += u" ".join([ "%s %s" % (x,y) for x, y in self.headers.get_dc().iteritems() ])
            template_engine.add_to_index(self.url, x + u" " + self.__unicode__(), self.headers.language)
        if "standalone" in self.headers.status:
            template_engine.write_to(self.url.get_path(), self.__unicode__())
        else:
            if "articles" in ctx:
                # resolve the "id:" pseudo-scheme
                ax = self.soup.findAll("a", href=re.compile(r"^id:"))
                for a in ax:
                    a['href'] = _get_by_id(a['href'][3:],
                                  ctx['articles']).url.copy().switch_language(self.headers.language).get()
                # resolve links to Requires and isRequiredBy
                # TODO: Do we need multiple Requires?
                if 'Requires' in self.headers:
                    self.headers.Requires = _get_by_id(self.headers.Requires,
                                                       ctx['articles'])
                if 'IsRequiredBy' in self.headers:
                    self.headers.IsRequiredBy = _get_by_id(self.headers.IsRequiredBy,
                                                           ctx['articles'])
                if 'translation' in self.headers:
                    self.headers.translation = _get_by_id(self.headers.translation,
                                                          ctx['articles'])
            for protocol, url_scheme in settings.PROTOCOLS.iteritems():
                # resolve all pseudo-schemes
                ax = self.soup.findAll(href=re.compile(u"^%s:" % protocol))
                ix = self.soup.findAll(src=re.compile(u"^%s:" % protocol))
                dx = self.soup.findAll(data=re.compile(u"^%s:" % protocol))
                for a in ax:
                    if callable(url_scheme):
                        a['href'] = url_scheme(a['href'][len(protocol)+1:])
                    else:
                        a['href'] = url_scheme % a['href'][len(protocol)+1:]
                    if a.get('class', False):
                        a['class'] += " protocol_%s" % protocol
                    else:
                        a['class'] = "protocol_%s" % protocol
                for a in ix:
                    if callable(url_scheme):
                        a['src'] = url_scheme(a['src'][len(protocol)+1:])
                    else:
                        a['src'] = url_scheme % a['src'][len(protocol)+1:]
                    if a.get('class', False):
                        a['class'] += " protocol_%s" % protocol
                    else:
                        a['class'] = "protocol_%s" % protocol
                for a in dx:
                    if callable(url_scheme):
                        a['data'] = url_scheme(a['data'][len(protocol)+1:])
                    else:
                        a['data'] = url_scheme % a['data'][len(protocol)+1:]
                    if a.get('class', False):
                        a['class'] += " protocol_%s" % protocol
                    else:
                        a['class'] = "protocol_%s" % protocol
            template_engine.render_article(self, **ctx)

    def __unicode__(self):
        # work around bug in BeautifulSoup
        return str(self.soup).decode('UTF-8')

    def __hash__(self):
        s = hashlib.sha224(self.headers.date.strftime("%Y-%m-%dT%H:%m:%s") +
                           "_" + self.headers.ID).hexdigest()
        return int(s, 16)

    def __repr__(self):
        return '<Article "%s">' % self.url.get()

    def __cmp__(self, other):
        """Compare articles by date first, ID second"""
        s = self.headers.date.strftime("%Y-%m-%dT%H:%m:%s") + "_" + self.headers.ID
        if isinstance(other, basestring):
            o = other
        else:
            o = other.headers.date.strftime("%Y-%m-%dT%H:%m:%s") + "_" + other.headers.ID
        return cmp(o, s)