def render_template(self, template, path, **ctx): """Render a template within the given context ctx""" nctx = self.ctx.copy() nctx.update(ctx) ctx = nctx ctx.update({ 'articles': self.ctx['articles'], }) if self.lang is not None and "a" in ctx: ctx["a"] = filter(lambda a: a.hard_language in (self.lang, None), ctx["a"]) if ctx.get("filter_langs", False) and "a" in ctx: ctx["a"] = filter( lambda a: a.headers.language in (self.lang, None), ctx["a"]) if not isinstance(path, Url): path = Url(path).switch_language(self.lang) else: path.switch_language(self.lang) ctx['url'] = path tpl = self.lookup.get_template(template) template_engine.write_to(ctx['url'].get_path(), tpl.render_unicode(**ctx), ctx.get("date", settings.now)) sitemap = [ ctx["url"].copy(), ctx.get("date", settings.now), "monthly", 0.5 ] self.sitemap.append(sitemap)
def render_paginated(self, template, path, **ctx): """Render a template, but break the content into multiple pages""" nctx = self.ctx.copy() nctx.update(ctx) ctx = nctx ctx.update({ 'articles': self.ctx['articles'], }) pl = settings.PAGINATE_N articles = ctx["a"][:] if self.lang is not None: articles = filter(lambda a: a.hard_language in (self.lang, None), articles) path = path.lstrip("/") dirname = os.path.dirname(path) baseurl = Url(path).switch_language(self.lang) if len(articles) > pl: if "pag" not in ctx: ctx["pag"] = {} if "base" not in ctx["pag"]: ctx["pag"]["base"] = dirname + "/" + ctx["_"]("page_%s") pages = (len(articles) - 1) // pl + 1 ctx["pag"]["first"] = baseurl.get_path() ctx["pag"]["pages"] = pages for p in range(1, pages): ctx["pag"]["cur"] = p + 1 ctx["a"] = articles[p * pl:(p + 1) * pl] self.render_template( "_templates/%s.mako" % template, Url(ctx["pag"]["base"] % (p + 1) + ".html").switch_language(self.lang), **ctx) ctx["pag"]["cur"] = 1 ctx["a"] = articles[:pl] self.render_template("_templates/%s.mako" % template, baseurl, **ctx)
def aa(path): """Make a path absolute""" if isinstance(path, Url): return path.get() elif re.match(r"[a-z0-9\-]+:", path) or path.startswith("//"): return path return Url(path).get()
def _laa(path): if isinstance(path, Article): if path.headers.translation: for t in path.headers.translation: if t.headers.language.startswith(lang): return t.url.get() return path.url.copy().switch_language(lang).get() elif isinstance(path, Url): return path.copy().switch_language(lang).get() else: return Url(path).switch_language(lang).get()
def __init__(self, path): """Initialize with path to article source""" path = path.lstrip("/") self.lexers.update(settings.get('LEXERS', {})) self.processed = False self.category = os.path.dirname(path).strip("/") self.url = Url(settings.get("ARTICLE_PATH", "") + path) l = filter(lambda s: s in settings.languages, self.url.get_extensions()) self.hard_language = None if len(l) == 1: self.hard_language = l[0] self.url.fix_language() f = open("_articles/%s" % path, 'r') head, content = f.read().replace("\r\n", "\n").split("\n\n", 1) f.close() self.headers = ArticleHeaders(head) self.raw_content = content.decode("utf-8") self.soup = BeautifulSoup(self.raw_content, fromEncoding="utf-8") self.complete_headers() self.process_content()
def __init__(self, path): """Initialize with path to article source""" path = path.lstrip("/") self.lexers.update(settings.get('LEXERS', {})) self.processed = False self.category = os.path.dirname(path).strip("/") self.url = Url(settings.get("ARTICLE_PATH", "") + path) l = filter(lambda s: s in settings.languages, self.url.get_extensions()) self.hard_language = None if len(l) == 1: self.hard_language = l[0] self.url.fix_language() f = open("_articles/%s" % path, 'r') head, content = f.read().replace("\r\n", "\n").split("\n\n", 1) f.close() self.headers = ArticleHeaders(head) self.raw_content = content.decode("utf-8") self.soup = BeautifulSoup(self.raw_content, fromEncoding="utf-8") self.complete_headers() self.process_content()
class Article(object): """A single article or blog post""" # Lexer aliases lexers = { 'php-inline': ['php', { "startinline": True }], } def __init__(self, path): """Initialize with path to article source""" path = path.lstrip("/") self.lexers.update(settings.get('LEXERS', {})) self.processed = False self.category = os.path.dirname(path).strip("/") self.url = Url(settings.get("ARTICLE_PATH", "") + path) l = filter(lambda s: s in settings.languages, self.url.get_extensions()) self.hard_language = None if len(l) == 1: self.hard_language = l[0] self.url.fix_language() f = open("_articles/%s" % path, 'r') head, content = f.read().replace("\r\n", "\n").split("\n\n", 1) f.close() self.headers = ArticleHeaders(head) self.raw_content = content.decode("utf-8") self.soup = BeautifulSoup(self.raw_content, fromEncoding="utf-8") self.complete_headers() self.process_content() def is_live(self): """Check meta info to see, if this article is live""" if self.headers.available and self.headers.available < settings.now: return False if self.headers.issued and self.headers.issued > settings.now: return False if self.headers.valid and self.headers.valid < settings.now: return False if "exclude" in self.headers.status: return False if "draft" in self.headers.get("status", []) and not settings.DEBUG: return False return True def complete_headers(self): """Set default headers, that are missing""" defaults = { "ID": str(self.url), "date": settings.now, "type": "Text", "format": "application/xhtml+xml", "status": [], "language": self.hard_language or settings.LANGUAGE, } self.headers.set_defaults(defaults) if "title" not in self.headers: if "standalone" in self.headers.status: self.process_content() self.headers.title = self.soup.html.head.title.string else: self.headers.title = "" if "description" not in self.headers: self.process_content() if "abstract" in self.headers: self.headers.description = self.headers.abstract elif "standalone" in self.headers.status: self.headers.description = generate_description( str(self.soup.body).decode("UTF-8")) else: self.headers.description = generate_description( self.__unicode__()) if pygments is not None: if "FORMATTER" in settings: class MyHtmlFormatter(settings.FORMATTER): pass else: class MyHtmlFormatter(HtmlFormatter): def __init__(self, hl_lines=None): super(Article.MyHtmlFormatter, self).__init__(encoding='UTF-8', classprefix="s_", hl_lines=hl_lines) def wrap(self, inner, outfile): if settings.HIGHLIGHT_OL: yield (0, '<ol class="highlight">') for i, (c, l) in enumerate(inner): if c != 1: yield t, value if i + 1 in self.hl_lines: yield (c, '<li class="hll"><code>' + l + '</code></li>') else: yield (c, '<li><code>' + l + '</code></li>') yield (0, '</ol>') else: yield (0, '<pre class="highlight"><code>') for i, (c, l) in enumerate(inner): if c != 1: yield t, value if i + 1 in self.hl_lines: yield (c, '<span class="line hll">' + l + '</span>') else: yield (c, '<span class="line">' + l + '</span>') yield (0, '</code></pre>') def _highlight_lines(self, tokensource): for tup in tokensource: yield tup def process_content(self): """Change the raw content to a renderable state This contains syntax highlighting but not URI scheme resolving. The latter is done in self.save(). This function works exclusively upon self.soup.""" if self.processed: return True elif "standalone" in self.headers.status: self.processed = True return True # Markup cleaning # see http://code.davidjanes.com/blog/2009/02/05/turning-garbage-html-into-xml-parsable-xhtml-using-beautiful-soup/ for item in self.soup.findAll(): for index, (name, value) in enumerate(item.attrs): if value == None: item.attrs[index] = (name, name) # Syntax highlighting: pres = self.soup.findAll("pre", {"data-lang": re.compile(r".+")}) if pygments is not None: for pre in pres: ArticleFormatter = Article.MyHtmlFormatter( hl_lines=pre.get("data-hl", "").split(",")) lang = pre["data-lang"] text = _unescape(pre.renderContents()) try: # lexers can be given in the config. They are either directly # Pygment Lexer instances or a list of lexer name and optional # lexer config dict. if lang in self.lexers: if isinstance(self.lexers[lang], Lexer): lexer = self.lexers[lang] else: lexer = get_lexer_by_name(self.lexers[lang][0], stripnl=False, **self.lexers[lang][1]) else: lexer = get_lexer_by_name(lang, stripnl=False) except pygments.util.ClassNotFound: logger.warning("Couldn't find lexer for %s" % lang) lexer = guess_lexer(text) result = pygments.highlight(text, lexer, ArticleFormatter) highlighted = BeautifulSoup(result, fromEncoding="utf-8") if settings.HIGHLIGHT_OL: for at, val in pre.attrs: if at == "class": highlighted.ol[at] += u" " + val else: highlighted.ol[at] = val pre.replaceWith(highlighted.ol) else: for at, val in pre.attrs: if at == "data-lang": highlighted.pre.code[at] = val elif at == "class": highlighted.pre[at] += u" " + val else: highlighted.pre[at] = val pre.replaceWith(highlighted.pre) self.processed = True def save(self, **ctx): """Save the article to a file If it's a standalone, save it directly. Else send the context to the corresponding template. In order to recognize the "id:" URI scheme, the parameter **ctx must contain the value "articles", against which's content the URI is checked.""" dr = "" if "draft" in self.headers.status: dr = "*DRAFT* " logger.debug(dr + self.url.get()) if "draft" in self.headers.status and not settings.DEBUG: raise ValueError("Can't save drafts") if "noindex" not in self.headers.get("robots", ""): x = u"" x += u" ".join([ "%s %s" % (x, y) for x, y in self.headers.get_dc().iteritems() ]) template_engine.add_to_index(self.url, x + u" " + self.__unicode__(), self.headers.language) if "standalone" in self.headers.status: template_engine.write_to(self.url.get_path(), self.__unicode__()) else: if "articles" in ctx: # resolve the "id:" pseudo-scheme ax = self.soup.findAll("a", href=re.compile(r"^id:")) for a in ax: a['href'] = _get_by_id( a['href'][3:], ctx['articles']).url.copy().switch_language( self.headers.language).get() # resolve links to Requires and isRequiredBy # TODO: Do we need multiple Requires? if 'Requires' in self.headers: self.headers.Requires = _get_by_id(self.headers.Requires, ctx['articles']) if 'IsRequiredBy' in self.headers: self.headers.IsRequiredBy = _get_by_id( self.headers.IsRequiredBy, ctx['articles']) if 'translation' in self.headers: self.headers.translation = _get_by_id( self.headers.translation, ctx['articles']) for protocol, url_scheme in settings.PROTOCOLS.iteritems(): # resolve all pseudo-schemes ax = self.soup.findAll(href=re.compile(u"^%s:" % protocol)) ix = self.soup.findAll(src=re.compile(u"^%s:" % protocol)) dx = self.soup.findAll(data=re.compile(u"^%s:" % protocol)) for a in ax: if callable(url_scheme): a['href'] = url_scheme(a['href'][len(protocol) + 1:]) else: a['href'] = url_scheme % a['href'][len(protocol) + 1:] if a.get('class', False): a['class'] += " protocol_%s" % protocol else: a['class'] = "protocol_%s" % protocol for a in ix: if callable(url_scheme): a['src'] = url_scheme(a['src'][len(protocol) + 1:]) else: a['src'] = url_scheme % a['src'][len(protocol) + 1:] if a.get('class', False): a['class'] += " protocol_%s" % protocol else: a['class'] = "protocol_%s" % protocol for a in dx: if callable(url_scheme): a['data'] = url_scheme(a['data'][len(protocol) + 1:]) else: a['data'] = url_scheme % a['data'][len(protocol) + 1:] if a.get('class', False): a['class'] += " protocol_%s" % protocol else: a['class'] = "protocol_%s" % protocol template_engine.render_article(self, **ctx) def __unicode__(self): # work around bug in BeautifulSoup return str(self.soup).decode('UTF-8') def __hash__(self): s = hashlib.sha224( self.headers.date.strftime("%Y-%m-%dT%H:%m:%s") + "_" + self.headers.ID).hexdigest() return int(s, 16) def __repr__(self): return '<Article "%s">' % self.url.get() def __cmp__(self, other): """Compare articles by date first, ID second""" s = self.headers.date.strftime( "%Y-%m-%dT%H:%m:%s") + "_" + self.headers.ID if isinstance(other, basestring): o = other else: o = other.headers.date.strftime( "%Y-%m-%dT%H:%m:%s") + "_" + other.headers.ID return cmp(o, s)
class Article(object): """A single article or blog post""" # Lexer aliases lexers = { 'php-inline' : ['php', {"startinline": True}], } def __init__(self, path): """Initialize with path to article source""" path = path.lstrip("/") self.lexers.update(settings.get('LEXERS', {})) self.processed = False self.category = os.path.dirname(path).strip("/") self.url = Url(settings.get("ARTICLE_PATH", "") + path) l = filter(lambda s: s in settings.languages, self.url.get_extensions()) self.hard_language = None if len(l) == 1: self.hard_language = l[0] self.url.fix_language() f = open("_articles/%s" % path, 'r') head, content = f.read().replace("\r\n", "\n").split("\n\n", 1) f.close() self.headers = ArticleHeaders(head) self.raw_content = content.decode("utf-8") self.soup = BeautifulSoup(self.raw_content, fromEncoding="utf-8") self.complete_headers() self.process_content() def is_live(self): """Check meta info to see, if this article is live""" if self.headers.available and self.headers.available < settings.now: return False if self.headers.issued and self.headers.issued > settings.now: return False if self.headers.valid and self.headers.valid < settings.now: return False if "exclude" in self.headers.status: return False if "draft" in self.headers.get("status", []) and not settings.DEBUG: return False return True def complete_headers(self): """Set default headers, that are missing""" defaults = { "ID": str(self.url), "date": settings.now, "type": "Text", "format": "application/xhtml+xml", "status": [], "language": self.hard_language or settings.LANGUAGE, } self.headers.set_defaults(defaults) if "title" not in self.headers: if "standalone" in self.headers.status: self.process_content() self.headers.title = self.soup.html.head.title.string else: self.headers.title = "" if "description" not in self.headers: self.process_content() if "abstract" in self.headers: self.headers.description = self.headers.abstract elif "standalone" in self.headers.status: self.headers.description = generate_description(str(self.soup.body).decode("UTF-8")) else: self.headers.description = generate_description(self.__unicode__()) if pygments is not None: if "FORMATTER" in settings: class MyHtmlFormatter(settings.FORMATTER): pass else: class MyHtmlFormatter(HtmlFormatter): def __init__(self, hl_lines=None): super(Article.MyHtmlFormatter, self).__init__(encoding='UTF-8', classprefix="s_", hl_lines=hl_lines) def wrap(self, inner, outfile): if settings.HIGHLIGHT_OL: yield (0, '<ol class="highlight">') for i, (c, l) in enumerate(inner): if c != 1: yield t, value if i+1 in self.hl_lines: yield (c, '<li class="hll"><code>'+l+'</code></li>') else: yield (c, '<li><code>'+l+'</code></li>') yield (0, '</ol>') else: yield (0, '<pre class="highlight"><code>') for i, (c, l) in enumerate(inner): if c != 1: yield t, value if i+1 in self.hl_lines: yield (c, '<span class="line hll">'+l+'</span>') else: yield (c, '<span class="line">'+l+'</span>') yield (0, '</code></pre>') def _highlight_lines(self, tokensource): for tup in tokensource: yield tup def process_content(self): """Change the raw content to a renderable state This contains syntax highlighting but not URI scheme resolving. The latter is done in self.save(). This function works exclusively upon self.soup.""" if self.processed: return True elif "standalone" in self.headers.status: self.processed = True return True # Markup cleaning # see http://code.davidjanes.com/blog/2009/02/05/turning-garbage-html-into-xml-parsable-xhtml-using-beautiful-soup/ for item in self.soup.findAll(): for index, ( name, value ) in enumerate(item.attrs): if value == None: item.attrs[index] = ( name, name ) # Syntax highlighting: pres = self.soup.findAll("pre", {"data-lang": re.compile(r".+")}) if pygments is not None: for pre in pres: ArticleFormatter = Article.MyHtmlFormatter(hl_lines=pre.get("data-hl", "").split(",")) lang = pre["data-lang"] text = _unescape(pre.renderContents()) try: # lexers can be given in the config. They are either directly # Pygment Lexer instances or a list of lexer name and optional # lexer config dict. if lang in self.lexers: if isinstance(self.lexers[lang], Lexer): lexer = self.lexers[lang] else: lexer = get_lexer_by_name(self.lexers[lang][0], stripnl=False, **self.lexers[lang][1]) else: lexer = get_lexer_by_name(lang, stripnl=False) except pygments.util.ClassNotFound: logger.warning("Couldn't find lexer for %s" % lang) lexer = guess_lexer(text) result = pygments.highlight(text, lexer, ArticleFormatter) highlighted = BeautifulSoup(result, fromEncoding="utf-8") if settings.HIGHLIGHT_OL: for at, val in pre.attrs: if at == "class": highlighted.ol[at] += u" "+val else: highlighted.ol[at] = val pre.replaceWith(highlighted.ol) else: for at, val in pre.attrs: if at == "data-lang": highlighted.pre.code[at] = val elif at == "class": highlighted.pre[at] += u" "+val else: highlighted.pre[at] = val pre.replaceWith(highlighted.pre) self.processed = True def save(self, **ctx): """Save the article to a file If it's a standalone, save it directly. Else send the context to the corresponding template. In order to recognize the "id:" URI scheme, the parameter **ctx must contain the value "articles", against which's content the URI is checked.""" dr = "" if "draft" in self.headers.status: dr = "*DRAFT* " logger.debug(dr + self.url.get()) if "draft" in self.headers.status and not settings.DEBUG: raise ValueError("Can't save drafts") if "noindex" not in self.headers.get("robots", ""): x = u"" x += u" ".join([ "%s %s" % (x,y) for x, y in self.headers.get_dc().iteritems() ]) template_engine.add_to_index(self.url, x + u" " + self.__unicode__(), self.headers.language) if "standalone" in self.headers.status: template_engine.write_to(self.url.get_path(), self.__unicode__()) else: if "articles" in ctx: # resolve the "id:" pseudo-scheme ax = self.soup.findAll("a", href=re.compile(r"^id:")) for a in ax: a['href'] = _get_by_id(a['href'][3:], ctx['articles']).url.copy().switch_language(self.headers.language).get() # resolve links to Requires and isRequiredBy # TODO: Do we need multiple Requires? if 'Requires' in self.headers: self.headers.Requires = _get_by_id(self.headers.Requires, ctx['articles']) if 'IsRequiredBy' in self.headers: self.headers.IsRequiredBy = _get_by_id(self.headers.IsRequiredBy, ctx['articles']) if 'translation' in self.headers: self.headers.translation = _get_by_id(self.headers.translation, ctx['articles']) for protocol, url_scheme in settings.PROTOCOLS.iteritems(): # resolve all pseudo-schemes ax = self.soup.findAll(href=re.compile(u"^%s:" % protocol)) ix = self.soup.findAll(src=re.compile(u"^%s:" % protocol)) dx = self.soup.findAll(data=re.compile(u"^%s:" % protocol)) for a in ax: if callable(url_scheme): a['href'] = url_scheme(a['href'][len(protocol)+1:]) else: a['href'] = url_scheme % a['href'][len(protocol)+1:] if a.get('class', False): a['class'] += " protocol_%s" % protocol else: a['class'] = "protocol_%s" % protocol for a in ix: if callable(url_scheme): a['src'] = url_scheme(a['src'][len(protocol)+1:]) else: a['src'] = url_scheme % a['src'][len(protocol)+1:] if a.get('class', False): a['class'] += " protocol_%s" % protocol else: a['class'] = "protocol_%s" % protocol for a in dx: if callable(url_scheme): a['data'] = url_scheme(a['data'][len(protocol)+1:]) else: a['data'] = url_scheme % a['data'][len(protocol)+1:] if a.get('class', False): a['class'] += " protocol_%s" % protocol else: a['class'] = "protocol_%s" % protocol template_engine.render_article(self, **ctx) def __unicode__(self): # work around bug in BeautifulSoup return str(self.soup).decode('UTF-8') def __hash__(self): s = hashlib.sha224(self.headers.date.strftime("%Y-%m-%dT%H:%m:%s") + "_" + self.headers.ID).hexdigest() return int(s, 16) def __repr__(self): return '<Article "%s">' % self.url.get() def __cmp__(self, other): """Compare articles by date first, ID second""" s = self.headers.date.strftime("%Y-%m-%dT%H:%m:%s") + "_" + self.headers.ID if isinstance(other, basestring): o = other else: o = other.headers.date.strftime("%Y-%m-%dT%H:%m:%s") + "_" + other.headers.ID return cmp(o, s)