def shutdown(self): """Write memoized key-value pairs to disk.""" try: with io.open(join(self.cache_dir, 'info'), 'wb') as fp: pickle.dump(self.memoize, fp, pickle.HIGHEST_PROTOCOL) except (IOError, pickle.PickleError) as e: log.warn('%s: %s' % (e.__class__.__name__, e))
def transform(self, text, entry, *args): acros = self.acronyms if len(args) > 0: acros = dict(filter(lambda k: any(k[0] == v for v in args), acros.items())) try: abbr = re.compile(r'\b(%s)\b' % '|'.join((pat.pattern for pat in acros))) except re.error as e: log.warn("acronyms: %s", e.args[0]) def repl(match): abbr = match.group(0) desc = acros.get(abbr, None) if desc is None: for pat in acros: if pat.match(abbr): desc = acros.get(pat) break return '<abbr title="%s">%s</abbr>' % (desc, abbr) try: return ''.join(Acrynomify(text, abbr, repl).result) except HTMLParseError: log.exception('could not acronymize ' + entry.filename) return text
def transform(self, content, entry, *args): try: self.options = entry.summarize except AttributeError as e: self.options = {} try: maxwords = int(entry.summarize.maxwords) except (AttributeError, KeyError, ValueError) as e: try: maxwords = int(args[0]) except (ValueError, IndexError) as e: if e.__class__.__name__ == 'ValueError': log.warn('Summarize: invalid maxwords argument %r', args[0]) maxwords = 100 try: X = Summarizer(content, self.path + entry.permalink, self.link, self.mode, maxwords) return ''.join(X.result) except HTMLParseError as e: log.warn('%s: %s in %s' % (e.__class__.__name__, e.msg, entry.filename)) return content
def transform(self, content, entry, *args): try: return ''.join(Text(content, args).result) except HTMLParseError as e: log.warn('%s: %s in %s' % (e.__class__.__name__, e.msg, entry.filename)) return content
def pingback(src, dest, dryrun=False): """Makes a pingback request to dest on behalf of src, i.e. effectively saying to dest that "the page at src is linking to you".""" def search_link(content): match = re.search(b'<link rel="pingback" href="([^"]+)" ?/?>', content) return match and match.group(1) try: r = head(dest) except (URLError, HTTPError) as e: return try: server_url = r.info().get('X-Pingback', '') or search_link(r.read(512 * 1024)) if server_url: print("Pingback", blue(urlparse(server_url).netloc), end='') print("from", green(''.join(urlparse(src)[1:3])) + ".") if not dryrun: server = xmlrpc.client.ServerProxy(server_url) server.pingback.ping(src, dest) except xmlrpclib.Fault as e: log.warn("XML-RPC fault: %d (%s)", e.faultCode, e.faultString) except xmlrpc.client.ProtocolError as e: raise AcrylamidException(e.args[0])
def default(self): try: # build default hyphenate_word using conf's lang (if available) return build(self.conf['lang'].replace('_', '-')) except HyphenPatternNotFound as e: log.warn(e.args[0]) return lambda x: [x]
def transform(self, content, entry, *args): options = helpers.union(Introduction.defaults, self.conf.fetch('intro_')) try: options.update(entry.intro) except AttributeError: pass try: maxparagraphs = int(options.get('maxparagraphs') or args[0]) except (IndexError, ValueError) as ex: if isinstance(ex, ValueError): log.warn('Introduction: invalid maxparagraphs argument %r', options.get('maxparagraphs') or args[0]) maxparagraphs = 1 try: return ''.join( Introducer(content, maxparagraphs, self.env.path + entry.permalink, options).result) except: log.exception('could not extract intro from ' + entry.filename) return content return content
def discover(directories, index, filterfunc=lambda filename: True): """Import and initialize modules from `directories` list. :param directories: list of directories :param index: index function""" def find(directories, filterfunc): """Discover and yield python modules (aka files that endswith .py) if `filterfunc` returns True for that filename.""" for directory in directories: for root, dirs, files in os.walk(directory): for fname in files: if fname.endswith('.py') and filterfunc(join(root, fname)): yield join(root, fname) for filename in find(directories, filterfunc): modname, ext = os.path.splitext(os.path.basename(rchop(filename, '/__init__.py'))) fp, path, descr = imp.find_module(modname, directories) prefix = commonprefix((PATH, filename)) if prefix: modname = 'acrylamid.' modname += rchop(filename[len(prefix):].replace('/', '.'), '.py') try: mod = sys.modules[modname] except KeyError: try: mod = imp.load_module(modname, fp, path, descr) except (ImportError, SyntaxError, ValueError) as e: log.warn('%r %s: %s', modname, e.__class__.__name__, e) continue index(mod)
def transform(self, text, entry, *args): acros = self.acronyms if len(args) > 0: acros = dict( filter(lambda k: any(k[0] == v for v in args), iteritems(acros))) try: abbr = re.compile(r'\b(%s)\b' % '|'.join( (pat.pattern for pat in acros))) except re.error as e: log.warn("acronyms: %s", e.args[0]) def repl(match): abbr = match.group(0) desc = acros.get(abbr, None) if desc is None: for pat in acros: if pat.match(abbr): desc = acros.get(pat) break return '<abbr title="%s">%s</abbr>' % (desc, abbr) try: return ''.join(Acrynomify(text, abbr, repl).result) except HTMLParseError: log.exception('could not acronymize ' + entry.filename) return text
def tweet(entry, conf, dryrun=False): """Send a tweet with the title, link and tags from an entry. The first time you need to authorize Acrylamid but than it works without any interaction.""" key = "6k00FRe6w4SZfqEzzzyZVA" secret = "fzRfQcqQX4gcZziyLeoI5wSbnFb7GGj2oEh10hnjPUo" creds = os.path.expanduser('~/.twitter_oauth') if not os.path.exists(creds): twitter.oauth_dance("Acrylamid", key, secret, creds) oauth_token, oauth_token_secret = twitter.read_token_file(creds) t = twitter.Twitter( auth=twitter.OAuth(oauth_token, oauth_token_secret, key, secret)) tweet = u"New Blog Entry: {0} {1} {2}".format( entry.title, helpers.joinurl(conf['www_root'], entry.permalink), ' '.join([u'#' + helpers.safeslug(tag) for tag in entry.tags])) print(' ', bold(blue("tweet ")), end='') print('\n'.join(wrap(tweet.encode('utf8'), subsequent_indent=' ' * 13))) if not dryrun: try: t.statuses.update(status=tweet.encode('utf8')) except twitter.api.TwitterError as e: try: log.warn("%s" % json.loads(e.response_data)['error']) except (ValueError, TypeError): log.warn("Twitter: something went wrong...")
def date(self): """parse date value and return :class:`datetime.datetime` object, fallback to modification timestamp of the file if unset. You can set a ``DATE_FORMAT`` in your :doc:`../conf.py` otherwise Acrylamid tries several format strings and throws an exception if no pattern works. As shortcut you can access ``date.day``, ``date.month``, ``date.year`` via ``entry.day``, ``entry.month`` and ``entry.year``.""" # alternate formats from pelican.utils, thank you! # https://github.com/ametaireau/pelican/blob/master/pelican/utils.py formats = ['%Y-%m-%d %H:%M', '%Y/%m/%d %H:%M', '%Y-%m-%d', '%Y/%m/%d', '%d-%m-%Y', '%Y-%d-%m', # Weird ones '%d/%m/%Y', '%d.%m.%Y', '%d.%m.%Y %H:%M', '%Y-%m-%d %H:%M:%S'] if 'date' not in self.props: log.warn("using mtime from %r" % self.filename) return Date.fromtimestamp(self.mtime) string = re.sub(' +', ' ', self.props['date']) formats.insert(0, self.props['date_format']) for date_format in formats: try: return Date.strptime(string, date_format) except ValueError: pass else: raise AcrylamidException("%r is not a valid date" % string)
def set(self, path, key, value): """Save a key, value pair into a blob using pickle and moderate zlib compression (level 6). We simply save a dictionary containing all different intermediates (from every view) of an entry. :param path: path of this cache object :param key: dictionary key where we store the value :param value: a string we compress with zlib and afterwards save """ if exists(path): try: with io.open(path, 'rb') as fp: rv = pickle.load(fp) except (pickle.PickleError, IOError): cache.remove(path) rv = {} try: with io.open(path, 'wb') as fp: rv[key] = zlib.compress(value, 6) pickle.dump(rv, fp, pickle.HIGHEST_PROTOCOL) except (IOError, pickle.PickleError) as e: log.warn('%s: %s' % (e.__class__.__name__, e)) else: try: fd, tmp = tempfile.mkstemp(suffix=self._fs_transaction_suffix, dir=self.cache_dir) with io.open(fd, 'wb') as fp: pickle.dump({key: zlib.compress(value, 6)}, fp, pickle.HIGHEST_PROTOCOL) os.rename(tmp, path) os.chmod(path, self.mode) except (IOError, OSError, pickle.PickleError, zlib.error) as e: log.warn('%s: %s' % (e.__class__.__name__, e)) self.objects[path].add(key) return value
def tweet(entry, conf, dryrun=False): """Send a tweet with the title, link and tags from an entry. The first time you need to authorize Acrylamid but than it works without any interaction.""" key = "6k00FRe6w4SZfqEzzzyZVA" secret = "fzRfQcqQX4gcZziyLeoI5wSbnFb7GGj2oEh10hnjPUo" creds = os.path.expanduser('~/.twitter_oauth') if not os.path.exists(creds): twitter.oauth_dance("Acrylamid", key, secret, creds) oauth_token, oauth_token_secret = twitter.read_token_file(creds) t = twitter.Twitter(auth=twitter.OAuth(oauth_token, oauth_token_secret, key, secret)) tweet = u"New Blog Entry: {0} {1} {2}".format(entry.title, helpers.joinurl(conf['www_root'], entry.permalink), ' '.join([u'#' + helpers.safeslug(tag) for tag in entry.tags])) print(' ', bold(blue("tweet ")), end='') print('\n'.join(wrap(tweet.encode('utf8'), subsequent_indent=' '*13))) if not dryrun: try: t.statuses.update(status=tweet.encode('utf8')) except twitter.api.TwitterError as e: try: log.warn("%s" % json.loads(e.response_data)['error']) except (ValueError, TypeError): log.warn("Twitter: something went wrong...")
def date(self): """Parse date value and return :class:`datetime.datetime` object. You can set a ``DATE_FORMAT`` in your :doc:`conf.py` otherwise Acrylamid tries several format strings and throws an exception if no pattern works.""" # alternate formats from pelican.utils, thank you! # https://github.com/ametaireau/pelican/blob/master/pelican/utils.py formats = ['%Y-%m-%d %H:%M', '%Y/%m/%d %H:%M', '%Y-%m-%d', '%Y/%m/%d', '%d-%m-%Y', '%Y-%d-%m', # Weird ones '%d/%m/%Y', '%d.%m.%Y', '%d.%m.%Y %H:%M', '%Y-%m-%d %H:%M:%S'] if 'date' not in self.props: if self.type == 'entry': log.warn("using mtime from %r" % self.filename) return super(MetadataMixin, self).date # Date.fromtimestamp(self.mtime) string = re.sub(' +', ' ', self.props['date']) formats.insert(0, self.props['date_format']) for date_format in formats: try: return Date.strptime(string, date_format).replace(tzinfo=self.tzinfo) except ValueError: pass else: raise AcrylamidException("%r is not a valid date" % string)
def normalize(conf): # metastyle has been removed if 'metastyle' in conf: log.info('notice METASTYLE is no longer needed to determine the metadata format ' + \ 'and can be removed.') # deprecated since 0.8 if isinstance(conf['static'], list): conf['static'] = conf['static'][0] log.warn("multiple static directories has been deprecated, " + \ "Acrylamid continues with '%s'.", conf['static']) # deprecated since 0.8 for fx in 'Jinja2', 'Mako': try: conf['static_filter'].remove(fx) except ValueError: pass else: log.warn( "%s asset filter has been renamed to `Template` and is " "included by default.", fx) for key in 'content_dir', 'theme', 'static', 'output_dir': if conf[key] is not None and not conf[key].endswith('/'): conf[key] += '/' for key in 'views_dir', 'filters_dir': if isinstance(conf[key], compat.string_types): conf[key] = [ conf[key], ] return conf
def date(self): """return :class:`datetime.datetime` object. Either converted from given key and ``date_format`` or fallback to modification timestamp of the file.""" # alternate formats from pelican.utils, thank you! # https://github.com/ametaireau/pelican/blob/master/pelican/utils.py formats = ['%Y-%m-%d %H:%M', '%Y/%m/%d %H:%M', '%Y-%m-%d', '%Y/%m/%d', '%d-%m-%Y', '%Y-%d-%m', # Weird ones '%d/%m/%Y', '%d.%m.%Y', '%d.%m.%Y %H:%M', '%Y-%m-%d %H:%M:%S'] if 'date' not in self.props: log.warn("using mtime from %r" % self.filename) return datetime.fromtimestamp(self.mtime) string = re.sub(' +', ' ', self.props['date']) formats.insert(0, self.props['date_format']) for date_format in formats: try: return datetime.strptime(string, date_format) except ValueError: pass else: raise AcrylamidException("%r is not a valid date" % string)
def normalize(conf): # metastyle has been removed if 'metastyle' in conf: log.info('notice METASTYLE is no longer needed to determine the metadata format ' + \ 'and can be removed.') # deprecated since 0.8 if isinstance(conf['static'], list): conf['static'] = conf['static'][0] log.warn("multiple static directories has been deprecated, " + \ "Acrylamid continues with '%s'.", conf['static']) # deprecated since 0.8 for fx in 'Jinja2', 'Mako': try: conf['static_filter'].remove(fx) except ValueError: pass else: log.warn("%s asset filter has been renamed to `Template` and is " "included by default.", fx) for key in 'content_dir', 'theme', 'static', 'output_dir': if conf[key] is not None and not conf[key].endswith('/'): conf[key] += '/' for key in 'views_dir', 'filters_dir': if isinstance(conf[key], compat.string_types): conf[key] = [conf[key], ] return conf
def write(self, src, dest, force=False, dryrun=False): dest = dest.replace(self.ext, self.target) if not force and isfile(dest) and getmtime(dest) > getmtime(src): return event.skip(dest) if isinstance(self.cmd, basestring): self.cmd = [self.cmd, ] tt = time.time() fd, path = mkstemp(dir=core.cache.cache_dir) try: res = helpers.system(self.cmd + [src]) except (OSError, AcrylamidException) as e: if isfile(dest): os.unlink(dest) log.warn('%s: %s' % (e.__class__.__name__, e.args[0])) else: with os.fdopen(fd, 'w') as fp: fp.write(res) with io.open(path, 'rb') as fp: mkfile(fp, dest, ctime=time.time()-tt, force=force, dryrun=dryrun) finally: os.unlink(path)
def run(self): while True: func, args, kargs = self.tasks.get() try: func(*args, **kargs) except Exception as e: log.warn('%s: %s' % (e.__class__.__name__, unicode(e))) self.tasks.task_done()
def init(self, conf, env, *args): try: self._db = conf.replace_rules except AttributeError: log.warn( 'No configuration named REPLACE_RULES found. Replace filter has nothing to do.' ) self._db = dict()
def transform(self, content, entry): try: tt = self.jinja2_env.from_string(content) return tt.render(conf=self.conf, env=self.env, entry=entry) except (TemplateError, AcrylamidException, OSError, TypeError) as e: log.warn('%s: %s in %s' % (e.__class__.__name__, e.args[0], entry.filename)) return content
def transform(self, content, entry): try: tt = Template(content, cache_enabled=False, input_encoding='utf-8') return tt.render(conf=self.conf, env=self.env, entry=entry, **self.filters) except (MakoException, AcrylamidException) as e: log.warn('%s: %s in %r' % (e.__class__.__name__, e.args[0], entry.filename)) return content
def transform(self, content, entry): try: tt = self.jinja2_env.from_string(self.macros + content) return tt.render(conf=self.conf, env=self.env, entry=entry) except (TemplateError, AcrylamidException) as e: log.warn('%s: %s in %r' % (e.__class__.__name__, e.args[0], entry.filename)) return content
def transform(self, content, entry): try: tt = Template(content, cache_enabled=False, input_encoding='utf-8') #tt = self.jinja2_env.from_string(content) return tt.render(conf=self.conf, env=self.env, entry=entry) except (MakoException, AcrylamidException, OSError, TypeError) as e: log.warn('%s: %s in %s' % (e.__class__.__name__, e.args[0], entry.filename)) return content
def generate(self, conf, env, data): pathes, entrylist = set(), data[self.type] unmodified = not env.modified and not conf.modified for i, entry in enumerate(entrylist): if entry.hasproperty('permalink'): path = joinurl(conf['output_dir'], entry.permalink) else: path = joinurl(conf['output_dir'], expand(self.path, entry)) if isfile(path) and path in pathes: try: os.remove(path) finally: other = [e.filename for e in entrylist if e is not entry and e.permalink == entry.permalink][0] log.error("title collision %s caused by %s and %s", entry.permalink, entry.filename, other) raise SystemExit pathes.add(path) next, prev = self.next(entrylist, i), self.prev(entrylist, i) # per-entry template tt = env.engine.fromfile(env, entry.props.get('layout', self.template)) if all([isfile(path), unmodified, not tt.modified, not entry.modified, not modified(*references(entry))]): event.skip(self.name, path) else: html = tt.render(conf=conf, entry=entry, env=union(env, entrylist=[entry], type=self.__class__.__name__.lower(), prev=prev, next=next, route=expand(self.path, entry))) yield html, path # check if any resources need to be moved if entry.hasproperty('copy'): for res_src in entry.resources: res_dest = join(dirname(path), basename(res_src)) # Note, presence of res_src check in FileReader.getresources if isfile(res_dest) and getmtime(res_dest) > getmtime(res_src): event.skip(self.name, res_dest) continue try: fp = io.open(res_src, 'rb') # use mkfile rather than yield so different ns can be specified (and filtered by sitemap) mkfile(fp, res_dest, ns='resource', force=env.options.force, dryrun=env.options.dryrun) except IOError as e: log.warn("Failed to copy resource '%s' whilst processing '%s' (%s)" % (res_src, entry.filename, e.strerror))
def transform(self, text, entry, *args): def relatively(part): if part.startswith('/') or part.find('://') == part.find('/') - 1: return part return joinurl(entry.permalink, part) try: return ''.join(Href(text, relatively).result) except: log.warn('%s: %s in %s' % (e.__class__.__name__, e.msg, entry.filename)) return text
def initialize(self, func): if not self.initialized: try: self.init(self.conf, self.env) self.initialized = True except ImportError as e: if self.env.options.ignore: log.warn(e.args[0]) setattr(cls, 'transform', lambda cls, x, y, *z: x) self.initialized = True return lambda cls, x, y, *z: x traceback.print_exc(file=sys.stdout) raise AcrylamidException('ImportError: %s' % e.args[0]) return func
def transform(self, content, entry, *args): try: maxwords = int(args[0]) except (ValueError, IndexError) as e: if e.__class__.__name__ == 'ValueError': log.warn('Summarize: invalid maxwords argument %r', args[0]) maxwords = 100 try: X = Summarizer(content, self.path+entry.permalink, self.link, self.mode, maxwords) return ''.join(X.result) except HTMLParseError as e: log.warn('%s: %s in %s' % (e.__class__.__name__, e.msg, entry.filename)) return content
def transform(self, text, entry, *args): def absolutify(part): if part.startswith('/'): return self.conf.www_root + part if part.find('://') == part.find('/') - 1: return part return self.conf.www_root + joinurl(entry.permalink, part) try: return ''.join(Href(text, absolutify).result) except: log.warn('%s: %s in %s' % (e.__class__.__name__, e.msg, entry.filename)) return text
def transform(self, content, entry, *args): try: maxparagraphs = int(entry.intro.maxparagraphs) except (AttributeError, KeyError, ValueError) as e: try: maxparagraphs = int(args[0]) except (ValueError, IndexError) as e: if e.__class__.__name__ == "ValueError": log.warn("Introduction: invalid maxparagraphs argument %r", args[0]) maxparagraphs = 1 try: return "".join(Introducer(content, maxparagraphs, self.intro_link, self.path + entry.permalink).result) except HTMLParseError as e: log.warn("%s: %s in %s" % (e.__class__.__name__, e.msg, entry.filename)) return content return content
def transform(self, content, entry, *args): try: maxparagraphs = int(entry.intro.maxparagraphs) except (AttributeError, KeyError, ValueError) as e: try: maxparagraphs = int(args[0]) except (ValueError, IndexError) as e: if e.__class__.__name__ == 'ValueError': log.warn('Introduction: invalid maxparagraphs argument %r', args[0]) maxparagraphs = 1 try: return ''.join(Introducer(content, maxparagraphs).result) except HTMLParseError as e: log.warn('%s: %s in %s' % (e.__class__.__name__, e.msg, entry.filename)) return content return content
def run(conf, env, options): """Subcommand: deploy -- run the shell command specified in DEPLOYMENT[task] using Popen. Each string value from :doc:`conf.py` is added to the execution environment. Every argument after ``acrylamid deploy task ARG1 ARG2`` is appended to cmd.""" if options.task is None: for task in conf.get('deployment', {}).keys(): print >> sys.stdout, task sys.exit(0) task, args = options.task, options.args cmd = conf.get('deployment', {}).get(task, None) if not cmd: raise AcrylamidException('no tasks named %r in conf.py' % task) # apply ARG1 ARG2 ... and -v --long-args to the command, e.g.: # $> acrylamid deploy task arg1 -b --foo cmd += ' ' + ' '.join(args) if '%s' in cmd: log.warn("'%s' syntax is deprecated, use $OUTPUT_DIR variable.") cmd = cmd.replace('%s', '$OUTPUT_DIR') env = os.environ env.update( dict([(k.upper(), v) for k, v in conf.items() if isinstance(v, basestring)])) log.info('execute %s', cmd) p = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) while True: output = p.stdout.read(1) if output == '' and p.poll() != None: break if output != '': sys.stdout.write(output) sys.stdout.flush()
def shutdown(self): """Remove abandoned cache files that are not accessed during a compilation. This does not affect jinja2 templates or cache's memoize file *.cache/info*. This does also remove abandoned intermediates from a cache file (they accumulate over time).""" # save memoized items to disk try: path = join(self.cache_dir, 'info') self.tracked[path] = set(self.memoize.keys()) with io.open(path, 'wb') as fp: pickle.dump(self.memoize, fp, pickle.HIGHEST_PROTOCOL) except (IOError, pickle.PickleError) as e: log.warn('%s: %s' % (e.__class__.__name__, e)) # first we search for cache files from entries that have vanished for path in set(self._list_dir()).difference(set(self.tracked.keys())): os.remove(path) # next we clean the cache files itself for path, keys in self.tracked.iteritems(): try: with io.open(path, 'rb') as fp: obj = pickle.load(fp) found = set(obj.keys()) except (IOError, pickle.PickleError): obj, found = {}, set([]) try: for key in found.difference(set(keys)): obj.pop(key) with io.open(path, 'wb') as fp: pickle.dump(obj, fp, pickle.HIGHEST_PROTOCOL) except pickle.PickleError: try: os.remove(path) except OSError as e: log.warn('OSError: %s' % e) except IOError: pass
def init(self, conf, env): self.extensions = {} self.ignore = env.options.ignore if not publish_parts or not directives: raise ImportError('reStructuredText: No module named docutils') # -- discover reStructuredText extensions -- directories = conf['filters_dir'] + [os.path.dirname(__file__)] for filename in discover(directories, lambda path: path.startswith('rstx_')): modname, ext = os.path.splitext(os.path.basename(filename)) fp, path, descr = imp.find_module(modname, directories) try: mod = imp.load_module(modname, fp, path, descr) mod.register(roles, directives) except (ImportError, Exception) as e: traceback.print_exc(file=sys.stdout) log.warn('%r %s: %s' % (filename, e.__class__.__name__, e))
def transform(self, content, entry, *args): options = helpers.union(Summarize.defaults, self.conf.fetch("summarize_")) try: options.update(entry.summarize) except AttributeError: pass try: maxwords = int(options.get("maxwords") or args[0]) except (IndexError, ValueError) as ex: if isinstance(ex, ValueError): log.warn("Summarize: invalid maxwords argument %r", options.get("maxwords") or args[0]) maxwords = 100 try: return "".join(Summarizer(content, maxwords, self.env.path + entry.permalink, options).result) except HTMLParseError: log.exception("could not summarize " + entry.filename) return content
def shutdown(self): """Remove abandoned cache files that are not accessed during compilation process. This does not affect jinja2 templates or *.cache/info*. This also removes abandoned intermediates from a cache file (they may accumulate over time).""" # save memoized items to disk try: path = join(self.cache_dir, 'info') self.tracked[path] = set(self.memoize.keys()) with io.open(path, 'wb') as fp: pickle.dump(self.memoize, fp, pickle.HIGHEST_PROTOCOL) except (IOError, pickle.PickleError) as e: log.warn('%s: %s' % (e.__class__.__name__, e)) # first we search for cache files from entries that have vanished for path in set(self._list_dir()).difference(set(self.tracked.keys())): os.remove(path) # next we clean the cache files itself for path, keys in self.tracked.iteritems(): try: with io.open(path, 'rb') as fp: obj = pickle.load(fp) found = set(obj.keys()) except (IOError, pickle.PickleError): obj, found = {}, set([]) try: for key in found.difference(set(keys)): obj.pop(key) with io.open(path, 'wb') as fp: pickle.dump(obj, fp, pickle.HIGHEST_PROTOCOL) except pickle.PickleError: try: os.remove(path) except OSError as e: log.warn('OSError: %s' % e) except IOError: pass
def init(self, conf, env): self.extensions = {} self.ignore = env.options.ignore if not publish_parts or not directives: raise ImportError('reStructuredText: No module named docutils') # -- discover reStructuredText extensions -- for mem in os.listdir(os.path.dirname(__file__)): if mem.startswith('rstx_') and mem.endswith('.py'): try: mod = __import__(mem.replace('.py', '')) rstx = mod.makeExtension() if isinstance(mod.match, basestring): mod.match = [mod.__name__] for name in mod.match: directives.register_directive(name, rstx) except (ImportError, Exception) as e: traceback.print_exc(file=sys.stdout) log.warn('%r %s: %s' % (mem, e.__class__.__name__, e))
def initialize(directories, conf, env): global __views_list __views_list, urlmap = [], [] for rule, view in iteritems(conf.views): if 'views' not in view: view['views'] = [view.pop('view'), ] for name in view['views']: item = view.copy() item.pop('views') item['name'] = name urlmap.append((rule, item)) directories += [os.path.dirname(__file__)] helpers.discover(directories, partial(index_views, conf, env, urlmap), lambda path: path.rpartition('.')[0] != __file__.rpartition('.')[0]) for rule, item in urlmap: log.warn("unable to locate '%s' view", item['name'])
def convert(data, fmt='markdown', pandoc=False): """Reconversion of HTML to Markdown or reStructuredText. Defaults to Markdown, but can be in fact every format pandoc supports. If pandoc is not available, try some specific conversion tools like html2text and html2rest. :param html: raw content to convert to :param html: format to reconvert to""" if fmt in ('Markdown', 'markdown', 'mkdown', 'md', 'mkd'): cmds = ['html2text'] fmt = 'markdown' elif fmt in ('rst', 'restructuredtext', 'rest', 'reStructuredText'): cmds = ['html2rest'] fmt = 'rst' else: cmds = [] p = [ 'pandoc', '--normalize', '-f', 'html', '-t', fmt, '--strict', '--no-wrap', '--parse-raw' ] cmds.insert(0, p) if pandoc or fmt == 'rst' else cmds.append(p) if fmt == 'html': return data, 'html' # - item.find(foo).text returns None if no CDATA # - pandoc waits for input if a zero-length string is given if data is None or data is '': return '', fmt for cmd in cmds: try: return system(cmd, stdin=data), fmt.lower() except AcrylamidException as e: log.warn(e.args[0]) except OSError: pass else: return data, 'html'
def transform(self, content, entry, *args): if entry.lang != self.conf['lang']: try: hyphenate_word = build(entry.lang.replace('_', '-')) except HyphenPatternNotFound as e: log.warn(e.args[0]) hyphenate_word = lambda x: [x] else: hyphenate_word = self.default try: length = int(args[0]) except (ValueError, IndexError) as e: if e.__class__.__name__ == 'ValueError': log.warn('Hyphenate: invalid length argument %r', args[0]) length = 10 try: return ''.join(Separator(content, hyphenate_word, length=length).result) except HTMLParseError as e: log.exception('could not hyphenate ' + entry.filename) return content
def transform(self, content, entry, *args): if entry.lang != self.conf['lang']: try: hyphenate_word = build(entry.lang.replace('_', '-')) except HyphenPatternNotFound as e: log.warn(e.args[0]) hyphenate_word = lambda x: [x] else: hyphenate_word = self.default try: length = int(args[0]) except (ValueError, IndexError) as e: if e.__class__.__name__ == 'ValueError': log.warn('Hyphenate: invalid length argument %r', args[0]) length = 10 try: return ''.join( Separator(content, hyphenate_word, length=length).result) except: log.exception('could not hyphenate ' + entry.filename) return content
def set(self, path, key, value): """Save a key, value pair into a blob using pickle and moderate zlib compression (level 6). We simply save a dictionary containing all different intermediates (from every view) of an entry. :param path: path of this cache object :param key: dictionary key where we store the value :param value: a string we compress with zlib and afterwards save """ path = join(self.cache_dir, path) if exists(path): try: with io.open(path, 'rb') as fp: rv = pickle.load(fp) except (pickle.PickleError, IOError): self.remove(path) rv = {} try: with io.open(path, 'wb') as fp: rv[key] = zlib.compress(value.encode('utf-8'), 6) pickle.dump(rv, fp, pickle.HIGHEST_PROTOCOL) except (IOError, pickle.PickleError) as e: log.warn('%s: %s' % (e.__class__.__name__, e)) else: try: fd, tmp = tempfile.mkstemp(suffix=self._fs_transaction_suffix, dir=self.cache_dir) with io.open(fd, 'wb') as fp: pickle.dump({key: zlib.compress(value.encode('utf-8'), 6)}, fp, pickle.HIGHEST_PROTOCOL) os.rename(tmp, path) os.chmod(path, self.mode) except (IOError, OSError, pickle.PickleError, zlib.error) as e: log.warn('%s: %s' % (e.__class__.__name__, e)) return value
def init(self, conf, env): self.extensions = {} self.ignore = env.options.ignore if not tuple(LooseVersion(version).version) > (0, 9): raise ImportError(u'docutils ≥ 0.9 required.') if not publish_parts or not directives: raise ImportError(u'reStructuredText: No module named docutils') # -- discover reStructuredText extensions -- directories = conf['filters_dir'] + [os.path.dirname(__file__)] for filename in discover(directories, lambda path: path.startswith('rstx_')): modname, ext = os.path.splitext(os.path.basename(filename)) fp, path, descr = imp.find_module(modname, directories) try: mod = imp.load_module(modname, fp, path, descr) mod.register(roles, directives) except (ImportError, Exception) as e: traceback.print_exc(file=sys.stdout) log.warn('%r %s: %s' % (filename, e.__class__.__name__, e))
def transform(self, content, entry, *args): options = helpers.union(Summarize.defaults, self.conf.fetch('summarize_')) try: options.update(entry.summarize) except AttributeError: pass try: maxwords = int(options.get('maxwords') or args[0]) except (IndexError, ValueError) as ex: if isinstance(ex, ValueError): log.warn('Summarize: invalid maxwords argument %r', options.get('maxwords') or args[0]) maxwords = 100 try: return ''.join( Summarizer(content, maxwords, self.env.path + entry.permalink, options).result) except: log.exception('could not summarize ' + entry.filename) return content
def initialize(conf, env): """Initializes Jinja2 environment, prepares locale and configure some minor things. Filter and View are inited with conf and env, a request dict is returned. """ # initialize cache, optional to cache_dir cache.init(conf.get('cache_dir', None)) # set up templating environment env.engine = utils.import_object(conf['engine'])() env.engine.init(conf['layout_dir'], cache.cache_dir) env.engine.register('safeslug', helpers.safeslug) env.engine.register('tagify', lambda x: x) # try language set in LANG, if set correctly use it try: locale.setlocale(locale.LC_ALL, str(conf.get('lang', ''))) except (locale.Error, TypeError): # try if LANG is an alias try: locale.setlocale( locale.LC_ALL, locale.locale_alias[str(conf.get('lang', '')).lower()]) except (locale.Error, KeyError): # LANG is not an alias, so we use system's default try: locale.setlocale(locale.LC_ALL, '') except locale.Error: pass # hope this makes Travis happy log.info('notice your OS does not support %s, fallback to %s', conf.get('lang', ''), locale.getlocale()[0]) if locale.getlocale()[0] is not None: conf['lang'] = locale.getlocale()[0][:2] else: # getlocale() is (None, None) aka 'C' conf['lang'] = 'en' if 'www_root' not in conf: log.warn('no `www_root` specified, using localhost:8000') conf['www_root'] = 'http://localhost:8000/' # figure out timezone and set offset, more verbose for 2.6 compatibility td = (datetime.now() - datetime.utcnow()) total_seconds = (td.microseconds + (td.seconds + td.days * 24 * 3600) * 10**6) / 10**6 offset = round(total_seconds / 3600.0) conf['tzinfo'] = readers.Timezone(offset) # determine http(s), host and path env['protocol'], env['netloc'], env['path'], x, y = urlsplit( conf['www_root']) # take off the trailing slash for www_root and path conf['www_root'] = conf['www_root'].rstrip('/') env['path'] = env['path'].rstrip('/') # check if encoding is available try: codecs.lookup(conf['encoding']) except LookupError: raise AcrylamidException('no such encoding available: %r' % conf['encoding']) # prepare, import and initialize filters and views if isinstance(conf['filters_dir'], basestring): conf['filters_dir'] = [ conf['filters_dir'], ] if isinstance(conf['views_dir'], basestring): conf['views_dir'] = [ conf['views_dir'], ] lazy.enable() filters.initialize(conf["filters_dir"], conf, env, exclude=conf["filters_ignore"], include=conf["filters_include"]) lazy.disable( ) # this has weird side effects with jinja2, so disabled after filters views.initialize(conf["views_dir"], conf, env) env['views'] = dict([(v.view, v) for v in views.get_views()]) entryfmt, pagefmt = '/:year/:slug/', '/:slug/' for view in views.get_views(): if view.view == 'entry': entryfmt = view.path if view.view == 'page': pagefmt = view.path conf.setdefault('entry_permalink', entryfmt) conf.setdefault('page_permalink', pagefmt) return {'conf': conf, 'env': env}
def pandocstyle(fileobj): """A function to parse the so called 'Title block' out of Pandoc-formatted documents. Provides very simple parsing so that Acrylamid won't choke on plain Pandoc documents. See http://johnmacfarlane.net/pandoc/README.html#title-block Currently not implemented: - Formatting within title blocks - Man-page writer title block extensions """ meta_pan_re = re.compile(r'^[ ]{0,3}%+\s*(?P<value>.*)') meta_pan_more_re = re.compile(r'^\s*(?P<value>.*)') meta_pan_authsplit = re.compile(r';+\s*') i, j = 0, 0 meta, key = {}, None poss_keys = ['title', 'author', 'date'] while True: line = fileobj.readline() i += 1 if line.strip() == '': break # blank line - done if j + 1 > len(poss_keys): raise AcrylamidException( "%r has too many items in the Pandoc title block." % fileobj.name) m1 = meta_pan_re.match(line) if m1: key = poss_keys[j] j += 1 valstrip = m1.group('value').strip() if not valstrip: continue value = distinguish(m1.group('value').strip()) if key == 'author': value = value.strip(';') value = meta_pan_authsplit.split(value) meta.setdefault(key, []).append(value) else: m2 = meta_pan_more_re.match(line) if m2 and key: # Add another line to existing key value = m2.group('value').strip() if key == 'author': value = value.strip(';') value = meta_pan_authsplit.split(value) meta[key].append(value) else: break # no meta data - done if 'title' not in meta: raise AcrylamidException('No title given in %r' % fileobj.name) if len(meta['title']) > 1: meta['title'] = ' '.join(meta['title']) if 'author' in meta: meta['author'] = sum(meta['author'], []) else: log.warn('%s does not have an Author in the Pandoc title block.' % fileobj.name) for key, values in iteritems(meta): if len(values) == 1: meta[key] = values[0] return i, meta
def system(cmd, stdin=None): try: return defaultsystem(cmd, stdin, shell=True).strip() except (OSError, AcrylamidException) as e: log.warn('%s: %s' % (e.__class__.__name__, e.args[0])) return e.args[0]
def initialize(conf, env): """Initializes Jinja2 environment, prepares locale and configure some minor things. Filter and View are inited with conf and env, a data dict is returned. """ # initialize cache, optional to cache_dir cache.init(conf.get('cache_dir')) env['version'] = type( 'Version', (str, ), dict(zip(['major', 'minor'], LooseVersion(dist.version).version[:2])))(dist.version) # crawl through CHANGES.md and stop on breaking changes if history.breaks(env, cache.emptyrun): cache.shutdown() print( "Detected version upgrade that might break your configuration. Run" ) print( "Acrylamid a second time to get rid of this message and premature exit." ) raise SystemExit # set up templating environment env.engine = import_object(conf['engine'])(conf['theme'], cache.cache_dir) env.engine.register('safeslug', helpers.safeslug) env.engine.register('tagify', lambda x: x) # try language set in LANG, if set correctly use it try: locale.setlocale(locale.LC_ALL, str(conf.get('lang', ''))) except (locale.Error, TypeError): # try if LANG is an alias try: locale.setlocale( locale.LC_ALL, locale.locale_alias[str(conf.get('lang', '')).lower()]) except (locale.Error, KeyError): # LANG is not an alias, so we use system's default try: locale.setlocale(locale.LC_ALL, '') except locale.Error: pass # hope this makes Travis happy log.info('notice your OS does not support %s, fallback to %s', conf.get('lang', ''), locale.getlocale()[0]) if locale.getlocale()[0] is not None: conf['lang'] = locale.getlocale()[0][:2] else: # getlocale() is (None, None) aka 'C' conf['lang'] = 'en' if 'www_root' not in conf: log.warn('no `www_root` specified, using localhost:8000') conf['www_root'] = 'http://localhost:8000/' # figure out timezone and set offset, more verbose for 2.6 compatibility td = (datetime.now() - datetime.utcnow()) offset = round(total_seconds(td) / 3600.0) conf['tzinfo'] = readers.Timezone(offset) # determine http(s), host and path env['protocol'], env['netloc'], env['path'], x, y = urlsplit( conf['www_root']) # take off the trailing slash for www_root and path conf['www_root'] = conf['www_root'].rstrip('/') env['path'] = env['path'].rstrip('/') if env['path']: conf['output_dir'] = conf['output_dir'] + env['path'] lazy.enable() filters.initialize(conf["filters_dir"][:], conf, env) lazy.disable( ) # this has weird side effects with jinja2, so disabled after filters views.initialize(conf["views_dir"][:], conf, env) env.views = views.Views(view for view in views.get_views()) entryfmt, pagefmt = '/:year/:slug/', '/:slug/' for view in views.get_views(): if view.name == 'entry': entryfmt = view.path if view.name == 'page': pagefmt = view.path conf.setdefault('entry_permalink', entryfmt) conf.setdefault('page_permalink', pagefmt) # register webassets to theme engine, make webassets available as env.webassets assets.initialize(conf, env) return {'conf': conf, 'env': env}