def directory(cmgs, env, title=DIR_TITLE, formats=None): """ Generate HTML directory of multiple CMGroups and write to file. """ # TODO: doc items = [{'cmg_id': cmg.cmg_id, 'name': cmg.name, 'notes': get_notes(cmg)} for cmg in cmgs] formats = formats or [] context = {'title': title, 'items': items, 'formats': formats} path = pjoin(env.results_path, 'index.html') templater = AshesEnv([TEMPLATES_DIR]) html = templater.render('directory.html', context) with open(path, 'w') as html_file: html_file.write(html)
def cids_to_html(cids, path, title='PubChem images', info=None, size=500): """ Generate HTML file displaying PubChem structures and CMGroup info. """ # TODO: Options to add links to JSON, CSV, Excel files. # Something like: formats=['csv', 'json', 'excel'] if info: info_list = info_to_context(info) else: info_list = [] context = {'size': size, 'title': title, 'info': info_list, 'items': [{'cid': cid, 'image': pc_img(cid, size=size)} for cid in cids]} templater = AshesEnv([TEMPLATES_DIR]) html = templater.render('display_cids.html', context) with open(path, 'w') as html_file: html_file.write(html)
class Site(object): _entry_type = Entry _entry_list_type = EntryList def __init__(self, input_path, **kw): # setting up paths self.paths = OMD() self._paths = OMD() # for the raw input paths self.fal = ChertFAL(chlog) set_path = self._set_path set_path('input_path', input_path) set_path('config_path', kw.pop('config_path', None), DEFAULT_CONFIG_FILENAME) set_path('entries_path', kw.pop('entries_path', None), 'entries') set_path('themes_path', kw.pop('themes_path', None), 'themes') set_path('uploads_path', kw.pop('uploads_path', None), 'uploads', required=False) set_path('output_path', kw.pop('output_path', None), 'site', required=False) self.reload_config() self.reset() self.dev_mode = kw.pop('dev_mode', False) if kw: raise TypeError('unexpected keyword arguments: %r' % kw) chlog.debug('init site').success() return def reload_config(self, **kw): # TODO: take optional kwarg self.config = yaml.load(self.fal.read(self.paths['config_path'])) # set theme with chlog.debug('setting theme'): theme_name = self.get_config('theme', 'name') theme_path = pjoin(self.themes_path, theme_name) self._set_path('theme_path', theme_path) def reset(self): """Called on __init__ and on reload before processing. Does not reset paths, etc., just state mutated during processing""" self.entries = self._entry_list_type() self.draft_entries = self._entry_list_type() self.special_entries = self._entry_list_type() self._rebuild_tag_map() self.last_load = None self.md_converter = Markdown(extensions=MD_EXTENSIONS) self.inline_md_converter = Markdown(extensions=INLINE_MD_EXTENSIONS) self._load_feed_templates() return def _set_path(self, name, path, default_suffix=None, required=True): """Set a path. Args: name: name of attribute (e.g., input_path) path: the path or None default_suffix: if path is None, self.input_path + default_suffix is used. The input_path should already be set. required: raise an error if path does not exist """ with chlog.debug('set {path_name} path to {path_val}', path_name=name, path_val=path) as rec: self._paths[name] = path if path: self.paths[name] = abspath(path) elif default_suffix: self.paths[name] = pjoin(self.input_path, default_suffix) else: raise ValueError('no path or default set for %r' % name) if required: if not os.path.exists(self.paths[name]): raise RuntimeError('expected existent %s path, not %r' % (name, self.paths[name])) rec.success('set {path_name} path to {path_val}', path_val=self.paths[name]) return def _load_feed_templates(self): default_atom_tmpl_path = pjoin(CUR_PATH, ATOM_FEED_FILENAME) atom_tmpl_path = pjoin(self.theme_path, ATOM_FEED_FILENAME) if not os.path.exists(atom_tmpl_path): atom_tmpl_path = default_atom_tmpl_path # TODO: defer opening to loading? self.atom_template = Template.from_path(atom_tmpl_path, name=ATOM_FEED_FILENAME) default_rss_tmpl_path = pjoin(CUR_PATH, RSS_FEED_FILENAME) rss_tmpl_path = pjoin(self.theme_path, RSS_FEED_FILENAME) if not os.path.exists(rss_tmpl_path): rss_tmpl_path = default_rss_tmpl_path # TODO: defer opening to loading? self.rss_template = Template.from_path(rss_tmpl_path, name=RSS_FEED_FILENAME) def get_config(self, section, key=None, default=_UNSET): try: section_map = self.config[section] except KeyError: if default is _UNSET: raise return default if key is None: return section_map try: return section_map[key] except KeyError: if default is _UNSET: raise return default def get_site_info(self): ret = {} ret['dev_mode'] = self.dev_mode refresh_secs = self.get_config('dev', 'autorefresh', DEFAULT_AUTOREFRESH) or False ret['dev_mode_refresh_seconds'] = refresh_secs site_config = self.get_config('site') ret['title'] = site_config.get('title', SITE_TITLE) ret['head_title'] = site_config.get('title', ret['title']) ret['tagline'] = site_config.get('tagline', '') ret['primary_links'] = self._get_links('site', 'primary_links') ret['secondary_links'] = self._get_links('site', 'secondary_links') ret['charset'] = 'UTF-8' # not really overridable ret['lang_code'] = site_config.get('lang_code', 'en') ret['copyright_notice'] = site_config.get('copyright', SITE_COPYRIGHT) ret['author_name'] = site_config.get('author', SITE_AUTHOR) ret['enable_analytics'] = site_config.get('enable_analytics', True) ret['analytics_code'] = self._get_analytics_code() prod_config = self.get_config('prod') ret['canonical_domain'] = prod_config.get('canonical_domain', CANONICAL_DOMAIN).rstrip('/') ret['canonical_base_path'] = prod_config.get('canonical_base_path', CANONICAL_BASE_PATH) if not ret['canonical_base_path'].endswith('/'): ret['canonical_base_path'] += '/' ret['canonical_url'] = ret['canonical_domain'] + ret['canonical_base_path'] ret['rss_feed_url'] = ret['canonical_base_path'] + RSS_FEED_FILENAME ret['canonical_rss_feed_url'] = ret['canonical_url'] + RSS_FEED_FILENAME ret['atom_feed_url'] = ret['canonical_base_path'] + ATOM_FEED_FILENAME ret['canonical_atom_feed_url'] = ret['canonical_url'] + ATOM_FEED_FILENAME now = datetime.now(LocalTZ) ret['last_generated'] = to_timestamp(now) ret['last_generated_utc'] = to_timestamp(now, to_utc=True) ret['export_html_ext'] = EXPORT_HTML_EXT ret['export_src_ext'] = EXPORT_SRC_EXT return ret def _get_analytics_code(self): with chlog.debug('set analytics code') as rec: code = self.get_config('site', 'analytics_code', None) if code is None: rec.failure('site.analytics_code not set in config.yaml') return '' match = _analytics_re.search(unicode(code)) if not match: rec.failure('analytics code blank or invalid: {!r}', code) return '' code = match.group('code') if len(code) < 6: rec.failure('analytics code too short: {!r}', code) return '' rec.success('analytics code set to {!r}', code) return code def _get_links(self, group, name): link_list = list(self.get_config(group, name, [])) for link in link_list: if link['href'] and URL(link['href']).host: link['is_external'] = True else: link['is_external'] = False return link_list @property def input_path(self): return self.paths['input_path'] @property def entries_path(self): return self.paths['entries_path'] @property def themes_path(self): return self.paths['themes_path'] @property def theme_path(self): return self.paths['theme_path'] @property def uploads_path(self): return self.paths['uploads_path'] @property def output_path(self): return self.paths['output_path'] @property def all_entries(self): return (self.special_entries.entries + self.entries.entries + self.draft_entries.entries) def process(self): if self.last_load: self.reload_config() self.reset() self.load() self.validate() self.render() self.audit() self.export() def _load_custom_mod(self): input_path = self.paths['input_path'] custom_mod_path = pjoin(input_path, 'custom.py') if not os.path.exists(custom_mod_path): self.custom_mod = None return # site_name = os.path.split(input_path)[1] with chlog.debug('import site custom module'): mod_name = 'custom' self.custom_mod = imp.load_source(mod_name, custom_mod_path) def _call_custom_hook(self, hook_name): with chlog.debug('call custom {hook_name} hook', hook_name=hook_name, reraise=False) as rec: if not self.custom_mod: # TODO: success or failure? rec.failure('no custom module loaded') try: hook_func = getattr(self.custom_mod, 'chert_' + hook_name) except AttributeError: rec.failure('no {} hook defined', hook_name) return hook_func(self) return @chlog.wrap('critical', 'load site') def load(self): self.last_load = time.time() self._load_custom_mod() self._call_custom_hook('pre_load') self.html_renderer = AshesEnv(paths=[self.theme_path]) self.html_renderer.load_all() self.md_renderer = AshesEnv(paths=[self.theme_path], exts=['md'], keep_whitespace=False) self.md_renderer.autoescape_filter = '' self.md_renderer.load_all() entries_path = self.paths['entries_path'] entry_paths = [] for entry_path in iter_find_files(entries_path, ENTRY_PATS): entry_paths.append(entry_path) entry_paths.sort() for ep in entry_paths: with chlog.info('entry load') as rec: try: entry = self._entry_type.from_path(ep) except IOError: rec.exception('unopenable entry path: {}', ep) continue except: rec.exception('entry load error: {exc_message}') continue else: rec['entry_title'] = entry.title rec['entry_length'] = round(entry.get_reading_time(), 1) rec.success('entry loaded:' ' {entry_title} ({entry_length}m)') if entry.is_draft: self.draft_entries.append(entry) elif entry.is_special: self.special_entries.append(entry) else: self.entries.append(entry) # Sorting the EntryLists self.entries.sort() # sorting drafts/special pages doesn't do much self.draft_entries.sort(key=lambda e: os.path.getmtime(e.source_path)) self.special_entries.sort() self._rebuild_tag_map() for i, entry in enumerate(self.entries, start=1): start_next = max(0, i - NEXT_ENTRY_COUNT) entry.next_entries = self.entries[start_next:i - 1][::-1] entry.prev_entries = self.entries[i:i + PREV_ENTRY_COUNT] self._call_custom_hook('post_load') def _rebuild_tag_map(self): self.tag_map = {} for entry in self.entries: for tag in entry.tags: try: self.tag_map[tag].append(entry) except KeyError: self.tag_map[tag] = self._entry_list_type([entry], tag=tag) for tag, entry_list in self.tag_map.items(): entry_list.sort() @chlog.wrap('critical', 'validate site') def validate(self): self._call_custom_hook('pre_validate') dup_id_map = {} eid_map = OMD([(e.entry_root, e) for e in self.entries]) for eid in eid_map: elist = eid_map.getlist(eid) if len(elist) > 1: dup_id_map[eid] = elist if dup_id_map: raise ValueError('duplicate entry IDs detected: %r' % dup_id_map) self._call_custom_hook('post_validate') # TODO: assert necessary templates are present (entry.html, etc.) def _make_anchor_id(self, header_text): return slugify(header_text, delim=self.get_config('site', 'anchor_delim', '-')) @chlog.wrap('critical', 'render site', verbose=True) def render(self): self._call_custom_hook('pre_render') entries = self.entries mdc, imdc = self.md_converter, self.inline_md_converter site_info = self.get_site_info() canonical_domain = site_info['canonical_domain'] def markdown2html(string): if not string: return '' ret = mdc.convert(string) mdc.reset() return ret def markdown2ihtml(string, entry_fn): if not string: return '' ret = hypertext.canonicalize_links(imdc.convert(string), canonical_domain, entry_fn) imdc.reset() return ret def render_parts(entry): for part in entry.loaded_parts: part['content_html'] = markdown2html(part['content']) part['content_ihtml'] = markdown2ihtml(part['content'], entry.output_filename) if not entry.summary: with chlog.debug('autosummarizing', reraise=False): entry.summary = entry._autosummarize() tmpl_name = entry.entry_layout + MD_LAYOUT_EXT render_ctx = {'entry': entry.to_dict(with_links=False), 'site': site_info} entry.content_md = self.md_renderer.render(tmpl_name, render_ctx) tmpl_name = entry.content_layout + HTML_LAYOUT_EXT content_html = self.html_renderer.render(tmpl_name, render_ctx) with chlog.debug('parse_content_html'): content_html_tree = hypertext.html_text_to_tree(content_html) with chlog.debug('add_toc_content_html'): hypertext.add_toc(content_html_tree, make_anchor_id=self._make_anchor_id) with chlog.debug('retarget_links_content_html'): _mode = self.get_config('site', 'retarget_links', 'external') hypertext.retarget_links(content_html_tree, mode=_mode) with chlog.debug('reserialize_content_html'): content_html = hypertext.html_tree_to_text(content_html_tree) entry.content_html = content_html render_ctx['inline'] = True content_ihtml = self.html_renderer.render(tmpl_name, render_ctx) with chlog.debug('canonicalize_ihtml_links'): # TODO: use tree (and move slightly down) content_ihtml = hypertext.canonicalize_links(content_ihtml, canonical_domain, entry.output_filename) with chlog.debug('parse_content_ihtml'): content_ihtml_tree = hypertext.html_text_to_tree(content_ihtml) with chlog.debug('add_toc_content_ihtml'): hypertext.add_toc(content_ihtml_tree) with chlog.debug('reserialize_content_ihtml'): content_ihtml = hypertext.html_tree_to_text(content_ihtml_tree) entry.content_ihtml = content_ihtml return def render_html(entry, with_links=False): tmpl_name = entry.entry_layout + HTML_LAYOUT_EXT render_ctx = {'entry': entry.to_dict(with_links=with_links), 'site': site_info} entry_html = self.html_renderer.render(tmpl_name, render_ctx) entry.entry_html = entry_html return with chlog.info('render published entry content', verbose=True): for entry in entries: render_parts(entry) with chlog.info('render draft entry content', verbose=True): for entry in self.draft_entries: render_parts(entry) with chlog.info('render special entry content', verbose=True): for entry in self.special_entries: render_parts(entry) with chlog.info('render entry html'): for entry in entries: render_html(entry, with_links=True) for entry in self.draft_entries: render_html(entry) for entry in self.special_entries: render_html(entry) # render feeds with chlog.info('render feed and tag lists'): self.entries.render(site_obj=self) for tag, entry_list in self.tag_map.items(): entry_list.render(site_obj=self) self._call_custom_hook('post_render') @chlog.wrap('critical', 'audit site') def audit(self): """ Validation of rendered content, to be used for link checking. """ # TODO: check for and other common HTML entities in # feed xml (these entities aren't supported in XML/Atom/RSS) # the only ok ones are here: https://en.wikipedia.org/wiki/List_of_XML_and_HTML_character_entity_references#Predefined_entities_in_XML self._call_custom_hook('pre_audit') self._call_custom_hook('post_audit') @chlog.wrap('critical', 'export site') def export(self): fal = self.fal self._call_custom_hook('pre_export') output_path = self.paths['output_path'] with chlog.critical('create output path'): mkdir_p(output_path) def export_entry(entry): entry_custom_base_path = os.path.split(entry.entry_root)[0] if entry_custom_base_path: mkdir_p(pjoin(output_path, entry_custom_base_path)) er = entry.entry_root entry_html_fn = er + EXPORT_HTML_EXT entry_gen_md_fn = er + '.gen.md' entry_data_fn = er + '.json' html_output_path = pjoin(output_path, entry_html_fn) data_output_path = pjoin(output_path, entry_data_fn) gen_md_output_path = pjoin(output_path, entry_gen_md_fn) #fal.write(html_output_path, entry.entry_html) # fal.write(html_output_path, entry.entry_html) fal.write(gen_md_output_path, entry.content_md) # TODO _data = json.dumps(entry.loaded_parts, indent=2, sort_keys=True) fal.write(data_output_path, _data) # TODO: copy file # fal.write(src_output_path, entry.source_text) return for entry in self.entries: export_entry(entry) for entry in self.draft_entries: export_entry(entry) for entry in self.special_entries: export_entry(entry) # index is just the most recent entry for now index_path = pjoin(output_path, 'index' + EXPORT_HTML_EXT) if self.entries: index_content = self.entries[0].entry_html else: index_content = 'No entries yet!' fal.write(index_path, index_content) archive_path = pjoin(output_path, ('archive' + EXPORT_HTML_EXT)) fal.write(archive_path, self.entries.rendered_html) # output feeds rss_path = pjoin(output_path, RSS_FEED_FILENAME) fal.write(rss_path, self.entries.rendered_rss_feed) atom_path = pjoin(output_path, ATOM_FEED_FILENAME) fal.write(atom_path, self.entries.rendered_atom_feed) for tag, entry_list in self.tag_map.items(): tag_path = pjoin(output_path, entry_list.path_part) mkdir_p(tag_path) rss_path = pjoin(tag_path, RSS_FEED_FILENAME) atom_path = pjoin(tag_path, ATOM_FEED_FILENAME) archive_path = pjoin(tag_path, 'index.html') fal.write(rss_path, entry_list.rendered_rss_feed) fal.write(atom_path, entry_list.rendered_atom_feed) fal.write(archive_path, entry_list.rendered_html) # copy assets, i.e., all directories under the theme path for sdn in get_subdirectories(self.theme_path): cur_src = pjoin(self.theme_path, sdn) cur_dest = pjoin(output_path, sdn) with chlog.critical('copy assets', src=cur_src, dest=cur_dest): copytree(cur_src, cur_dest) # optionally symlink the uploads directory. this is an # important step for sites with uploads because Chert's # default rsync behavior picks up on these uploads by # following the symlink. with chlog.critical('link uploads directory') as rec: uploads_link_path = pjoin(output_path, 'uploads') if not os.path.isdir(self.uploads_path): rec.failure('no uploads directory at {}', self.uploads_path) else: message = None if os.path.islink(uploads_link_path): os.unlink(uploads_link_path) message = 'refreshed existing uploads symlink' os.symlink(self.uploads_path, uploads_link_path) rec.success(message) self._call_custom_hook('post_export') def serve(self): dev_config = self.get_config('dev') host = dev_config.get('server_host', DEV_SERVER_HOST) port = dev_config.get('server_port', int(DEV_SERVER_PORT)) base_url = dev_config.get('base_path', DEV_SERVER_BASE_PATH) class Handler(SimpleHTTPRequestHandler): def send_head(self): if not self.path.startswith(base_url): self.send_error(404, 'File not found') return None self.path = self.path[len(base_url):] if not self.path.startswith('/'): self.path = '/' + self.path return SimpleHTTPRequestHandler.send_head(self) Handler.extensions_map.update({'.md': 'text/plain', '.json': 'application/json'}) class ThreadedHTTPServer(ThreadingMixIn, HTTPServer): """Handle requests in a separate thread.""" server = ThreadedHTTPServer((host, port), Handler) serving = False config_path = self.paths['config_path'] entries_path = self.paths['entries_path'] theme_path = self.paths['theme_path'] output_path = self.paths['output_path'] for changed in _iter_changed_files(entries_path, theme_path, config_path): if serving: print 'Changed %s files, regenerating...' % len(changed) server.shutdown() with chlog.critical('site generation', reraise=True): self.process() print 'Serving from %s' % output_path os.chdir(abspath(output_path)) print 'Serving at http://%s:%s%s' % (host, port, base_url) thread = Thread(target=server.serve_forever) thread.daemon = True thread.start() if not serving: serving = True # TODO: hook(s)? return @chlog.wrap('critical', 'publish site', inject_as='log_rec') def publish(self, log_rec): # deploy? #self._load_custom_mod() #self._call_custom_hook('pre_publish') prod_config = self.get_config('prod') rsync_cmd = prod_config.get('rsync_cmd', 'rsync') if not rsync_cmd.isalpha(): rsync_cmd = shell_quote(rsync_cmd) # TODO: add -e 'ssh -o "NumberOfPasswordPrompts 0"' to fail if # ssh keys haven't been set up. rsync_flags = prod_config.get('rsync_flags', 'avzPk') local_site_path = self.output_path if not local_site_path.endswith('/'): local_site_path += '/' # not just cosmetic; rsync needs this assert os.path.exists(local_site_path + 'index.html') remote_host = prod_config['remote_host'] remote_user = prod_config['remote_user'] remote_path = prod_config['remote_path'] remote_slug = "%s@%s:'%s'" % (remote_user, remote_host, shell_quote(remote_path)) full_rsync_cmd = '%s -%s %s %s' % (rsync_cmd, rsync_flags, local_site_path, remote_slug) log_rec['rsync_cmd'] = full_rsync_cmd print 'Executing', full_rsync_cmd try: rsync_output = subprocess.check_output(full_rsync_cmd, shell=True) except subprocess.CalledProcessError as cpe: log_rec['rsync_exit_code'] = cpe.returncode rsync_output = cpe.output print rsync_output log_rec.failure('publish failed: rsync got exit code {rsync_exit_code}') return False else: print rsync_output log_rec.success() return True
class Site(object): _entry_type = Entry _entry_list_type = EntryList def __init__(self, input_path, **kw): # setting up paths self.paths = OMD() self._paths = OMD() # for the raw input paths self.fal = ChertFAL(chlog) set_path = self._set_path set_path('input_path', input_path) set_path('config_path', kw.pop('config_path', None), DEFAULT_CONFIG_FILENAME) set_path('entries_path', kw.pop('entries_path', None), 'entries') set_path('themes_path', kw.pop('themes_path', None), 'themes') set_path('uploads_path', kw.pop('uploads_path', None), 'uploads', required=False) set_path('output_path', kw.pop('output_path', None), 'site', required=False) self.reload_config() self.reset() self.dev_mode = kw.pop('dev_mode', False) if kw: raise TypeError('unexpected keyword arguments: %r' % kw) chlog.debug('init site').success() return def reload_config(self, **kw): # TODO: take optional kwarg self.config = yaml.load(self.fal.read(self.paths['config_path'])) # set theme with chlog.debug('setting theme'): theme_name = self.get_config('theme', 'name') theme_path = pjoin(self.themes_path, theme_name) self._set_path('theme_path', theme_path) def reset(self): """Called on __init__ and on reload before processing. Does not reset paths, etc., just state mutated during processing""" self.entries = self._entry_list_type() self.draft_entries = self._entry_list_type() self.special_entries = self._entry_list_type() self._rebuild_tag_map() self.last_load = None self.md_converter = Markdown(extensions=MD_EXTENSIONS) self.inline_md_converter = Markdown(extensions=INLINE_MD_EXTENSIONS) self._load_feed_templates() return def _set_path(self, name, path, default_suffix=None, required=True): """Set a path. Args: name: name of attribute (e.g., input_path) path: the path or None default_suffix: if path is None, self.input_path + default_suffix is used. The input_path should already be set. required: raise an error if path does not exist """ with chlog.debug('set {path_name} path to {path_val}', path_name=name, path_val=path) as rec: self._paths[name] = path if path: self.paths[name] = abspath(path) elif default_suffix: self.paths[name] = pjoin(self.input_path, default_suffix) else: raise ValueError('no path or default set for %r' % name) if required: if not os.path.exists(self.paths[name]): raise RuntimeError('expected existent %s path, not %r' % (name, self.paths[name])) rec.success('set {path_name} path to {path_val}', path_val=self.paths[name]) return def _load_feed_templates(self): default_atom_tmpl_path = pjoin(CUR_PATH, ATOM_FEED_FILENAME) atom_tmpl_path = pjoin(self.theme_path, ATOM_FEED_FILENAME) if not os.path.exists(atom_tmpl_path): atom_tmpl_path = default_atom_tmpl_path # TODO: defer opening to loading? self.atom_template = Template.from_path(atom_tmpl_path, name=ATOM_FEED_FILENAME) default_rss_tmpl_path = pjoin(CUR_PATH, RSS_FEED_FILENAME) rss_tmpl_path = pjoin(self.theme_path, RSS_FEED_FILENAME) if not os.path.exists(rss_tmpl_path): rss_tmpl_path = default_rss_tmpl_path # TODO: defer opening to loading? self.rss_template = Template.from_path(rss_tmpl_path, name=RSS_FEED_FILENAME) def get_config(self, section, key=None, default=_UNSET): try: section_map = self.config[section] except KeyError: if default is _UNSET: raise return default if key is None: return section_map try: return section_map[key] except KeyError: if default is _UNSET: raise return default def get_site_info(self): ret = {} ret['dev_mode'] = self.dev_mode refresh_secs = self.get_config('dev', 'autorefresh', DEFAULT_AUTOREFRESH) or False ret['dev_mode_refresh_seconds'] = refresh_secs site_config = self.get_config('site') ret['title'] = site_config.get('title', SITE_TITLE) ret['head_title'] = site_config.get('title', ret['title']) ret['tagline'] = site_config.get('tagline', '') ret['primary_links'] = self._get_links('site', 'primary_links') ret['secondary_links'] = self._get_links('site', 'secondary_links') ret['charset'] = 'UTF-8' # not really overridable ret['lang_code'] = site_config.get('lang_code', 'en') ret['copyright_notice'] = site_config.get('copyright', SITE_COPYRIGHT) ret['author_name'] = site_config.get('author', SITE_AUTHOR) ret['enable_analytics'] = site_config.get('enable_analytics', True) ret['analytics_code'] = self._get_analytics_code() prod_config = self.get_config('prod') ret['canonical_domain'] = prod_config.get('canonical_domain', CANONICAL_DOMAIN).rstrip('/') ret['canonical_base_path'] = prod_config.get('canonical_base_path', CANONICAL_BASE_PATH) if not ret['canonical_base_path'].endswith('/'): ret['canonical_base_path'] += '/' ret['canonical_url'] = ret['canonical_domain'] + ret[ 'canonical_base_path'] ret['rss_feed_url'] = ret['canonical_base_path'] + RSS_FEED_FILENAME ret['canonical_rss_feed_url'] = ret['canonical_url'] + RSS_FEED_FILENAME ret['atom_feed_url'] = ret['canonical_base_path'] + ATOM_FEED_FILENAME ret['canonical_atom_feed_url'] = ret[ 'canonical_url'] + ATOM_FEED_FILENAME now = datetime.now(LocalTZ) ret['last_generated'] = to_timestamp(now) ret['last_generated_utc'] = to_timestamp(now, to_utc=True) ret['export_html_ext'] = EXPORT_HTML_EXT ret['export_src_ext'] = EXPORT_SRC_EXT return ret def _get_analytics_code(self): with chlog.debug('set analytics code') as rec: code = self.get_config('site', 'analytics_code', None) if code is None: rec.failure('site.analytics_code not set in config.yaml') return '' match = _analytics_re.search(unicode(code)) if not match: rec.failure('analytics code blank or invalid: {!r}', code) return '' code = match.group('code') if len(code) < 6: rec.failure('analytics code too short: {!r}', code) return '' rec.success('analytics code set to {!r}', code) return code def _get_links(self, group, name): link_list = list(self.get_config(group, name, [])) for link in link_list: if link['href'] and URL(link['href']).host: link['is_external'] = True else: link['is_external'] = False return link_list @property def input_path(self): return self.paths['input_path'] @property def entries_path(self): return self.paths['entries_path'] @property def themes_path(self): return self.paths['themes_path'] @property def theme_path(self): return self.paths['theme_path'] @property def uploads_path(self): return self.paths['uploads_path'] @property def output_path(self): return self.paths['output_path'] @property def all_entries(self): return (self.special_entries.entries + self.entries.entries + self.draft_entries.entries) def process(self): if self.last_load: self.reload_config() self.reset() self.load() self.validate() self.render() self.audit() self.export() def _load_custom_mod(self): input_path = self.paths['input_path'] custom_mod_path = pjoin(input_path, 'custom.py') if not os.path.exists(custom_mod_path): self.custom_mod = None return # site_name = os.path.split(input_path)[1] with chlog.debug('import site custom module'): mod_name = 'custom' self.custom_mod = imp.load_source(mod_name, custom_mod_path) def _call_custom_hook(self, hook_name): with chlog.debug('call custom {hook_name} hook', hook_name=hook_name, reraise=False) as rec: if not self.custom_mod: # TODO: success or failure? rec.failure('no custom module loaded') try: hook_func = getattr(self.custom_mod, 'chert_' + hook_name) except AttributeError: rec.failure('no {} hook defined', hook_name) return hook_func(self) return @chlog.wrap('critical', 'load site') def load(self): self.last_load = time.time() self._load_custom_mod() self._call_custom_hook('pre_load') self.html_renderer = AshesEnv(paths=[self.theme_path]) self.html_renderer.load_all() self.md_renderer = AshesEnv(paths=[self.theme_path], exts=['md'], keep_whitespace=False) self.md_renderer.autoescape_filter = '' self.md_renderer.load_all() entries_path = self.paths['entries_path'] entry_paths = [] for entry_path in iter_find_files(entries_path, ENTRY_PATS): entry_paths.append(entry_path) entry_paths.sort() for ep in entry_paths: with chlog.info('entry load') as rec: try: entry = self._entry_type.from_path(ep) rec['entry_title'] = entry.title rec['entry_length'] = round(entry.get_reading_time(), 1) except IOError: rec.exception('unopenable entry path: {}', ep) continue except: rec['entry_path'] = ep rec.exception( 'entry {entry_path} load error: {exc_message}') continue else: rec.success('entry loaded:' ' {entry_title} ({entry_length}m)') if entry.is_draft: self.draft_entries.append(entry) elif entry.is_special: self.special_entries.append(entry) else: self.entries.append(entry) # Sorting the EntryLists self.entries.sort() # sorting drafts/special pages doesn't do much self.draft_entries.sort(key=lambda e: os.path.getmtime(e.source_path)) self.special_entries.sort() self._rebuild_tag_map() for i, entry in enumerate(self.entries, start=1): start_next = max(0, i - NEXT_ENTRY_COUNT) entry.next_entries = self.entries[start_next:i - 1][::-1] entry.prev_entries = self.entries[i:i + PREV_ENTRY_COUNT] self._call_custom_hook('post_load') def _rebuild_tag_map(self): self.tag_map = {} for entry in self.entries: for tag in entry.tags: try: self.tag_map[tag].append(entry) except KeyError: self.tag_map[tag] = self._entry_list_type([entry], tag=tag) for tag, entry_list in self.tag_map.items(): entry_list.sort() @chlog.wrap('critical', 'validate site') def validate(self): self._call_custom_hook('pre_validate') dup_id_map = {} eid_map = OMD([(e.entry_root, e) for e in self.entries]) for eid in eid_map: elist = eid_map.getlist(eid) if len(elist) > 1: dup_id_map[eid] = elist if dup_id_map: raise ValueError('duplicate entry IDs detected: %r' % dup_id_map) self._call_custom_hook('post_validate') # TODO: assert necessary templates are present (entry.html, etc.) def _make_anchor_id(self, header_text): return slugify(header_text, delim=self.get_config('site', 'anchor_delim', '-')) @chlog.wrap('critical', 'render site', verbose=True) def render(self): self._call_custom_hook('pre_render') entries = self.entries mdc, imdc = self.md_converter, self.inline_md_converter site_info = self.get_site_info() canonical_domain = site_info['canonical_domain'] def markdown2html(string): if not string: return '' ret = mdc.convert(string) mdc.reset() return ret def markdown2ihtml(string, entry_fn): if not string: return '' ret = hypertext.canonicalize_links(imdc.convert(string), canonical_domain, entry_fn) imdc.reset() return ret def render_parts(entry): for part in entry.loaded_parts: part['content_html'] = markdown2html(part['content']) part['content_ihtml'] = markdown2ihtml(part['content'], entry.output_filename) if not entry.summary: with chlog.debug('autosummarizing', reraise=False): entry.summary = entry._autosummarize() tmpl_name = entry.entry_layout + MD_LAYOUT_EXT render_ctx = { 'entry': entry.to_dict(with_links=False), 'site': site_info } entry.content_md = self.md_renderer.render(tmpl_name, render_ctx) tmpl_name = entry.content_layout + HTML_LAYOUT_EXT content_html = self.html_renderer.render(tmpl_name, render_ctx) with chlog.debug('parse_content_html'): content_html_tree = hypertext.html_text_to_tree(content_html) with chlog.debug('add_toc_content_html'): hypertext.add_toc(content_html_tree, make_anchor_id=self._make_anchor_id) with chlog.debug('retarget_links_content_html'): _mode = self.get_config('site', 'retarget_links', 'external') hypertext.retarget_links(content_html_tree, mode=_mode) with chlog.debug('reserialize_content_html'): content_html = hypertext.html_tree_to_text(content_html_tree) entry.content_html = content_html render_ctx['inline'] = True content_ihtml = self.html_renderer.render(tmpl_name, render_ctx) with chlog.debug('canonicalize_ihtml_links'): # TODO: use tree (and move slightly down) content_ihtml = hypertext.canonicalize_links( content_ihtml, canonical_domain, entry.output_filename) with chlog.debug('parse_content_ihtml'): content_ihtml_tree = hypertext.html_text_to_tree(content_ihtml) with chlog.debug('add_toc_content_ihtml'): hypertext.add_toc(content_ihtml_tree) with chlog.debug('reserialize_content_ihtml'): content_ihtml = hypertext.html_tree_to_text(content_ihtml_tree) entry.content_ihtml = content_ihtml return def render_html(entry, with_links=False): tmpl_name = entry.entry_layout + HTML_LAYOUT_EXT render_ctx = { 'entry': entry.to_dict(with_links=with_links), 'site': site_info } entry_html = self.html_renderer.render(tmpl_name, render_ctx) entry.entry_html = entry_html return with chlog.info('render published entry content', verbose=True): for entry in entries: render_parts(entry) with chlog.info('render draft entry content', verbose=True): for entry in self.draft_entries: render_parts(entry) with chlog.info('render special entry content', verbose=True): for entry in self.special_entries: render_parts(entry) with chlog.info('render entry html'): for entry in entries: render_html(entry, with_links=True) for entry in self.draft_entries: render_html(entry) for entry in self.special_entries: render_html(entry) # render feeds with chlog.info('render feed and tag lists'): self.entries.render(site_obj=self) for tag, entry_list in self.tag_map.items(): entry_list.render(site_obj=self) self._call_custom_hook('post_render') @chlog.wrap('critical', 'audit site') def audit(self): """ Validation of rendered content, to be used for link checking. """ # TODO: check for and other common HTML entities in # feed xml (these entities aren't supported in XML/Atom/RSS) # the only ok ones are here: https://en.wikipedia.org/wiki/List_of_XML_and_HTML_character_entity_references#Predefined_entities_in_XML self._call_custom_hook('pre_audit') self._call_custom_hook('post_audit') @chlog.wrap('critical', 'export site') def export(self): fal = self.fal self._call_custom_hook('pre_export') output_path = self.paths['output_path'] with chlog.critical('create output path'): mkdir_p(output_path) def export_entry(entry): entry_custom_base_path = os.path.split(entry.entry_root)[0] if entry_custom_base_path: mkdir_p(pjoin(output_path, entry_custom_base_path)) er = entry.entry_root entry_html_fn = er + EXPORT_HTML_EXT entry_gen_md_fn = er + '.gen.md' entry_data_fn = er + '.json' html_output_path = pjoin(output_path, entry_html_fn) data_output_path = pjoin(output_path, entry_data_fn) gen_md_output_path = pjoin(output_path, entry_gen_md_fn) #fal.write(html_output_path, entry.entry_html) # fal.write(html_output_path, entry.entry_html) fal.write(gen_md_output_path, entry.content_md) # TODO _data = json.dumps(entry.loaded_parts, indent=2, sort_keys=True) fal.write(data_output_path, _data) # TODO: copy file # fal.write(src_output_path, entry.source_text) return for entry in self.entries: export_entry(entry) for entry in self.draft_entries: export_entry(entry) for entry in self.special_entries: export_entry(entry) # index is just the most recent entry for now index_path = pjoin(output_path, 'index' + EXPORT_HTML_EXT) if self.entries: index_content = self.entries[0].entry_html else: index_content = 'No entries yet!' fal.write(index_path, index_content) archive_path = pjoin(output_path, ('archive' + EXPORT_HTML_EXT)) fal.write(archive_path, self.entries.rendered_html) # output feeds rss_path = pjoin(output_path, RSS_FEED_FILENAME) fal.write(rss_path, self.entries.rendered_rss_feed) atom_path = pjoin(output_path, ATOM_FEED_FILENAME) fal.write(atom_path, self.entries.rendered_atom_feed) for tag, entry_list in self.tag_map.items(): tag_path = pjoin(output_path, entry_list.path_part) mkdir_p(tag_path) rss_path = pjoin(tag_path, RSS_FEED_FILENAME) atom_path = pjoin(tag_path, ATOM_FEED_FILENAME) archive_path = pjoin(tag_path, 'index.html') fal.write(rss_path, entry_list.rendered_rss_feed) fal.write(atom_path, entry_list.rendered_atom_feed) fal.write(archive_path, entry_list.rendered_html) # copy assets, i.e., all directories under the theme path for sdn in get_subdirectories(self.theme_path): cur_src = pjoin(self.theme_path, sdn) cur_dest = pjoin(output_path, sdn) with chlog.critical('copy assets', src=cur_src, dest=cur_dest): copytree(cur_src, cur_dest) # optionally symlink the uploads directory. this is an # important step for sites with uploads because Chert's # default rsync behavior picks up on these uploads by # following the symlink. with chlog.critical('link uploads directory') as rec: uploads_link_path = pjoin(output_path, 'uploads') if not os.path.isdir(self.uploads_path): rec.failure('no uploads directory at {}', self.uploads_path) else: message = None if os.path.islink(uploads_link_path): os.unlink(uploads_link_path) message = 'refreshed existing uploads symlink' os.symlink(self.uploads_path, uploads_link_path) rec.success(message) self._call_custom_hook('post_export') def serve(self): dev_config = self.get_config('dev') host = dev_config.get('server_host', DEV_SERVER_HOST) port = dev_config.get('server_port', int(DEV_SERVER_PORT)) base_url = dev_config.get('base_path', DEV_SERVER_BASE_PATH) class Handler(SimpleHTTPRequestHandler): def send_head(self): if not self.path.startswith(base_url): self.send_error(404, 'File not found') return None self.path = self.path[len(base_url):] if not self.path.startswith('/'): self.path = '/' + self.path return SimpleHTTPRequestHandler.send_head(self) Handler.extensions_map.update({ '.md': 'text/plain', '.json': 'application/json' }) class ThreadedHTTPServer(ThreadingMixIn, HTTPServer): """Handle requests in a separate thread.""" server = ThreadedHTTPServer((host, port), Handler) serving = False config_path = self.paths['config_path'] entries_path = self.paths['entries_path'] theme_path = self.paths['theme_path'] output_path = self.paths['output_path'] for changed in _iter_changed_files(entries_path, theme_path, config_path): if serving: print 'Changed %s files, regenerating...' % len(changed) server.shutdown() with chlog.critical('site generation', reraise=True): self.process() print 'Serving from %s' % output_path os.chdir(abspath(output_path)) print 'Serving at http://%s:%s%s' % (host, port, base_url) thread = Thread(target=server.serve_forever) thread.daemon = True thread.start() if not serving: serving = True # TODO: hook(s)? return @chlog.wrap('critical', 'publish site', inject_as='log_rec') def publish(self, log_rec): # deploy? #self._load_custom_mod() #self._call_custom_hook('pre_publish') prod_config = self.get_config('prod') rsync_cmd = prod_config.get('rsync_cmd', 'rsync') if not rsync_cmd.isalpha(): rsync_cmd = shell_quote(rsync_cmd) # TODO: add -e 'ssh -o "NumberOfPasswordPrompts 0"' to fail if # ssh keys haven't been set up. rsync_flags = prod_config.get('rsync_flags', 'avzPk') local_site_path = self.output_path if not local_site_path.endswith('/'): local_site_path += '/' # not just cosmetic; rsync needs this assert os.path.exists(local_site_path + 'index.html') remote_host = prod_config['remote_host'] remote_user = prod_config['remote_user'] remote_path = prod_config['remote_path'] remote_slug = "%s@%s:'%s'" % (remote_user, remote_host, shell_quote(remote_path)) full_rsync_cmd = '%s -%s %s %s' % (rsync_cmd, rsync_flags, local_site_path, remote_slug) log_rec['rsync_cmd'] = full_rsync_cmd print 'Executing', full_rsync_cmd try: rsync_output = subprocess.check_output(full_rsync_cmd, shell=True) except subprocess.CalledProcessError as cpe: log_rec['rsync_exit_code'] = cpe.returncode rsync_output = cpe.output print rsync_output log_rec.failure( 'publish failed: rsync got exit code {rsync_exit_code}') return False else: print rsync_output log_rec.success() return True
def render(plist, pdir, pfile): "generate the list markdown from the yaml listing" normalize(pfile=pfile, plist=plist) topic_map = plist.get_projects_by_type('topic') topic_toc_text = format_tag_toc(topic_map) projects_by_topic = format_all_categories(topic_map) plat_map = plist.get_projects_by_type('platform') plat_toc_text = format_tag_toc(plat_map) projects_by_plat = format_all_categories(plat_map) context = { 'TOPIC_TOC': topic_toc_text, 'TOPIC_TEXT': projects_by_topic, 'PLATFORM_TOC': plat_toc_text, 'PLATFORM_TEXT': projects_by_plat, 'TOTAL_COUNT': len(plist.project_list) } templates_path = pdir + '/templates/' if not os.path.isdir(templates_path): raise APACLIError('expected "templates" directory at %r' % templates_path) for filename in iter_find_files(templates_path, '*.tmpl.md'): tmpl_text = open(filename).read() target_filename = os.path.split(filename)[1].replace('.tmpl', '') output_text = tmpl_text.format(**context) with atomic_save(pdir + '/' + target_filename) as f: f.write(output_text.encode('utf8')) feed_tmpl_path = templates_path + '/atom.xml' if os.path.exists(feed_tmpl_path): def _stderr_log_func(level, name, message): import sys sys.stderr.write('%s - %s - %s\n' % (level.upper(), name, message)) sys.stderr.flush() ashes_env = AshesEnv([templates_path], log_func=_stderr_log_func) proj_dict_list = [] for proj in plist.project_list: cur = proj.to_dict() cur['name_slug'] = proj.name_slug cur['date_added_utc'] = proj.date_added.isoformat() + 'Z' cur['urls'] = get_url_list(proj) proj_dict_list.append(cur) cur_dt = datetime.datetime.utcnow().replace( microsecond=0).isoformat() + 'Z' res = ashes_env.render( 'atom.xml', { 'projects': sorted(proj_dict_list, key=lambda x: x['date_added'], reverse=True), 'last_generated_utc': cur_dt }) with atomic_save(pdir + '/atom.xml') as f: f.write(res.encode('utf8')) return