コード例 #1
0
def directory(cmgs, env, title=DIR_TITLE, formats=None):
    """
    Generate HTML directory of multiple CMGroups and write to file.
    """
    # TODO: doc
    items = [{'cmg_id': cmg.cmg_id,
              'name': cmg.name,
              'notes': get_notes(cmg)} for cmg in cmgs]
    formats = formats or []
    context = {'title': title, 'items': items, 'formats': formats}
    path = pjoin(env.results_path, 'index.html')
    templater = AshesEnv([TEMPLATES_DIR])
    html = templater.render('directory.html', context)
    with open(path, 'w') as html_file:
        html_file.write(html)
コード例 #2
0
def cids_to_html(cids, path, title='PubChem images', info=None, size=500):
    """
    Generate HTML file displaying PubChem structures and CMGroup info.
    """
    # TODO: Options to add links to JSON, CSV, Excel files.
    #       Something like: formats=['csv', 'json', 'excel']
    if info:
        info_list = info_to_context(info)
    else:
        info_list = []
    context = {'size': size,
               'title': title,
               'info': info_list,
               'items': [{'cid': cid, 'image': pc_img(cid, size=size)}
                         for cid in cids]}
    templater = AshesEnv([TEMPLATES_DIR])
    html = templater.render('display_cids.html', context)
    with open(path, 'w') as html_file:
        html_file.write(html)
コード例 #3
0
ファイル: core.py プロジェクト: mahmoud/chert
class Site(object):
    _entry_type = Entry
    _entry_list_type = EntryList

    def __init__(self, input_path, **kw):
        # setting up paths
        self.paths = OMD()
        self._paths = OMD()  # for the raw input paths
        self.fal = ChertFAL(chlog)

        set_path = self._set_path
        set_path('input_path', input_path)
        set_path('config_path', kw.pop('config_path', None),
                 DEFAULT_CONFIG_FILENAME)
        set_path('entries_path', kw.pop('entries_path', None), 'entries')
        set_path('themes_path', kw.pop('themes_path', None), 'themes')
        set_path('uploads_path', kw.pop('uploads_path', None), 'uploads',
                 required=False)
        set_path('output_path', kw.pop('output_path', None), 'site',
                 required=False)
        self.reload_config()
        self.reset()
        self.dev_mode = kw.pop('dev_mode', False)
        if kw:
            raise TypeError('unexpected keyword arguments: %r' % kw)
        chlog.debug('init site').success()
        return

    def reload_config(self, **kw):
        # TODO: take optional kwarg
        self.config = yaml.load(self.fal.read(self.paths['config_path']))

        # set theme
        with chlog.debug('setting theme'):
            theme_name = self.get_config('theme', 'name')
            theme_path = pjoin(self.themes_path, theme_name)
            self._set_path('theme_path', theme_path)

    def reset(self):
        """Called on __init__ and on reload before processing. Does not reset
        paths, etc., just state mutated during processing"""
        self.entries = self._entry_list_type()
        self.draft_entries = self._entry_list_type()
        self.special_entries = self._entry_list_type()
        self._rebuild_tag_map()

        self.last_load = None

        self.md_converter = Markdown(extensions=MD_EXTENSIONS)
        self.inline_md_converter = Markdown(extensions=INLINE_MD_EXTENSIONS)
        self._load_feed_templates()
        return

    def _set_path(self, name, path, default_suffix=None, required=True):
        """Set a path.

        Args:
            name: name of attribute (e.g., input_path)
            path: the path or None
            default_suffix: if path is None, self.input_path +
                default_suffix is used. The input_path should already
                be set.
            required: raise an error if path does not exist
        """
        with chlog.debug('set {path_name} path to {path_val}',
                         path_name=name, path_val=path) as rec:
            self._paths[name] = path
            if path:
                self.paths[name] = abspath(path)
            elif default_suffix:
                self.paths[name] = pjoin(self.input_path, default_suffix)
            else:
                raise ValueError('no path or default set for %r' % name)
            if required:
                if not os.path.exists(self.paths[name]):
                    raise RuntimeError('expected existent %s path, not %r'
                                       % (name, self.paths[name]))
            rec.success('set {path_name} path to {path_val}',
                        path_val=self.paths[name])
        return

    def _load_feed_templates(self):
        default_atom_tmpl_path = pjoin(CUR_PATH, ATOM_FEED_FILENAME)
        atom_tmpl_path = pjoin(self.theme_path, ATOM_FEED_FILENAME)
        if not os.path.exists(atom_tmpl_path):
            atom_tmpl_path = default_atom_tmpl_path
        # TODO: defer opening to loading?
        self.atom_template = Template.from_path(atom_tmpl_path,
                                                name=ATOM_FEED_FILENAME)

        default_rss_tmpl_path = pjoin(CUR_PATH, RSS_FEED_FILENAME)
        rss_tmpl_path = pjoin(self.theme_path, RSS_FEED_FILENAME)
        if not os.path.exists(rss_tmpl_path):
            rss_tmpl_path = default_rss_tmpl_path
        # TODO: defer opening to loading?
        self.rss_template = Template.from_path(rss_tmpl_path,
                                               name=RSS_FEED_FILENAME)

    def get_config(self, section, key=None, default=_UNSET):
        try:
            section_map = self.config[section]
        except KeyError:
            if default is _UNSET:
                raise
            return default
        if key is None:
            return section_map
        try:
            return section_map[key]
        except KeyError:
            if default is _UNSET:
                raise
            return default

    def get_site_info(self):
        ret = {}
        ret['dev_mode'] = self.dev_mode
        refresh_secs = self.get_config('dev', 'autorefresh', DEFAULT_AUTOREFRESH) or False

        ret['dev_mode_refresh_seconds'] = refresh_secs
        site_config = self.get_config('site')
        ret['title'] = site_config.get('title', SITE_TITLE)
        ret['head_title'] = site_config.get('title', ret['title'])
        ret['tagline'] = site_config.get('tagline', '')
        ret['primary_links'] = self._get_links('site', 'primary_links')
        ret['secondary_links'] = self._get_links('site', 'secondary_links')
        ret['charset'] = 'UTF-8'  # not really overridable
        ret['lang_code'] = site_config.get('lang_code', 'en')
        ret['copyright_notice'] = site_config.get('copyright', SITE_COPYRIGHT)
        ret['author_name'] = site_config.get('author', SITE_AUTHOR)
        ret['enable_analytics'] = site_config.get('enable_analytics', True)
        ret['analytics_code'] = self._get_analytics_code()

        prod_config = self.get_config('prod')
        ret['canonical_domain'] = prod_config.get('canonical_domain',
                                                  CANONICAL_DOMAIN).rstrip('/')
        ret['canonical_base_path'] = prod_config.get('canonical_base_path',
                                                     CANONICAL_BASE_PATH)
        if not ret['canonical_base_path'].endswith('/'):
            ret['canonical_base_path'] += '/'
        ret['canonical_url'] = ret['canonical_domain'] + ret['canonical_base_path']
        ret['rss_feed_url'] = ret['canonical_base_path'] + RSS_FEED_FILENAME
        ret['canonical_rss_feed_url'] = ret['canonical_url'] + RSS_FEED_FILENAME
        ret['atom_feed_url'] = ret['canonical_base_path'] + ATOM_FEED_FILENAME
        ret['canonical_atom_feed_url'] = ret['canonical_url'] + ATOM_FEED_FILENAME

        now = datetime.now(LocalTZ)
        ret['last_generated'] = to_timestamp(now)
        ret['last_generated_utc'] = to_timestamp(now, to_utc=True)
        ret['export_html_ext'] = EXPORT_HTML_EXT
        ret['export_src_ext'] = EXPORT_SRC_EXT
        return ret

    def _get_analytics_code(self):
        with chlog.debug('set analytics code') as rec:
            code = self.get_config('site', 'analytics_code', None)
            if code is None:
                rec.failure('site.analytics_code not set in config.yaml')
                return ''
            match = _analytics_re.search(unicode(code))
            if not match:
                rec.failure('analytics code blank or invalid: {!r}', code)
                return ''
            code = match.group('code')
            if len(code) < 6:
                rec.failure('analytics code too short: {!r}', code)
                return ''
            rec.success('analytics code set to {!r}', code)
        return code

    def _get_links(self, group, name):
        link_list = list(self.get_config(group, name, []))
        for link in link_list:
            if link['href'] and URL(link['href']).host:
                link['is_external'] = True
            else:
                link['is_external'] = False
        return link_list

    @property
    def input_path(self):
        return self.paths['input_path']

    @property
    def entries_path(self):
        return self.paths['entries_path']

    @property
    def themes_path(self):
        return self.paths['themes_path']

    @property
    def theme_path(self):
        return self.paths['theme_path']

    @property
    def uploads_path(self):
        return self.paths['uploads_path']

    @property
    def output_path(self):
        return self.paths['output_path']

    @property
    def all_entries(self):
        return (self.special_entries.entries
                + self.entries.entries
                + self.draft_entries.entries)

    def process(self):
        if self.last_load:
            self.reload_config()
            self.reset()
        self.load()
        self.validate()
        self.render()
        self.audit()
        self.export()

    def _load_custom_mod(self):
        input_path = self.paths['input_path']
        custom_mod_path = pjoin(input_path, 'custom.py')
        if not os.path.exists(custom_mod_path):
            self.custom_mod = None
            return
        # site_name = os.path.split(input_path)[1]
        with chlog.debug('import site custom module'):
            mod_name = 'custom'
            self.custom_mod = imp.load_source(mod_name, custom_mod_path)

    def _call_custom_hook(self, hook_name):
        with chlog.debug('call custom {hook_name} hook',
                         hook_name=hook_name,
                         reraise=False) as rec:
            if not self.custom_mod:
                # TODO: success or failure?
                rec.failure('no custom module loaded')
            try:
                hook_func = getattr(self.custom_mod, 'chert_' + hook_name)
            except AttributeError:
                rec.failure('no {} hook defined', hook_name)
                return
            hook_func(self)
        return

    @chlog.wrap('critical', 'load site')
    def load(self):
        self.last_load = time.time()
        self._load_custom_mod()
        self._call_custom_hook('pre_load')
        self.html_renderer = AshesEnv(paths=[self.theme_path])
        self.html_renderer.load_all()
        self.md_renderer = AshesEnv(paths=[self.theme_path],
                                    exts=['md'],
                                    keep_whitespace=False)
        self.md_renderer.autoescape_filter = ''
        self.md_renderer.load_all()

        entries_path = self.paths['entries_path']
        entry_paths = []
        for entry_path in iter_find_files(entries_path, ENTRY_PATS):
            entry_paths.append(entry_path)
        entry_paths.sort()
        for ep in entry_paths:
            with chlog.info('entry load') as rec:
                try:
                    entry = self._entry_type.from_path(ep)
                except IOError:
                    rec.exception('unopenable entry path: {}', ep)
                    continue
                except:
                    rec.exception('entry load error: {exc_message}')
                    continue
                else:
                    rec['entry_title'] = entry.title
                    rec['entry_length'] = round(entry.get_reading_time(), 1)
                    rec.success('entry loaded:'
                                ' {entry_title} ({entry_length}m)')
            if entry.is_draft:
                self.draft_entries.append(entry)
            elif entry.is_special:
                self.special_entries.append(entry)
            else:
                self.entries.append(entry)

        # Sorting the EntryLists
        self.entries.sort()
        # sorting drafts/special pages doesn't do much
        self.draft_entries.sort(key=lambda e: os.path.getmtime(e.source_path))
        self.special_entries.sort()

        self._rebuild_tag_map()

        for i, entry in enumerate(self.entries, start=1):
            start_next = max(0, i - NEXT_ENTRY_COUNT)
            entry.next_entries = self.entries[start_next:i - 1][::-1]
            entry.prev_entries = self.entries[i:i + PREV_ENTRY_COUNT]

        self._call_custom_hook('post_load')

    def _rebuild_tag_map(self):
        self.tag_map = {}
        for entry in self.entries:
            for tag in entry.tags:
                try:
                    self.tag_map[tag].append(entry)
                except KeyError:
                    self.tag_map[tag] = self._entry_list_type([entry], tag=tag)
        for tag, entry_list in self.tag_map.items():
            entry_list.sort()

    @chlog.wrap('critical', 'validate site')
    def validate(self):
        self._call_custom_hook('pre_validate')
        dup_id_map = {}
        eid_map = OMD([(e.entry_root, e) for e in self.entries])
        for eid in eid_map:
            elist = eid_map.getlist(eid)
            if len(elist) > 1:
                dup_id_map[eid] = elist
        if dup_id_map:
            raise ValueError('duplicate entry IDs detected: %r' % dup_id_map)
        self._call_custom_hook('post_validate')

        # TODO: assert necessary templates are present (entry.html, etc.)

    def _make_anchor_id(self, header_text):
        return slugify(header_text,
                       delim=self.get_config('site', 'anchor_delim', '-'))

    @chlog.wrap('critical', 'render site', verbose=True)
    def render(self):
        self._call_custom_hook('pre_render')
        entries = self.entries
        mdc, imdc = self.md_converter, self.inline_md_converter
        site_info = self.get_site_info()
        canonical_domain = site_info['canonical_domain']

        def markdown2html(string):
            if not string:
                return ''
            ret = mdc.convert(string)
            mdc.reset()
            return ret

        def markdown2ihtml(string, entry_fn):
            if not string:
                return ''

            ret = hypertext.canonicalize_links(imdc.convert(string),
                                               canonical_domain,
                                               entry_fn)
            imdc.reset()
            return ret

        def render_parts(entry):
            for part in entry.loaded_parts:
                part['content_html'] = markdown2html(part['content'])
                part['content_ihtml'] = markdown2ihtml(part['content'],
                                                       entry.output_filename)
            if not entry.summary:
                with chlog.debug('autosummarizing', reraise=False):
                    entry.summary = entry._autosummarize()

            tmpl_name = entry.entry_layout + MD_LAYOUT_EXT
            render_ctx = {'entry': entry.to_dict(with_links=False),
                          'site': site_info}
            entry.content_md = self.md_renderer.render(tmpl_name, render_ctx)

            tmpl_name = entry.content_layout + HTML_LAYOUT_EXT
            content_html = self.html_renderer.render(tmpl_name, render_ctx)
            with chlog.debug('parse_content_html'):
                content_html_tree = hypertext.html_text_to_tree(content_html)
            with chlog.debug('add_toc_content_html'):
                hypertext.add_toc(content_html_tree, make_anchor_id=self._make_anchor_id)
            with chlog.debug('retarget_links_content_html'):
                _mode = self.get_config('site', 'retarget_links', 'external')
                hypertext.retarget_links(content_html_tree, mode=_mode)
            with chlog.debug('reserialize_content_html'):
                content_html = hypertext.html_tree_to_text(content_html_tree)
            entry.content_html = content_html

            render_ctx['inline'] = True
            content_ihtml = self.html_renderer.render(tmpl_name, render_ctx)
            with chlog.debug('canonicalize_ihtml_links'):
                # TODO: use tree (and move slightly down)
                content_ihtml = hypertext.canonicalize_links(content_ihtml,
                                                             canonical_domain,
                                                             entry.output_filename)
            with chlog.debug('parse_content_ihtml'):
                content_ihtml_tree = hypertext.html_text_to_tree(content_ihtml)
            with chlog.debug('add_toc_content_ihtml'):
                hypertext.add_toc(content_ihtml_tree)
            with chlog.debug('reserialize_content_ihtml'):
                content_ihtml = hypertext.html_tree_to_text(content_ihtml_tree)

            entry.content_ihtml = content_ihtml
            return

        def render_html(entry, with_links=False):
            tmpl_name = entry.entry_layout + HTML_LAYOUT_EXT
            render_ctx = {'entry': entry.to_dict(with_links=with_links),
                          'site': site_info}
            entry_html = self.html_renderer.render(tmpl_name, render_ctx)
            entry.entry_html = entry_html
            return

        with chlog.info('render published entry content', verbose=True):
            for entry in entries:
                render_parts(entry)
        with chlog.info('render draft entry content', verbose=True):
            for entry in self.draft_entries:
                render_parts(entry)
        with chlog.info('render special entry content', verbose=True):
            for entry in self.special_entries:
                render_parts(entry)

        with chlog.info('render entry html'):
            for entry in entries:
                render_html(entry, with_links=True)
            for entry in self.draft_entries:
                render_html(entry)
            for entry in self.special_entries:
                render_html(entry)

        # render feeds
        with chlog.info('render feed and tag lists'):
            self.entries.render(site_obj=self)
            for tag, entry_list in self.tag_map.items():
                entry_list.render(site_obj=self)

        self._call_custom_hook('post_render')

    @chlog.wrap('critical', 'audit site')
    def audit(self):
        """
        Validation of rendered content, to be used for link checking.
        """
        # TODO: check for &nbsp; and other common HTML entities in
        # feed xml (these entities aren't supported in XML/Atom/RSS)
        # the only ok ones are here: https://en.wikipedia.org/wiki/List_of_XML_and_HTML_character_entity_references#Predefined_entities_in_XML
        self._call_custom_hook('pre_audit')
        self._call_custom_hook('post_audit')

    @chlog.wrap('critical', 'export site')
    def export(self):
        fal = self.fal
        self._call_custom_hook('pre_export')
        output_path = self.paths['output_path']

        with chlog.critical('create output path'):
            mkdir_p(output_path)

        def export_entry(entry):
            entry_custom_base_path = os.path.split(entry.entry_root)[0]
            if entry_custom_base_path:
                mkdir_p(pjoin(output_path, entry_custom_base_path))
            er = entry.entry_root
            entry_html_fn = er + EXPORT_HTML_EXT
            entry_gen_md_fn = er + '.gen.md'
            entry_data_fn = er + '.json'

            html_output_path = pjoin(output_path, entry_html_fn)
            data_output_path = pjoin(output_path, entry_data_fn)
            gen_md_output_path = pjoin(output_path, entry_gen_md_fn)

            #fal.write(html_output_path, entry.entry_html)
            #
            fal.write(html_output_path, entry.entry_html)
            fal.write(gen_md_output_path, entry.content_md)  # TODO
            _data = json.dumps(entry.loaded_parts, indent=2, sort_keys=True)
            fal.write(data_output_path, _data)

            # TODO: copy file
            # fal.write(src_output_path, entry.source_text)
            return

        for entry in self.entries:
            export_entry(entry)
        for entry in self.draft_entries:
            export_entry(entry)
        for entry in self.special_entries:
            export_entry(entry)

        # index is just the most recent entry for now
        index_path = pjoin(output_path, 'index' + EXPORT_HTML_EXT)
        if self.entries:
            index_content = self.entries[0].entry_html
        else:
            index_content = 'No entries yet!'
        fal.write(index_path, index_content)
        archive_path = pjoin(output_path, ('archive' + EXPORT_HTML_EXT))
        fal.write(archive_path, self.entries.rendered_html)

        # output feeds
        rss_path = pjoin(output_path, RSS_FEED_FILENAME)
        fal.write(rss_path, self.entries.rendered_rss_feed)
        atom_path = pjoin(output_path, ATOM_FEED_FILENAME)
        fal.write(atom_path, self.entries.rendered_atom_feed)

        for tag, entry_list in self.tag_map.items():
            tag_path = pjoin(output_path, entry_list.path_part)
            mkdir_p(tag_path)
            rss_path = pjoin(tag_path, RSS_FEED_FILENAME)
            atom_path = pjoin(tag_path, ATOM_FEED_FILENAME)
            archive_path = pjoin(tag_path, 'index.html')
            fal.write(rss_path, entry_list.rendered_rss_feed)
            fal.write(atom_path, entry_list.rendered_atom_feed)
            fal.write(archive_path, entry_list.rendered_html)

        # copy assets, i.e., all directories under the theme path
        for sdn in get_subdirectories(self.theme_path):
            cur_src = pjoin(self.theme_path, sdn)
            cur_dest = pjoin(output_path, sdn)
            with chlog.critical('copy assets', src=cur_src, dest=cur_dest):
                copytree(cur_src, cur_dest)

        # optionally symlink the uploads directory.  this is an
        # important step for sites with uploads because Chert's
        # default rsync behavior picks up on these uploads by
        # following the symlink.
        with chlog.critical('link uploads directory') as rec:
            uploads_link_path = pjoin(output_path, 'uploads')
            if not os.path.isdir(self.uploads_path):
                rec.failure('no uploads directory at {}', self.uploads_path)
            else:
                message = None
                if os.path.islink(uploads_link_path):
                    os.unlink(uploads_link_path)
                    message = 'refreshed existing uploads symlink'
                os.symlink(self.uploads_path, uploads_link_path)
                rec.success(message)

        self._call_custom_hook('post_export')

    def serve(self):
        dev_config = self.get_config('dev')
        host = dev_config.get('server_host', DEV_SERVER_HOST)
        port = dev_config.get('server_port', int(DEV_SERVER_PORT))
        base_url = dev_config.get('base_path', DEV_SERVER_BASE_PATH)

        class Handler(SimpleHTTPRequestHandler):
            def send_head(self):
                if not self.path.startswith(base_url):
                    self.send_error(404, 'File not found')
                    return None
                self.path = self.path[len(base_url):]
                if not self.path.startswith('/'):
                    self.path = '/' + self.path
                return SimpleHTTPRequestHandler.send_head(self)
        Handler.extensions_map.update({'.md': 'text/plain',
                                       '.json': 'application/json'})


        class ThreadedHTTPServer(ThreadingMixIn, HTTPServer):
            """Handle requests in a separate thread."""

        server = ThreadedHTTPServer((host, port), Handler)
        serving = False

        config_path = self.paths['config_path']
        entries_path = self.paths['entries_path']
        theme_path = self.paths['theme_path']
        output_path = self.paths['output_path']
        for changed in _iter_changed_files(entries_path, theme_path, config_path):
            if serving:
                print 'Changed %s files, regenerating...' % len(changed)
                server.shutdown()
            with chlog.critical('site generation', reraise=True):
                self.process()
            print 'Serving from %s' % output_path
            os.chdir(abspath(output_path))
            print 'Serving at http://%s:%s%s' % (host, port, base_url)

            thread = Thread(target=server.serve_forever)
            thread.daemon = True
            thread.start()
            if not serving:
                serving = True
        # TODO: hook(s)?
        return

    @chlog.wrap('critical', 'publish site', inject_as='log_rec')
    def publish(self, log_rec):  # deploy?
        #self._load_custom_mod()
        #self._call_custom_hook('pre_publish')
        prod_config = self.get_config('prod')
        rsync_cmd = prod_config.get('rsync_cmd', 'rsync')
        if not rsync_cmd.isalpha():
            rsync_cmd = shell_quote(rsync_cmd)
        # TODO: add -e 'ssh -o "NumberOfPasswordPrompts 0"' to fail if
        # ssh keys haven't been set up.
        rsync_flags = prod_config.get('rsync_flags', 'avzPk')
        local_site_path = self.output_path
        if not local_site_path.endswith('/'):
            local_site_path += '/'  # not just cosmetic; rsync needs this
        assert os.path.exists(local_site_path + 'index.html')
        remote_host = prod_config['remote_host']
        remote_user = prod_config['remote_user']
        remote_path = prod_config['remote_path']
        remote_slug = "%s@%s:'%s'" % (remote_user,
                                      remote_host,
                                      shell_quote(remote_path))

        full_rsync_cmd = '%s -%s %s %s' % (rsync_cmd,
                                           rsync_flags,
                                           local_site_path,
                                           remote_slug)
        log_rec['rsync_cmd'] = full_rsync_cmd
        print 'Executing', full_rsync_cmd
        try:
            rsync_output = subprocess.check_output(full_rsync_cmd, shell=True)
        except subprocess.CalledProcessError as cpe:
            log_rec['rsync_exit_code'] = cpe.returncode
            rsync_output = cpe.output
            print rsync_output
            log_rec.failure('publish failed: rsync got exit code {rsync_exit_code}')
            return False
        else:
            print rsync_output
            log_rec.success()
        return True
コード例 #4
0
ファイル: core.py プロジェクト: ra2003/chert
class Site(object):
    _entry_type = Entry
    _entry_list_type = EntryList

    def __init__(self, input_path, **kw):
        # setting up paths
        self.paths = OMD()
        self._paths = OMD()  # for the raw input paths
        self.fal = ChertFAL(chlog)

        set_path = self._set_path
        set_path('input_path', input_path)
        set_path('config_path', kw.pop('config_path', None),
                 DEFAULT_CONFIG_FILENAME)
        set_path('entries_path', kw.pop('entries_path', None), 'entries')
        set_path('themes_path', kw.pop('themes_path', None), 'themes')
        set_path('uploads_path',
                 kw.pop('uploads_path', None),
                 'uploads',
                 required=False)
        set_path('output_path',
                 kw.pop('output_path', None),
                 'site',
                 required=False)
        self.reload_config()
        self.reset()
        self.dev_mode = kw.pop('dev_mode', False)
        if kw:
            raise TypeError('unexpected keyword arguments: %r' % kw)
        chlog.debug('init site').success()
        return

    def reload_config(self, **kw):
        # TODO: take optional kwarg
        self.config = yaml.load(self.fal.read(self.paths['config_path']))

        # set theme
        with chlog.debug('setting theme'):
            theme_name = self.get_config('theme', 'name')
            theme_path = pjoin(self.themes_path, theme_name)
            self._set_path('theme_path', theme_path)

    def reset(self):
        """Called on __init__ and on reload before processing. Does not reset
        paths, etc., just state mutated during processing"""
        self.entries = self._entry_list_type()
        self.draft_entries = self._entry_list_type()
        self.special_entries = self._entry_list_type()
        self._rebuild_tag_map()

        self.last_load = None

        self.md_converter = Markdown(extensions=MD_EXTENSIONS)
        self.inline_md_converter = Markdown(extensions=INLINE_MD_EXTENSIONS)
        self._load_feed_templates()
        return

    def _set_path(self, name, path, default_suffix=None, required=True):
        """Set a path.

        Args:
            name: name of attribute (e.g., input_path)
            path: the path or None
            default_suffix: if path is None, self.input_path +
                default_suffix is used. The input_path should already
                be set.
            required: raise an error if path does not exist
        """
        with chlog.debug('set {path_name} path to {path_val}',
                         path_name=name,
                         path_val=path) as rec:
            self._paths[name] = path
            if path:
                self.paths[name] = abspath(path)
            elif default_suffix:
                self.paths[name] = pjoin(self.input_path, default_suffix)
            else:
                raise ValueError('no path or default set for %r' % name)
            if required:
                if not os.path.exists(self.paths[name]):
                    raise RuntimeError('expected existent %s path, not %r' %
                                       (name, self.paths[name]))
            rec.success('set {path_name} path to {path_val}',
                        path_val=self.paths[name])
        return

    def _load_feed_templates(self):
        default_atom_tmpl_path = pjoin(CUR_PATH, ATOM_FEED_FILENAME)
        atom_tmpl_path = pjoin(self.theme_path, ATOM_FEED_FILENAME)
        if not os.path.exists(atom_tmpl_path):
            atom_tmpl_path = default_atom_tmpl_path
        # TODO: defer opening to loading?
        self.atom_template = Template.from_path(atom_tmpl_path,
                                                name=ATOM_FEED_FILENAME)

        default_rss_tmpl_path = pjoin(CUR_PATH, RSS_FEED_FILENAME)
        rss_tmpl_path = pjoin(self.theme_path, RSS_FEED_FILENAME)
        if not os.path.exists(rss_tmpl_path):
            rss_tmpl_path = default_rss_tmpl_path
        # TODO: defer opening to loading?
        self.rss_template = Template.from_path(rss_tmpl_path,
                                               name=RSS_FEED_FILENAME)

    def get_config(self, section, key=None, default=_UNSET):
        try:
            section_map = self.config[section]
        except KeyError:
            if default is _UNSET:
                raise
            return default
        if key is None:
            return section_map
        try:
            return section_map[key]
        except KeyError:
            if default is _UNSET:
                raise
            return default

    def get_site_info(self):
        ret = {}
        ret['dev_mode'] = self.dev_mode
        refresh_secs = self.get_config('dev', 'autorefresh',
                                       DEFAULT_AUTOREFRESH) or False

        ret['dev_mode_refresh_seconds'] = refresh_secs
        site_config = self.get_config('site')
        ret['title'] = site_config.get('title', SITE_TITLE)
        ret['head_title'] = site_config.get('title', ret['title'])
        ret['tagline'] = site_config.get('tagline', '')
        ret['primary_links'] = self._get_links('site', 'primary_links')
        ret['secondary_links'] = self._get_links('site', 'secondary_links')
        ret['charset'] = 'UTF-8'  # not really overridable
        ret['lang_code'] = site_config.get('lang_code', 'en')
        ret['copyright_notice'] = site_config.get('copyright', SITE_COPYRIGHT)
        ret['author_name'] = site_config.get('author', SITE_AUTHOR)
        ret['enable_analytics'] = site_config.get('enable_analytics', True)
        ret['analytics_code'] = self._get_analytics_code()

        prod_config = self.get_config('prod')
        ret['canonical_domain'] = prod_config.get('canonical_domain',
                                                  CANONICAL_DOMAIN).rstrip('/')
        ret['canonical_base_path'] = prod_config.get('canonical_base_path',
                                                     CANONICAL_BASE_PATH)
        if not ret['canonical_base_path'].endswith('/'):
            ret['canonical_base_path'] += '/'
        ret['canonical_url'] = ret['canonical_domain'] + ret[
            'canonical_base_path']
        ret['rss_feed_url'] = ret['canonical_base_path'] + RSS_FEED_FILENAME
        ret['canonical_rss_feed_url'] = ret['canonical_url'] + RSS_FEED_FILENAME
        ret['atom_feed_url'] = ret['canonical_base_path'] + ATOM_FEED_FILENAME
        ret['canonical_atom_feed_url'] = ret[
            'canonical_url'] + ATOM_FEED_FILENAME

        now = datetime.now(LocalTZ)
        ret['last_generated'] = to_timestamp(now)
        ret['last_generated_utc'] = to_timestamp(now, to_utc=True)
        ret['export_html_ext'] = EXPORT_HTML_EXT
        ret['export_src_ext'] = EXPORT_SRC_EXT
        return ret

    def _get_analytics_code(self):
        with chlog.debug('set analytics code') as rec:
            code = self.get_config('site', 'analytics_code', None)
            if code is None:
                rec.failure('site.analytics_code not set in config.yaml')
                return ''
            match = _analytics_re.search(unicode(code))
            if not match:
                rec.failure('analytics code blank or invalid: {!r}', code)
                return ''
            code = match.group('code')
            if len(code) < 6:
                rec.failure('analytics code too short: {!r}', code)
                return ''
            rec.success('analytics code set to {!r}', code)
        return code

    def _get_links(self, group, name):
        link_list = list(self.get_config(group, name, []))
        for link in link_list:
            if link['href'] and URL(link['href']).host:
                link['is_external'] = True
            else:
                link['is_external'] = False
        return link_list

    @property
    def input_path(self):
        return self.paths['input_path']

    @property
    def entries_path(self):
        return self.paths['entries_path']

    @property
    def themes_path(self):
        return self.paths['themes_path']

    @property
    def theme_path(self):
        return self.paths['theme_path']

    @property
    def uploads_path(self):
        return self.paths['uploads_path']

    @property
    def output_path(self):
        return self.paths['output_path']

    @property
    def all_entries(self):
        return (self.special_entries.entries + self.entries.entries +
                self.draft_entries.entries)

    def process(self):
        if self.last_load:
            self.reload_config()
            self.reset()
        self.load()
        self.validate()
        self.render()
        self.audit()
        self.export()

    def _load_custom_mod(self):
        input_path = self.paths['input_path']
        custom_mod_path = pjoin(input_path, 'custom.py')
        if not os.path.exists(custom_mod_path):
            self.custom_mod = None
            return
        # site_name = os.path.split(input_path)[1]
        with chlog.debug('import site custom module'):
            mod_name = 'custom'
            self.custom_mod = imp.load_source(mod_name, custom_mod_path)

    def _call_custom_hook(self, hook_name):
        with chlog.debug('call custom {hook_name} hook',
                         hook_name=hook_name,
                         reraise=False) as rec:
            if not self.custom_mod:
                # TODO: success or failure?
                rec.failure('no custom module loaded')
            try:
                hook_func = getattr(self.custom_mod, 'chert_' + hook_name)
            except AttributeError:
                rec.failure('no {} hook defined', hook_name)
                return
            hook_func(self)
        return

    @chlog.wrap('critical', 'load site')
    def load(self):
        self.last_load = time.time()
        self._load_custom_mod()
        self._call_custom_hook('pre_load')
        self.html_renderer = AshesEnv(paths=[self.theme_path])
        self.html_renderer.load_all()
        self.md_renderer = AshesEnv(paths=[self.theme_path],
                                    exts=['md'],
                                    keep_whitespace=False)
        self.md_renderer.autoescape_filter = ''
        self.md_renderer.load_all()

        entries_path = self.paths['entries_path']
        entry_paths = []
        for entry_path in iter_find_files(entries_path, ENTRY_PATS):
            entry_paths.append(entry_path)
        entry_paths.sort()
        for ep in entry_paths:
            with chlog.info('entry load') as rec:
                try:
                    entry = self._entry_type.from_path(ep)
                    rec['entry_title'] = entry.title
                    rec['entry_length'] = round(entry.get_reading_time(), 1)
                except IOError:
                    rec.exception('unopenable entry path: {}', ep)
                    continue
                except:
                    rec['entry_path'] = ep
                    rec.exception(
                        'entry {entry_path} load error: {exc_message}')
                    continue
                else:
                    rec.success('entry loaded:'
                                ' {entry_title} ({entry_length}m)')
            if entry.is_draft:
                self.draft_entries.append(entry)
            elif entry.is_special:
                self.special_entries.append(entry)
            else:
                self.entries.append(entry)

        # Sorting the EntryLists
        self.entries.sort()
        # sorting drafts/special pages doesn't do much
        self.draft_entries.sort(key=lambda e: os.path.getmtime(e.source_path))
        self.special_entries.sort()

        self._rebuild_tag_map()

        for i, entry in enumerate(self.entries, start=1):
            start_next = max(0, i - NEXT_ENTRY_COUNT)
            entry.next_entries = self.entries[start_next:i - 1][::-1]
            entry.prev_entries = self.entries[i:i + PREV_ENTRY_COUNT]

        self._call_custom_hook('post_load')

    def _rebuild_tag_map(self):
        self.tag_map = {}
        for entry in self.entries:
            for tag in entry.tags:
                try:
                    self.tag_map[tag].append(entry)
                except KeyError:
                    self.tag_map[tag] = self._entry_list_type([entry], tag=tag)
        for tag, entry_list in self.tag_map.items():
            entry_list.sort()

    @chlog.wrap('critical', 'validate site')
    def validate(self):
        self._call_custom_hook('pre_validate')
        dup_id_map = {}
        eid_map = OMD([(e.entry_root, e) for e in self.entries])
        for eid in eid_map:
            elist = eid_map.getlist(eid)
            if len(elist) > 1:
                dup_id_map[eid] = elist
        if dup_id_map:
            raise ValueError('duplicate entry IDs detected: %r' % dup_id_map)
        self._call_custom_hook('post_validate')

        # TODO: assert necessary templates are present (entry.html, etc.)

    def _make_anchor_id(self, header_text):
        return slugify(header_text,
                       delim=self.get_config('site', 'anchor_delim', '-'))

    @chlog.wrap('critical', 'render site', verbose=True)
    def render(self):
        self._call_custom_hook('pre_render')
        entries = self.entries
        mdc, imdc = self.md_converter, self.inline_md_converter
        site_info = self.get_site_info()
        canonical_domain = site_info['canonical_domain']

        def markdown2html(string):
            if not string:
                return ''
            ret = mdc.convert(string)
            mdc.reset()
            return ret

        def markdown2ihtml(string, entry_fn):
            if not string:
                return ''

            ret = hypertext.canonicalize_links(imdc.convert(string),
                                               canonical_domain, entry_fn)
            imdc.reset()
            return ret

        def render_parts(entry):
            for part in entry.loaded_parts:
                part['content_html'] = markdown2html(part['content'])
                part['content_ihtml'] = markdown2ihtml(part['content'],
                                                       entry.output_filename)
            if not entry.summary:
                with chlog.debug('autosummarizing', reraise=False):
                    entry.summary = entry._autosummarize()

            tmpl_name = entry.entry_layout + MD_LAYOUT_EXT
            render_ctx = {
                'entry': entry.to_dict(with_links=False),
                'site': site_info
            }
            entry.content_md = self.md_renderer.render(tmpl_name, render_ctx)

            tmpl_name = entry.content_layout + HTML_LAYOUT_EXT
            content_html = self.html_renderer.render(tmpl_name, render_ctx)
            with chlog.debug('parse_content_html'):
                content_html_tree = hypertext.html_text_to_tree(content_html)
            with chlog.debug('add_toc_content_html'):
                hypertext.add_toc(content_html_tree,
                                  make_anchor_id=self._make_anchor_id)
            with chlog.debug('retarget_links_content_html'):
                _mode = self.get_config('site', 'retarget_links', 'external')
                hypertext.retarget_links(content_html_tree, mode=_mode)
            with chlog.debug('reserialize_content_html'):
                content_html = hypertext.html_tree_to_text(content_html_tree)
            entry.content_html = content_html

            render_ctx['inline'] = True
            content_ihtml = self.html_renderer.render(tmpl_name, render_ctx)
            with chlog.debug('canonicalize_ihtml_links'):
                # TODO: use tree (and move slightly down)
                content_ihtml = hypertext.canonicalize_links(
                    content_ihtml, canonical_domain, entry.output_filename)
            with chlog.debug('parse_content_ihtml'):
                content_ihtml_tree = hypertext.html_text_to_tree(content_ihtml)
            with chlog.debug('add_toc_content_ihtml'):
                hypertext.add_toc(content_ihtml_tree)
            with chlog.debug('reserialize_content_ihtml'):
                content_ihtml = hypertext.html_tree_to_text(content_ihtml_tree)

            entry.content_ihtml = content_ihtml
            return

        def render_html(entry, with_links=False):
            tmpl_name = entry.entry_layout + HTML_LAYOUT_EXT
            render_ctx = {
                'entry': entry.to_dict(with_links=with_links),
                'site': site_info
            }
            entry_html = self.html_renderer.render(tmpl_name, render_ctx)
            entry.entry_html = entry_html
            return

        with chlog.info('render published entry content', verbose=True):
            for entry in entries:
                render_parts(entry)
        with chlog.info('render draft entry content', verbose=True):
            for entry in self.draft_entries:
                render_parts(entry)
        with chlog.info('render special entry content', verbose=True):
            for entry in self.special_entries:
                render_parts(entry)

        with chlog.info('render entry html'):
            for entry in entries:
                render_html(entry, with_links=True)
            for entry in self.draft_entries:
                render_html(entry)
            for entry in self.special_entries:
                render_html(entry)

        # render feeds
        with chlog.info('render feed and tag lists'):
            self.entries.render(site_obj=self)
            for tag, entry_list in self.tag_map.items():
                entry_list.render(site_obj=self)

        self._call_custom_hook('post_render')

    @chlog.wrap('critical', 'audit site')
    def audit(self):
        """
        Validation of rendered content, to be used for link checking.
        """
        # TODO: check for &nbsp; and other common HTML entities in
        # feed xml (these entities aren't supported in XML/Atom/RSS)
        # the only ok ones are here: https://en.wikipedia.org/wiki/List_of_XML_and_HTML_character_entity_references#Predefined_entities_in_XML
        self._call_custom_hook('pre_audit')
        self._call_custom_hook('post_audit')

    @chlog.wrap('critical', 'export site')
    def export(self):
        fal = self.fal
        self._call_custom_hook('pre_export')
        output_path = self.paths['output_path']

        with chlog.critical('create output path'):
            mkdir_p(output_path)

        def export_entry(entry):
            entry_custom_base_path = os.path.split(entry.entry_root)[0]
            if entry_custom_base_path:
                mkdir_p(pjoin(output_path, entry_custom_base_path))
            er = entry.entry_root
            entry_html_fn = er + EXPORT_HTML_EXT
            entry_gen_md_fn = er + '.gen.md'
            entry_data_fn = er + '.json'

            html_output_path = pjoin(output_path, entry_html_fn)
            data_output_path = pjoin(output_path, entry_data_fn)
            gen_md_output_path = pjoin(output_path, entry_gen_md_fn)

            #fal.write(html_output_path, entry.entry_html)
            #
            fal.write(html_output_path, entry.entry_html)
            fal.write(gen_md_output_path, entry.content_md)  # TODO
            _data = json.dumps(entry.loaded_parts, indent=2, sort_keys=True)
            fal.write(data_output_path, _data)

            # TODO: copy file
            # fal.write(src_output_path, entry.source_text)
            return

        for entry in self.entries:
            export_entry(entry)
        for entry in self.draft_entries:
            export_entry(entry)
        for entry in self.special_entries:
            export_entry(entry)

        # index is just the most recent entry for now
        index_path = pjoin(output_path, 'index' + EXPORT_HTML_EXT)
        if self.entries:
            index_content = self.entries[0].entry_html
        else:
            index_content = 'No entries yet!'
        fal.write(index_path, index_content)
        archive_path = pjoin(output_path, ('archive' + EXPORT_HTML_EXT))
        fal.write(archive_path, self.entries.rendered_html)

        # output feeds
        rss_path = pjoin(output_path, RSS_FEED_FILENAME)
        fal.write(rss_path, self.entries.rendered_rss_feed)
        atom_path = pjoin(output_path, ATOM_FEED_FILENAME)
        fal.write(atom_path, self.entries.rendered_atom_feed)

        for tag, entry_list in self.tag_map.items():
            tag_path = pjoin(output_path, entry_list.path_part)
            mkdir_p(tag_path)
            rss_path = pjoin(tag_path, RSS_FEED_FILENAME)
            atom_path = pjoin(tag_path, ATOM_FEED_FILENAME)
            archive_path = pjoin(tag_path, 'index.html')
            fal.write(rss_path, entry_list.rendered_rss_feed)
            fal.write(atom_path, entry_list.rendered_atom_feed)
            fal.write(archive_path, entry_list.rendered_html)

        # copy assets, i.e., all directories under the theme path
        for sdn in get_subdirectories(self.theme_path):
            cur_src = pjoin(self.theme_path, sdn)
            cur_dest = pjoin(output_path, sdn)
            with chlog.critical('copy assets', src=cur_src, dest=cur_dest):
                copytree(cur_src, cur_dest)

        # optionally symlink the uploads directory.  this is an
        # important step for sites with uploads because Chert's
        # default rsync behavior picks up on these uploads by
        # following the symlink.
        with chlog.critical('link uploads directory') as rec:
            uploads_link_path = pjoin(output_path, 'uploads')
            if not os.path.isdir(self.uploads_path):
                rec.failure('no uploads directory at {}', self.uploads_path)
            else:
                message = None
                if os.path.islink(uploads_link_path):
                    os.unlink(uploads_link_path)
                    message = 'refreshed existing uploads symlink'
                os.symlink(self.uploads_path, uploads_link_path)
                rec.success(message)

        self._call_custom_hook('post_export')

    def serve(self):
        dev_config = self.get_config('dev')
        host = dev_config.get('server_host', DEV_SERVER_HOST)
        port = dev_config.get('server_port', int(DEV_SERVER_PORT))
        base_url = dev_config.get('base_path', DEV_SERVER_BASE_PATH)

        class Handler(SimpleHTTPRequestHandler):
            def send_head(self):
                if not self.path.startswith(base_url):
                    self.send_error(404, 'File not found')
                    return None
                self.path = self.path[len(base_url):]
                if not self.path.startswith('/'):
                    self.path = '/' + self.path
                return SimpleHTTPRequestHandler.send_head(self)

        Handler.extensions_map.update({
            '.md': 'text/plain',
            '.json': 'application/json'
        })

        class ThreadedHTTPServer(ThreadingMixIn, HTTPServer):
            """Handle requests in a separate thread."""

        server = ThreadedHTTPServer((host, port), Handler)
        serving = False

        config_path = self.paths['config_path']
        entries_path = self.paths['entries_path']
        theme_path = self.paths['theme_path']
        output_path = self.paths['output_path']
        for changed in _iter_changed_files(entries_path, theme_path,
                                           config_path):
            if serving:
                print 'Changed %s files, regenerating...' % len(changed)
                server.shutdown()
            with chlog.critical('site generation', reraise=True):
                self.process()
            print 'Serving from %s' % output_path
            os.chdir(abspath(output_path))
            print 'Serving at http://%s:%s%s' % (host, port, base_url)

            thread = Thread(target=server.serve_forever)
            thread.daemon = True
            thread.start()
            if not serving:
                serving = True
        # TODO: hook(s)?
        return

    @chlog.wrap('critical', 'publish site', inject_as='log_rec')
    def publish(self, log_rec):  # deploy?
        #self._load_custom_mod()
        #self._call_custom_hook('pre_publish')
        prod_config = self.get_config('prod')
        rsync_cmd = prod_config.get('rsync_cmd', 'rsync')
        if not rsync_cmd.isalpha():
            rsync_cmd = shell_quote(rsync_cmd)
        # TODO: add -e 'ssh -o "NumberOfPasswordPrompts 0"' to fail if
        # ssh keys haven't been set up.
        rsync_flags = prod_config.get('rsync_flags', 'avzPk')
        local_site_path = self.output_path
        if not local_site_path.endswith('/'):
            local_site_path += '/'  # not just cosmetic; rsync needs this
        assert os.path.exists(local_site_path + 'index.html')
        remote_host = prod_config['remote_host']
        remote_user = prod_config['remote_user']
        remote_path = prod_config['remote_path']
        remote_slug = "%s@%s:'%s'" % (remote_user, remote_host,
                                      shell_quote(remote_path))

        full_rsync_cmd = '%s -%s %s %s' % (rsync_cmd, rsync_flags,
                                           local_site_path, remote_slug)
        log_rec['rsync_cmd'] = full_rsync_cmd
        print 'Executing', full_rsync_cmd
        try:
            rsync_output = subprocess.check_output(full_rsync_cmd, shell=True)
        except subprocess.CalledProcessError as cpe:
            log_rec['rsync_exit_code'] = cpe.returncode
            rsync_output = cpe.output
            print rsync_output
            log_rec.failure(
                'publish failed: rsync got exit code {rsync_exit_code}')
            return False
        else:
            print rsync_output
            log_rec.success()
        return True
コード例 #5
0
ファイル: cli.py プロジェクト: mcgyver5/apatite
def render(plist, pdir, pfile):
    "generate the list markdown from the yaml listing"
    normalize(pfile=pfile, plist=plist)
    topic_map = plist.get_projects_by_type('topic')
    topic_toc_text = format_tag_toc(topic_map)
    projects_by_topic = format_all_categories(topic_map)

    plat_map = plist.get_projects_by_type('platform')
    plat_toc_text = format_tag_toc(plat_map)
    projects_by_plat = format_all_categories(plat_map)

    context = {
        'TOPIC_TOC': topic_toc_text,
        'TOPIC_TEXT': projects_by_topic,
        'PLATFORM_TOC': plat_toc_text,
        'PLATFORM_TEXT': projects_by_plat,
        'TOTAL_COUNT': len(plist.project_list)
    }

    templates_path = pdir + '/templates/'
    if not os.path.isdir(templates_path):
        raise APACLIError('expected "templates" directory at %r' %
                          templates_path)

    for filename in iter_find_files(templates_path, '*.tmpl.md'):
        tmpl_text = open(filename).read()
        target_filename = os.path.split(filename)[1].replace('.tmpl', '')
        output_text = tmpl_text.format(**context)
        with atomic_save(pdir + '/' + target_filename) as f:
            f.write(output_text.encode('utf8'))

    feed_tmpl_path = templates_path + '/atom.xml'
    if os.path.exists(feed_tmpl_path):

        def _stderr_log_func(level, name, message):
            import sys
            sys.stderr.write('%s - %s - %s\n' % (level.upper(), name, message))
            sys.stderr.flush()

        ashes_env = AshesEnv([templates_path], log_func=_stderr_log_func)
        proj_dict_list = []
        for proj in plist.project_list:
            cur = proj.to_dict()
            cur['name_slug'] = proj.name_slug
            cur['date_added_utc'] = proj.date_added.isoformat() + 'Z'
            cur['urls'] = get_url_list(proj)
            proj_dict_list.append(cur)
        cur_dt = datetime.datetime.utcnow().replace(
            microsecond=0).isoformat() + 'Z'
        res = ashes_env.render(
            'atom.xml', {
                'projects':
                sorted(proj_dict_list,
                       key=lambda x: x['date_added'],
                       reverse=True),
                'last_generated_utc':
                cur_dt
            })
        with atomic_save(pdir + '/atom.xml') as f:
            f.write(res.encode('utf8'))

    return