Beispiel #1
0
class Site(object):
    _entry_type = Entry
    _entry_list_type = EntryList

    def __init__(self, input_path, **kw):
        # setting up paths
        self.paths = OMD()
        self._paths = OMD()  # for the raw input paths
        self.fal = ChertFAL(chlog)

        set_path = self._set_path
        set_path('input_path', input_path)
        set_path('config_path', kw.pop('config_path', None),
                 DEFAULT_CONFIG_FILENAME)
        set_path('entries_path', kw.pop('entries_path', None), 'entries')
        set_path('themes_path', kw.pop('themes_path', None), 'themes')
        set_path('uploads_path',
                 kw.pop('uploads_path', None),
                 'uploads',
                 required=False)
        set_path('output_path',
                 kw.pop('output_path', None),
                 'site',
                 required=False)
        self.reload_config()
        self.reset()
        self.dev_mode = kw.pop('dev_mode', False)
        if kw:
            raise TypeError('unexpected keyword arguments: %r' % kw)
        chlog.debug('init site').success()
        return

    def reload_config(self, **kw):
        # TODO: take optional kwarg
        self.config = yaml.load(self.fal.read(self.paths['config_path']))

        # set theme
        with chlog.debug('setting theme'):
            theme_name = self.get_config('theme', 'name')
            theme_path = pjoin(self.themes_path, theme_name)
            self._set_path('theme_path', theme_path)

    def reset(self):
        """Called on __init__ and on reload before processing. Does not reset
        paths, etc., just state mutated during processing"""
        self.entries = self._entry_list_type()
        self.draft_entries = self._entry_list_type()
        self.special_entries = self._entry_list_type()
        self._rebuild_tag_map()

        self.last_load = None

        self.md_converter = Markdown(extensions=MD_EXTENSIONS)
        self.inline_md_converter = Markdown(extensions=INLINE_MD_EXTENSIONS)
        self._load_feed_templates()
        return

    def _set_path(self, name, path, default_suffix=None, required=True):
        """Set a path.

        Args:
            name: name of attribute (e.g., input_path)
            path: the path or None
            default_suffix: if path is None, self.input_path +
                default_suffix is used. The input_path should already
                be set.
            required: raise an error if path does not exist
        """
        with chlog.debug('set {path_name} path to {path_val}',
                         path_name=name,
                         path_val=path) as rec:
            self._paths[name] = path
            if path:
                self.paths[name] = abspath(path)
            elif default_suffix:
                self.paths[name] = pjoin(self.input_path, default_suffix)
            else:
                raise ValueError('no path or default set for %r' % name)
            if required:
                if not os.path.exists(self.paths[name]):
                    raise RuntimeError('expected existent %s path, not %r' %
                                       (name, self.paths[name]))
            rec.success('set {path_name} path to {path_val}',
                        path_val=self.paths[name])
        return

    def _load_feed_templates(self):
        default_atom_tmpl_path = pjoin(CUR_PATH, ATOM_FEED_FILENAME)
        atom_tmpl_path = pjoin(self.theme_path, ATOM_FEED_FILENAME)
        if not os.path.exists(atom_tmpl_path):
            atom_tmpl_path = default_atom_tmpl_path
        # TODO: defer opening to loading?
        self.atom_template = Template.from_path(atom_tmpl_path,
                                                name=ATOM_FEED_FILENAME)

        default_rss_tmpl_path = pjoin(CUR_PATH, RSS_FEED_FILENAME)
        rss_tmpl_path = pjoin(self.theme_path, RSS_FEED_FILENAME)
        if not os.path.exists(rss_tmpl_path):
            rss_tmpl_path = default_rss_tmpl_path
        # TODO: defer opening to loading?
        self.rss_template = Template.from_path(rss_tmpl_path,
                                               name=RSS_FEED_FILENAME)

    def get_config(self, section, key=None, default=_UNSET):
        try:
            section_map = self.config[section]
        except KeyError:
            if default is _UNSET:
                raise
            return default
        if key is None:
            return section_map
        try:
            return section_map[key]
        except KeyError:
            if default is _UNSET:
                raise
            return default

    def get_site_info(self):
        ret = {}
        ret['dev_mode'] = self.dev_mode
        refresh_secs = self.get_config('dev', 'autorefresh',
                                       DEFAULT_AUTOREFRESH) or False

        ret['dev_mode_refresh_seconds'] = refresh_secs
        site_config = self.get_config('site')
        ret['title'] = site_config.get('title', SITE_TITLE)
        ret['head_title'] = site_config.get('title', ret['title'])
        ret['tagline'] = site_config.get('tagline', '')
        ret['primary_links'] = self._get_links('site', 'primary_links')
        ret['secondary_links'] = self._get_links('site', 'secondary_links')
        ret['charset'] = 'UTF-8'  # not really overridable
        ret['lang_code'] = site_config.get('lang_code', 'en')
        ret['copyright_notice'] = site_config.get('copyright', SITE_COPYRIGHT)
        ret['author_name'] = site_config.get('author', SITE_AUTHOR)
        ret['enable_analytics'] = site_config.get('enable_analytics', True)
        ret['analytics_code'] = self._get_analytics_code()

        prod_config = self.get_config('prod')
        ret['canonical_domain'] = prod_config.get('canonical_domain',
                                                  CANONICAL_DOMAIN).rstrip('/')
        ret['canonical_base_path'] = prod_config.get('canonical_base_path',
                                                     CANONICAL_BASE_PATH)
        if not ret['canonical_base_path'].endswith('/'):
            ret['canonical_base_path'] += '/'
        ret['canonical_url'] = ret['canonical_domain'] + ret[
            'canonical_base_path']
        ret['rss_feed_url'] = ret['canonical_base_path'] + RSS_FEED_FILENAME
        ret['canonical_rss_feed_url'] = ret['canonical_url'] + RSS_FEED_FILENAME
        ret['atom_feed_url'] = ret['canonical_base_path'] + ATOM_FEED_FILENAME
        ret['canonical_atom_feed_url'] = ret[
            'canonical_url'] + ATOM_FEED_FILENAME

        now = datetime.now(LocalTZ)
        ret['last_generated'] = to_timestamp(now)
        ret['last_generated_utc'] = to_timestamp(now, to_utc=True)
        ret['export_html_ext'] = EXPORT_HTML_EXT
        ret['export_src_ext'] = EXPORT_SRC_EXT
        return ret

    def _get_analytics_code(self):
        with chlog.debug('set analytics code') as rec:
            code = self.get_config('site', 'analytics_code', None)
            if code is None:
                rec.failure('site.analytics_code not set in config.yaml')
                return ''
            match = _analytics_re.search(unicode(code))
            if not match:
                rec.failure('analytics code blank or invalid: {!r}', code)
                return ''
            code = match.group('code')
            if len(code) < 6:
                rec.failure('analytics code too short: {!r}', code)
                return ''
            rec.success('analytics code set to {!r}', code)
        return code

    def _get_links(self, group, name):
        link_list = list(self.get_config(group, name, []))
        for link in link_list:
            if link['href'] and URL(link['href']).host:
                link['is_external'] = True
            else:
                link['is_external'] = False
        return link_list

    @property
    def input_path(self):
        return self.paths['input_path']

    @property
    def entries_path(self):
        return self.paths['entries_path']

    @property
    def themes_path(self):
        return self.paths['themes_path']

    @property
    def theme_path(self):
        return self.paths['theme_path']

    @property
    def uploads_path(self):
        return self.paths['uploads_path']

    @property
    def output_path(self):
        return self.paths['output_path']

    @property
    def all_entries(self):
        return (self.special_entries.entries + self.entries.entries +
                self.draft_entries.entries)

    def process(self):
        if self.last_load:
            self.reload_config()
            self.reset()
        self.load()
        self.validate()
        self.render()
        self.audit()
        self.export()

    def _load_custom_mod(self):
        input_path = self.paths['input_path']
        custom_mod_path = pjoin(input_path, 'custom.py')
        if not os.path.exists(custom_mod_path):
            self.custom_mod = None
            return
        # site_name = os.path.split(input_path)[1]
        with chlog.debug('import site custom module'):
            mod_name = 'custom'
            self.custom_mod = imp.load_source(mod_name, custom_mod_path)

    def _call_custom_hook(self, hook_name):
        with chlog.debug('call custom {hook_name} hook',
                         hook_name=hook_name,
                         reraise=False) as rec:
            if not self.custom_mod:
                # TODO: success or failure?
                rec.failure('no custom module loaded')
            try:
                hook_func = getattr(self.custom_mod, 'chert_' + hook_name)
            except AttributeError:
                rec.failure('no {} hook defined', hook_name)
                return
            hook_func(self)
        return

    @chlog.wrap('critical', 'load site')
    def load(self):
        self.last_load = time.time()
        self._load_custom_mod()
        self._call_custom_hook('pre_load')
        self.html_renderer = AshesEnv(paths=[self.theme_path])
        self.html_renderer.load_all()
        self.md_renderer = AshesEnv(paths=[self.theme_path],
                                    exts=['md'],
                                    keep_whitespace=False)
        self.md_renderer.autoescape_filter = ''
        self.md_renderer.load_all()

        entries_path = self.paths['entries_path']
        entry_paths = []
        for entry_path in iter_find_files(entries_path, ENTRY_PATS):
            entry_paths.append(entry_path)
        entry_paths.sort()
        for ep in entry_paths:
            with chlog.info('entry load') as rec:
                try:
                    entry = self._entry_type.from_path(ep)
                    rec['entry_title'] = entry.title
                    rec['entry_length'] = round(entry.get_reading_time(), 1)
                except IOError:
                    rec.exception('unopenable entry path: {}', ep)
                    continue
                except:
                    rec['entry_path'] = ep
                    rec.exception(
                        'entry {entry_path} load error: {exc_message}')
                    continue
                else:
                    rec.success('entry loaded:'
                                ' {entry_title} ({entry_length}m)')
            if entry.is_draft:
                self.draft_entries.append(entry)
            elif entry.is_special:
                self.special_entries.append(entry)
            else:
                self.entries.append(entry)

        # Sorting the EntryLists
        self.entries.sort()
        # sorting drafts/special pages doesn't do much
        self.draft_entries.sort(key=lambda e: os.path.getmtime(e.source_path))
        self.special_entries.sort()

        self._rebuild_tag_map()

        for i, entry in enumerate(self.entries, start=1):
            start_next = max(0, i - NEXT_ENTRY_COUNT)
            entry.next_entries = self.entries[start_next:i - 1][::-1]
            entry.prev_entries = self.entries[i:i + PREV_ENTRY_COUNT]

        self._call_custom_hook('post_load')

    def _rebuild_tag_map(self):
        self.tag_map = {}
        for entry in self.entries:
            for tag in entry.tags:
                try:
                    self.tag_map[tag].append(entry)
                except KeyError:
                    self.tag_map[tag] = self._entry_list_type([entry], tag=tag)
        for tag, entry_list in self.tag_map.items():
            entry_list.sort()

    @chlog.wrap('critical', 'validate site')
    def validate(self):
        self._call_custom_hook('pre_validate')
        dup_id_map = {}
        eid_map = OMD([(e.entry_root, e) for e in self.entries])
        for eid in eid_map:
            elist = eid_map.getlist(eid)
            if len(elist) > 1:
                dup_id_map[eid] = elist
        if dup_id_map:
            raise ValueError('duplicate entry IDs detected: %r' % dup_id_map)
        self._call_custom_hook('post_validate')

        # TODO: assert necessary templates are present (entry.html, etc.)

    def _make_anchor_id(self, header_text):
        return slugify(header_text,
                       delim=self.get_config('site', 'anchor_delim', '-'))

    @chlog.wrap('critical', 'render site', verbose=True)
    def render(self):
        self._call_custom_hook('pre_render')
        entries = self.entries
        mdc, imdc = self.md_converter, self.inline_md_converter
        site_info = self.get_site_info()
        canonical_domain = site_info['canonical_domain']

        def markdown2html(string):
            if not string:
                return ''
            ret = mdc.convert(string)
            mdc.reset()
            return ret

        def markdown2ihtml(string, entry_fn):
            if not string:
                return ''

            ret = hypertext.canonicalize_links(imdc.convert(string),
                                               canonical_domain, entry_fn)
            imdc.reset()
            return ret

        def render_parts(entry):
            for part in entry.loaded_parts:
                part['content_html'] = markdown2html(part['content'])
                part['content_ihtml'] = markdown2ihtml(part['content'],
                                                       entry.output_filename)
            if not entry.summary:
                with chlog.debug('autosummarizing', reraise=False):
                    entry.summary = entry._autosummarize()

            tmpl_name = entry.entry_layout + MD_LAYOUT_EXT
            render_ctx = {
                'entry': entry.to_dict(with_links=False),
                'site': site_info
            }
            entry.content_md = self.md_renderer.render(tmpl_name, render_ctx)

            tmpl_name = entry.content_layout + HTML_LAYOUT_EXT
            content_html = self.html_renderer.render(tmpl_name, render_ctx)
            with chlog.debug('parse_content_html'):
                content_html_tree = hypertext.html_text_to_tree(content_html)
            with chlog.debug('add_toc_content_html'):
                hypertext.add_toc(content_html_tree,
                                  make_anchor_id=self._make_anchor_id)
            with chlog.debug('retarget_links_content_html'):
                _mode = self.get_config('site', 'retarget_links', 'external')
                hypertext.retarget_links(content_html_tree, mode=_mode)
            with chlog.debug('reserialize_content_html'):
                content_html = hypertext.html_tree_to_text(content_html_tree)
            entry.content_html = content_html

            render_ctx['inline'] = True
            content_ihtml = self.html_renderer.render(tmpl_name, render_ctx)
            with chlog.debug('canonicalize_ihtml_links'):
                # TODO: use tree (and move slightly down)
                content_ihtml = hypertext.canonicalize_links(
                    content_ihtml, canonical_domain, entry.output_filename)
            with chlog.debug('parse_content_ihtml'):
                content_ihtml_tree = hypertext.html_text_to_tree(content_ihtml)
            with chlog.debug('add_toc_content_ihtml'):
                hypertext.add_toc(content_ihtml_tree)
            with chlog.debug('reserialize_content_ihtml'):
                content_ihtml = hypertext.html_tree_to_text(content_ihtml_tree)

            entry.content_ihtml = content_ihtml
            return

        def render_html(entry, with_links=False):
            tmpl_name = entry.entry_layout + HTML_LAYOUT_EXT
            render_ctx = {
                'entry': entry.to_dict(with_links=with_links),
                'site': site_info
            }
            entry_html = self.html_renderer.render(tmpl_name, render_ctx)
            entry.entry_html = entry_html
            return

        with chlog.info('render published entry content', verbose=True):
            for entry in entries:
                render_parts(entry)
        with chlog.info('render draft entry content', verbose=True):
            for entry in self.draft_entries:
                render_parts(entry)
        with chlog.info('render special entry content', verbose=True):
            for entry in self.special_entries:
                render_parts(entry)

        with chlog.info('render entry html'):
            for entry in entries:
                render_html(entry, with_links=True)
            for entry in self.draft_entries:
                render_html(entry)
            for entry in self.special_entries:
                render_html(entry)

        # render feeds
        with chlog.info('render feed and tag lists'):
            self.entries.render(site_obj=self)
            for tag, entry_list in self.tag_map.items():
                entry_list.render(site_obj=self)

        self._call_custom_hook('post_render')

    @chlog.wrap('critical', 'audit site')
    def audit(self):
        """
        Validation of rendered content, to be used for link checking.
        """
        # TODO: check for &nbsp; and other common HTML entities in
        # feed xml (these entities aren't supported in XML/Atom/RSS)
        # the only ok ones are here: https://en.wikipedia.org/wiki/List_of_XML_and_HTML_character_entity_references#Predefined_entities_in_XML
        self._call_custom_hook('pre_audit')
        self._call_custom_hook('post_audit')

    @chlog.wrap('critical', 'export site')
    def export(self):
        fal = self.fal
        self._call_custom_hook('pre_export')
        output_path = self.paths['output_path']

        with chlog.critical('create output path'):
            mkdir_p(output_path)

        def export_entry(entry):
            entry_custom_base_path = os.path.split(entry.entry_root)[0]
            if entry_custom_base_path:
                mkdir_p(pjoin(output_path, entry_custom_base_path))
            er = entry.entry_root
            entry_html_fn = er + EXPORT_HTML_EXT
            entry_gen_md_fn = er + '.gen.md'
            entry_data_fn = er + '.json'

            html_output_path = pjoin(output_path, entry_html_fn)
            data_output_path = pjoin(output_path, entry_data_fn)
            gen_md_output_path = pjoin(output_path, entry_gen_md_fn)

            #fal.write(html_output_path, entry.entry_html)
            #
            fal.write(html_output_path, entry.entry_html)
            fal.write(gen_md_output_path, entry.content_md)  # TODO
            _data = json.dumps(entry.loaded_parts, indent=2, sort_keys=True)
            fal.write(data_output_path, _data)

            # TODO: copy file
            # fal.write(src_output_path, entry.source_text)
            return

        for entry in self.entries:
            export_entry(entry)
        for entry in self.draft_entries:
            export_entry(entry)
        for entry in self.special_entries:
            export_entry(entry)

        # index is just the most recent entry for now
        index_path = pjoin(output_path, 'index' + EXPORT_HTML_EXT)
        if self.entries:
            index_content = self.entries[0].entry_html
        else:
            index_content = 'No entries yet!'
        fal.write(index_path, index_content)
        archive_path = pjoin(output_path, ('archive' + EXPORT_HTML_EXT))
        fal.write(archive_path, self.entries.rendered_html)

        # output feeds
        rss_path = pjoin(output_path, RSS_FEED_FILENAME)
        fal.write(rss_path, self.entries.rendered_rss_feed)
        atom_path = pjoin(output_path, ATOM_FEED_FILENAME)
        fal.write(atom_path, self.entries.rendered_atom_feed)

        for tag, entry_list in self.tag_map.items():
            tag_path = pjoin(output_path, entry_list.path_part)
            mkdir_p(tag_path)
            rss_path = pjoin(tag_path, RSS_FEED_FILENAME)
            atom_path = pjoin(tag_path, ATOM_FEED_FILENAME)
            archive_path = pjoin(tag_path, 'index.html')
            fal.write(rss_path, entry_list.rendered_rss_feed)
            fal.write(atom_path, entry_list.rendered_atom_feed)
            fal.write(archive_path, entry_list.rendered_html)

        # copy assets, i.e., all directories under the theme path
        for sdn in get_subdirectories(self.theme_path):
            cur_src = pjoin(self.theme_path, sdn)
            cur_dest = pjoin(output_path, sdn)
            with chlog.critical('copy assets', src=cur_src, dest=cur_dest):
                copytree(cur_src, cur_dest)

        # optionally symlink the uploads directory.  this is an
        # important step for sites with uploads because Chert's
        # default rsync behavior picks up on these uploads by
        # following the symlink.
        with chlog.critical('link uploads directory') as rec:
            uploads_link_path = pjoin(output_path, 'uploads')
            if not os.path.isdir(self.uploads_path):
                rec.failure('no uploads directory at {}', self.uploads_path)
            else:
                message = None
                if os.path.islink(uploads_link_path):
                    os.unlink(uploads_link_path)
                    message = 'refreshed existing uploads symlink'
                os.symlink(self.uploads_path, uploads_link_path)
                rec.success(message)

        self._call_custom_hook('post_export')

    def serve(self):
        dev_config = self.get_config('dev')
        host = dev_config.get('server_host', DEV_SERVER_HOST)
        port = dev_config.get('server_port', int(DEV_SERVER_PORT))
        base_url = dev_config.get('base_path', DEV_SERVER_BASE_PATH)

        class Handler(SimpleHTTPRequestHandler):
            def send_head(self):
                if not self.path.startswith(base_url):
                    self.send_error(404, 'File not found')
                    return None
                self.path = self.path[len(base_url):]
                if not self.path.startswith('/'):
                    self.path = '/' + self.path
                return SimpleHTTPRequestHandler.send_head(self)

        Handler.extensions_map.update({
            '.md': 'text/plain',
            '.json': 'application/json'
        })

        class ThreadedHTTPServer(ThreadingMixIn, HTTPServer):
            """Handle requests in a separate thread."""

        server = ThreadedHTTPServer((host, port), Handler)
        serving = False

        config_path = self.paths['config_path']
        entries_path = self.paths['entries_path']
        theme_path = self.paths['theme_path']
        output_path = self.paths['output_path']
        for changed in _iter_changed_files(entries_path, theme_path,
                                           config_path):
            if serving:
                print 'Changed %s files, regenerating...' % len(changed)
                server.shutdown()
            with chlog.critical('site generation', reraise=True):
                self.process()
            print 'Serving from %s' % output_path
            os.chdir(abspath(output_path))
            print 'Serving at http://%s:%s%s' % (host, port, base_url)

            thread = Thread(target=server.serve_forever)
            thread.daemon = True
            thread.start()
            if not serving:
                serving = True
        # TODO: hook(s)?
        return

    @chlog.wrap('critical', 'publish site', inject_as='log_rec')
    def publish(self, log_rec):  # deploy?
        #self._load_custom_mod()
        #self._call_custom_hook('pre_publish')
        prod_config = self.get_config('prod')
        rsync_cmd = prod_config.get('rsync_cmd', 'rsync')
        if not rsync_cmd.isalpha():
            rsync_cmd = shell_quote(rsync_cmd)
        # TODO: add -e 'ssh -o "NumberOfPasswordPrompts 0"' to fail if
        # ssh keys haven't been set up.
        rsync_flags = prod_config.get('rsync_flags', 'avzPk')
        local_site_path = self.output_path
        if not local_site_path.endswith('/'):
            local_site_path += '/'  # not just cosmetic; rsync needs this
        assert os.path.exists(local_site_path + 'index.html')
        remote_host = prod_config['remote_host']
        remote_user = prod_config['remote_user']
        remote_path = prod_config['remote_path']
        remote_slug = "%s@%s:'%s'" % (remote_user, remote_host,
                                      shell_quote(remote_path))

        full_rsync_cmd = '%s -%s %s %s' % (rsync_cmd, rsync_flags,
                                           local_site_path, remote_slug)
        log_rec['rsync_cmd'] = full_rsync_cmd
        print 'Executing', full_rsync_cmd
        try:
            rsync_output = subprocess.check_output(full_rsync_cmd, shell=True)
        except subprocess.CalledProcessError as cpe:
            log_rec['rsync_exit_code'] = cpe.returncode
            rsync_output = cpe.output
            print rsync_output
            log_rec.failure(
                'publish failed: rsync got exit code {rsync_exit_code}')
            return False
        else:
            print rsync_output
            log_rec.success()
        return True
Beispiel #2
0
class Site(object):
    _entry_type = Entry
    _entry_list_type = EntryList

    def __init__(self, input_path, **kw):
        # setting up paths
        self.paths = OMD()
        self._paths = OMD()  # for the raw input paths
        self.fal = ChertFAL(chlog)

        set_path = self._set_path
        set_path('input_path', input_path)
        set_path('config_path', kw.pop('config_path', None),
                 DEFAULT_CONFIG_FILENAME)
        set_path('entries_path', kw.pop('entries_path', None), 'entries')
        set_path('themes_path', kw.pop('themes_path', None), 'themes')
        set_path('uploads_path', kw.pop('uploads_path', None), 'uploads',
                 required=False)
        set_path('output_path', kw.pop('output_path', None), 'site',
                 required=False)
        self.reload_config()
        self.reset()
        self.dev_mode = kw.pop('dev_mode', False)
        if kw:
            raise TypeError('unexpected keyword arguments: %r' % kw)
        chlog.debug('init site').success()
        return

    def reload_config(self, **kw):
        # TODO: take optional kwarg
        self.config = yaml.load(self.fal.read(self.paths['config_path']))

        # set theme
        with chlog.debug('setting theme'):
            theme_name = self.get_config('theme', 'name')
            theme_path = pjoin(self.themes_path, theme_name)
            self._set_path('theme_path', theme_path)

    def reset(self):
        """Called on __init__ and on reload before processing. Does not reset
        paths, etc., just state mutated during processing"""
        self.entries = self._entry_list_type()
        self.draft_entries = self._entry_list_type()
        self.special_entries = self._entry_list_type()
        self._rebuild_tag_map()

        self.last_load = None

        self.md_converter = Markdown(extensions=MD_EXTENSIONS)
        self.inline_md_converter = Markdown(extensions=INLINE_MD_EXTENSIONS)
        self._load_feed_templates()
        return

    def _set_path(self, name, path, default_suffix=None, required=True):
        """Set a path.

        Args:
            name: name of attribute (e.g., input_path)
            path: the path or None
            default_suffix: if path is None, self.input_path +
                default_suffix is used. The input_path should already
                be set.
            required: raise an error if path does not exist
        """
        with chlog.debug('set {path_name} path to {path_val}',
                         path_name=name, path_val=path) as rec:
            self._paths[name] = path
            if path:
                self.paths[name] = abspath(path)
            elif default_suffix:
                self.paths[name] = pjoin(self.input_path, default_suffix)
            else:
                raise ValueError('no path or default set for %r' % name)
            if required:
                if not os.path.exists(self.paths[name]):
                    raise RuntimeError('expected existent %s path, not %r'
                                       % (name, self.paths[name]))
            rec.success('set {path_name} path to {path_val}',
                        path_val=self.paths[name])
        return

    def _load_feed_templates(self):
        default_atom_tmpl_path = pjoin(CUR_PATH, ATOM_FEED_FILENAME)
        atom_tmpl_path = pjoin(self.theme_path, ATOM_FEED_FILENAME)
        if not os.path.exists(atom_tmpl_path):
            atom_tmpl_path = default_atom_tmpl_path
        # TODO: defer opening to loading?
        self.atom_template = Template.from_path(atom_tmpl_path,
                                                name=ATOM_FEED_FILENAME)

        default_rss_tmpl_path = pjoin(CUR_PATH, RSS_FEED_FILENAME)
        rss_tmpl_path = pjoin(self.theme_path, RSS_FEED_FILENAME)
        if not os.path.exists(rss_tmpl_path):
            rss_tmpl_path = default_rss_tmpl_path
        # TODO: defer opening to loading?
        self.rss_template = Template.from_path(rss_tmpl_path,
                                               name=RSS_FEED_FILENAME)

    def get_config(self, section, key=None, default=_UNSET):
        try:
            section_map = self.config[section]
        except KeyError:
            if default is _UNSET:
                raise
            return default
        if key is None:
            return section_map
        try:
            return section_map[key]
        except KeyError:
            if default is _UNSET:
                raise
            return default

    def get_site_info(self):
        ret = {}
        ret['dev_mode'] = self.dev_mode
        refresh_secs = self.get_config('dev', 'autorefresh', DEFAULT_AUTOREFRESH) or False

        ret['dev_mode_refresh_seconds'] = refresh_secs
        site_config = self.get_config('site')
        ret['title'] = site_config.get('title', SITE_TITLE)
        ret['head_title'] = site_config.get('title', ret['title'])
        ret['tagline'] = site_config.get('tagline', '')
        ret['primary_links'] = self._get_links('site', 'primary_links')
        ret['secondary_links'] = self._get_links('site', 'secondary_links')
        ret['charset'] = 'UTF-8'  # not really overridable
        ret['lang_code'] = site_config.get('lang_code', 'en')
        ret['copyright_notice'] = site_config.get('copyright', SITE_COPYRIGHT)
        ret['author_name'] = site_config.get('author', SITE_AUTHOR)
        ret['enable_analytics'] = site_config.get('enable_analytics', True)
        ret['analytics_code'] = self._get_analytics_code()

        prod_config = self.get_config('prod')
        ret['canonical_domain'] = prod_config.get('canonical_domain',
                                                  CANONICAL_DOMAIN).rstrip('/')
        ret['canonical_base_path'] = prod_config.get('canonical_base_path',
                                                     CANONICAL_BASE_PATH)
        if not ret['canonical_base_path'].endswith('/'):
            ret['canonical_base_path'] += '/'
        ret['canonical_url'] = ret['canonical_domain'] + ret['canonical_base_path']
        ret['rss_feed_url'] = ret['canonical_base_path'] + RSS_FEED_FILENAME
        ret['canonical_rss_feed_url'] = ret['canonical_url'] + RSS_FEED_FILENAME
        ret['atom_feed_url'] = ret['canonical_base_path'] + ATOM_FEED_FILENAME
        ret['canonical_atom_feed_url'] = ret['canonical_url'] + ATOM_FEED_FILENAME

        now = datetime.now(LocalTZ)
        ret['last_generated'] = to_timestamp(now)
        ret['last_generated_utc'] = to_timestamp(now, to_utc=True)
        ret['export_html_ext'] = EXPORT_HTML_EXT
        ret['export_src_ext'] = EXPORT_SRC_EXT
        return ret

    def _get_analytics_code(self):
        with chlog.debug('set analytics code') as rec:
            code = self.get_config('site', 'analytics_code', None)
            if code is None:
                rec.failure('site.analytics_code not set in config.yaml')
                return ''
            match = _analytics_re.search(unicode(code))
            if not match:
                rec.failure('analytics code blank or invalid: {!r}', code)
                return ''
            code = match.group('code')
            if len(code) < 6:
                rec.failure('analytics code too short: {!r}', code)
                return ''
            rec.success('analytics code set to {!r}', code)
        return code

    def _get_links(self, group, name):
        link_list = list(self.get_config(group, name, []))
        for link in link_list:
            if link['href'] and URL(link['href']).host:
                link['is_external'] = True
            else:
                link['is_external'] = False
        return link_list

    @property
    def input_path(self):
        return self.paths['input_path']

    @property
    def entries_path(self):
        return self.paths['entries_path']

    @property
    def themes_path(self):
        return self.paths['themes_path']

    @property
    def theme_path(self):
        return self.paths['theme_path']

    @property
    def uploads_path(self):
        return self.paths['uploads_path']

    @property
    def output_path(self):
        return self.paths['output_path']

    @property
    def all_entries(self):
        return (self.special_entries.entries
                + self.entries.entries
                + self.draft_entries.entries)

    def process(self):
        if self.last_load:
            self.reload_config()
            self.reset()
        self.load()
        self.validate()
        self.render()
        self.audit()
        self.export()

    def _load_custom_mod(self):
        input_path = self.paths['input_path']
        custom_mod_path = pjoin(input_path, 'custom.py')
        if not os.path.exists(custom_mod_path):
            self.custom_mod = None
            return
        # site_name = os.path.split(input_path)[1]
        with chlog.debug('import site custom module'):
            mod_name = 'custom'
            self.custom_mod = imp.load_source(mod_name, custom_mod_path)

    def _call_custom_hook(self, hook_name):
        with chlog.debug('call custom {hook_name} hook',
                         hook_name=hook_name,
                         reraise=False) as rec:
            if not self.custom_mod:
                # TODO: success or failure?
                rec.failure('no custom module loaded')
            try:
                hook_func = getattr(self.custom_mod, 'chert_' + hook_name)
            except AttributeError:
                rec.failure('no {} hook defined', hook_name)
                return
            hook_func(self)
        return

    @chlog.wrap('critical', 'load site')
    def load(self):
        self.last_load = time.time()
        self._load_custom_mod()
        self._call_custom_hook('pre_load')
        self.html_renderer = AshesEnv(paths=[self.theme_path])
        self.html_renderer.load_all()
        self.md_renderer = AshesEnv(paths=[self.theme_path],
                                    exts=['md'],
                                    keep_whitespace=False)
        self.md_renderer.autoescape_filter = ''
        self.md_renderer.load_all()

        entries_path = self.paths['entries_path']
        entry_paths = []
        for entry_path in iter_find_files(entries_path, ENTRY_PATS):
            entry_paths.append(entry_path)
        entry_paths.sort()
        for ep in entry_paths:
            with chlog.info('entry load') as rec:
                try:
                    entry = self._entry_type.from_path(ep)
                except IOError:
                    rec.exception('unopenable entry path: {}', ep)
                    continue
                except:
                    rec.exception('entry load error: {exc_message}')
                    continue
                else:
                    rec['entry_title'] = entry.title
                    rec['entry_length'] = round(entry.get_reading_time(), 1)
                    rec.success('entry loaded:'
                                ' {entry_title} ({entry_length}m)')
            if entry.is_draft:
                self.draft_entries.append(entry)
            elif entry.is_special:
                self.special_entries.append(entry)
            else:
                self.entries.append(entry)

        # Sorting the EntryLists
        self.entries.sort()
        # sorting drafts/special pages doesn't do much
        self.draft_entries.sort(key=lambda e: os.path.getmtime(e.source_path))
        self.special_entries.sort()

        self._rebuild_tag_map()

        for i, entry in enumerate(self.entries, start=1):
            start_next = max(0, i - NEXT_ENTRY_COUNT)
            entry.next_entries = self.entries[start_next:i - 1][::-1]
            entry.prev_entries = self.entries[i:i + PREV_ENTRY_COUNT]

        self._call_custom_hook('post_load')

    def _rebuild_tag_map(self):
        self.tag_map = {}
        for entry in self.entries:
            for tag in entry.tags:
                try:
                    self.tag_map[tag].append(entry)
                except KeyError:
                    self.tag_map[tag] = self._entry_list_type([entry], tag=tag)
        for tag, entry_list in self.tag_map.items():
            entry_list.sort()

    @chlog.wrap('critical', 'validate site')
    def validate(self):
        self._call_custom_hook('pre_validate')
        dup_id_map = {}
        eid_map = OMD([(e.entry_root, e) for e in self.entries])
        for eid in eid_map:
            elist = eid_map.getlist(eid)
            if len(elist) > 1:
                dup_id_map[eid] = elist
        if dup_id_map:
            raise ValueError('duplicate entry IDs detected: %r' % dup_id_map)
        self._call_custom_hook('post_validate')

        # TODO: assert necessary templates are present (entry.html, etc.)

    def _make_anchor_id(self, header_text):
        return slugify(header_text,
                       delim=self.get_config('site', 'anchor_delim', '-'))

    @chlog.wrap('critical', 'render site', verbose=True)
    def render(self):
        self._call_custom_hook('pre_render')
        entries = self.entries
        mdc, imdc = self.md_converter, self.inline_md_converter
        site_info = self.get_site_info()
        canonical_domain = site_info['canonical_domain']

        def markdown2html(string):
            if not string:
                return ''
            ret = mdc.convert(string)
            mdc.reset()
            return ret

        def markdown2ihtml(string, entry_fn):
            if not string:
                return ''

            ret = hypertext.canonicalize_links(imdc.convert(string),
                                               canonical_domain,
                                               entry_fn)
            imdc.reset()
            return ret

        def render_parts(entry):
            for part in entry.loaded_parts:
                part['content_html'] = markdown2html(part['content'])
                part['content_ihtml'] = markdown2ihtml(part['content'],
                                                       entry.output_filename)
            if not entry.summary:
                with chlog.debug('autosummarizing', reraise=False):
                    entry.summary = entry._autosummarize()

            tmpl_name = entry.entry_layout + MD_LAYOUT_EXT
            render_ctx = {'entry': entry.to_dict(with_links=False),
                          'site': site_info}
            entry.content_md = self.md_renderer.render(tmpl_name, render_ctx)

            tmpl_name = entry.content_layout + HTML_LAYOUT_EXT
            content_html = self.html_renderer.render(tmpl_name, render_ctx)
            with chlog.debug('parse_content_html'):
                content_html_tree = hypertext.html_text_to_tree(content_html)
            with chlog.debug('add_toc_content_html'):
                hypertext.add_toc(content_html_tree, make_anchor_id=self._make_anchor_id)
            with chlog.debug('retarget_links_content_html'):
                _mode = self.get_config('site', 'retarget_links', 'external')
                hypertext.retarget_links(content_html_tree, mode=_mode)
            with chlog.debug('reserialize_content_html'):
                content_html = hypertext.html_tree_to_text(content_html_tree)
            entry.content_html = content_html

            render_ctx['inline'] = True
            content_ihtml = self.html_renderer.render(tmpl_name, render_ctx)
            with chlog.debug('canonicalize_ihtml_links'):
                # TODO: use tree (and move slightly down)
                content_ihtml = hypertext.canonicalize_links(content_ihtml,
                                                             canonical_domain,
                                                             entry.output_filename)
            with chlog.debug('parse_content_ihtml'):
                content_ihtml_tree = hypertext.html_text_to_tree(content_ihtml)
            with chlog.debug('add_toc_content_ihtml'):
                hypertext.add_toc(content_ihtml_tree)
            with chlog.debug('reserialize_content_ihtml'):
                content_ihtml = hypertext.html_tree_to_text(content_ihtml_tree)

            entry.content_ihtml = content_ihtml
            return

        def render_html(entry, with_links=False):
            tmpl_name = entry.entry_layout + HTML_LAYOUT_EXT
            render_ctx = {'entry': entry.to_dict(with_links=with_links),
                          'site': site_info}
            entry_html = self.html_renderer.render(tmpl_name, render_ctx)
            entry.entry_html = entry_html
            return

        with chlog.info('render published entry content', verbose=True):
            for entry in entries:
                render_parts(entry)
        with chlog.info('render draft entry content', verbose=True):
            for entry in self.draft_entries:
                render_parts(entry)
        with chlog.info('render special entry content', verbose=True):
            for entry in self.special_entries:
                render_parts(entry)

        with chlog.info('render entry html'):
            for entry in entries:
                render_html(entry, with_links=True)
            for entry in self.draft_entries:
                render_html(entry)
            for entry in self.special_entries:
                render_html(entry)

        # render feeds
        with chlog.info('render feed and tag lists'):
            self.entries.render(site_obj=self)
            for tag, entry_list in self.tag_map.items():
                entry_list.render(site_obj=self)

        self._call_custom_hook('post_render')

    @chlog.wrap('critical', 'audit site')
    def audit(self):
        """
        Validation of rendered content, to be used for link checking.
        """
        # TODO: check for &nbsp; and other common HTML entities in
        # feed xml (these entities aren't supported in XML/Atom/RSS)
        # the only ok ones are here: https://en.wikipedia.org/wiki/List_of_XML_and_HTML_character_entity_references#Predefined_entities_in_XML
        self._call_custom_hook('pre_audit')
        self._call_custom_hook('post_audit')

    @chlog.wrap('critical', 'export site')
    def export(self):
        fal = self.fal
        self._call_custom_hook('pre_export')
        output_path = self.paths['output_path']

        with chlog.critical('create output path'):
            mkdir_p(output_path)

        def export_entry(entry):
            entry_custom_base_path = os.path.split(entry.entry_root)[0]
            if entry_custom_base_path:
                mkdir_p(pjoin(output_path, entry_custom_base_path))
            er = entry.entry_root
            entry_html_fn = er + EXPORT_HTML_EXT
            entry_gen_md_fn = er + '.gen.md'
            entry_data_fn = er + '.json'

            html_output_path = pjoin(output_path, entry_html_fn)
            data_output_path = pjoin(output_path, entry_data_fn)
            gen_md_output_path = pjoin(output_path, entry_gen_md_fn)

            #fal.write(html_output_path, entry.entry_html)
            #
            fal.write(html_output_path, entry.entry_html)
            fal.write(gen_md_output_path, entry.content_md)  # TODO
            _data = json.dumps(entry.loaded_parts, indent=2, sort_keys=True)
            fal.write(data_output_path, _data)

            # TODO: copy file
            # fal.write(src_output_path, entry.source_text)
            return

        for entry in self.entries:
            export_entry(entry)
        for entry in self.draft_entries:
            export_entry(entry)
        for entry in self.special_entries:
            export_entry(entry)

        # index is just the most recent entry for now
        index_path = pjoin(output_path, 'index' + EXPORT_HTML_EXT)
        if self.entries:
            index_content = self.entries[0].entry_html
        else:
            index_content = 'No entries yet!'
        fal.write(index_path, index_content)
        archive_path = pjoin(output_path, ('archive' + EXPORT_HTML_EXT))
        fal.write(archive_path, self.entries.rendered_html)

        # output feeds
        rss_path = pjoin(output_path, RSS_FEED_FILENAME)
        fal.write(rss_path, self.entries.rendered_rss_feed)
        atom_path = pjoin(output_path, ATOM_FEED_FILENAME)
        fal.write(atom_path, self.entries.rendered_atom_feed)

        for tag, entry_list in self.tag_map.items():
            tag_path = pjoin(output_path, entry_list.path_part)
            mkdir_p(tag_path)
            rss_path = pjoin(tag_path, RSS_FEED_FILENAME)
            atom_path = pjoin(tag_path, ATOM_FEED_FILENAME)
            archive_path = pjoin(tag_path, 'index.html')
            fal.write(rss_path, entry_list.rendered_rss_feed)
            fal.write(atom_path, entry_list.rendered_atom_feed)
            fal.write(archive_path, entry_list.rendered_html)

        # copy assets, i.e., all directories under the theme path
        for sdn in get_subdirectories(self.theme_path):
            cur_src = pjoin(self.theme_path, sdn)
            cur_dest = pjoin(output_path, sdn)
            with chlog.critical('copy assets', src=cur_src, dest=cur_dest):
                copytree(cur_src, cur_dest)

        # optionally symlink the uploads directory.  this is an
        # important step for sites with uploads because Chert's
        # default rsync behavior picks up on these uploads by
        # following the symlink.
        with chlog.critical('link uploads directory') as rec:
            uploads_link_path = pjoin(output_path, 'uploads')
            if not os.path.isdir(self.uploads_path):
                rec.failure('no uploads directory at {}', self.uploads_path)
            else:
                message = None
                if os.path.islink(uploads_link_path):
                    os.unlink(uploads_link_path)
                    message = 'refreshed existing uploads symlink'
                os.symlink(self.uploads_path, uploads_link_path)
                rec.success(message)

        self._call_custom_hook('post_export')

    def serve(self):
        dev_config = self.get_config('dev')
        host = dev_config.get('server_host', DEV_SERVER_HOST)
        port = dev_config.get('server_port', int(DEV_SERVER_PORT))
        base_url = dev_config.get('base_path', DEV_SERVER_BASE_PATH)

        class Handler(SimpleHTTPRequestHandler):
            def send_head(self):
                if not self.path.startswith(base_url):
                    self.send_error(404, 'File not found')
                    return None
                self.path = self.path[len(base_url):]
                if not self.path.startswith('/'):
                    self.path = '/' + self.path
                return SimpleHTTPRequestHandler.send_head(self)
        Handler.extensions_map.update({'.md': 'text/plain',
                                       '.json': 'application/json'})


        class ThreadedHTTPServer(ThreadingMixIn, HTTPServer):
            """Handle requests in a separate thread."""

        server = ThreadedHTTPServer((host, port), Handler)
        serving = False

        config_path = self.paths['config_path']
        entries_path = self.paths['entries_path']
        theme_path = self.paths['theme_path']
        output_path = self.paths['output_path']
        for changed in _iter_changed_files(entries_path, theme_path, config_path):
            if serving:
                print 'Changed %s files, regenerating...' % len(changed)
                server.shutdown()
            with chlog.critical('site generation', reraise=True):
                self.process()
            print 'Serving from %s' % output_path
            os.chdir(abspath(output_path))
            print 'Serving at http://%s:%s%s' % (host, port, base_url)

            thread = Thread(target=server.serve_forever)
            thread.daemon = True
            thread.start()
            if not serving:
                serving = True
        # TODO: hook(s)?
        return

    @chlog.wrap('critical', 'publish site', inject_as='log_rec')
    def publish(self, log_rec):  # deploy?
        #self._load_custom_mod()
        #self._call_custom_hook('pre_publish')
        prod_config = self.get_config('prod')
        rsync_cmd = prod_config.get('rsync_cmd', 'rsync')
        if not rsync_cmd.isalpha():
            rsync_cmd = shell_quote(rsync_cmd)
        # TODO: add -e 'ssh -o "NumberOfPasswordPrompts 0"' to fail if
        # ssh keys haven't been set up.
        rsync_flags = prod_config.get('rsync_flags', 'avzPk')
        local_site_path = self.output_path
        if not local_site_path.endswith('/'):
            local_site_path += '/'  # not just cosmetic; rsync needs this
        assert os.path.exists(local_site_path + 'index.html')
        remote_host = prod_config['remote_host']
        remote_user = prod_config['remote_user']
        remote_path = prod_config['remote_path']
        remote_slug = "%s@%s:'%s'" % (remote_user,
                                      remote_host,
                                      shell_quote(remote_path))

        full_rsync_cmd = '%s -%s %s %s' % (rsync_cmd,
                                           rsync_flags,
                                           local_site_path,
                                           remote_slug)
        log_rec['rsync_cmd'] = full_rsync_cmd
        print 'Executing', full_rsync_cmd
        try:
            rsync_output = subprocess.check_output(full_rsync_cmd, shell=True)
        except subprocess.CalledProcessError as cpe:
            log_rec['rsync_exit_code'] = cpe.returncode
            rsync_output = cpe.output
            print rsync_output
            log_rec.failure('publish failed: rsync got exit code {rsync_exit_code}')
            return False
        else:
            print rsync_output
            log_rec.success()
        return True
Beispiel #3
0
RUN_UUID = uuid.uuid4()
UPDATED_DT_FORMAT = '%Y-%m-%d %H:%M:%S'

DEBUG = False

DEFAULT_CARD = 'https://upload.wikimedia.org/wikipedia/commons/8/81/WikiSplat.png'

# these paths are relative to the campaign directory
STATE_FULL_PATH_TMPL = '/data/%Y%m/state_full_%Y%m%d_%H%M%S.json.gz'
STATE_PATH_TMPL = '/data/%Y%m/state_%Y%m%d_%H%M%S.json'
STATE_FULL_FN_GLOB = 'state_full_*.json.gz'
STATE_FN_GLOB = 'state_*.json'

ASHES_ENV = AshesEnv(TEMPLATE_PATH,
                     filters={'percentage': lambda n: round(n * 100, 2)})
ASHES_ENV.load_all()


def to_unicode(obj):
    try:
        return unicode(obj)
    except UnicodeDecodeError:
        return unicode(obj, encoding='utf8')


@attr.s
class PTArticle(object):
    lang = attr.ib()
    title = attr.ib()
    timestamp = attr.ib()