def _import_page(self, url): r = requests.get(url) if 199 < r.status_code < 300: # Got it # Use the page's title doc = lxml.html.fromstring(r.content) title = doc.find('*//title').text if sys.version_info[0] == 2 and isinstance(title, str): title = title.decode('utf-8') try: slug = utils.slugify(title, lang='') except TypeError: slug = utils.slugify(title) nodes = list(libextract.api.extract(r.content)) # Let's assume the node with more text is the good one lengths = [len(n.text_content()) for n in nodes] node = nodes[lengths.index(max(lengths))] document = doc_template.format( title=title, slug=slug, content=lxml.html.tostring(node, encoding='utf8', method='html', pretty_print=True).decode('utf8')) with codecs.open(slug + '.html', 'w+', encoding='utf-8') as outf: outf.write(document) else: LOGGER.error('Error fetching URL: {}'.format(url))
def _import_page(self, url): r = requests.get(url) if 199 < r.status_code < 300: # Got it # Use the page's title doc = lxml.html.fromstring(r.content) title = doc.find('*//title').text if sys.version_info[0] == 2 and isinstance(title, str): title = title.decode('utf-8') try: slug = utils.slugify(title, lang='') except TypeError: slug = utils.slugify(title) nodes = list(libextract.api.extract(r.content)) # Let's assume the node with more text is the good one lengths = [len(n.text_content()) for n in nodes] node = nodes[lengths.index(max(lengths))] document = doc_template.format( title=title, slug=slug, content=lxml.html.tostring(node, encoding='utf8', method='html', pretty_print=True).decode('utf8') ) with codecs.open(slug + '.html', 'w+', encoding='utf-8') as outf: outf.write(document) else: LOGGER.error('Error fetching URL: {}'.format(url))
def series_path(self, name, lang): if self.site.config['PRETTY_URLS']: return [_f for _f in [ self.site.config['TRANSLATIONS'][lang], 'series', slugify(name), self.site.config['INDEX_FILE']] if _f] else: return [_f for _f in [ self.site.config['TRANSLATIONS'][lang], 'series', slugify(name) + ".html"] if _f]
def series_path(self, name, lang): if self.site.config['PRETTY_URLS']: return [ _f for _f in [ self.site.config['TRANSLATIONS'][lang], 'series', slugify(name), self.site.config['INDEX_FILE'] ] if _f ] else: return [ _f for _f in [ self.site.config['TRANSLATIONS'][lang], 'series', slugify(name) + ".html" ] if _f ]
def _add_imports(self, symbols, htmlpath): """Add the imports to the Module Documentation.""" content = '' results = symbols['imports'] imports = results['imports'] fromImports = results['fromImports'] if imports or fromImports: content += templates.IMPORTS + ( '-' * len(templates.IMPORTS)) + '\n' imports_key = sorted(imports.keys()) name_to_slugy = os.path.splitext(htmlpath)[0] slugy = utils.slugify(name_to_slugy) for imp in imports_key: content += templates.LIST_LINK_ITEM % { 'name': imp, 'link': '%s#%s-%s' % (htmlpath, slugy, imports[imp]['lineno']) } + '\n' fromImports_key = sorted(fromImports.keys()) for imp in fromImports_key: # FIXME try: content += templates.LIST_LINK_ITEM % { 'name': fromImports[imp]['module'] + ".%s" % imp, 'link': '%s#%s' % (htmlpath, fromImports[imp]['lineno']) } + '\n' except Exception as exc: print(exc) continue return content
def generate(self, item, feed): compiler = self.site.compilers[feed['format']] title = self.get_data(item, feed['metadata']['title']) output_name = os.path.join(feed['output_folder'], slugify(title, feed['lang'])) + compiler.extension() content = self.site.render_template( feed['template'], None, dict( item=item, feed=feed, lang=feed['lang'], )) metadata = {} for k, v in feed['metadata'].items(): metadata[k] = self.get_data(item, v) if 'tags' not in metadata: metadata['tags'] = feed['tags'] compiler.create_post( path=output_name, content=content, onefile=True, is_page=False, **metadata )
def import_item(self, item, out_folder=None): """Takes an item from the feed and creates a post file.""" if out_folder is None: out_folder = "posts" # link is something like http://foo.com/2012/09/01/hello-world/ # So, take the path, utils.slugify it, and that's our slug link = item.link link_path = urlparse(link).path title = item.title # blogger supports empty titles, which Nikola doesn't if not title: LOGGER.warn("Empty title in post with URL {0}. Using NO_TITLE " "as placeholder, please fix.".format(link)) title = "NO_TITLE" if link_path.lower().endswith(".html"): link_path = link_path[:-5] link_path = link_path.lstrip("/") out_path = os.path.join(self.output_folder, out_folder, link_path) link_fragments = link_path.split("/") slug = utils.slugify(link_fragments[-1]) if not slug: # should never happen LOGGER.error("Error converting post:", title) return description = "" post_date = datetime.datetime.fromtimestamp(time.mktime(item.published_parsed)) for candidate in item.content: if candidate.type == "text/html": content = candidate.value break # FIXME: handle attachments tags = [] for tag in item.tags: if tag.scheme == "http://www.blogger.com/atom/ns#": tags.append(tag.term) if item.get("app_draft"): tags.append("draft") is_draft = True else: is_draft = False self.url_map[link] = self.context["SITE_URL"] + out_folder + "/" + link_path + ".html" if is_draft and self.exclude_drafts: LOGGER.notice('Draft "{0}" will not be imported.'.format(title)) elif content.strip(): # If no content is found, no files are written. content = self.transform_content(content) self.write_metadata(out_path + ".meta", title, slug, post_date, description, tags) self.write_content(out_path + ".html", content) else: LOGGER.warn('Not going to import "{0}" because it seems to contain' " no content.".format(title))
def _add_function(self, symbol, htmlpath, docpath): """Add the function with the function content and style.""" content = '' name_to_slugy = os.path.splitext(htmlpath)[0] slugy = utils.slugify(name_to_slugy) function_name = templates.FUNCTION % { 'name': "%s [at ln:%d]" % (symbol['name'], symbol['lineno']), 'link': '%s#%s-%s' % (htmlpath, slugy, symbol['lineno']) } content += function_name + ('~' * len(function_name)) + '\n' content += templates.CODE % { 'code': "def %s:" % symbol['name'] } docstring = symbol['docstring'] if docstring: docstring = '| %s' % docstring.replace( '*', '\\*').replace('`', '\\`').replace('_', '\_') doc = '| '.join([line + '\n' for line in docstring.split('\n')]) + '\n' else: doc = templates.NO_DESCRIPTION content += doc if symbol['decorators']: content += templates.DECORATORS for decorator in symbol['decorators']: content += '- *%s*\n' % decorator self.__functions.append((symbol['name'], docpath, symbol['lineno'])) content += '\n----\n' return content
def get_path(self, author, lang, dest_type='page'): """A path handler for the given classification.""" if self.site.config['SLUG_AUTHOR_PATH']: slug = utils.slugify(author, lang) else: slug = author return [self.site.config['AUTHOR_PATH'](lang), slug], 'auto'
def import_item(self, item): """Takes an item from the feed and creates a post file.""" link = item.link if link.endswith('?utm_medium=api&utm_source=rss'): link = link[:-30] title = "Goodreads review: %s (%s)" % (item.title, item.author_name) slug = utils.slugify(title) # Needed because user_read_at can have a different locale saved = locale.getlocale(locale.LC_ALL) locale.setlocale(locale.LC_ALL, (None, None)) post_date = datetime.datetime.strptime(item.user_read_at[:-6], "%a, %d %b %Y %H:%M:%S") locale.setlocale(locale.LC_ALL, saved) content = '' if item.get('user_review'): content = item.get('user_review') content += ("<br/><br/>" if content else "") + "Rating: %s/5" % item.user_rating content += "<br/><br/>Original: <a href=\"%s\">%s</a>" % (link, link) tags = [item.author_name, item.title.replace(", ", " - "), "Goodreads review"] content = self.transform_content(content) self.write_metadata( os.path.join(self.output_folder, slug + '.meta'), title, slug, post_date.strftime(r'%Y/%m/%d %H:%m:%S'), '', tags) self.write_content( os.path.join(self.output_folder, slug + '.html'), content)
def generate(self, item, feed): compiler = self.site.compilers[feed['format']] source_ext = feed.get('source_extension', '.html') title = self.get_data(item, feed['metadata']['title']) output_name = os.path.join(feed['output_folder'], slugify(title, feed['lang'])) + source_ext start_at = feed.get('start_at', '1970-1-1') start_at = dateutil.parser.parse(start_at, ignoretz=True) content = self.site.render_template( feed['template'], None, dict( item=item, feed=feed, lang=feed['lang'], )) metadata = {} for k, v in feed['metadata'].items(): metadata[k] = self.get_data(item, v) if 'tags' not in metadata: metadata['tags'] = feed['tags'] if dateutil.parser.parse(metadata['date'], ignoretz=True) < start_at: # skip old post return compiler.create_post(path=output_name, content=content, onefile=True, is_page=False, **metadata)
def import_posts(self, zipfile, names): """Import all posts.""" out_folder = 'posts' providers = micawber.bootstrap_basic() for name in names: with zipfile.open(name, 'r') as post_f: data = json.load(post_f) title = data['title'] slug = utils.slugify(title) if not slug: # should never happen LOGGER.error("Error converting post:", title) return description = '' post_date = dateutil.parser.parse(data["published"]) content = data["object"]["content"] for obj in data["object"].get("attachments", []): content += '\n<div> {} </div>\n'.format(micawber.parse_text(obj["url"], providers)) tags = [] self.write_metadata(os.path.join(self.output_folder, out_folder, slug + '.meta'), title, slug, post_date, description, tags) self.write_content( os.path.join(self.output_folder, out_folder, slug + '.html'), content)
def render_listing(in_name, out_name): with open(in_name, 'r') as fd: try: lexer = get_lexer_for_filename(in_name) except: lexer = TextLexer() code = highlight(fd.read(), lexer, HtmlFormatter(cssclass='code', linenos="table", nowrap=False, lineanchors=utils.slugify(f), anchorlinenos=True)) title = os.path.basename(in_name) crumbs = out_name.split(os.sep)[1:-1] + [title] # TODO: write this in human paths = ['/'.join(['..'] * (len(crumbs) - 2 - i)) for i in range(len(crumbs[:-2]))] + ['.', '#'] context = { 'code': code, 'title': title, 'crumbs': zip(paths, crumbs), 'lang': kw['default_lang'], 'description': title, } self.site.render_template('listing.tmpl', out_name, context)
def import_posts(self, zipfile, names): """Import all posts.""" out_folder = 'posts' providers = micawber.bootstrap_basic() for name in names: with zipfile.open(name, 'r') as post_f: data = json.load(post_f) title = data['title'] slug = utils.slugify(title) if not slug: # should never happen LOGGER.error("Error converting post:", title) return description = '' post_date = dateutil.parser.parse(data["published"]) content = data["object"]["content"] for obj in data["object"].get("attachments", []): content += '\n<div> {} </div>\n'.format( micawber.parse_text(obj["url"], providers)) tags = [] self.write_metadata( os.path.join(self.output_folder, out_folder, slug + '.meta'), title, slug, post_date, description, tags) self.write_content( os.path.join(self.output_folder, out_folder, slug + '.html'), content)
def render_listing(in_name, out_name): with open(in_name, 'r') as fd: try: lexer = get_lexer_for_filename(in_name) except: lexer = TextLexer() code = highlight( fd.read(), lexer, HtmlFormatter(cssclass='code', linenos="table", nowrap=False, lineanchors=utils.slugify(f), anchorlinenos=True)) title = os.path.basename(in_name) crumbs = out_name.split(os.sep)[1:-1] + [title] # TODO: write this in human paths = [ '/'.join(['..'] * (len(crumbs) - 2 - i)) for i in range(len(crumbs[:-2])) ] + ['.', '#'] context = { 'code': code, 'title': title, 'crumbs': zip(paths, crumbs), 'lang': kw['default_lang'], 'description': title, } self.site.render_template('listing.tmpl', out_name, context)
def _add_function(self, symbol, htmlpath, docpath): """Add the function with the function content and style.""" content = '' name_to_slugy = os.path.splitext(htmlpath)[0] slugy = utils.slugify(name_to_slugy.decode('utf-8')) function_name = templates.FUNCTION % { 'name': "%s [at ln:%d]" % (symbol['name'], symbol['lineno']), 'link': '%s#%s-%s' % (htmlpath, slugy, symbol['lineno']) } content += function_name + ('~' * len(function_name)) + '\n' content += templates.CODE % {'code': "def %s:" % symbol['name']} docstring = symbol['docstring'] if docstring: docstring = '| %s' % docstring.replace('*', '\\*').replace( '`', '\\`').replace('_', '\_') doc = '| '.join([line + '\n' for line in docstring.split('\n')]) + '\n' else: doc = templates.NO_DESCRIPTION content += doc if symbol['decorators']: content += templates.DECORATORS for decorator in symbol['decorators']: content += '- *%s*\n' % decorator self.__functions.append((symbol['name'], docpath, symbol['lineno'])) content += '\n----\n' return content
def render_listing(in_name, out_name, folders=[], files=[]): if in_name: with open(in_name, 'r') as fd: try: lexer = get_lexer_for_filename(in_name) except: lexer = TextLexer() code = highlight(fd.read(), lexer, HtmlFormatter(cssclass='code', linenos="table", nowrap=False, lineanchors=utils.slugify(f), anchorlinenos=True)) title = os.path.basename(in_name) else: code = '' title = '' crumbs = utils.get_crumbs(os.path.relpath(out_name, kw['output_folder']), is_file=True) context = { 'code': code, 'title': title, 'crumbs': crumbs, 'lang': kw['default_lang'], 'folders': folders, 'files': files, 'description': title, } self.site.render_template('listing.tmpl', out_name, context)
def get_path(self, classification, lang, dest_type='page'): """Return a path for the given classification.""" if self.site.config['SLUG_AUTHOR_PATH']: slug = utils.slugify(classification, lang) else: slug = classification return [self.site.config['AUTHOR_PATH'](lang), slug], 'auto'
def _doc_link(rawtext, text, options={}, content=[]): """Handle the doc role.""" # split link's text and post's slug in role content has_explicit_title, title, slug = split_explicit_title(text) if '#' in slug: slug, fragment = slug.split('#', 1) else: fragment = None slug = slugify(slug) # check if the slug given is part of our blog posts/pages twin_slugs = False post = None for p in doc_role.site.timeline: if p.meta('slug') == slug: if post is None: post = p else: twin_slugs = True break try: if post is None: raise ValueError("No post with matching slug found.") except ValueError: return False, False, None, None, slug if not has_explicit_title: # use post's title as link's text title = post.title() permalink = post.permalink() if fragment: permalink += '#' + fragment return True, twin_slugs, title, permalink, slug
def _add_imports(self, symbols, htmlpath): """Add the imports to the Module Documentation.""" content = '' results = symbols['imports'] imports = results['imports'] fromImports = results['fromImports'] if imports or fromImports: content += templates.IMPORTS + ('-' * len(templates.IMPORTS)) + '\n' imports_key = sorted(imports.keys()) name_to_slugy = os.path.splitext(htmlpath)[0] slugy = utils.slugify(name_to_slugy.decode('utf-8')) for imp in imports_key: content += templates.LIST_LINK_ITEM % { 'name': imp, 'link': '%s#%s-%s' % (htmlpath, slugy, imports[imp]['lineno']) } + '\n' fromImports_key = sorted(fromImports.keys()) for imp in fromImports_key: content += templates.LIST_LINK_ITEM % { 'name': fromImports[imp]['module'] + ".%s" % imp, 'link': '%s#%s' % (htmlpath, fromImports[imp]['lineno']) } + '\n' return content
def _doc_link(rawtext, text, options={}, content=[]): """Handle the doc role.""" # split link's text and post's slug in role content has_explicit_title, title, slug = split_explicit_title(text) if '#' in slug: slug, fragment = slug.split('#', 1) else: fragment = None # Look for the unslugified input first, then try to slugify (Issue #3450) post, twin_slugs = _find_post(slug) if post is None: slug = slugify(slug) post, twin_slugs = _find_post(slug) try: if post is None: raise ValueError("No post with matching slug found.") except ValueError: return False, False, None, None, slug if not has_explicit_title: # use post's title as link's text title = post.title() permalink = post.permalink() if fragment: permalink += '#' + fragment return True, twin_slugs, title, permalink, slug
def import_item(self, item, wordpress_namespace, out_folder=None): """Takes an item from the feed and creates a post file.""" if out_folder is None: out_folder = 'posts' title = get_text_tag(item, 'title', 'NO TITLE') # link is something like http://foo.com/2012/09/01/hello-world/ # So, take the path, utils.slugify it, and that's our slug link = get_text_tag(item, 'link', None) slug = utils.slugify(urlparse(link).path) if not slug: # it happens if the post has no "nice" URL slug = get_text_tag( item, '{%s}post_name' % wordpress_namespace, None) if not slug: # it *may* happen slug = get_text_tag( item, '{%s}post_id' % wordpress_namespace, None) if not slug: # should never happen print("Error converting post:", title) return description = get_text_tag(item, 'description', '') post_date = get_text_tag( item, '{%s}post_date' % wordpress_namespace, None) status = get_text_tag( item, '{%s}status' % wordpress_namespace, 'publish') content = get_text_tag( item, '{http://purl.org/rss/1.0/modules/content/}encoded', '') tags = [] if status != 'publish': tags.append('draft') is_draft = True else: is_draft = False for tag in item.findall('category'): text = tag.text if text == 'Uncategorized': continue tags.append(text) self.url_map[link] = self.context['BLOG_URL'] + '/' + \ out_folder + '/' + slug + '.html' if is_draft and self.exclude_drafts: print('Draft "%s" will not be imported.' % (title, )) elif content.strip(): # If no content is found, no files are written. content = self.transform_content(content) self.write_metadata(os.path.join(self.output_folder, out_folder, slug + '.meta'), title, slug, post_date, description, tags) self.write_content( os.path.join(self.output_folder, out_folder, slug + '.wp'), content) else: print('Not going to import "%s" because it seems to contain' ' no content.' % (title, ))
def slugify_tag_name(self, name, lang): """Slugify a tag name.""" if lang is None: # TODO: remove in v8 utils.LOGGER.warn("RenderTags.slugify_tag_name() called without language!") lang = '' if self.site.config['SLUG_TAG_PATH']: name = utils.slugify(name, lang) return name
def slugify_author_name(self, name, lang=None): """Slugify an author name.""" if lang is None: # TODO: remove in v8 utils.LOGGER.warn("RenderAuthors.slugify_author_name() called without language!") lang = '' if self.site.config['SLUG_AUTHOR_PATH']: name = utils.slugify(name, lang) return name
def author_jsonfeed_path(self, name, lang, **kwargs): """Return path to author JSON Feed.""" if self.site.config['SLUG_AUTHOR_PATH']: filename = utils.slugify(name, lang) + '-feed.json' else: filename = name + '-feed.json' return [_f for _f in [self.site.config['TRANSLATIONS'][lang], self.site.config['AUTHOR_PATH'](lang), filename] if _f]
def slugify_file(filename): name, _ = os.path.splitext(os.path.basename(filename)) m = re.match('\d+\-\d+\-\d+\-(?P<name>.*)', name) if m: name = m.group('name') if not isinstance(name, utils.unicode_str): name = name.decode('unicode-escape') return utils.slugify(name)
def import_item(self, item, out_folder=None): """Takes an item from the feed and creates a post file.""" if out_folder is None: out_folder = 'posts' title = item.title # link is something like http://foo.com/2012/09/01/hello-world/ # So, take the path, utils.slugify it, and that's our slug link = item.link link_path = urlparse(link).path if link_path.lower().endswith('.html'): link_path = link_path[:-5] slug = utils.slugify(link_path) if not slug: # should never happen print("Error converting post:", title) return description = '' post_date = datetime.datetime.fromtimestamp(time.mktime(item.published_parsed)) for candidate in item.content: if candidate.type == 'text/html': content = candidate.value break # FIXME: handle attachments tags = [] for tag in item.tags: if tag.scheme == 'http://www.blogger.com/atom/ns#': tags.append(tag.term) if item.get('app_draft'): tags.append('draft') is_draft = True else: is_draft = False self.url_map[link] = self.context['BLOG_URL'] + '/' + \ out_folder + '/' + slug + '.html' if is_draft and self.exclude_drafts: print('Draft "%s" will not be imported.' % (title, )) elif content.strip(): # If no content is found, no files are written. content = self.transform_content(content) self.write_metadata(os.path.join(self.output_folder, out_folder, slug + '.meta'), title, slug, post_date, description, tags) self.write_content( os.path.join(self.output_folder, out_folder, slug + '.html'), content) else: print('Not going to import "%s" because it seems to contain' ' no content.' % (title, ))
def slugify_author_name(self, name, lang=None): """Slugify an author name.""" if lang is None: # TODO: remove in v8 utils.LOGGER.warn( "RenderAuthors.slugify_author_name() called without language!") lang = '' if self.site.config['SLUG_AUTHOR_PATH']: name = utils.slugify(name, lang) return name
def render_listing(in_name, out_name, input_folder, output_folder, folders=[], files=[]): if in_name: with open(in_name, 'r') as fd: try: lexer = get_lexer_for_filename(in_name) except: lexer = TextLexer() code = highlight( fd.read(), lexer, HtmlFormatter(cssclass='code', linenos="table", nowrap=False, lineanchors=utils.slugify(in_name, force=True), anchorlinenos=True)) # the pygments highlighter uses <div class="codehilite"><pre> # for code. We switch it to reST's <pre class="code">. code = CODERE.sub('<pre class="code literal-block">\\1</pre>', code) title = os.path.basename(in_name) else: code = '' title = os.path.split(os.path.dirname(out_name))[1] crumbs = utils.get_crumbs(os.path.relpath( out_name, self.kw['output_folder']), is_file=True) permalink = self.site.link( 'listing', os.path.join( input_folder, os.path.relpath( out_name[:-5], # remove '.html' os.path.join(self.kw['output_folder'], output_folder)))) if self.site.config['COPY_SOURCES']: source_link = permalink[:-5] # remove '.html' else: source_link = None context = { 'code': code, 'title': title, 'crumbs': crumbs, 'permalink': permalink, 'lang': self.kw['default_lang'], 'folders': natsort.natsorted(folders), 'files': natsort.natsorted(files), 'description': title, 'source_link': source_link, } self.site.render_template('listing.tmpl', out_name, context)
def get_author_title(author_name): slug = slugify(author_name) try: with open(os.path.join('authors', slug + '.rst')) as fd: for line in fd: if line.startswith('.. author_title:'): key, value = line.split(':', 1) return value.strip() except: pass return ''
def nikola_stub_folder(self): "Return the stub folder path for this content in Nikola" title_path = slugify(self.frontmatter["title"]) if self.is_post: date = self.frontmatter["date"] date_path = self.get_date().strftime("%Y/%m") return os.path.join(date_path, title_path) return title_path
def get_pdf_dest(site, post, lang): base_path = os.path.join( site.config["OUTPUT_FOLDER"], "posts", "pdf", utils.slugify(post.title()), ) dest = base_path + ".pdf" if lang != site.default_lang: dest = site.config["TRANSLATIONS_PATTERN"].format(path=base_path, lang=lang, ext="pdf") return dest
def import_item(item): """Takes an item from the feed and creates a post file.""" title = get_text_tag(item, 'title', 'NO TITLE') # link is something like http://foo.com/2012/09/01/hello-world/ # So, take the path, utils.slugify it, and that's our slug slug = utils.slugify(urlparse(get_text_tag(item, 'link', None)).path) description = get_text_tag(item, 'description', '') post_date = get_text_tag(item, '{http://wordpress.org/export/1.2/}post_date', None) post_type = get_text_tag(item, '{http://wordpress.org/export/1.2/}post_type', 'post') status = get_text_tag(item, '{http://wordpress.org/export/1.2/}status', 'publish') content = get_text_tag( item, '{http://purl.org/rss/1.0/modules/content/}encoded', '') tags = [] if status != 'publish': tags.append('draft') for tag in item.findall('category'): text = tag.text if text == 'Uncategorized': continue tags.append(text) if post_type == 'attachment': return elif post_type == 'post': out_folder = 'posts' else: out_folder = 'stories' # Write metadata with codecs.open(os.path.join('new_site', out_folder, slug + '.meta'), "w+", "utf8") as fd: fd.write(u'%s\n' % title) fd.write(u'%s\n' % slug) fd.write(u'%s\n' % post_date) fd.write(u'%s\n' % ','.join(tags)) fd.write(u'\n') fd.write(u'%s\n' % description) with open(os.path.join('new_site', out_folder, slug + '.wp'), "wb+") as fd: if content.strip(): try: doc = html.document_fromstring(content) doc.rewrite_links(replacer) fd.write(html.tostring(doc, encoding='utf8')) except: import pdb pdb.set_trace()
def _add_classes(self, symbols, htmlpath, docpath): """Add the class with the class content and style.""" content = '' clazzes = symbols.get('classes', []) name_to_slugy = os.path.splitext(htmlpath)[0] slugy = utils.slugify(name_to_slugy) for clazz in clazzes: clazz_name = templates.CLASS % { 'name': clazz, 'link': '%s#%s-%s' % (htmlpath, slugy, clazzes[clazz]['lineno']) } content += clazz_name + ('-' * len(clazz_name)) + '\n' content += templates.CODE % { 'code': "class %s:" % clazz } docstring = clazzes[clazz]['docstring'] if docstring: docstring = '| %s' % docstring.replace( '*', '\\*').replace('`', '\\`').replace('_', '\_') doc = '| '.join([line + '\n' for line in docstring.split('\n')]) + '\n' else: doc = templates.NO_DESCRIPTION content += doc attrs = clazzes[clazz]['attributes'] if attrs: content += templates.ATTRIBUTES + ( '~' * len(templates.ATTRIBUTES)) + '\n' attrs_key = sorted(attrs.keys()) for attr in attrs_key: content += templates.LIST_LINK_ITEM % { 'name': "%s [at ln:%d]" % (attr, attrs[attr]), 'link': '%s#%s-%s' % (htmlpath, slugy, attrs[attr]) } + '\n' funcs = clazzes[clazz]['functions'] if funcs: funcs_key = sorted(funcs.keys()) for func in funcs_key: content += self._add_function(funcs[func], htmlpath, docpath) else: content += '\n----\n' self.__classes.append((clazz, docpath, clazzes[clazz]['lineno'])) return content
def import_item(self, item, wordpress_namespace, out_folder=None): """Takes an item from the feed and creates a post file.""" if out_folder is None: out_folder = "posts" title = get_text_tag(item, "title", "NO TITLE") # link is something like http://foo.com/2012/09/01/hello-world/ # So, take the path, utils.slugify it, and that's our slug link = get_text_tag(item, "link", None) slug = utils.slugify(urlparse(link).path) if not slug: # it happens if the post has no "nice" URL slug = get_text_tag(item, "{%s}post_name" % wordpress_namespace, None) if not slug: # it *may* happen slug = get_text_tag(item, "{%s}post_id" % wordpress_namespace, None) if not slug: # should never happen print("Error converting post:", title) return description = get_text_tag(item, "description", "") post_date = get_text_tag(item, "{%s}post_date" % wordpress_namespace, None) status = get_text_tag(item, "{%s}status" % wordpress_namespace, "publish") content = get_text_tag(item, "{http://purl.org/rss/1.0/modules/content/}encoded", "") tags = [] if status != "publish": tags.append("draft") is_draft = True else: is_draft = False for tag in item.findall("category"): text = tag.text if text == "Uncategorized": continue tags.append(text) self.url_map[link] = self.context["BLOG_URL"] + "/" + out_folder + "/" + slug + ".html" if is_draft and self.exclude_drafts: print('Draft "%s" will not be imported.' % (title,)) elif content.strip(): # If no content is found, no files are written. content = self.transform_content(content) self.write_metadata( os.path.join(self.output_folder, out_folder, slug + ".meta"), title, slug, post_date, description, tags ) self.write_content(os.path.join(self.output_folder, out_folder, slug + ".wp"), content) else: print('Not going to import "%s" because it seems to contain' " no content." % (title,))
def import_item(item): """Takes an item from the feed and creates a post file.""" title = get_text_tag(item, 'title', 'NO TITLE') # link is something like http://foo.com/2012/09/01/hello-world/ # So, take the path, utils.slugify it, and that's our slug slug = utils.slugify(urlparse(get_text_tag(item, 'link', None)).path) description = get_text_tag(item, 'description', '') post_date = get_text_tag(item, '{http://wordpress.org/export/1.2/}post_date', None) post_type = get_text_tag(item, '{http://wordpress.org/export/1.2/}post_type', 'post') status = get_text_tag(item, '{http://wordpress.org/export/1.2/}status', 'publish') content = get_text_tag(item, '{http://purl.org/rss/1.0/modules/content/}encoded', '') tags = [] if status != 'publish': tags.append('draft') for tag in item.findall('category'): text = tag.text if text == 'Uncategorized': continue tags.append(text) if post_type == 'attachment': return elif post_type == 'post': out_folder = 'posts' else: out_folder = 'stories' # Write metadata with codecs.open(os.path.join('new_site', out_folder, slug + '.meta'), "w+", "utf8") as fd: fd.write(u'%s\n' % title) fd.write(u'%s\n' % slug) fd.write(u'%s\n' % post_date) fd.write(u'%s\n' % ','.join(tags)) fd.write(u'\n') fd.write(u'%s\n' % description) with open(os.path.join( 'new_site', out_folder, slug + '.wp'), "wb+") as fd: if content.strip(): try: doc = html.document_fromstring(content) doc.rewrite_links(replacer) fd.write(html.tostring(doc, encoding='utf8')) except: import pdb pdb.set_trace()
def _add_classes(self, symbols, htmlpath, docpath): """Add the class with the class content and style.""" content = '' clazzes = symbols.get('classes', []) name_to_slugy = os.path.splitext(htmlpath)[0] slugy = utils.slugify(name_to_slugy.decode('utf-8')) for clazz in clazzes: clazz_name = templates.CLASS % { 'name': clazz, 'link': '%s#%s-%s' % (htmlpath, slugy, clazzes[clazz]['lineno']) } content += clazz_name + ('-' * len(clazz_name)) + '\n' content += templates.CODE % {'code': "class %s:" % clazz} docstring = clazzes[clazz]['docstring'] if docstring: docstring = '| %s' % docstring.replace('*', '\\*').replace( '`', '\\`').replace('_', '\_') doc = '| '.join( [line + '\n' for line in docstring.split('\n')]) + '\n' else: doc = templates.NO_DESCRIPTION content += doc attrs = clazzes[clazz]['attributes'] if attrs: content += templates.ATTRIBUTES + ( '~' * len(templates.ATTRIBUTES)) + '\n' attrs_key = sorted(attrs.keys()) for attr in attrs_key: content += templates.LIST_LINK_ITEM % { 'name': "%s [at ln:%d]" % (attr, attrs[attr]), 'link': '%s#%s-%s' % (htmlpath, slugy, attrs[attr]) } + '\n' funcs = clazzes[clazz]['functions'] if funcs: funcs_key = sorted(funcs.keys()) for func in funcs_key: content += self._add_function(funcs[func], htmlpath, docpath) else: content += '\n----\n' self.__classes.append((clazz, docpath, clazzes[clazz]['lineno'])) return content
def render_listing(in_name, out_name, input_folder, output_folder, folders=[], files=[]): if in_name: with open(in_name, 'r') as fd: try: lexer = get_lexer_for_filename(in_name) except: lexer = TextLexer() code = highlight(fd.read(), lexer, HtmlFormatter(cssclass='code', linenos="table", nowrap=False, lineanchors=utils.slugify(in_name, force=True), anchorlinenos=True)) # the pygments highlighter uses <div class="codehilite"><pre> # for code. We switch it to reST's <pre class="code">. code = CODERE.sub('<pre class="code literal-block">\\1</pre>', code) title = os.path.basename(in_name) else: code = '' title = os.path.split(os.path.dirname(out_name))[1] crumbs = utils.get_crumbs(os.path.relpath(out_name, self.kw['output_folder']), is_file=True) permalink = self.site.link( 'listing', os.path.join( input_folder, os.path.relpath( out_name[:-5], # remove '.html' os.path.join( self.kw['output_folder'], output_folder)))) if self.site.config['COPY_SOURCES']: source_link = permalink[:-5] # remove '.html' else: source_link = None context = { 'code': code, 'title': title, 'crumbs': crumbs, 'permalink': permalink, 'lang': self.kw['default_lang'], 'folders': natsort.natsorted(folders), 'files': natsort.natsorted(files), 'description': title, 'source_link': source_link, } self.site.render_template('listing.tmpl', out_name, context)
def _execute(self, options, args): """Import from Twitpic.""" if not args: print(self.help()) return options['filename'] = args[0] self.path = options['filename'] self.output_folder = options['output_folder'] self.extra_tags = [t.strip() for t in options['tags'].split(",")] self.import_into_existing_site = True with open(os.path.join(self.path, 'tweets.txt')) as f: chunks = [p.strip() for p in f.read().decode('utf-8').split("\n\n") if p != '\n'] self.site.scan_posts() self.site_tags = {utils.slugify(t): t for t in self.site.posts_per_tag} self.import_pics(chunks)
def import_item(item): """Takes an item from the feed and creates a post file.""" title = get_text_tag(item, "title", "NO TITLE") # link is something like http://foo.com/2012/09/01/hello-world/ # So, take the path, utils.slugify it, and that's our slug slug = utils.slugify(urlparse(get_text_tag(item, "link", None)).path) description = get_text_tag(item, "description", "") post_date = get_text_tag(item, "{http://wordpress.org/export/1.2/}post_date", None) post_type = get_text_tag(item, "{http://wordpress.org/export/1.2/}post_type", "post") status = get_text_tag(item, "{http://wordpress.org/export/1.2/}status", "publish") content = get_text_tag(item, "{http://purl.org/rss/1.0/modules/content/}encoded", "") tags = [] if status != "publish": tags.append("draft") for tag in item.findall("category"): text = tag.text if text == "Uncategorized": continue tags.append(text) if post_type == "attachment": return elif post_type == "post": out_folder = "posts" else: out_folder = "stories" # Write metadata with codecs.open(os.path.join("new_site", out_folder, slug + ".meta"), "w+", "utf8") as fd: fd.write(u"%s\n" % title) fd.write(u"%s\n" % slug) fd.write(u"%s\n" % post_date) fd.write(u"%s\n" % ",".join(tags)) fd.write(u"\n") fd.write(u"%s\n" % description) with open(os.path.join("new_site", out_folder, slug + ".wp"), "wb+") as fd: if content.strip(): try: doc = html.document_fromstring(content) doc.rewrite_links(replacer) fd.write(html.tostring(doc, encoding="utf8")) except: import pdb pdb.set_trace()
def render_listing(in_name, out_name, folders=[], files=[]): if in_name: with open(in_name, 'r') as fd: try: lexer = get_lexer_for_filename(in_name) except: lexer = TextLexer() code = highlight(fd.read(), lexer, HtmlFormatter(cssclass='code', linenos="table", nowrap=False, lineanchors=utils.slugify(in_name), anchorlinenos=True)) title = os.path.basename(in_name) else: code = '' title = '' crumbs = utils.get_crumbs(os.path.relpath(out_name, kw['output_folder']), is_file=True) permalink = self.site.link( 'listing', os.path.relpath( out_name, os.path.join( kw['output_folder'], kw['listings_folder']))) if self.site.config['COPY_SOURCES']: source_link = permalink[:-5] else: source_link = None context = { 'code': code, 'title': title, 'crumbs': crumbs, 'permalink': permalink, 'lang': kw['default_lang'], 'folders': natsort.natsorted(folders), 'files': natsort.natsorted(files), 'description': title, 'source_link': source_link, } self.site.render_template('listing.tmpl', out_name, context)
def render_listing(in_name, out_name, folders=[], files=[]): if in_name: with open(in_name, 'r') as fd: try: lexer = get_lexer_for_filename(in_name) except: lexer = TextLexer() code = highlight( fd.read(), lexer, HtmlFormatter(cssclass='code', linenos="table", nowrap=False, lineanchors=utils.slugify(in_name), anchorlinenos=True)) title = os.path.basename(in_name) else: code = '' title = '' crumbs = utils.get_crumbs(os.path.relpath(out_name, kw['output_folder']), is_file=True) permalink = self.site.link( 'listing', os.path.relpath( out_name, os.path.join(kw['output_folder'], kw['listings_folder']))) if self.site.config['COPY_SOURCES']: source_link = permalink[:-5] else: source_link = None context = { 'code': code, 'title': title, 'crumbs': crumbs, 'permalink': permalink, 'lang': kw['default_lang'], 'folders': natsort.natsorted(folders), 'files': natsort.natsorted(files), 'description': title, 'source_link': source_link, } self.site.render_template('listing.tmpl', out_name, context)
def section_slug(self, lang=None): """Return the slug for the post's section.""" if lang is None: lang = nikola.utils.LocaleBorg().current_lang if not self.config['POSTS_SECTION_FROM_META']: dest = self.destination_path(lang) if dest[-(1 + len(self.index_file)):] == os.sep + self.index_file: dest = dest[:-(1 + len(self.index_file))] dirname = os.path.dirname(dest) slug = dest.split(os.sep) if not slug or dirname == '.': slug = self.messages[lang]["Uncategorized"] elif lang == slug[0]: slug = slug[1] else: slug = slug[0] else: slug = self.meta[lang]['section'].split(',')[0] if 'section' in self.meta[lang] else self.messages[lang]["Uncategorized"] return utils.slugify(slug, lang)
def _add_global_attributes(self, symbols, htmlpath): """Add the global attributes to the Module Documentation.""" content = '' attrs = symbols.get('attributes') if attrs: content += templates.GLOBAL_ATTRIBUTES + ( '-' * len(templates.GLOBAL_ATTRIBUTES)) + '\n' attrs_key = sorted(attrs.keys()) name_to_slugy = os.path.splitext(htmlpath)[0] slugy = utils.slugify(name_to_slugy) for attr in attrs_key: content += templates.LIST_LINK_ITEM % { 'name': "%s [at ln:%d]" % (attr, attrs[attr]), 'link': '%s#%s-%s' % (htmlpath, slugy, attrs[attr]) } + '\n' content += '\n----\n' return content
def _add_global_attributes(self, symbols, htmlpath): """Add the global attributes to the Module Documentation.""" content = '' attrs = symbols.get('attributes') if attrs: content += templates.GLOBAL_ATTRIBUTES + ( '-' * len(templates.GLOBAL_ATTRIBUTES)) + '\n' attrs_key = sorted(attrs.keys()) name_to_slugy = os.path.splitext(htmlpath)[0] slugy = utils.slugify(name_to_slugy.decode('utf-8')) for attr in attrs_key: content += templates.LIST_LINK_ITEM % { 'name': "%s [at ln:%d]" % (attr, attrs[attr]), 'link': '%s#%s-%s' % (htmlpath, slugy, attrs[attr]) } + '\n' content += '\n----\n' return content
def section_slug(self, lang=None): """Return the slug for the post's section.""" if lang is None: lang = nikola.utils.LocaleBorg().current_lang if not self.config['POSTS_SECTION_FROM_META']: dest = self.destination_path(lang) if dest[-(1 + len(self.index_file)):] == os.sep + self.index_file: dest = dest[:-(1 + len(self.index_file))] dirname = os.path.dirname(dest) slug = dest.split(os.sep) if not slug or dirname == '.': slug = self.messages[lang]["Uncategorized"] elif lang == slug[0]: slug = slug[1] else: slug = slug[0] else: slug = self.meta[lang]['section'].split(',')[0] if 'section' in self.meta[lang] else self.messages[lang]["Uncategorized"] return utils.slugify(slug)
def lancelot_link(site, slug, title): """process the slug, check if it exists or is duplicated if `title` is None this will grab the post-title Args: site: the Nikola object slug: the text between the shortcode tags title: the title passed in by the user (if any) Returns: tuple (success, has duplicate slugs, title, permalink, slug) """ if '#' in slug: slug, fragment = slug.split('#', 1) else: fragment = None slug = slugify(slug) twin_slugs = False post = None for p in site.timeline: if p.meta('slug') == slug: if post is None: post = p else: twin_slugs = True break if post is None: return False, False, title, None, slug if title is None: title = post.title() permalink = post.permalink() if fragment: permalink += '#' + fragment return True, twin_slugs, title, permalink, slug
def import_item(self, pic, date, text): """Create a post file.""" post_date = datetime.datetime.strptime(date, "%m/%d/%Y") title = "Twitpic: %s" % post_date.strftime("%d/%m/%Y") slug = utils.slugify(title) self.tags = ["Twitpic"] + self.extra_tags content = self.expand(text) base, ext = pic.split('.') content += """ .. figure:: %s.thumbnail.%s :target: %s """ % (base, ext, pic) self.write_metadata( os.path.join(self.output_folder, slug + '.meta'), title, slug, post_date.strftime(r'%Y/%m/%d %H:%m:%S'), '', self.tags) self.write_content( os.path.join(self.output_folder, slug + '.rst'), content, False) utils.copy_file(os.path.join(self.path, pic), os.path.join("images", self.output_folder, slug, pic))
def import_item(self, pic, date, text): """Create a post file.""" post_date = datetime.datetime.strptime(date, "%m/%d/%Y") title = "Twitpic: %s" % post_date.strftime("%d/%m/%Y") slug = utils.slugify(title) self.tags = ["Twitpic"] + self.extra_tags content = self.expand(text) base, ext = pic.split('.') content += """ .. figure:: %s.thumbnail.%s :target: %s """ % (base, ext, pic) self.write_metadata(os.path.join(self.output_folder, slug + '.meta'), title, slug, post_date.strftime(r'%Y/%m/%d %H:%m:%S'), '', self.tags) self.write_content(os.path.join(self.output_folder, slug + '.rst'), content, False) utils.copy_file(os.path.join(self.path, pic), os.path.join("images", self.output_folder, slug, pic))
def _execute(self, options, args): """Import from Twitpic.""" if not args: print(self.help()) return options['filename'] = args[0] self.path = options['filename'] self.output_folder = options['output_folder'] self.extra_tags = [t.strip() for t in options['tags'].split(",")] self.import_into_existing_site = True with open(os.path.join(self.path, 'tweets.txt')) as f: chunks = [ p.strip() for p in f.read().decode('utf-8').split("\n\n") if p != '\n' ] self.site.scan_posts() self.site_tags = { utils.slugify(t): t for t in self.site.posts_per_tag } self.import_pics(chunks)
def slugify_name(self, name): if self.site.config['SLUG_TAG_PATH']: name = utils.slugify(name) return name
def slugify_author_name(self, name): """Slugify an author name.""" if self.site.config['SLUG_AUTHOR_PATH']: name = utils.slugify(name) return name