def create_articles(self, num): now = datetime.now() for i in range(num, 0, -1): date = now + timedelta(days=-i) self.articles.append( Article( "Some content", metadata={ "Title": "Title " + "{:02n}".format(i), "Date": date, "Category": Category("Cat", self.context), "Tags": [ "TagBecomesCategoryInFeed", "OtherTag", "Tag " + "{:02n}".format(i), ], "Author": Author("Author " + str(i // 10), self.context), }, ) )
def test_slugify_with_author_substitutions(self): settings = get_settings() settings['AUTHOR_REGEX_SUBSTITUTIONS'] = [ ('Alexander Todorov', 'atodorov'), ('Krasimir Tsonev', 'krasimir'), (r'[^\w\s-]', ''), (r'(?u)\A\s*', ''), (r'(?u)\s*\Z', ''), (r'[-\s]+', '-'), ] settings['ARTICLE_URL'] = 'blog/{author}/{slug}/' settings['ARTICLE_SAVE_AS'] = 'blog/{author}/{slug}/index.html' article_kwargs = self._copy_page_kwargs() article_kwargs['metadata']['author'] = Author('Alexander Todorov', settings) article_kwargs['metadata']['title'] = 'fnord' article_kwargs['settings'] = settings article = Article(**article_kwargs) self.assertEqual(article.url, 'blog/atodorov/fnord/') self.assertEqual(article.save_as, 'blog/atodorov/fnord/index.html')
def test_slugify_category_author(self): settings = get_settings() settings['SLUG_REGEX_SUBSTITUTIONS'] = [ (r'C#', 'csharp'), (r'[^\w\s-]', ''), (r'(?u)\A\s*', ''), (r'(?u)\s*\Z', ''), (r'[-\s]+', '-'), ] settings['ARTICLE_URL'] = '{author}/{category}/{slug}/' settings['ARTICLE_SAVE_AS'] = '{author}/{category}/{slug}/index.html' article_kwargs = self._copy_page_kwargs() article_kwargs['metadata']['author'] = Author("O'Brien", settings) article_kwargs['metadata']['category'] = Category( 'C# & stuff', settings) article_kwargs['metadata']['title'] = 'fnord' article_kwargs['settings'] = settings article = Article(**article_kwargs) self.assertEqual(article.url, 'obrien/csharp-stuff/fnord/') self.assertEqual(article.save_as, 'obrien/csharp-stuff/fnord/index.html')
def test_intrasite_link_absolute(self): """Test that absolute URLs are merged properly.""" args = self.page_kwargs.copy() args['settings'] = get_settings( STATIC_URL='http://static.cool.site/{path}', ARTICLE_URL='http://blog.cool.site/{slug}.html') args['source_path'] = 'content' args['context']['filenames'] = { 'images/poster.jpg': Static('', settings=args['settings'], source_path='images/poster.jpg'), 'article.rst': Article('', settings=args['settings'], metadata={ 'slug': 'article', 'title': 'Article' }) } # Article link will go to blog args['content'] = ('<a href="{filename}article.rst">Article</a>') content = Page(**args).get_content('http://cool.site') self.assertEqual( content, '<a href="http://blog.cool.site/article.html">Article</a>') # Page link will go to the main site args['content'] = ('<a href="{index}">Index</a>') content = Page(**args).get_content('http://cool.site') self.assertEqual(content, '<a href="http://cool.site/index.html">Index</a>') # Image link will go to static args['content'] = ('<img src="{filename}/images/poster.jpg"/>') content = Page(**args).get_content('http://cool.site') self.assertEqual( content, '<img src="http://static.cool.site/images/poster.jpg"/>')
def gen_article_and_html_from_rst(self, rstPath): content, metadata = self.reader.read(rstPath) article = Article(content=content, metadata=metadata) context = self.settings.copy() context['generated_content'] = {} context['static_links'] = set() context['static_content'] = {} context['localsiteurl'] = self.settings['SITEURL'] generator = ArticlesGenerator(context=context, settings=self.settings, path=CONTENT_DIR, theme=self.settings['THEME'], output_path=OUTPUT_DIR) generator.generate_context() f = lambda a: True if (a.slug == article.slug) else False result = list(filter(f, generator.context["articles"]))[0] self.writer.write_file(result.save_as, generator.get_template('article'), generator.context, article=result) soup = BeautifulSoup( open("./" + self.writer.output_path + '/' + result.save_as), "html.parser") return (result, soup)
def generate_context(self): """change the context""" # return the list of files to use files = self.get_files(self.path, exclude=[ 'pages', ]) all_articles = [] for f in files: content, metadatas = read_file(f) # if no category is set, use the name of the path as a category if 'category' not in metadatas.keys(): if os.path.dirname(f) == self.path: category = self.settings['DEFAULT_CATEGORY'] else: category = os.path.basename(os.path.dirname(f)) if category != '': metadatas['category'] = unicode(category) if 'date' not in metadatas.keys()\ and self.settings['FALLBACK_ON_FS_DATE']: metadatas['date'] = datetime.fromtimestamp(os.stat(f).st_ctime) article = Article(content, metadatas, settings=self.settings, filename=f) if not is_valid_content(article, f): continue if hasattr(article, 'tags'): for tag in article.tags: self.tags[tag].append(article) all_articles.append(article) self.articles, self.translations = process_translations(all_articles) for article in self.articles: # only main articles are listed in categories, not translations self.categories[article.category].append(article) # sort the articles by date self.articles.sort(key=attrgetter('date'), reverse=True) self.dates = list(self.articles) self.dates.sort(key=attrgetter('date'), reverse=self.context['REVERSE_ARCHIVE_ORDER']) # create tag cloud tag_cloud = defaultdict(int) for article in self.articles: for tag in getattr(article, 'tags', []): tag_cloud[tag] += 1 tag_cloud = sorted(tag_cloud.items(), key=itemgetter(1), reverse=True) tag_cloud = tag_cloud[:self.settings.get('TAG_CLOUD_MAX_ITEMS')] tags = map(itemgetter(1), tag_cloud) if tags: max_count = max(tags) steps = self.settings.get('TAG_CLOUD_STEPS') # calculate word sizes self.tag_cloud = [ (tag, int( math.floor(steps - (steps - 1) * math.log(count) / math.log(max_count)))) for tag, count in tag_cloud ] # put words in chaos random.shuffle(self.tag_cloud) # and generate the output :) self._update_context( ('articles', 'dates', 'tags', 'categories', 'tag_cloud'))
def generate_context(self): """Add the articles into the shared context""" article_path = os.path.normpath( # we have to remove trailing slashes os.path.join(self.path, self.settings['ARTICLE_DIR']) ) all_articles = [] for f in self.get_files( article_path, exclude=self.settings['ARTICLE_EXCLUDES']): try: signals.article_generate_preread.send(self) content, metadata = read_file(f, settings=self.settings) except Exception as e: logger.warning(u'Could not process %s\n%s' % (f, str(e))) continue # if no category is set, use the name of the path as a category if 'category' not in metadata: if (self.settings['USE_FOLDER_AS_CATEGORY'] and os.path.dirname(f) != article_path): # if the article is in a subdirectory category = os.path.basename(os.path.dirname(f))\ .decode('utf-8') else: # if the article is not in a subdirectory category = self.settings['DEFAULT_CATEGORY'] if category != '': metadata['category'] = Category(category, self.settings) if 'date' not in metadata and self.settings.get('DEFAULT_DATE'): if self.settings['DEFAULT_DATE'] == 'fs': metadata['date'] = datetime.datetime.fromtimestamp( os.stat(f).st_ctime) else: metadata['date'] = datetime.datetime( *self.settings['DEFAULT_DATE']) signals.article_generate_context.send(self, metadata=metadata) article = Article(content, metadata, settings=self.settings, filename=f, context=self.context) if not is_valid_content(article, f): continue self.add_filename(article) if article.status == "published": if hasattr(article, 'tags'): for tag in article.tags: self.tags[tag].append(article) all_articles.append(article) elif article.status == "draft": self.drafts.append(article) else: logger.warning(u"Unknown status %s for file %s, skipping it." % (repr(unicode.encode(article.status, 'utf-8')), repr(f))) self.articles, self.translations = process_translations(all_articles) for article in self.articles: # only main articles are listed in categories, not translations self.categories[article.category].append(article) # ignore blank authors as well as undefined if hasattr(article,'author') and article.author.name != '': self.authors[article.author].append(article) # sort the articles by date self.articles.sort(key=attrgetter('date'), reverse=True) self.dates = list(self.articles) self.dates.sort(key=attrgetter('date'), reverse=self.context['NEWEST_FIRST_ARCHIVES']) # create tag cloud tag_cloud = defaultdict(int) for article in self.articles: for tag in getattr(article, 'tags', []): tag_cloud[tag] += 1 tag_cloud = sorted(tag_cloud.items(), key=itemgetter(1), reverse=True) tag_cloud = tag_cloud[:self.settings.get('TAG_CLOUD_MAX_ITEMS')] tags = map(itemgetter(1), tag_cloud) if tags: max_count = max(tags) steps = self.settings.get('TAG_CLOUD_STEPS') # calculate word sizes self.tag_cloud = [ ( tag, int(math.floor(steps - (steps - 1) * math.log(count) / (math.log(max_count)or 1))) ) for tag, count in tag_cloud ] # put words in chaos random.shuffle(self.tag_cloud) # and generate the output :) # order the categories per name self.categories = list(self.categories.items()) self.categories.sort( key=lambda item: item[0].name, reverse=self.settings['REVERSE_CATEGORY_ORDER']) self.authors = list(self.authors.items()) self.authors.sort(key=lambda item: item[0].name) self._update_context(('articles', 'dates', 'tags', 'categories', 'tag_cloud', 'authors', 'related_posts')) signals.article_generator_finalized.send(self)
def get_article(title, slug, content, lang, extra_metadata=None): metadata = {'slug': slug, 'title': title, 'lang': lang} if extra_metadata is not None: metadata.update(extra_metadata) return Article(content, metadata=metadata)
def generate_context(self): """change the context""" # return the list of files to use files = self.get_files(self.path, exclude=[ 'pages', ]) all_articles = [] for f in files: try: content, metadata = read_file(f, settings=self.settings) except Exception, e: warning(u'Could not process %s\n%s' % (f, str(e))) continue # if no category is set, use the name of the path as a category if 'category' not in metadata.keys(): if os.path.dirname(f) == self.path: category = self.settings['DEFAULT_CATEGORY'] else: category = os.path.basename( os.path.dirname(f)).decode('utf-8') if category != '': metadata['category'] = unicode(category) if 'date' not in metadata.keys()\ and self.settings['FALLBACK_ON_FS_DATE']: metadata['date'] = datetime.fromtimestamp(os.stat(f).st_ctime) article = Article(content, metadata, settings=self.settings, filename=f) if not is_valid_content(article, f): continue add_to_url = u'' if 'ARTICLE_PERMALINK_STRUCTURE' in self.settings: article_permalink_structure = self.settings[ 'ARTICLE_PERMALINK_STRUCTURE'] article_permalink_structure = article_permalink_structure.lstrip( '/').replace('%(', "%%(") # try to substitute any python datetime directive add_to_url = article.date.strftime(article_permalink_structure) # try to substitute any article metadata in rest file add_to_url = add_to_url % article.__dict__ add_to_url = [slugify(i) for i in add_to_url.split('/')] add_to_url = os.path.join(*add_to_url) article.url = urlparse.urljoin(add_to_url, article.url) article.save_as = urlparse.urljoin(add_to_url, article.save_as) if article.status == "published": if hasattr(article, 'tags'): for tag in article.tags: self.tags[tag].append(article) all_articles.append(article) elif article.status == "draft": self.drafts.append(article)
def get_article(title, content, **extra_metadata): metadata = default_metadata(settings=DEFAULT_CONFIG) metadata['title'] = title if extra_metadata: metadata.update(extra_metadata) return Article(content, metadata=metadata)
def article2draft(article): '''Set to draft the status of an article''' draft = Article(article._content, article.metadata, article.settings, article.source_path, article._context) draft.status = 'draft' return draft
def _generate_mbox_articles(self, mboxPath, mboxCategory): baseReader = BaseReader(self.settings) category = baseReader.process_metadata('category', mboxCategory) # Complain if the mbox path does not exist and is not readable. try: if not os.path.exists(mboxPath): raise RuntimeError mbox = mailbox.mbox(mboxPath) except: logger.error('Could not process mbox file %s', mboxPath) return # Retrieve some fields from the settings. authorString = self.settings.get('MBOX_AUTHOR_STRING') markdownify = self.settings.get('MBOX_MARKDOWNIFY') # Loop over all messages, turn them into article objects. all_articles = [] slugs = [] for message in mbox.itervalues(): # Get author name. author = message['from'] if author is None: author = 'Unknown' else: if '<' and '>' in author: author = author[:author.find(' <')] author = author.replace('"', '').replace("'", '') # As a hack to avoid dealing with the fact that names can collide. if authorString is not None and authorString != '': author += ' ' + authorString authorObject = baseReader.process_metadata('author', author) # Get date object, using python-dateutil as an easy hack. # If there is no date in the message, abort, we shouldn't bother. if message['date'] is None: continue if parser: date = parser.parse(message['date']) else: logger.error('No python-dateutil, we cannot continue as ' + 'date formats cannot be parsed. ') continue monthYear = date.strftime('%B-%Y').lower() # Get title and slug; build year + month into slug. subject = message['subject'] slugSubject = slugify(subject) slug = os.path.join(slugify(mboxCategory), monthYear, slugSubject) # Hack to handle multiple messages with the same subject. if slug in slugs: slug += "_%d" count = 2 testSlug = slug % count while testSlug in slugs: count += 1 testSlug = slug % count slug = testSlug slugs.append(slug) # Code adapted from Stackoverflow for parsing email messages. # https://stackoverflow.com/questions/4824376/parse-multi-part-email-with-sub-parts-using-python # Code is clumsy, should be refactored. if message.is_multipart(): plaintext = None html = None for part in message.get_payload(): payload = part.get_payload(decode=True) if payload is not None: for charset in message.get_charsets(): if charset is not None and charset != 'x-unknown': # These probably shoudldn't be 'ignore'. if sys.version_info.major >= 3 and not isinstance( payload, str): payload = payload.decode(charset, "ignore") elif sys.version_info.major <= 2: payload = unicode(payload, charset, "ignore").encode( "ascii", "replace") if part.get_content_type() == 'text/plain': plaintext = payload if part.get_content_type() == 'text/html': html = payload if plaintext is None and html is None: continue elif plaintext is None: content = html else: if sys.version_info.major >= 3 and isinstance( plaintext, bytes): plaintext = plaintext.decode("utf-8", "ignore") content = plaintext_to_html(plaintext, markdownify) else: payload = message.get_payload(decode=True) for charset in message.get_charsets(): if charset is not None and charset != 'x-unknown': if sys.version_info.major < 3: payload = unicode(payload, charset, "ignore").encode( "ascii", "replace") else: payload = payload.decode(charset) if sys.version_info.major >= 3 and isinstance(payload, bytes): payload = payload.decode("utf-8", "ignore") content = plaintext_to_html(payload, markdownify) # On python 2, it seems that we need to do this final check of content. if sys.version_info.major <= 2: content = unicode(content, "us-ascii", "ignore").encode("ascii", "replace") metadata = { 'title': subject, 'date': date, 'category': category, 'authors': [authorObject], 'slug': slug } article = Article(content=content, metadata=metadata, settings=self.settings, source_path=mboxPath, context=self.context) # This seems like it cannot happen... but it does without fail. article.author = article.authors[0] all_articles.append(article) return all_articles
def generate_cave_pages(generator, writer): cave_bios = generator.context['cavebios'] caves = generator.context['cavepeep_cave'] caves_dict = {} # Split the through trips into individual caves. # Make unique list (set) of cave names and for trip in caves: for cave in trip.split('>'): create_or_add(caves_dict, cave.strip(), caves[trip]) dictionary = caves_dict content_dictionary = cave_bios output_path = "caves" template = "cavepages" row = namedtuple('row', 'path content metadata articles') initialised_pages = {} for key in dictionary.keys(): if key not in initialised_pages.keys(): logging.debug( "Cavebios: Adding {} to list of pages to write".format(key)) content = '' metadata = '' data = {} if key in content_dictionary: logging.debug("Cavebios: Content added to " + key) content = content_dictionary[key].content metadata = content_dictionary[key].metadata metadata['data'] = get_data_from_metadata(metadata) path = os.path.join(output_path, str(key) + '.html') initialised_pages[key] = (row(path, content, metadata, dictionary[key])) else: initialised_pages[key].articles.extend(dictionary[key]) for page_name, page_data in initialised_pages.items(): #logging.debug("Cavebios: Writing {}".format(page_name)) article = Article(page_data.content, page_data.metadata) writer.write_file(page_data.path, template=generator.get_template(template), context=generator.context, pagename=page_name, articles=sorted(page_data.articles, key=lambda x: x.date, reverse=True), article=article) # ==========Write the index of caves================ pages = initialised_pages row = namedtuple('row', 'name number recentdate meta') rows = [] for page_name in pages.keys(): name = page_name number = len(pages[page_name].articles) recentdate = max( [article.date for article in pages[page_name].articles]) meta = content_dictionary[ page_name].metadata if page_name in content_dictionary.keys( ) else None rows.append(row(name, number, recentdate, meta)) filename = os.path.join(output_path, 'index.html') writer.write_file(filename, template=generator.get_template(template + "_index"), context=generator.context, rows=sorted(rows, key=lambda x: x.name))
def generate_person_pages(generator, writer): # For each person generate a page listing the caves they have been in and the article that # describes that trip author_list = {} caver_bios = generator.context['caverbios'] cavers = generator.context['cavepeep_person'] dictionary = cavers content_dictionary = caver_bios output_path = "cavers" template = "caverpages" for item in generator.authors: author_list[item[0].name] = item[1] row = namedtuple('row', 'path content metadata articles authored') initialised_pages = {} for key in dictionary.keys(): if key not in initialised_pages.keys(): logging.debug( "Cavebios: Adding {} to list of pages to write".format(key)) content = '' metadata = '' authored = [] #print(key) #print(author_list.keys()) if key in author_list.keys(): authored = author_list[key] path = os.path.join(output_path, str(key) + '.html') initialised_pages[key] = (row(path, content, metadata, dictionary[key], authored)) else: initialised_pages[key].articles.extend(dictionary[key]) for page_name, page_data in initialised_pages.items(): #logging.debug("Cavebios: Writing {}".format(page_name)) article = Article(page_data.content, page_data.metadata) writer.write_file(page_data.path, template=generator.get_template(template), context=generator.context, personname=page_name, articles=sorted(page_data.articles, key=lambda x: x.date, reverse=True), article=article, authored=page_data.authored) pages = initialised_pages # ==========Write the index of cavers================ row = namedtuple('row', 'name number recentdate meta') rows = [] for page_name in pages.keys(): name = page_name number = len(pages[page_name].articles) recentdate = max( [article.date for article in pages[page_name].articles]) meta = content_dictionary[ page_name].metadata if page_name in content_dictionary.keys( ) else None rows.append(row(name, number, recentdate, meta)) filename = os.path.join(output_path, 'index.html') writer.write_file(filename, template=generator.get_template(template + "_index"), context=generator.context, rows=sorted(sorted(rows, key=lambda x: x.name), key=lambda x: x.recentdate, reverse=True))
def _handle_article_generation(self, path): content, metadata = self.md_reader.read(path) return Article(content=content, metadata=metadata)
def generate_context(self): """change the context""" # return the list of files to use files = self.get_files(self.path, exclude=[ 'pages', ]) all_articles = [] for f in files: content, metadata = read_file(f) # if no category is set, use the name of the path as a category if 'category' not in metadata.keys(): if os.path.dirname(f) == self.path: category = self.settings['DEFAULT_CATEGORY'] else: category = os.path.basename(os.path.dirname(f)) if category != '': metadata['category'] = unicode(category) if 'date' not in metadata.keys()\ and self.settings['FALLBACK_ON_FS_DATE']: metadata['date'] = datetime.fromtimestamp(os.stat(f).st_ctime) article = Article(content, metadata, settings=self.settings, filename=f) if not is_valid_content(article, f): continue add_to_url = u'' if 'ARTICLE_PERMALINK_STRUCTURE' in self.settings: article_permalink_structure = self.settings[ 'ARTICLE_PERMALINK_STRUCTURE'] article_permalink_structure = article_permalink_structure.lstrip( '/') # try to substitute any python datetime directive add_to_url = article.date.strftime(article_permalink_structure) # try to substitute any article metadata in rest file add_to_url = add_to_url % article.__dict__ add_to_url = [slugify(i) for i in add_to_url.split('/')] add_to_url = os.path.join(*add_to_url) article.url = urlparse.urljoin(add_to_url, article.url) article.save_as = urlparse.urljoin(add_to_url, article.save_as) if article.status == "published": if hasattr(article, 'tags'): for tag in article.tags: self.tags[tag].append(article) all_articles.append(article) elif article.status == "draft": self.drafts.append(article) self.articles, self.translations = process_translations(all_articles) for article in self.articles: # only main articles are listed in categories, not translations self.categories[article.category].append(article) self.authors[article.author].append(article) # sort the articles by date self.articles.sort(key=attrgetter('date'), reverse=True) self.dates = list(self.articles) self.dates.sort(key=attrgetter('date'), reverse=self.context['REVERSE_ARCHIVE_ORDER']) # create tag cloud tag_cloud = defaultdict(int) for article in self.articles: for tag in getattr(article, 'tags', []): tag_cloud[tag] += 1 tag_cloud = sorted(tag_cloud.items(), key=itemgetter(1), reverse=True) tag_cloud = tag_cloud[:self.settings.get('TAG_CLOUD_MAX_ITEMS')] tags = map(itemgetter(1), tag_cloud) if tags: max_count = max(tags) steps = self.settings.get('TAG_CLOUD_STEPS') # calculate word sizes self.tag_cloud = [ (tag, int( math.floor(steps - (steps - 1) * math.log(count) / (math.log(max_count) or 1)))) for tag, count in tag_cloud ] # put words in chaos random.shuffle(self.tag_cloud) # and generate the output :) # order the categories per name self.categories = list(self.categories.items()) self.categories.sort( reverse=self.settings.get('REVERSE_CATEGORY_ORDER')) self.authors = list(self.authors.items()) self.authors.sort() self._update_context(('articles', 'dates', 'tags', 'categories', 'tag_cloud', 'authors'))