def breadcrumbs(path): bcrumbs_settings = ini('breadcrumbs')['general'] crumbs = path.split('/') del crumbs[0] # this would be the content directory if len(crumbs) == 1: return '' home_href = bcrumbs_settings['home_href'] home_text = bcrumbs_settings['home_text'] breadcrumb = ['<a href="%s">%s</a>' % (home_href, home_text)] current_directory = '' for crumb in crumbs: if crumb == 'index.html': # preceeding was a directory, no need to specify index file break # we first assume it's a file... crumb_link = current_directory + crumb if '.' not in crumb: # this crumb is a directory! current_directory += crumb + '/' crumb_link = current_directory + 'index.html' crumb_label = crumb.rsplit('.', 1)[0] crumb_label = crumb_label.replace('-', ' ').replace('_', ' ').title() breadcrumb.append('<a href="%s">%s</a>' % (crumb_link, crumb_label)) breadcrumb = ' > '.join(breadcrumb) open_tag = bcrumbs_settings['open_tag'] close_tag = bcrumbs_settings['close_tag'] return open_tag + '\n<p>' + breadcrumb + '\n</p>\n' + close_tag
def seo(document_path, document): """Inserts canonical to header, other stuff...""" settings = common.ini('settings') basehref = settings['httpd']['basehref'] href = basehref + document_path canonical = '<link rel="canonical" href="%s" />' % href #raise Exception(document.replace('</head>', canonical + '\n</head>')) return document.replace('</head>', canonical + '\n</head>')
def page_meta(path): """Document statistics HTML.""" modified = time.ctime(os.path.getmtime(path)) created = time.ctime(os.path.getctime(path)) meta = ('<strong>Last modified:</strong> %s<br>' '\n<strong>Created:</strong> %s' % (modified, created)) meta_settings = ini('page_meta')['general'] open_tag = meta_settings['open_tag'] close_tag = meta_settings['close_tag'] return open_tag + meta + close_tag
def tag(document_path, document, *args): conn = sqlite.connect('database/tag.db') cursor = conn.cursor() # because I like natural joins! sql = ''' CREATE TABLE IF NOT EXISTS article ( article_id INTEGER PRIMARY KEY, title TEXT NOT NULL, href TEXT NOT NULL UNIQUE ); CREATE TABLE IF NOT EXISTS tag ( tag_id INTEGER PRIMARY KEY, name TEXT NOT NULL UNIQUE ); CREATE TABLE IF NOT EXISTS article_tag ( article_id INTEGER REFERENCES article(id), tag_id INTEGER REFERENCES tag(id), PRIMARY KEY (article_id, tag_id) ); ''' cursor.executescript(sql) document_directory = os.path.dirname(document_path) # NOT USED ELSEWHERE! settings = ini('blog_index') soup = BeautifulSoup(document) # article table: article_id, title, href title_element = soup.find(id=(settings['title']['id'], )) try: title = title_element.contents[0] except: raise Exception(soup) href = document_path.split(os.path.sep, 1)[-1] sql = 'INSERT OR IGNORE INTO article (title, href) VALUES (?, ?)' cursor.execute(sql, (title, href)) # assure tags in db # yes, this is typically faster than attempting to convert args to a list # of tuplesand using executemany for tag in args: cursor.execute('INSERT OR IGNORE INTO tag (name) VALUES (?)', (tag, )) # link tabs to article in db sql = 'SELECT article_id FROM article WHERE href=?' cursor.execute(sql, (href, )) article_id = cursor.fetchone()[0] tag_list = ['<ul class="tag-list">'] for tag in args: sql = ''' INSERT OR IGNORE INTO article_tag (article_id, tag_id) VALUES (?, (SELECT tag_id FROM tag WHERE name=?)) ''' cursor.execute(sql, (article_id, tag)) entry = '<li><a href="/cache/index_%s.html">%s</a></li>' % (tag, tag) tag_list.append(entry) conn.commit() conn.close() tag_list.append('</ul>') return '\n'.join(tag_list)
def table_of_contents(document_path, document): """Maybe instead of StringIO I should build with lxml?""" table_settings = ini('table-of-contents') root = etree.HTML(document) # can define base_url! headings = ('h3', 'h4', 'h5', 'h6') current_level = -1 current_level_text = None html = StringIO() # heading open_tag = table_settings['heading']['open_tag'] close_tag = table_settings['heading']['close_tag'] text = table_settings['heading']['text'] html.write(open_tag + text + close_tag) # start table of contents... table html.write(table_settings['container']['open_tag']) try: # use os.path instead. __, document_path = document_path.split('/', 1) except ValueError: raise Exception(document_path) number_of_entries = 0 has_nested = False for element in root.iter(): tag = element.tag if tag not in headings: continue number_of_entries += 1 nest_level = headings.index(tag) level_id = element.get('id') level_text = element.text if level_id: subs = (document_path, level_id, level_text) entry = '<a href="%s#%s">%s</a>' % subs else: entry = level_text if nest_level == current_level: html.write('<li>%s' % entry) elif nest_level > current_level: html.write('\n<ol>\n <li>%s' % entry) elif nest_level < current_level: has_nested = True html.write('</ol>\n <li>%s' % entry) current_level = nest_level if number_of_entries < 2 or not has_nested: return '' html.write('\n</ol>') html.write(table_settings['container']['close_tag']) return html.getvalue()
def config(document, replace, section, key): settings = common.ini('settings') return document.replace(replace, settings[section][key])
def blog_index(document_path): settings = ini('blog_index') document_directory, __ = document_path.rsplit('/', 1) contents = StringIO() # being replaced index_d = common.index(document_directory) truncate = 50 # title/permalink open_tag = settings['title']['open'] close_tag = settings['title']['close'] permalink = open_tag + '<a href="%s">%s</a>' + close_tag # container container_open = settings['container']['open'] container_close = settings['container']['close'] # header header_open = settings['header']['open'] header_close = settings['header']['close'] # summary summary_open = settings['summary']['open'] summary_close = settings['summary']['close'] for directory, files in index_d.items(): paths = [directory + '/' + fname for fname in files] # determine category somehow? # raise Exception(directory) for path in paths: if path.endswith('index.html'): continue with open(path) as f: article = f.read() # get the article title, permalink # get title from octothorpe ##code## element = tag.TagDoc(article).first('include', 'title') title = element['title'] # assemble the obtained data contents.write(container_open) contents.write(header_open) link = path.split('/', 1)[-1] contents.write(' ' + permalink % (link, title)) # get meta contents.write(page_meta(path)) contents.write(header_close) # get the first paragraph after the article title # I'll have to grab the first paragraph from the article path for paragraph in BeautifulSoup(article).find_all('p'): paragraph = paragraph.string if element.full in paragraph: continue paragraph = (summary_open + paragraph + summary_close) contents.write(paragraph) contents.write(container_close) return contents.getvalue()