def generate_manpage_sitemaps(self): query = """SELECT name, section, count(package) as amount, group_concat(package) as packages, file FROM manpages GROUP by name, section ORDER by section ASC, name ASC""" pages_in_section = defaultdict(set) for name, section, amount, packages, file in self.conn.execute(query): if amount > 1: packages = packages.split(',') for package in packages: page = "%s-%s.%s.html" % ( package, name, section, ) pages_in_section["man%s" % section[0]].add(page) else: page = "%s.%s.html" % ( name, section, ) pages_in_section["man%s" % section[0]].add(page) sm_item_tpl = load_template('sitemap-url-nolastmod') sitemap_urls = [] for section in SECTIONS: urls = [ sm_item_tpl.substitute(url="%s/%s/%s" % (self.manpages_url, section, page)) for page in pages_in_section[section] ] urls.append( sm_item_tpl.substitute(url="%s/%s/" % ( self.manpages_url, section, ))) sitemap = load_template('sitemap').substitute( urlset="\n".join(urls)) rel_sitemap_path = pjoin(section, "sitemap.xml") f = open(pjoin(self.manpages_dir, rel_sitemap_path), 'w') f.write(sitemap) f.close() sitemap_urls.append("%s/%s" % (self.manpages_url, rel_sitemap_path)) return sitemap_urls
def parse_directory(self, source_dir): self.conn.execute("DELETE FROM manpages") iterator = DirectoryIterator(glob.iglob("%s/*/*.*" % source_dir)) for page_file, redirected_from in iterator: logging.debug("Processing man page %s ...", page_file) try: parser = ManpageParser(page_file) manpage = parser.process() except NotSupportedFormat: logging.info("Skipping %s, not supported format...", page_file) continue except RedirectedPage as e: redirect_to = e.redirect logging.info("Page %s, has a redirection to %s...", page_file, redirect_to) parent_dirs = redirect_to.count('/') base_dir = dname(page_file) while parent_dirs: base_dir = dname(base_dir) parent_dirs -= 1 redirection_full_path = pjoin(base_dir, redirect_to) if not redirected_from: original_file = page_file else: original_file = redirected_from iterator.add_item(redirection_full_path, original_file) continue except IOError: logging.info("Skipping %s, file (%s) does not exist", redirected_from, page_file) continue except UnexpectedMacro as e: macro = str(e).split('(', 1)[1].split(')', 1)[0] logging.info("Skipping %s, missing macro (%s)", page_file, macro) self.missing_parsers[macro] += 1 continue except: print "Error in %s" % page_file continue if not redirected_from: name, section = manpage.name, manpage.section else: name, ext = os.path.splitext(bname(redirected_from)) section = ext[1:] package = bname(dname(page_file)) self.conn.execute( "INSERT INTO manpages (package, name, section, subtitle, file) VALUES (?, ?, ?, ?, ?)", (package, name, section, manpage.title, page_file)) logging.debug("Man page %s processed correctly...", page_file)
def generate_images(self, output_dir): images_dir = pjoin(output_dir, "images") shutil.rmtree(images_dir, ignore_errors=True) ManDirectoryParser.makedirs(images_dir) for (package, name, section), description in self.subtitles.iteritems(): filename = "%s-%s-%s.png" % ( package, name, section, ) ManDirectoryParser.write_image(pjoin(images_dir, filename), name, section, description)
def write_page(self, package, name, section, parent_dir, file, prefix=None, prev_page=None, next_page=None): filename = "%s.%s.html" % (name, section) logging.info("Creating manpage %s.%s", name, section) if prefix: filename = "%s-%s" % (prefix, filename) full_path = pjoin(self.manpages_dir, parent_dir, filename) mp = ManpageParser(file).process() mp.package = package mp.prev_page = prev_page mp.next_page = next_page mp.url = "https://www.carta.tech/man-pages/man%s/%s" % ( section, filename, ) AvailablePages.pages = self.available_pages logging.debug("Writing %s" % full_path) f = open(full_path, 'w') f.write(mp.html()) f.close()
def __init__(self, database): self.conn = sqlite3.connect(pjoin(package_directory, "..", database), isolation_level=None) self.conn.text_factory = str self.cursor = self.conn.cursor() self.pages = dict() self.missing_parsers = Counter()
def generate_manpage_sitemap_index(self, urls): sitemap_index_url_tpl = load_template('sitemap-index-url') urls = [sitemap_index_url_tpl.substitute(url=url) for url in urls] content = load_template('sitemap-index').substitute( sitemaps=''.join(urls)) f = open(pjoin(self.manpages_dir, "sitemap.xml"), 'w') f.write(content) f.close()
def write_aliases_page(self, name, section, parent_dir, packages): filename = "%s.%s.html" % (name, section) full_path = pjoin(self.manpages_dir, parent_dir, filename) f = open(full_path, 'w') f.write(' ') f.close() return full_path
def generate_output(self, output_dir, base_url): self.root_html = output_dir self.manpages_dir_name = "man-pages" self.packages_dir_name = "packages" self.manpages_dir = pjoin(output_dir, self.manpages_dir_name) self.packages_dir = pjoin(output_dir, self.packages_dir_name) self.manpages_url = base_url + "man-pages" self.packages_url = base_url + "packages" # Delete output directories self.empty_output_directories() # Create placeholder directories self.create_output_directories() # Create Manpages self.create_manpages()
def generate_indexes(self, output_dir, base_url): self.root_html = output_dir self.manpages_dir_name = "man-pages" self.packages_dir_name = "packages" self.manpages_dir = pjoin(output_dir, self.manpages_dir_name) self.packages_dir = pjoin(output_dir, self.packages_dir_name) self.manpages_url = base_url + "man-pages" self.packages_url = base_url + "packages" # Generate sitemaps and indexes for manpages sitemap_urls = self.generate_manpage_sitemaps() self.generate_manpage_sitemap_index(sitemap_urls) self.generate_manpage_indexes() self.generate_manpage_index() # Generate root index.html self.generate_base_index() # Generate package indexes self.generate_package_indexes()
def generate_sitemap_indexes(sm_urls): # Generate sitemap indexes sitemap_index_url_tpl = load_template('sitemap-index-url') sitemap_index_content = "" for sitemap_url in sm_urls: sitemap_index_content += sitemap_index_url_tpl.substitute( url=sitemap_url) sitemap_index_tpl = load_template('sitemap-index') sitemap_index_content = sitemap_index_tpl.substitute( sitemaps=sitemap_index_content) f = open(pjoin(base_manpage_dir, "sitemap.xml"), 'w') f.write(sitemap_index_content) f.close()
def generate_base_index(self): # Generate base index base_tpl = load_template('base') index_tpl = load_template('index-contents') index = base_tpl.substitute( metadescription="Carta.tech: The home for open documentation", title="Carta.tech: The home for open documentation", canonical="", extraheaders="", header="", breadcrumb="", content=index_tpl.substitute(), ) f = open(pjoin(self.root_html, "index.html"), 'w') f.write(index) f.close()
def generate_manpage_index(self): # Generate man-pages index base_tpl = load_template('base') index_tpl = load_template('index-manpage') index = base_tpl.substitute( metadescription="Linux Man Pages", title="Linux Man Pages", canonical="", extraheaders="", header="", breadcrumb="", content=index_tpl.substitute(), ) f = open(pjoin(self.manpages_dir, "index.html"), 'w') f.write(index) f.close()
def generate_manpage_indexes(self): query = """SELECT name, section, count(package) as amount, group_concat(package) as packages FROM manpages GROUP by name, section ORDER by section ASC, name ASC""" section_item_tpl = load_template('section-index-item') section_item_manpage_tpl = load_template('section-index-item-manpage') items = defaultdict(list) for name, section, amount, packages in self.conn.execute(query): page = "%s.%s" % (name, section) if amount > 1: for package in packages.split(','): subtitle = self.subtitles[(package, name, section)] if package == "man-pages": items[section].append( section_item_manpage_tpl.substitute( link=page, name=name, section=section, description=subtitle, package=package)) else: items[section].append( section_item_tpl.substitute(link=page, name=name, section=section, description=subtitle, package=package)) else: subtitle = self.subtitles[(packages, name, section)] if packages == "man-pages": items[section].append( section_item_manpage_tpl.substitute( link=page, name=name, section=section, description=subtitle, package=packages)) else: items[section].append( section_item_tpl.substitute(link=page, name=name, section=section, description=subtitle, package=packages)) for section in items: section_content = load_template('section-index').substitute( items=''.join(items[section[0]])) section_description = SECTIONS["man%s" % section[0]] breadcrumb = [ ("/man-pages/", "Man Pages"), ("/man-pages/man%s/" % section[0], section_description), ] out = load_template('base').substitute( title="Linux Man Pages - %s" % section_description, canonical="", extraheaders="", header=load_template('header').substitute( title=section_description, section=section, subtitle=""), breadcrumb=get_breadcrumb(breadcrumb), content=section_content, metadescription=section_description.replace("\"", "\'"), ) f = open( pjoin(self.manpages_dir, "man%s" % section[0], 'index.html'), 'w') f.write(out) f.close()
def generate_package_indexes(self): item_tpl = load_template('package-index-item') package_index_tpl = load_template('package-index') package_index_section_tpl = load_template('package-index-section') package_index_contents_tpl = load_template('package-index-contents') package_list_item_tpl = load_template('package-list-item') sm_item_tpl = load_template('sitemap-url-nolastmod') query = """SELECT name, section, count(package) as amount, group_concat(package) as packages FROM manpages GROUP by name, section ORDER by package ASC, section ASC, name ASC""" package_container = defaultdict(lambda: defaultdict(list)) for name, section, amount, packages in self.conn.execute(query): if amount > 1: for package in packages.split(','): subtitle = self.subtitles[(package, name, section)] package_container[package][section].append( ("%s.%s" % (name, section), subtitle, True)) else: subtitle = self.subtitles[(packages, name, section)] package_container[packages][section].append( ("%s.%s" % (name, section), subtitle, False)) package_list_items = [] sitemap_urls = [] for package, sections in package_container.items(): package_directory = pjoin(self.packages_dir, package) self.makedirs(package_directory) package_index = [] for section, pages in sorted(sections.items()): full_section = "man%s" % (section[0], ) section_description = SECTIONS[full_section] section_directory = pjoin(package_directory, full_section) section_relative_url = pjoin(self.manpages_dir_name, full_section) section_url = self.manpages_url + "/" + full_section + "/" self.makedirs(section_directory) items = [] for name, subtitle, aliased in pages: filename = "%s.html" % (name, ) if aliased: filename = "%s-%s" % ( package, filename, ) relative_url = "/" + pjoin(section_relative_url, filename) items.append( item_tpl.substitute(name=name, description=subtitle, link=relative_url)) package_index.append( package_index_section_tpl.substitute( amount=len(pages), numeric_section=section, section=section_description, section_url=section_url, content=package_index_tpl.substitute( items='\n'.join(items)))) contents = package_index_contents_tpl.substitute( contents="\n".join(package_index)) breadcrumb = [ ("/packages/", "Packages"), ("/packages/%s/" % package, package), ] out = load_template('base').substitute( title="Man Pages in %s" % package, canonical="", extraheaders="", header=load_template('header-package').substitute( title=package), breadcrumb=get_breadcrumb(breadcrumb), content=contents, metadescription="Man Pages in %s" % package, ) f = open(pjoin(package_directory, "index.html"), 'w') f.write(out) f.close() package_list_items.append( package_list_item_tpl.substitute(url="%s/" % (package, ), package=package)) sitemap_urls.append( sm_item_tpl.substitute(url="%s/%s/" % (self.packages_url, package))) f = open(pjoin(self.packages_dir, "sitemap.xml"), 'w') f.write( load_template('sitemap').substitute( urlset="\n".join(sitemap_urls))) f.close() # Generate package index breadcrumb = [ ("/packages/", "Packages"), ] index = load_template('ul').substitute( content="\n".join(package_list_items)) index_path = pjoin(self.packages_dir, "index.html") out = load_template('base').substitute( title="Packages with man pages", canonical="", extraheaders="", header=load_template('header-package-index').substitute( title="Packages with man pages"), breadcrumb=get_breadcrumb(breadcrumb), content=index, metadescription="List of packages with man pages", ) f = open(index_path, 'w') f.write(out) f.close()
def create_output_directories(self): # Create man page directories map(ManDirectoryParser.makedirs, [pjoin(self.manpages_dir, directory) for directory in SECTIONS])
import json import sys import httplib2 from apiclient.discovery import Resource, build from google.oauth2.credentials import Credentials from helpers import BASE_PATH, locked_file, pjoin client_path = pjoin(BASE_PATH, 'hta/handin/client_secret.json') with locked_file(client_path) as f: j_creds = json.load(f) client_id = j_creds['installed']['client_id'] client_secret = j_creds['installed']['client_secret'] ref_tok_path = pjoin(BASE_PATH, 'hta/handin/ref_tok.txt') with locked_file(ref_tok_path) as f: ref_tok: str = f.read().strip() def sheets_api() -> Resource: credentials = Credentials( None, refresh_token=ref_tok, token_uri="https://accounts.google.com/o/oauth2/token", client_id=client_id, client_secret=client_secret ) try: sheets = build('sheets', 'v4', credentials=credentials) except httplib2.ServerNotFoundError: print('httplib2 exception in sheets build')