def __init__(self, context, filepath=None, metadata=None, content=None, basename=None): self.context = context if filepath is not None: self.source_filepath = filepath # Get filenames, paths etc self.dirname = os.path.dirname(filepath) self.basepath, self.filename = os.path.split(filepath) self.basename, self.extension = os.path.splitext(self.filename) self.relpath = os.path.relpath(os.path.join(self.dirname, self.basename) + '.html', context.SOURCE_FOLDER) # Parse the file for content and metadata with codecs.open(filepath, 'r', encoding='utf8') as md_file: #reader = Reader(context, md_file.read()) reader = Reader(md_file.read()) metadata, content = reader.parse() elif metadata is not None and content is not None and basename is not None: self.content = content self.basename = basename self.source_filepath = None self.dirname = None self.basepath = None self.relpath = None else: raise Exception('Article object not supplied with either filepath or content and metadata.') #TODO: this doesnt seem to work self.content = content self.metadata = metadata self.status = None self.template = None self.output_filepath = None self.same_as_cache = False self.cache_type = 'SOURCE' signal_sender = Signal(signals.AFTER_SOURCE_INITIALISED) signal_sender.send(context=context, source=self)
def __init__(self, context, filepath=None): super().__init__(context, filepath) self.template = 'page.html' self.title = self.metadata[ 'title'] if 'title' in self.metadata else self.basename self.url = self.relpath if 'PAGE_SLUG' in context: slug_dict = vars(self) output_filename = context.PAGE_SLUG.format(**slug_dict) else: output_filename = '{}.html'.format(self.basename) self.output_filepath = os.path.join(context.OUTPUT_FOLDER, 'pages', output_filename) self.url = 'pages/{}'.format(output_filename) self.cache_id = self.output_filepath self.cache_type = 'PAGE' signal_sender = Signal(signals.AFTER_PAGE_READ) signal_sender.send(context=context, page=self)
def write_file(self, context=None, **kwargs): self.context = context if context is not None else self.context if self.context.caching_enabled and self.same_as_cache: return False if self.template is None: return False signal_sender = Signal(signals.BEFORE_MD_CONVERT) signal_sender.send(context=self.context, source=self) self.content = self.context.MD(self.content) signal_sender = Signal(signals.BEFORE_SOURCE_WRITE) signal_sender.send(context=self.context, source=self) writer = Writer( self.context, self.output_filepath, self.template, **kwargs) writer.write_file() return True
def main(): """Main olm function""" if args.init: logger.notice("Setting up basic website") base_path = os.path.abspath(args.src) if os.listdir(base_path): logger.error("The directory must be empty in order to initialise") return quickstart(base_path) logger.success("Done! Run 'olm {}'".format(args.src)) return time_all = time.time() logger.notice("Beginning static site generation") if args.settings is not None: settings_file_path = os.path.abspath(args.settings) else: settings_file_path = os.path.abspath( os.path.join(args.src, 'settings.py')) if os.path.isfile(settings_file_path): CONTEXT = load_context_from_file(settings_file_path, load_default_context(args.src)) else: logger.error('No valid settings.py file found') sys.exit() CONTEXT.caching_enabled = True CONTEXT.rewrite_cache_files_when_disabled = False if args.disable_caching or args.disable_caching_and_rewrite: CONTEXT.caching_enabled = False if args.disable_caching_and_rewrite: CONTEXT.rewrite_cache_files_when_disabled = True plugins = Plugins(CONTEXT) signal_sender = Signal(signals.INITIALISED) signal_sender.send(context=CONTEXT) site = Site(CONTEXT) site.build_site() subsites = site.subsites for subsite in subsites: CONTEXT = load_context_from_file(settings_file_path, load_default_context(args.src)) CONTEXT.caching_enabled = True CONTEXT.rewrite_cache_files_when_disabled = False if args.disable_caching or args.disable_caching_and_rewrite: CONTEXT.caching_enabled = False if args.disable_caching_and_rewrite: CONTEXT.rewrite_cache_files_when_disabled = True plugins.unload_plugins() subsite_name = subsite[1:] logger.info("") logger.info("Found subsite '%s'", subsite_name) if subsite_name in CONTEXT.SUBSITES: subsite_context = load_context(CONTEXT.SUBSITES[subsite_name], CONTEXT) else: subsite_context = CONTEXT plugins.load_plugins(subsite_context) subsite_context.BASE_FOLDER = os.path.join(CONTEXT.SOURCE_FOLDER, subsite) subsite_context.SOURCE_FOLDER = os.path.join(CONTEXT.SOURCE_FOLDER, subsite) subsite_context.OUTPUT_FOLDER = os.path.join(CONTEXT.OUTPUT_FOLDER, subsite_name) subsite_context.CACHE_LOCATION = os.path.join( CONTEXT.BASE_FOLDER, 'cache_' + subsite_name + '.pickle') site = Site(subsite_context) site.build_site() logger.success("Completed everything in %.3f seconds", (time.time() - time_all))
def build_site(self): CONTEXT = self.context all_files = [] articles = [] draft_articles = [] unlisted_articles = [] pages = [] subsites = set() CONTEXT['authors'] = {} CONTEXT['all_files'] = [] CONTEXT['articles'] = [] CONTEXT['pages'] = [] signal_sender = Signal(signals.SITE_INITIALISED) signal_sender.send(context=CONTEXT) # Scan source files logger.info("Scanning source files") time_source_start = time.time() for dirname, dirs, files in os.walk(CONTEXT.SOURCE_FOLDER): for filename in files: filepath = os.path.join(dirname, filename) relpath = os.path.relpath(filepath, CONTEXT.SOURCE_FOLDER) firstfolder = relpath.split(os.sep)[0] basename, extension = os.path.splitext(filename) if firstfolder in CONTEXT.NO_SCAN: continue if extension.lower() == ".md": if firstfolder[0] == "_": subsites.add(firstfolder) elif firstfolder == "pages": logger.debug("Found %s", filepath) page = Page(CONTEXT, filepath) pages.append(page) all_files.append(page) else: logger.debug("Found %s", filepath) article = Article(CONTEXT, filepath) if article.type in CONTEXT.ARTICLE_TYPES + CONTEXT.INDEX_TYPES: if article.status == ArticleStatus.ACTIVE: articles.append(article) elif article.status == ArticleStatus.UNLISTED: unlisted_articles.append(article) else: draft_articles.append(article) all_files.append(article) logger.info( "Processed %d articles, %d unlisted articles, %d drafts, and %d pages in %.3f seconds", len(articles), len(unlisted_articles), len(draft_articles), len(pages), time.time() - time_source_start) # Extend the lists (in case a plugin has added to it post init) CONTEXT['all_files'].extend(all_files) CONTEXT['articles'].extend( sorted(articles, key=lambda k: (k.date), reverse=True)) CONTEXT['pages'].extend(pages) signal_sender = Signal(signals.BEFORE_CACHING) signal_sender.send(context=CONTEXT, articles=CONTEXT.articles) # Work out the cache status of all the fles check_cache(CONTEXT, CONTEXT['all_files']) signal_sender = Signal(signals.AFTER_ALL_ARTICLES_READ) signal_sender.send(context=CONTEXT, articles=CONTEXT.articles) # Check for duplicate output paths # and see if output file exists outputs = [] for f in CONTEXT['all_files']: if f.output_filepath is None: continue if f.output_filepath not in outputs: outputs.append(f.output_filepath) if not os.path.isfile(f.output_filepath): logger.spam( "'%s' does not exist. Setting cached status to False.", f.source_filepath) #f.same_as_cache = False else: dupes = [ b for b in CONTEXT['all_files'] if b.output_filepath == f.output_filepath ] logger.error( "'%s' has the same output file path as '%s'. The other file will be overwritten.", f.source_filepath, dupes[0].source_filepath) signal_sender = Signal(signals.BEFORE_WRITING) signal_sender.send(context=CONTEXT) # Write all the articles all_articles = CONTEXT.articles + unlisted_articles logger.debug("Writing %d articles", len(all_articles)) time_write_start = time.time() number_written = 0 for index, article in enumerate(all_articles): logger.spam("Writing file %d of %d", index + 1, len(all_articles)) wrote = article.write_file(context=CONTEXT) number_written = number_written + 1 if wrote else number_written logger.info("Wrote %d out of %d articles in %.3f seconds", number_written, len(all_articles), (time.time() - time_write_start)) # Write all the pages logger.debug("Writing %d pages", len(pages)) time_write_start = time.time() number_written = 0 for index, page in enumerate(pages): logger.spam("Writing file %d of %d", index + 1, len(pages)) wrote = page.write_file(context=CONTEXT) number_written = number_written + 1 if wrote else number_written logger.info("Wrote %d out of %d pages in %.3f seconds", number_written, len(pages), (time.time() - time_write_start)) # Write the index logger.debug("Writing articles index") time_write_start = time.time() index = Index(CONTEXT) wrote = index.write_file() if wrote: logger.info("Wrote index in %.3f seconds", (time.time() - time_write_start)) else: logger.info("Reused cached index in %.3f seconds", (time.time() - time_write_start)) signal_sender = Signal(signals.AFTER_WRITING) signal_sender.send(context=CONTEXT) # Find, compile and move static files logger.debug("Compiling static files") time_static_start = time.time() # SASS sass.compile(dirname=(CONTEXT.CSS_FOLDER, CONTEXT.OUTPUT_CSS_FOLDER), output_style='compressed') # CSS for dirname, dirs, files in os.walk(CONTEXT.CSS_FOLDER): for filename in files: filepath = os.path.join(dirname, filename) basename, extension = os.path.splitext(filename) rel_path = os.path.relpath(filepath, CONTEXT.CSS_FOLDER) if extension.lower() == ".css": os.makedirs(os.path.dirname( os.path.join(CONTEXT.OUTPUT_CSS_FOLDER, rel_path)), exist_ok=True) copyfile(filepath, os.path.join(CONTEXT.OUTPUT_CSS_FOLDER, rel_path)) # JS for dirname, dirs, files in os.walk(CONTEXT.JS_FOLDER): for filename in files: filepath = os.path.join(dirname, filename) basename, extension = os.path.splitext(filename) rel_path = os.path.relpath(filepath, CONTEXT.JS_FOLDER) if extension.lower() == ".js": with codecs.open(filepath, encoding='utf-8', errors='ignore') as js_file: minified = js_file.read() output_filepath = os.path.join(CONTEXT.OUTPUT_JS_FOLDER, rel_path) os.makedirs(os.path.dirname(output_filepath), exist_ok=True) with codecs.open(output_filepath, 'w+', encoding='utf-8') as js_min_file: js_min_file.write(minified) logger.info("Processed static files in %.3f seconds", time.time() - time_static_start) self.subsites = subsites
def __init__(self, context, filepath=None, metadata=None, content=None, basename=None): if filepath is not None: super().__init__(context, filepath=filepath) elif metadata is not None and content is not None and basename is not None: super().__init__(context, metadata=metadata, content=content, basename=basename) self.template = 'article.html' self.date = datetime.datetime.strptime(self.metadata['date'].strip( ), '%Y-%m-%d') if 'date' in self.metadata else datetime.datetime.now() self.type = self.metadata['type'].strip().lower( ) if 'type' in self.metadata else 'article' self.title = self.metadata[ 'title'] if 'title' in self.metadata else basename self.summary = self.metadata[ 'summary'] if 'summary' in self.metadata else None self.location = self.metadata['location'].strip().lower( ) if 'location' in self.metadata else None # Status status = self.metadata['status'].strip().lower( ) if 'status' in self.metadata else None if status == 'unlisted' or self.type == 'unlisted': self.status = ArticleStatus.UNLISTED elif status == 'draft': self.status = ArticleStatus.DRAFT else: self.status = ArticleStatus.ACTIVE # Authors if 'authors' in self.metadata: self.authors = [ a.strip() for a in self.metadata['authors'].split(',') ] elif 'author' in self.metadata: self.authors = [ a.strip() for a in self.metadata['author'].split(',') ] else: self.authors = [] for author in self.authors: if author in context['authors']: context['authors'][author].append(self) else: context['authors'][author] = [self] # Output Filepath if 'ARTICLE_SLUG' in context and self.date and self.location: slug_dict = merge_dictionaries( vars(self), { 'date': self.date.strftime('%Y-%m-%d'), 'location': self.location.lower() }) output_filename = context.ARTICLE_SLUG.format(**slug_dict) elif 'ARTICLE_SLUG' in context: slug_dict = merge_dictionaries( vars(self), {'date': self.date.strftime('%Y-%m-%d')}) output_filename = context.ARTICLE_SLUG.format(**slug_dict) elif self.date and self.location: output_filename = '{}-{}.html'.format( self.location.lower(), self.date.strftime('%Y-%m-%d')) else: output_filename = '{}.html'.format(self.basename) self.output_filepath = os.path.join(context.OUTPUT_FOLDER, 'articles', output_filename) self.url = 'articles/{}'.format(output_filename) self.cache_id = self.output_filepath if 'cache_type' in self.metadata: self.cache_type = self.metadata['cache_type'] else: self.cache_type = 'ARTICLE' signal_sender = Signal(signals.AFTER_ARTICLE_READ) signal_sender.send(context=context, article=self)
def generate_person_pages(context): # For each person generate a page listing the caves they have been in and the article that # describes that trip caver_bios = context['caverbios'] cavers = context['cavepeep_person'] dictionary = cavers content_dictionary = caver_bios output_path = "cavers" template = "caverpages" row = namedtuple('row', 'path content metadata articles authored same_as_cache') initialised_pages = {} for key in dictionary.keys(): if key not in initialised_pages.keys(): logger.debug("Adding {} to list of pages to write".format(key)) if key in content_dictionary: source = content_dictionary[key] logger.debug("Content added to " + key) else: source = Caver(context, content='', metadata={}, basename=key) source.same_as_cache = context.is_cached if key in context.authors: source.authored = sorted(context.authors[key], key=lambda k: (k.date), reverse=True) source.output_filepath = os.path.join(output_path, str(key) + '.html') source.articles = dictionary[key] source.template = template + '.html' initialised_pages[key] = source else: initialised_pages[key].articles.extend(dictionary[key]) def get_people(cavepeep): c = re.compile( r"""DATE=\s*(\d\d\d\d-\d\d-\d\d)\s*;\s*CAVE=\s*([\s\w\D][^;]*)\s*;\s*PEOPLE=\s*([\s\w\D][^;]*);*[\n\t\r]*""" ) # Create key/value relationship between trip identifier (Date + Cave) and list of cavers item_date = None item_caves = None item_people = None m = c.match(cavepeep) if not m: return [] item_people = m.group(3).split(',') item_people = item_people if type(item_people) is list else [ item_people ] item_people = [x.strip() for x in item_people] return item_people for page_name, page_data in initialised_pages.items(): cocavers = {} for article in set([a.article for a in page_data.articles]): trips = article.metadata['cavepeeps'] if type( article.metadata['cavepeeps']) is list else [ article.metadata['cavepeeps'] ] for trip in trips: people = get_people(trip) if page_name in people: for person in people: if person in cocavers: cocavers[person] = cocavers[person] + 1 else: cocavers[person] = 1 del cocavers[page_name] page_data.cocavers = sorted([(person, cocavers[person]) for person in cocavers.keys()], key=lambda tup: tup[1], reverse=True) caves = {} for trip in [a.cave for a in page_data.articles]: if trip is None: continue for cave in [a.strip() for a in trip.split('>')]: if cave in caves: caves[cave] = caves[cave] + 1 else: caves[cave] = 1 page_data.caves = sorted([(cave, caves[cave]) for cave in caves.keys()], key=lambda tup: tup[1], reverse=True) # Work out if we need to update this file changes = context['cache_change_types'] meta_changes = context['cache_changed_meta'] refresh_triggers = ["ARTICLE.NEW_FILE", "ARTICLE.REMOVED_FILE"] refresh_meta_triggers = ['title', 'location', 'date', 'status'] changed_people = [] if "ARTICLE.NEW_FILE" in changes or "ARTICLE.META_CHANGE" in changes: for meta_change in meta_changes: added, removed, modified = meta_change if 'cavepeeps' in added: people, caves = parse_metadata(added['cavepeeps']) changed_people.extend(people) if 'cavepeeps' in removed: people, caves = parse_metadata(removed['cavepeeps']) changed_people.extend(people) if 'cavepeeps' in modified: people, caves = parse_metadata(modified['cavepeeps'][0]) changed_people.extend(people) people, caves = parse_metadata(modified['cavepeeps'][1]) changed_people.extend(people) if 'authors' in added: people = [p.strip() for p in added['authors'].split(',')] changed_people.extend(people) if 'authors' in removed: people = [p.strip() for p in removed['authors'].split(',')] changed_people.extend(people) if 'authors' in modified: people = [p.strip() for p in modified['authors'][0].split(',')] changed_people.extend(people) people = [p.strip() for p in modified['authors'][1].split(',')] changed_people.extend(people) logger.debug("Writing %s caver pages", len(initialised_pages)) number_written = 0 for page_name, page_data in initialised_pages.items(): page_data.caver_articles = page_data.articles page_data.number = len( [a for a in page_data.articles if a.cave is not None]) if context.caching_enabled: if page_name in changed_people: page_data.same_as_cache = False if any(i in changes for i in refresh_triggers): page_data.same_as_cache = False if any( any(m in merge_dictionaries(*c) for m in refresh_meta_triggers) for c in meta_changes): page_data.same_as_cache = False if page_data.same_as_cache: continue number_written = number_written + 1 signal_sender = Signal("BEFORE_ARTICLE_WRITE") signal_sender.send(context=context, afile=page_data) page_data.write_file(context=context) pages = initialised_pages logger.info("Wrote %s out of %s total caver pages", number_written, len(initialised_pages)) # ==========Write the index of cavers================ cached = True if context.caching_enabled: if len(changed_people) > 0: cached = False if any(i in changes for i in refresh_triggers): cached = False if any( any(m in merge_dictionaries(*c) for m in refresh_meta_triggers) for c in meta_changes): cached = False if cached: return row = namedtuple('row', 'name number recentdate meta') rows = [] for page_name in pages.keys(): name = page_name number = len( [a for a in pages[page_name].articles if a.cave is not None]) recentdate = max( [article.date for article in pages[page_name].articles]) meta = content_dictionary[ page_name].metadata if page_name in content_dictionary.keys( ) else None rows.append(row(name, number, recentdate, meta)) filename = os.path.join(output_path, 'index.html') writer = Writer(context, filename, template + "_index.html", rows=sorted(sorted(rows, key=lambda x: x.name), key=lambda x: x.recentdate, reverse=True)) writer.write_file()
def generate_cave_pages(context): cave_bios = context['cavebios'] caves = context['cavepeep_cave'] caves_dict = {} # Split the through trips into individual caves. # Make unique list (set) of cave names and for trip in [c for c in caves if c is not None]: for cave in trip.split('>'): create_or_add(caves_dict, cave.strip(), caves[trip]) dictionary = caves_dict content_dictionary = cave_bios output_path = "caves" template = "cavepages" row = namedtuple('row', 'path content metadata articles same_as_cache') initialised_pages = {} for key in dictionary.keys(): if key not in initialised_pages.keys(): logger.debug( "Cavebios: Adding {} to list of pages to write".format(key)) if key in content_dictionary: source = content_dictionary[key] logger.debug("Cavebios: Content added to " + key) else: source = Cave(context, content='', metadata={}, basename=key) source.same_as_cache = context.is_cached source.output_filepath = os.path.join(output_path, str(key) + '.html') source.articles = dictionary[key] source.template = template + '.html' initialised_pages[key] = source else: initialised_pages[key].articles.extend(dictionary[key]) # Work out if we need to update this file changes = context['cache_change_types'] meta_changes = context['cache_changed_meta'] refresh_triggers = ["ARTICLE.NEW_FILE", "ARTICLE.REMOVED_FILE"] refresh_meta_triggers = ['title', 'location', 'date', 'status'] changed_caves = [] if "ARTICLE.NEW_FILE" in changes or "ARTICLE.META_CHANGE" in changes: for meta_change in meta_changes: added, removed, modified = meta_change if 'cavepeeps' in added: people, caves = parse_metadata(added['cavepeeps']) changed_caves.extend(caves) if 'cavepeeps' in removed: people, caves = parse_metadata(removed['cavepeeps']) changed_caves.extend(caves) if 'cavepeeps' in modified: people, caves = parse_metadata(modified['cavepeeps'][0]) changed_caves.extend(caves) people, caves = parse_metadata(modified['cavepeeps'][1]) changed_caves.extend(caves) number_written = 0 for page_name, page_data in initialised_pages.items(): page_data.cave_articles = [(a, a.date, was_author_in_cave(a, page_name)) for a in page_data.articles] if context.caching_enabled: if page_name in changed_caves: page_data.same_as_cache = False if any(i in changes for i in refresh_triggers): page_data.same_as_cache = False if any( any(m in merge_dictionaries(*c) for m in refresh_meta_triggers) for c in meta_changes): page_data.same_as_cache = False if page_data.same_as_cache: continue number_written = number_written + 1 signal_sender = Signal("BEFORE_ARTICLE_WRITE") signal_sender.send(context=context, afile=page_data) page_data.write_file(context=context) logger.info("Wrote %s out of %s total cave pages", number_written, len(initialised_pages)) # ==========Write the index of caves================ cached = True if context.caching_enabled: if len(changed_caves) > 0: cached = False if any(i in changes for i in refresh_triggers): cached = False if any( any(m in merge_dictionaries(*c) for m in refresh_meta_triggers) for c in meta_changes): cached = False if cached: return logger.info("writing cave page index") pages = initialised_pages row = namedtuple('row', 'name number recentdate meta') rows = [] for page_name in pages.keys(): name = page_name number = len(pages[page_name].articles) recentdate = max( [article.date for article in pages[page_name].articles]) meta = content_dictionary[ page_name].metadata if page_name in content_dictionary.keys( ) else None rows.append(row(name, number, recentdate, meta)) filename = os.path.join(output_path, 'index.html') writer = Writer(context, filename, template + "_index.html", rows=sorted(rows, key=lambda x: x.name)) writer.write_file()