def create_yaml(api_key, talks_dir, speakers_dir): if not exists(talks_dir): makedirs(talks_dir) if not exists(speakers_dir): makedirs(speakers_dir) for proposal_state in PROPOSAL_STATES: r = get( 'https://www.papercall.io/api/v1/submissions?_token={0}&state={1}&per_page=1000' .format( api_key, proposal_state, )) speakers = {} for proposal in r.json(): talk_title_slug = slugify(proposal['talk']['title']) post = frontmatter.loads(proposal['talk']['description']) post['type'] = 'talk' post['title'] = proposal['talk']['title'] post['level'] = proposal['talk']['audience_level'] post['abstract'] = proposal['talk']['abstract'] post['speakers'] = [] speaker_name = proposal['profile']['name'] if '/' in speaker_name: speaker_name = speaker_name.split('/') elif ' and ' in speaker_name: speaker_name = speaker_name.split(' and ') elif ',' in speaker_name and speaker_name[-5:] != ', MBA': speaker_name = speaker_name.split(',') else: speaker_name = [speaker_name] for name in map(str.strip, speaker_name): speaker_slug = slugify(name) if speaker_slug not in speakers: speakers[speaker_slug] = frontmatter.loads( proposal['profile']['bio']) speakers[speaker_slug]['name'] = name speakers[speaker_slug]['talks'] = [] post['speakers'].append(name) speakers[speaker_slug]['talks'].append(post['title']) talk_filename = '{}/{}.md'.format(talks_dir, talk_title_slug) with open(talk_filename, 'wb') as file_to_write: frontmatter.dump(post, file_to_write) print('saved {!r}'.format(talk_filename)) for speaker_slug, speaker in speakers.items(): speaker_filename = '{}/{}.md'.format(speakers_dir, speaker_slug) with open(speaker_filename, 'wb') as file_to_write: frontmatter.dump(speaker, file_to_write) print('saved {!r}'.format(speaker_filename))
def sync( all_sheets: bool = False, output_folder: str = "_players", sheet_app_id: str = typer.Option(envvar="GOOGLE_SHEET_APP_ID", default=""), sheet_name: str = typer.Option(envvar="GOOGLE_SHEET_NAME", default="Sheet1"), ): typer.secho("sync-players", fg="yellow") try: sa = SpreadsheetApp(from_env=True) except AttributeError: print_expected_env_variables() raise typer.Exit() try: spreadsheet = sa.open_by_id(sheet_app_id) except Exception: typer.echo( f"We can't find that 'sheet_app_id'.\n" f"Please double check that 'GOOGLE_SHEET_APP_ID' is set. (Currently set to: '{sheet_app_id}')" ) raise typer.Exit() if all_sheets: sheets = spreadsheet.get_sheets() else: try: sheets = [spreadsheet.get_sheet_by_name(sheet_name)] except Exception: typer.echo( f"We can't find that 'sheet_name' aka the tab.\n" f"Please double check that 'SHEET_NAME' is set. (Currently set to: '{sheet_name}')" ) raise typer.Exit() for sheet in sheets: data_range = sheet.get_data_range() table = Table(data_range, backgrounds=True) metadata = {} for item in table: for key in item.header: value = item.get_field_value(key) metadata[key] = value player = Player(**metadata) if not Path(output_folder).exists(): Path(output_folder).mkdir() player_filename = Path(output_folder, f"{player.slug}.md") if player_filename.exists(): post = frontmatter.loads(player_filename.read_text()) else: post = frontmatter.loads("") post.metadata.update(player.dict(by_alias=True)) player_filename.write_text(frontmatter.dumps(post))
def accomplishment(date: str = "now", overwrite: bool = False): parsed_date = maya.when(date, timezone="US/Central").datetime(naive=True) day_of_month = parsed_date.day week_number = (day_of_month - 1) // 7 + 1 output_filename = f"{parsed_date.year}-{parsed_date.month:02}-week{week_number}.md" output_filename = Path( str(OUTPUT_FOLDER.joinpath(output_filename)).format( year=parsed_date.year)) typer.echo(str(output_filename)) if not output_filename.parent.exists(): output_filename.parent.mkdir() if TASKS_DATA.exists(): data = yaml.load(TASKS_DATA.read_text(), Loader=yaml.FullLoader) else: data = dict() context_data = data.copy() context_data["date"] = parsed_date # Handle Accomplishments post = frontmatter.loads(ACCOMPLISHMENT_TEMPLATE.read_text()) t = jinja2.Template(post.content) contents = t.render(context_data) post["date"] = parsed_date post.content = contents if not output_filename.exists() or overwrite: output_filename.write_text(frontmatter.dumps(post)) # Handle README output_filename = Path("README.md") post = frontmatter.loads(README_TEMPLATE.read_text()) t = jinja2.Template(post.content) contents = t.render(context_data) post["date"] = parsed_date post.content = contents # output_filename.write_text(frontmatter.dumps(post)) output_filename.write_text(post.content)
def generate(self, presite): parsed = frontmatter.loads(self.content) self.config.update(parsed) if self.is_markdown: parsed.content = markdown.markdown(parsed.content) template = Template(parsed.content) result = template.render(self.config) combined_config = self.config combined_config['content'] = result templatetxt = '{{content}}' if 'template' in combined_config: with open(presite.selected_template_folder() + combined_config['template'] + '.html') as f: templatetxt = f.read() template = Template(templatetxt) result = template.render(combined_config) currentdir = './' for dir in self.dest_path_elements[:-1]: currentdir = currentdir + dir + '/' if not os.path.exists(currentdir): os.mkdir(currentdir) with open(self.dest_path, 'w+') as f: f.write(result) print('wrote ' + self.dest_path)
def add(post_path): """Add shorturl to a post.""" filename = basename(post_path) m = env.re_file_foramt.match(filename) date_obj = datetime.strptime( '{0}-{1}-{2}'.format(m.group('year'), m.group('month'), m.group('day')), '%Y-%m-%d') post_url = env.tpl_post_url.format(date=date_obj.strftime('%Y/%j'), slug=m.group('slug')) with codecs.open(join(env.root_dir, post_path), 'r', 'utf-8') as fobj: post = frontmatter.loads(fobj.read()) if post.get('shorturl'): abort('Post already has a short url: ' '{shorturl}.'.format(**post)) meta = post.to_dict() content = meta['content'] del (meta['content']) meta['shorturl'] = shorturl(post_url) with codecs.open(join(env.root_dir, post_path), 'w', 'utf-8') as fobj: new_post = frontmatter.Post(content, **meta) frontmatter.dump(new_post, fobj) puts('Added the short url: {shorturl}.'.format(**meta))
def _getData(self): result = urlfetch.fetch(BASE_URI + self.key) if result.status_code == 200: return frontmatter.loads(result.content) else: # TODO handle other cases pass
def parse_to_recipe(content: str) -> Recipe: """Parse a Markdown formatted string to a Recipe object Args: content (str): A Markdown formatted string Returns: Recipe: A Recipe object representing the given content """ recipe_metadata = frontmatter.loads(content) document = marko.parse(content) recipe_name = get_recipe_name(document) quote = get_quote(document) ingredients = get_ingredients(document) instructions = get_instructions(document) return Recipe( name=recipe_metadata["name"], residence=recipe_metadata["residence"], category=RecipeCategory(recipe_metadata["category"].lower()), recipe_name=recipe_name, quote=quote, ingredients=ingredients, instructions=instructions, )
def load_logos(args): retVal = dict() found_files = pathlib.Path(args.logodir).glob('**/index.md') for fn in found_files: f = open(str(fn)) fmstr = f.read() f.close() fm = frontmatter.loads(fmstr) website = fm["website"] if website.startswith("http:"): website = website[:4] + "s" + website[4:] if website.startswith("https://www."): website = "https://" + website[12:] if "guide" in fm.metadata: guide = fm["guide"] else: guide = "" data = { "logohandle": fm["logohandle"], "website": website, "guide": guide } retVal[fm["logohandle"]] = data if args.verbose: sys.stdout.write("INFO: %d logos loaded\n" % len(retVal)) return retVal
def sync_file_to_block(filename, block, links: dict = {}): logger.info(f"Syncing {filename} to block {block.id}") with open(filename) as markdown_fd: contents = markdown_fd.read() post = frontmatter.loads(contents) def resolve_link(target): try: parsed = urlparse(target) if parsed.scheme: return target except: pass target_path = os.path.realpath( os.path.join(os.path.dirname(filename), target)) block = links.get(target_path) if not block: return target return block.get_browseable_url() markdown_blocks = convert(str(post), link_resolver=resolve_link) sync_markdown_blocks_to_block(markdown_blocks, block)
def parse_matter(string): post = frontmatter.loads(string) content = None backmatter = None if post.content.strip().endswith('+++'): content = post.content.rsplit("+++", 2) backmatter = content[1] content = content[0] else: content = post.content backmatter = None # Split new comment from backmatter comment = None if backmatter and '::: Add Comment' in backmatter: comment = backmatter.split('::: Add Comment') comment = comment[-1].strip().splitlines()[1:] comment = "\n".join(comment).strip() if not comment: comment = [] return { 'frontmatter': post.metadata, 'content': content, 'backmatter': backmatter, 'comment': comment }
def build(cls, path): with open(path, "r", encoding="utf-8") as input_file: fm = frontmatter.loads(input_file.read()) slug = PathFromFile.build(path).slug is_blog_entry = cls.__is_blog_entry(fm.metadata) return cls(slug, fm.metadata, is_blog_entry)
def test_custom_handler(self): "allow caller to specify a custom delimiter/handler" # not including this in the regular test directory # because it would/should be invalid per the defaults custom = textwrap.dedent(""" ... dummy frontmatter ... dummy content """) # and a custom handler that really doesn't do anything class DummyHandler(object): def load(self, fm): return {'value': fm} def split(self, text): return "dummy frontmatter", "dummy content" # but we tell frontmatter that it is the appropriate handler # for the '...' delimiter # frontmatter.handlers['...'] = DummyHandler() post = frontmatter.loads(custom, handler=DummyHandler()) self.assertEqual(post['value'], 'dummy frontmatter')
def getPage(self, pageName, isMobile): if self.data.platform == "PYINSTALLER": lhome = os.path.join(self.data.platformHome) else: lhome = "." filename = lhome + pageName if filename.find("http") != -1: print("external link") return None with open(filename) as f: page = frontmatter.loads(f.read()) pageContent = page.content #filterString = '([^\!]|^)\[(.+)\]\((.+)\)' filterString = '(?:[^\!]|^)\[([^\[\]]+)\]\((?!http)([^()]+)\)' filteredPage = re.sub( filterString, r"""<a href='#' onclick='requestPage("\2");'>\1</a>""", pageContent) filteredPage = filteredPage.replace("{{ site.baseurl }}{% link ", "") filteredPage = filteredPage.replace(".md %}", ".md") filteredPage = markdown.markdown( filteredPage, extensions=["markdown.extensions.tables"]) filteredPage = filteredPage.replace("Â", "") filteredPage = filteredPage.replace("{: .label .label-blue }", "") filteredPage = filteredPage.replace("<a href=\"http", "<a target='_blank' href=\"http") filteredPage = filteredPage.replace("<table>", "<table class='table'>") if isMobile: # make all images 100% filteredPage = filteredPage.replace("img alt=", "img width='100%' alt=") return filteredPage
def add_slideshare_embed_urls(self, blog_posts): count = 0 # Loop through all the markdown posts in the directory for post in blog_posts: front_matter = frontmatter.loads(open(post, "r").read()) # Get Slideshare URL from each blog post and add the embed url try: slideshare_url = front_matter["slideshare_presentation_url"] if slideshare_url == "None": pass else: #resource_url = front_matter["link"] embed_url = get_slideshare_embed_url(slideshare_url) if front_matter["slideshare_embed_url"] == embed_url: pass else: front_matter["slideshare_embed_url"] = embed_url with open(post, "w") as changed_file: changed_file.writelines( frontmatter.dumps(front_matter)) # print("{0} changed for {1}".format(embed_url, post)) count += 1 except Exception as e: print(e) pass print("{0} posts changed!".format(count)) return True
def getYAMLProjects(self): """ Get projects from projects.yml and appends to self.projects""" with open("projects.yml", 'r') as stream: try: yaml_data = yaml.safe_load(stream) for each in yaml_data: md_file = frontmatter.loads(open("template.md","r").read()) # Add front matter attributes md_file['title'] = each["name"] md_file['image'] = "/assets/images/projects/" + each["image"] # Prepend the output folder and the new_post_name file_name = each["name"].replace(" ", "-").lower() + ".md" if each["url"]: md_file["url"] = each["url"] # response = requests.get(each["url"]) # soup = BeautifulSoup(response.text, features="html.parser") # metas = soup.find_all('meta') # description = [ meta.attrs['content'] for meta in metas if 'name' in meta.attrs and meta.attrs['name'] == 'description' ] # if len(description) > 0: # md_file["description"] = '"{0}"'.format(description[0]) output_object = "projects/" + file_name # Create the new post object and write the front matter to post. with open(output_object,"w") as new_post_file: new_post_file.writelines(frontmatter.dumps(md_file)) print("Jekyll project created for {0} at {1}".format(each["name"], output_object)) except yaml.YAMLError as exc: print(exc)
def from_md(cls, md_content: str): """ Class method to generate new dataobj from a well formatted markdown string Call like this: ```python Dataobj.from_md(content) ``` """ data = frontmatter.loads(md_content) dataobj = {} dataobj["content"] = data.content for pair in ["tags", "id", "title", "path"]: try: dataobj[pair] = data[pair] except KeyError: # files sometimes get moved temporarily by applications while you edit # this can create bugs where the data is not loaded correctly # this handles that scenario as validation will simply fail and the event will # be ignored break dataobj["type"] = "processed-dataobj" return cls(**dataobj)
def process(logohandle, logodir): srcfile = os.path.join(logodir, "index.md") if not os.path.isfile(srcfile): print("WARNING: no index file for '%s' (%s)" % (logohandle, srcfile)) return print("INFO: processing %s" % (logohandle)) fmsrc = frontmatter.load(srcfile) if "images" not in fmsrc.metadata: print("WARNING: no images, skipping") return fmdst = frontmatter.loads("---\nlayout: amp\nnoindex: true\n---") fmdst[ "redirect_to"] = "https://www.vectorlogo.zone/logos/%s/index.html" % logohandle dstfile = os.path.join(logodir, "index.amp.html") f = open(dstfile, 'w') f.write(frontmatter.dumps(fmdst)) f.write('\n') f.close()
def test_md_get_node_for_key(md_path: pathlib.Path) -> None: """Test node fetching.""" contents = frontmatter.loads( md_path.open('r', encoding=const.FILE_ENCODING).read()) markdown_wo_header = contents.content lines = markdown_wo_header.split('\n') tree: MarkdownNode = MarkdownNode.build_tree_from_markdown(lines) assert tree.get_node_for_key('header1') is None assert tree.get_node_for_key('nonexisting header') is None node: MarkdownNode = tree.get_node_for_key('# 2. MD Header 2 Blockquotes') assert node is not None # Assert returned node has content assert node.key == '# 2. MD Header 2 Blockquotes' assert len(node.content.blockquotes) == 2 assert node.content.text[2] == 'some text after blockquote' # Assert unstrict and strict matching return same notes deep_node_unstrict = tree.get_node_for_key('5.2.2.1', strict_matching=False) deep_node_strict = tree.get_node_for_key( '#### 5.2.2.1 A even deeper section here 2') assert deep_node_strict == deep_node_unstrict # Assert substrings are matched node = tree.get_node_for_key('Header 4 Tricky', strict_matching=False) assert node is not None # Assert first match returned if strict matching is off node = tree.get_node_for_key('5.1.1', strict_matching=False) assert node.key == '### 5.1.1 A deeper section 1'
def update_existing_posts(self, sessions, users): """Take the latest export of sessions and users and update any information that has changed""" if self._post_location: count = 0 blog_posts = self.get_blog_posts(self._post_location) # Loop through all the markdown posts in the directory for post in blog_posts: changed = False front_matter = frontmatter.loads(open(post,"r").read()) for session in sessions: if session['session_id'] == front_matter['session_id']: # Gather speaker information emails = session["speakers"].split(",") names = [] for speaker_email in emails: for attendee in users: if attendee["speaker_email"] == speaker_email: name = attendee["first_name"] + " " + attendee["second_name"] bio_formatted = f'"{attendee["bio"]}"' speaker_dict = { "name":name, "biography": bio_formatted, "job-title": attendee["job-title"], "company": attendee["company"], "speaker-image": attendee["image-name"] } names.append(speaker_dict) # Check if there are changes to speakers if front_matter['speakers'] != names: front_matter['speakers'] = names changed = True # Check if session tracks have changed. tracks = session["tracks"].replace(";",", ") if front_matter["session_track"] != tracks: front_matter["session_track"] = tracks changed = True # Check if title has changed title = re.sub('[^A-Za-z0-9-!: ()]+', '', session["title"]) if front_matter['title'] != title: front_matter['title'] = title changed = True # Check if post content has changed content = session['blurb'] if front_matter.content != content: front_matter.content = content changed = True if changed: # Write the changed frontmatter to the file. with open(post,"w") as changed_file: changed_file.writelines(frontmatter.dumps(front_matter)) print("{0} post updated!".format(session['session_id'])) count += 1 print("{0} posts updated!".format(count)) else: return False
def _build_content_pages(items: List[ContentItem]) -> Iterator[Page]: for item in items: post = fm.loads(item.content) content = post.content html = resources.markdown.reset().convert(content) permalink = _build_permalink(item.location) image, image_thumbnail = _process_image(post) frontmatter = Frontmatter( title=post["title"], description=post.get("description"), category=post.get("category"), date=post.get("date"), image=image, image_thumbnail=image_thumbnail, image_caption=post.get("image_caption"), tags=post.get("tags", []), ) meta = _build_meta(permalink, frontmatter) yield Page( html=html, content=content, permalink=permalink, frontmatter=frontmatter, meta=meta, )
def parse_markdown(parser, template, filepath, metadata): parser.reset() metadata = deepcopy(metadata) # Try to extract frontmatter metadata with open(filepath, encoding="utf-8") as markdown_file: file_content = markdown_file.read() try: file_parts = frontmatter.loads(file_content) metadata.update(file_parts.metadata) metadata["content"] = parser.convert(file_parts.content) except (ScannerError, ParserError): """ If there's a parsererror, it's because frontmatter had to parse the entire file (not finding frontmatter at the top) and encountered an unexpected format somewhere in it. This means the file has no frontmatter, so we can simply continue. """ metadata["content"] = parser.convert(file_content) except ParseError: """ If there is a parse error in a file, it is useful to know which file it is """ print("Error parsing file: {}".format(filepath)) raise # Now add on any multimarkdown-format metadata if hasattr(parser, "Meta"): # Restructure markdown parser metadata to the same format as we expect markdown_meta = parser.Meta for name, value in markdown_meta.items(): if type(value) == list and len(value) == 1: markdown_meta[name] = value[0] metadata.update(markdown_meta) toc_soup = BeautifulSoup(parser.toc, "html.parser") nav_item_strings = [] # Only get <h2> items, to avoid getting crazy for item in toc_soup.select(".toc > ul > li > ul > li"): for child in item("ul"): child.extract() item["class"] = "p-toc__item" for anchor in item("a"): anchor["class"] = "p-toc__link" nav_item_strings.append(str(item)) metadata["toc_items"] = "\n".join(nav_item_strings) return template.render(metadata)
def test_custom_handler(self): "allow caller to specify a custom delimiter/handler" # not including this in the regular test directory # because it would/should be invalid per the defaults custom = textwrap.dedent(""" ... dummy frontmatter ... dummy content """) # and a custom handler that really doesn't do anything class DummyHandler(object): def load(self, fm, Loader=None): return {'value': fm} def split(self, text): return "dummy frontmatter", "dummy content" # but we tell frontmatter that it is the appropriate handler # for the '...' delimiter # frontmatter.handlers['...'] = DummyHandler() post = frontmatter.loads(custom, handler=DummyHandler()) self.assertEqual(post['value'], 'dummy frontmatter')
def _migrate(post): print('migrate post {}'.format(post)) # 处理首行不含 --- 的文档 with open(post) as f: contents = f.read() if not contents.startswith('---'): contents = '---\n' + contents post_yaml = frontmatter.loads(contents) _date = post_yaml['date'] if not isinstance(_date, str): _date = _date.strftime('%Y-%m-%d %H:%M:%S') try: _date_dt = datetime.strptime(_date, '%Y-%m-%d %H:%M:%S') except ValueError as e: _date_dt = datetime.strptime(_date, '%Y-%m-%d %H:%M') except Exception as e: print(e) raise metadata = {} metadata['created'] = _date_dt.strftime('%Y-%m-%d_%H-%M-%S') title = post_yaml['title'] metadata['title'] = title # 正文首行补充一级标题 content = '# ' + title + '\n\n' + post_yaml.content return (metadata, content)
def _extract_metadata(self, text): # fast test if not text.startswith("---"): return text content = frontmatter.loads(text) self.metadata = content.metadata return content.content
def __init__(self, content): parsed = frontmatter.loads(text_or_list(content)) self.content = parsed.content for k in ('args', 'function', 'returns'): try: setattr(self, k, parsed[k]) except KeyError: setattr(self, k, None)
def __init__(self, content_path: Path): post = frontmatter.loads(content_path.read_text("utf8")) self.file_path = content_path self.content = post.content self.metadata = post.metadata self._restricted_to: set[int] = set( self.metadata.get("restricted_to", ())) self._cooldowns: dict[discord.TextChannel, float] = {}
def jekyll_format(s: str, p: Parser) -> Entry: post = frontmatter.loads(s) entry = Entry(title=post["title"], text=post.content, updated_at=parse(str(post["updated"])), meta=post.metadata, p=p) return entry
def main(input, output, extra_context): chunk = input.read() post = frontmatter.loads(chunk) if extra_context: post.metadata.update(extra_context) frontmatter.dump(post, output)
def test_tree_text_equal_to_md(md_path: pathlib.Path) -> None: """Test tree construction.""" contents = frontmatter.loads( md_path.open('r', encoding=const.FILE_ENCODING).read()) markdown_wo_header = contents.content lines = markdown_wo_header.split('\n') tree: MarkdownNode = MarkdownNode.build_tree_from_markdown(lines) assert markdown_wo_header == tree.content.raw_text
def get_entries(drive_service, file_list): entries = [] raw_files = get_raw_files(drive_service, file_list) for raw_file in raw_files: entries.append(frontmatter.loads(raw_file)) return entries
def sync_cuisines(): click.echo("sync-cuisines") aliases = load_aliases() cuisine_aliases = aliases["cuisines"] data = [] places = Path("_places").glob("*.md") for place in places: post = frontmatter.loads(place.read_text()) cuisines = post["cuisines"] if cuisines and len(cuisines): data += cuisines if not Path("_cuisines").exists(): Path("_cuisines").mkdir() for cuisine in CUISINE_INITIAL: cuisine_slug = slugify(cuisine) if not Path("_cuisines").joinpath(f"{cuisine_slug}.md").exists(): post = frontmatter.loads("") post["active"] = True post["name"] = cuisine post["sitemap"] = True post["slug"] = cuisine_slug Path("_cuisines").joinpath(f"{cuisine_slug}.md").write_text( frontmatter.dumps(post)) data = set(data) for cuisine in data: cuisine_slug = slugify(cuisine) if not any( [alias for alias in cuisine_aliases if cuisine in alias["name"]]): if not Path("_cuisines").joinpath(f"{cuisine_slug}.md").exists(): post = frontmatter.loads("") post["active"] = False post["name"] = cuisine post["sitemap"] = False post["slug"] = cuisine_slug Path("_cuisines").joinpath(f"{cuisine_slug}.md").write_text( frontmatter.dumps(post))
def _getMarkdownFrontMatter(text): """ Args: text: string of markdown Return: frontmatter in json format """ data = frontmatter.loads(text) return data
def before_change_note(mapper, connection, target): title = None data = frontmatter.loads(target.text) if isinstance(data.get('title'), str) and len(data.get('title')) > 0: title = data.get('title') if title and not target.is_date: target.name = title
def sanity_check(self, filename, handler_type): "Ensure we can load -> dump -> load" post = frontmatter.load(filename) self.assertIsInstance(post.handler, handler_type) # dump and reload repost = frontmatter.loads(frontmatter.dumps(post)) self.assertEqual(post.metadata, repost.metadata) self.assertEqual(post.content, repost.content) self.assertEqual(post.handler, repost.handler)
def parse_frontmatter(markdown_content): metadata = {} try: file_parts = frontmatter.loads(markdown_content) metadata = file_parts.metadata except (ScannerError, ParserError): """ If there's a parsererror, it's because frontmatter had to parse the entire file (not finding frontmatter at the top) and encountered an unexpected format somewhere in it. This means the file has no frontmatter, so we can simply continue. """ pass return metadata
def tags(root='.'): """Build and return tag -> [files] dict.""" tags_dict = dict() for dirpath, filename, file, date in traverse_posts(root): # extract tags from frontmatter with open('{}/{}'.format(dirpath, filename), 'r') as f: fm = frontmatter.loads(f.read()) for tag in fm['tags']: if tag in tags_dict: tags_dict[tag].append(file) else: tags_dict[tag] = [file] return OrderedDict(reversed(sorted(tags_dict.items(), key=lambda x: len(x[1]))))
def guides(): guides = [] files = [] for root, dirs, filenames in os.walk("getting_started/data"): for f in filenames: files.append(f) files = sorted(files, reverse=True) for f in files: slug = f.replace(".md", "") markdown = frontmatter.loads(open(os.path.join(root, f),'r').read()) guides.append({"content": markdown.content, "metadata": markdown.metadata, "slug": slug}) return jsonify({"guides": guides})
def create_block(message, block_path): block = frontmatter.loads('') last_block_hash = get_last_block_hash(block_path) if last_block_hash: block['prev'] = last_block_hash block['created_at'] = datetime.datetime.now().isoformat() block['nonce'] = 0 block.content = message while True: block['nonce'] += 1 block_hash = hashlib.sha256( frontmatter.dumps(block).encode() ).hexdigest() if block_hash[:2] == '00': break return block, block_hash
def categories(root='.'): """Build and return cat -> [files] dict.""" cats_dict = dict() for dirpath, filename, file, date in traverse_posts(root): # extract title and categories from frontmatter with open('{}/{}'.format(dirpath, filename), 'r') as f: fm = frontmatter.loads(f.read()) title, cat = fm['title'], fm['categories'].replace(' ', '/') file = {'date': date, 'file': file, 'title': title} for base_cat in parse_base_cats(cat): if not base_cat in cats_dict: cats_dict[base_cat] = [] if cat in cats_dict: cats_dict[cat].append(file) else: cats_dict[cat] = [file] return OrderedDict(sorted(cats_dict.items(), key=lambda x: x[0]))
def readfile(filename, section=None): """Opens a filename and returns either yaml or content""" try: with open(filename, 'rb') as f: post = frontmatter.loads(f.read()) if section == 'content': # TODO: # Check case of \r\n for Windows # WARNING: Removes trailing newlines return post.content.splitlines() elif section == 'metadata': # WARNING: Implicitly converts dates to datetime return post.metadata except (LookupError, SyntaxError, UnicodeError, scanner.ScannerError): # Return Error; require utf-8 # Should this sys.exit(1) here? print(f"{filename} caused the Blueberry script to crash.") sys.exit(1)
def test_external(self): filename = self.data['filename'] content = self.data['content'] metadata = self.data['metadata'] content_stripped = content.strip() post = frontmatter.load(filename) self.assertEqual(post.content, content_stripped) for k, v in metadata.items(): self.assertEqual(post[k], v) # dumps and then loads to ensure round trip conversions. posttext = frontmatter.dumps(post, handler=self.handler) post_2 = frontmatter.loads(posttext) for k in post.metadata: self.assertEqual(post.metadata[k], post_2.metadata[k]) self.assertEqual(post.content, post_2.content)
def run(username, save_folder, prefix=None, data_file_name="gist_data.json"): gist_all = get_all_gists_for_user(username) gist_filenames = get_all_gist_filenames(gist_all) add_json_filenames_to_list(gist_filenames) delete_non_matching_files_from_folder(gist_filenames, save_folder) for gist in gist_all: content_data = get_content_data_for_gist(gist, prefix) if not content_data: continue file_path = os.path.join(save_folder, content_data["name"]) file_data_path = os.path.join(save_folder, content_data["name"].replace('.md', '.json')) markdown_content = download_markdown_gist(file_path, content_data["data"]) if markdown_content: gist_frontmatter = frontmatter.loads(markdown_content) content = gist_frontmatter.content metadata = gist_frontmatter.metadata content_data["content"] = content content_data["metadata"] = metadata with open(file_data_path, 'w') as target_file: json.dump(content_data, target_file, indent=True)
def find_old_guides(count=20): """Print a list of the 20 oldest guides in the library. Results are sorted by modification date (in front matter) and deprecated guides are ignored. Command line arguments: count: number of guides to list (default: 20) """ old_guides = [] guides_scanned = 0 rootdir = 'docs' for filename in glob.glob('docs/**/*.md',recursive=True): guides_scanned += 1 with open(filename, 'r') as f: yaml = frontmatter.loads(f.read()) if 'modified' in yaml and 'deprecated' not in yaml: record = make_record(parsed) record['path'] = filename old_guides.append(record) print(str(guides_scanned) + " guides scanned.") old_guides.sort(key=itemgetter('updated')) oldest_guides = old_guides[0:count] print(tabulate(oldest_guides))
def __init__(self, bookmark): self.bookmark = bookmark self.frontmatter = frontmatter.loads(bookmark.extended)
def test_underscore_alias(): with open('ci/data/underscore-alias.md','r') as f: yaml = frontmatter.loads(f.read()).metadata assert valid_alias(yaml)[1] == "applications/containers/this_is_an_alias/ should use dashes instead of underscores."
def test_uppercase_alias(): with open('ci/data/SHOUTING_BAD_FILE.md','r') as f: yaml = frontmatter.loads(f.read()).metadata assert valid_alias(yaml)[1] == "applications/containers/this-is-an-ALIAS/ should be lowercase."
import datetime for p in pathlib.Path('import_versions/exitwp/_posts').glob('*.markdown'): fm = frontmatter.load(str(p)) tags = fm['tags'] cats = fm['categories'] tags_str = "tags:\n" + '\n'.join("- {}".format(tag) for tag in tags) cats_str = "categories:\n" + '\n'.join("- {}".format(cat) for cat in cats) try: dest_post = '_posts/{}.md'.format(p.stem) with open(dest_post) as f: content = f.read() except FileNotFoundError: p_split = p.stem.split('-', 3) date_prefix = [int(x) for x in p_split[:3]] orig_date = datetime.date(*date_prefix) new_date = orig_date - datetime.timedelta(1) dest_post = '_posts/{:%Y-%m-%d-}{}.md'.format(new_date, p_split[-1]) with open(dest_post) as f: content = f.read() nfm = frontmatter.loads(content) if not any((nfm.get('tags'), nfm.get('categories'))): loc = content.find('\n---\n') new_content = "{}\n{}\n{}{}".format(content[:loc], tags_str, cats_str, content[loc:]) with open(dest_post, 'w') as f: f.write(new_content)
def test_valid_alias(): with open('ci/data/goodfile.md', 'r') as f: yaml = frontmatter.loads(f.read()).metadata assert valid_alias(yaml) == None
nargs='?', default=sys.stdin, help='Markdown file to preprocess') parser.add_argument('output', type=argparse.FileType('w'), nargs='?', default=sys.stdout, help='the name of the output file (defaults to stdout)') args = parser.parse_args() # Save arguments input_file = args.input_file output = args.output # Bump up all headings line by line def increase_heading_level(line): return(re.sub("^(#+)", "\\1#", line, flags=re.MULTILINE)) # Parse the final string for YAML frontmatter parsed_yaml = frontmatter.loads(args.input_file.read()) # If there's a title key, extract it, bump up the headings, and add the title # to the beginning of the file if 'title' in parsed_yaml.keys(): chapter_title = parsed_yaml['title'] final_result = '# ' + chapter_title + '\n\n' + increase_heading_level(parsed_yaml.content) else: final_result = parsed_yaml.content # All done with output as f: f.write(final_result)
def test_with_crlf_string(self): import codecs markdown_bytes = b'---\r\ntitle: "my title"\r\ncontent_type: "post"\r\npublished: no\r\n---\r\n\r\nwrite your content in markdown here' loaded = frontmatter.loads(codecs.decode(markdown_bytes, 'utf-8')) self.assertEqual(loaded['title'], 'my title')
def test_required_yaml(): with open('ci/data/goodfile.md','r') as f: yaml = frontmatter.loads(f.read()).metadata assert require_yaml(yaml) == None
def test_micropost_extracts_tags(self): filename = self.micropostCmd.run(post='this is a post #giggles #stuff', network=None) post = frontmatter.loads(self.site.load_file(filename)) self.assertEquals('giggles,stuff', post.metadata['tags'])
#args = parser.parse_args() con = sqlite3.connect("_build/out/dv.db") con.row_factory = sqlite3.Row cur = con.cursor() cur.execute("select * from meta_article"); r = cur.fetchone() while r: md = pypandoc.convert(r['article'], 'markdown_strict', format='html') title_md = pypandoc.convert(r['title'], 'markdown_strict', format='html', extra_args=["--columns", "9999"]) md = fix(articlelink(md)) title_md = fix(title_md, title=True) post = frontmatter.loads("") post.content= md post['title'] = title_md post['date'] = r['date'] post['author'] = r['author'] post['id'] = r['id'] f = "_meta/" + r['date'] + "-" + yamltitle(title_md) + ".md" out = codecs.open(f, 'w', 'utf-8') frontmatter.dump(post, out, Dumper=frontmatter.yaml.Dumper, allow_unicode=True) r = cur.fetchone()
def test_extra_yaml_header(): with open('ci/data/extra_yaml_header.md','r') as f: yaml = frontmatter.loads(f.read()).metadata assert only_allowed_yaml(yaml)[1] == "Non-allowed metadata: fake_header"
def test_missing_yaml_header(): with open('ci/data/missing_yaml_header.md','r') as f: yaml = frontmatter.loads(f.read()).metadata assert require_yaml(yaml)[1] == "Missing required metadata: published"
def main(): # input is teams csv datafile from TBA # -> https://github.com/the-blue-alliance/the-blue-alliance-data csv_fname = abspath(sys.argv[1]) max_team = int(sys.argv[2]) mode = sys.argv[3] if mode not in ['new', 'update']: print("Error: invalid mode") return os.chdir(abspath(join(dirname(__file__), '..'))) cwd = os.getcwd() for row in read_team_csv(csv_fname): # this changes on occasion... number, name, sponsors, l1, l2, l3, website, rookie_year, \ facebook, twitter, youtube, github, instagram, periscope = row name = name rookie_year = rookie_year if rookie_year: rookie_year = int(rookie_year) number = number[3:] if int(number) > max_team: continue d1 = '%04d' % (int(int(number)/1000)*1000,) d2 = '%03d' % (int(int(number)/100)*100,) f = join(cwd, 'frc%s' % d1, '_frc', d2, '%s.md' % number) if mode == 'new' and exists(f): continue if 'firstinspires' in website: website = '' if l3: location = '%s, %s, %s' % (l1, l2, l3) elif l2: location = '%s, %s' % (l1, l2) else: location = l1 sponsors = [s.strip() for s in sponsors.split('/')] if sponsors == ['']: sponsors = None else: if '&' in sponsors[-1]: sN = sponsors[-1].split('&') del sponsors[-1] sponsors += [s.strip() for s in sN] if mode == 'update': try: fm = frontmatter.load(f) except: print("Error at %s" % f) raise reformatted = str(frontmatter.dumps(fm)) if 'team' not in fm.metadata: raise Exception("Error in %s" % f) team = fm.metadata['team'] if 'links' not in fm.metadata['team']: links = OrderedDict() else: links = fm.metadata['team']['links'] else: data = OrderedDict() team = OrderedDict() links = OrderedDict() data['title'] = 'FRC Team %s' % number data['team'] = team team['type'] = 'FRC' team['number'] = int(number) add_maybe(team, 'name', name) add_maybe(team, 'rookie_year', rookie_year) add_maybe(team, 'location', location) if sponsors and mode != 'update': team['sponsors'] = sponsors if 'Github' in links: links['GitHub'] = links['Github'] del links['Github'] add_maybe_web(links, 'Website', website) add_maybe_web(links, 'Facebook', facebook) add_maybe_web(links, 'Twitter', twitter) add_maybe_web(links, 'YouTube', youtube) add_maybe_web(links, 'GitHub', github) add_maybe_web(links, 'Instagram', instagram) add_maybe_web(links, 'Periscope', periscope) if mode == 'update': if links: fm.metadata['team']['links'] = links if fm.content.strip() == 'No content has been added for this team': fm.content = '{% include remove_this_line_and_add_a_paragraph %}' page = str(frontmatter.dumps(fm)) if reformatted == page: # don't make gratuitious changes continue elif mode == 'new': if links: team['links'] = links page = '---\n%s\n---\n\n{%% include remove_this_line_and_add_a_paragraph %%}\n' % ( yaml.safe_dump(data) ) # roundtrip through frontmatter to get the formatting consistent page = frontmatter.dumps(frontmatter.loads(page)) if not exists(dirname(f)): os.makedirs(dirname(f)) with open(f, 'w') as fp: fp.write(page)
def test_alias_trailing_slash(): with open('ci/data/trailing-slash-alias.md','r') as f: yaml = frontmatter.loads(f.read()).metadata assert valid_alias(yaml)[1] == "applications/containers/this-is-an-alias should end with a slash (/)."