def transfer_math_equations_newline(article: Article, doc: MarkdownDoc) -> MarkdownDoc: doc.transfer_math_block_by_line( test=lambda line: line.endswith(r'\\'), transfer=lambda line: line[:-2] + r'\newline', ) return doc
def add_lcn_footer(article: Article, doc: MarkdownDoc) -> MarkdownDoc: print('add lcn footer') script_path = os.path.realpath(__file__) project_path = Path(script_path).parent.parent file = project_path / 'snippet' / 'footer' / 'lcn.md' footer = MarkdownDoc.from_file(file) doc.footer = footer.body return doc
def extract_meta(doc: MarkdownDoc) -> MetaInfo: name = None doc_name = None title_en = None title_cn = None original_url = None translator_name = None translator_page = None p = doc.find_one(lambda p: isinstance(p, Quote)) assert p is not None for line in p.line_strings(): if '原文地址' in line: original_address = extract_link(line) title_en = original_address.text original_url = original_address.url elif '本文永久链接' in line: permalink = extract_link(line) url = permalink.url path = Path(urlparse(url).path) name = path.stem doc_name = path.name elif '译者' in line: translator = extract_link(line) translator_name = translator.text translator_page = translator.url heading1: Optional[Heading] = doc.find_one( lambda p: isinstance(p, Heading) and p.level == 1) if heading1 is not None: title_cn = heading1.text return MetaInfo( base=BaseInfo( name=name, docName=doc_name, titleEn=title_en, titleCn=title_cn, tags=['翻译'], ), translation=TranslationInfo( originalUrl=original_url, translatorName=translator_name, translatorPage=translator_page, goldMiner=GoldMinerTranslationInfo(postUrl='', ), ), )
def group_code_blocks(article: Article, doc: MarkdownDoc) -> MarkdownDoc: code_block_groups = doc.find_adjacent(lambda p: isinstance(p, CodeBlock)) for group in code_block_groups: for p in group: code_block: CodeBlock = p code_block.language = code_block.language + ' []' return doc
def save_body_to_temp_zhihu(article: Article, doc: MarkdownDoc) -> None: body_string = doc.full_body_string() # workaround: replace math equations as zhihu special format body_string = replace_math_equations_zhihu(body_string) filename = article.meta.base.docName file = Path.home() / 'Desktop' / filename with file.open('w') as f: print(body_string, file=f) print(f'document body exported to file {file}')
def transfer_image_uri_as_public(article: Article, doc: MarkdownDoc) -> MarkdownDoc: def transfer(uri: str) -> str: base_url_path = settings.image.oss.baseDir article_name = article.meta.base.name file_name = Path(uri).name url_path = Path(base_url_path).joinpath(article_name, file_name) url = ParseResult( scheme='http', netloc=settings.image.oss.publicHost, path=str(url_path), params='', query='', fragment='', ).geturl() return url doc.transfer_image_uri( test=lambda image: image.is_local(), transfer=transfer, ) return doc
def save_doc(self, doc: MarkdownDoc) -> None: self._mkdir() doc.save(self.doc_path())
def read_doc(self) -> MarkdownDoc: doc_file = self.path_to(self.meta.base.docName) if not doc_file.exists(): raise RuntimeError(f'doc file not exists: {doc_file}') return MarkdownDoc.from_file(doc_file)
def copy_body(article: Article, doc: MarkdownDoc) -> None: pyperclip.copy(doc.full_body_string()) print('document body copied to clipboard')
def add_zhihu_footer(article: Article, doc: MarkdownDoc) -> MarkdownDoc: print('add zhihu footer') file = get_project_path() / 'snippet' / 'footer' / 'zhihu.md' footer = MarkdownDoc.from_file(file) doc.footer = footer.body return doc
def extract_title(article: Article, doc: MarkdownDoc) -> MarkdownDoc: heading1 = doc.remove_start( lambda p: isinstance(p, Heading) and p.level == 1) if heading1 is not None: doc.title = heading1.text return doc
def remove_footer(article: Article, doc: MarkdownDoc) -> MarkdownDoc: doc.remove_end_while( lambda p: isinstance(p, Quote) or isinstance(p, HorizontalRule)) return doc
def extract_gold_miner_header(article: Article, doc: MarkdownDoc) -> MarkdownDoc: doc.header = doc.remove_start_while(lambda p: isinstance(p, Quote)) return doc