def solver(self, url: str): posts = [] res = get("https://taifua.com/") soup = BeautifulSoup(res, features="lxml") for item in soup.select(".list-title"): link = item.select_one("a") posts.append(Post(link.get_text(), link.get("href"), 0)) res = post( "https://taifua.com/wp-admin/admin-ajax.php", { "append": "list-home", "paged": 2, "action": "ajax_load_posts", "query": "", "page": "home", }, ) soup = BeautifulSoup(res, features="lxml") for item in soup.select(".list-title"): link = item.select_one("a") posts.append(Post(link.get_text(), link.get("href"), 0)) return posts
def build_content(base_path: str) -> List[Post]: """ Convert all the markdown files to html files. Returns a list of: - destination file path - html transformed text - meta information """ # Load markdown with the meta extension configuration = load_config_file(base_path) html_path = os.path.join(base_path, required_folders["public"]) result = [] # iterate over pairs file name and file path for filen, file_path in list_content(base_path, required_folders["content"]): # Open the file with open(file_path, "r") as f: # load markdown md = markdown.Markdown( extensions=['meta', 'tables', 'sane_lists', 'attr_list']) # Read document data = f.read() # Convert markdown to html html = md.convert(data) # Get file extension filenn, extension = os.path.splitext(filen) # If it's not md skip file if extension != '.md': continue if 'index' in filen: result.append( Post(path=os.path.join(html_path, "index.html"), html=html, meta=md.Meta, config=configuration, index=True)) else: if 'slug' in md.Meta.keys(): slug = md.Meta['slug'][0] else: slug = filen.split(".")[0] md.Meta['slug'] = slug result.append( Post(path=os.path.join(html_path, slug, "index.html"), html=html, meta=md.Meta, config=configuration)) return result
def _load_posts(self): posts = self.read_json(POST_FILE).get('posts') if posts is None: raise Exception(f'No posts key in {POST_FILE}') posts = {p['id']: Post(**p) for p in posts} return { k: v for k, v in sorted(posts.items(), key=lambda item: item[1]) }
def solver(self, url: str): res = get("%s/%s" % (url.strip("/"), "front/articles.action?pageCount=0")) soup = BeautifulSoup(res, features="lxml") posts = [] for item in soup.select(".card"): posts.append( Post( item.select_one(".card-header").get_text(), "%s/%s" % (url.strip("/"), item.parent.get("href").strip("/")))) return posts
def solver(self, url: str): res = get(url) soup = BeautifulSoup(res, features="lxml") posts = [] for item in soup.select("h3.rpwe-title"): link = item.select_one("a") posts.append(Post( link.get_text(), link.get("href"), parseToUnix(item.parent.select_one("time").get("datetime")), )) return posts
def solver(self, url: str): res = get(url) soup = BeautifulSoup(res, features="lxml") posts = [] for item in soup.select("a.article-title"): posts.append( Post( item.get_text(), "%s/%s" % (url.strip("/"), item.get("href").strip("/")), parseToUnix( item.parent.parent.select_one("time").get_text()))) return posts
def solver(self, url: str): res = get("https://lolimay.cn/archives/",) soup = BeautifulSoup(res, features="lxml") posts = [] for item in soup.select(".post-title"): link = item.select_one("a") posts.append(Post( link.get_text(), "%s/%s" % (url.strip("/"), link.get("href").strip("/")), parseToUnix(item.parent.select_one(".post-date").get_text()), )) return posts
def solver(self, url: str): res = get("https://jspang.com/") soup = BeautifulSoup(res, features="lxml") posts = [] for item in soup.select("div.list-title"): link = item.select_one("a") posts.append( Post( link.get_text(), link.get("href"), parseToUnix(item.parent.select_one("i").parent.get_text()), )) return posts
def solver(self, url: str): res = get("https://www.sanghangning.cn/json/blog.json") data = json.loads(res) posts = [] for post in data["blog"]: posts.append( Post( post['title'], "%s/%s" % ("https://www.sanghangning.cn".strip("/"), post['url'].strip("/")), parseToUnix(post["date"]), )) return posts
def solver(self, url: str): res = get(url) file = feedparser.parse(res) entries = file.entries entries.sort( key=lambda x: parse(x.published).timestamp(), reverse=True, ) posts = [] for f in entries: posts.append(Post(f.title, f.link, parse(f.published).timestamp())) return posts
def solver(self, url: str): res = get("https://www.lizenghai.com/user/1/posts") soup = BeautifulSoup(res, features="lxml") posts = [] for item in soup.find_all("a", rel="bookmark"): posts.append( Post( item.get_text(), item.get("href"), parseToUnix( item.parent.parent.parent.select_one("time").get( "datetime")), )) return posts
def solver(self, url: str): res = get("http://ylinknest.top") soup = BeautifulSoup(res, features="lxml") posts = [] for item in soup.select(".post-title"): timeTuple = list(map(int, item.parent.select_one( ".fa-calendar").parent.get_text().split("/"))) posts.append(Post( item.get_text(), item.select_one("a").get("href"), datetime(2000+timeTuple[0], timeTuple[1], timeTuple[2]).timestamp(), )) return posts
def solver(self, url: str): res = get("https://wiki.ioin.in/") soup = BeautifulSoup(res, features="lxml") posts = [] for item in soup.select("tr"): link = item.select_one("a") if link == None: continue posts.append( Post( link.get_text(), "%s/%s" % (url.strip("/"), link.get("href").strip("/")), parseToUnix(item.select_one("td").get_text()), )) return posts
def solver(self, url: str): res = get("https://jarviswwong.com/") soup = BeautifulSoup(res, features="lxml") posts = [] for item in soup.select(".cardmao"): y, m, d = map( int, re.findall( r'(\d+)', item.select_one(".info-date").select_one( "span").get_text())) posts.append( Post( item.select_one(".card-title").get_text(), item.select_one("a").get("href"), datetime.datetime(y, m, d).timestamp(), )) return posts
def solver(self, url: str): res = get(url) soup = BeautifulSoup(res, features="lxml") posts = [] for item in soup.select("article.kratos-hentry"): link = item.select_one(".kratos-entry-title-new").select_one("a") timeTuple = list( map( int, regxp.findall( item.select_one("i.fa-calendar").parent.get_text()))) posts.append( Post( link.get_text(), link.get("href"), datetime(timeTuple[0], timeTuple[1], timeTuple[2]).timestamp(), )) return posts