Exemple #1
0
    def solver(self, url: str):
        posts = []

        res = get("https://taifua.com/")
        soup = BeautifulSoup(res, features="lxml")
        for item in soup.select(".list-title"):
            link = item.select_one("a")
            posts.append(Post(link.get_text(), link.get("href"), 0))

        res = post(
            "https://taifua.com/wp-admin/admin-ajax.php",
            {
                "append": "list-home",
                "paged": 2,
                "action": "ajax_load_posts",
                "query": "",
                "page": "home",
            },
        )
        soup = BeautifulSoup(res, features="lxml")
        for item in soup.select(".list-title"):
            link = item.select_one("a")
            posts.append(Post(link.get_text(), link.get("href"), 0))

        return posts
Exemple #2
0
def build_content(base_path: str) -> List[Post]:
    """
    Convert all the markdown files to html files. 
    Returns a list of:
    - destination file path
    - html transformed text
    - meta information
    """

    # Load markdown with the meta extension

    configuration = load_config_file(base_path)

    html_path = os.path.join(base_path, required_folders["public"])
    result = []

    # iterate over pairs file name and file path
    for filen, file_path in list_content(base_path,
                                         required_folders["content"]):
        # Open the file
        with open(file_path, "r") as f:
            # load markdown
            md = markdown.Markdown(
                extensions=['meta', 'tables', 'sane_lists', 'attr_list'])
            # Read document
            data = f.read()
            # Convert markdown to html
            html = md.convert(data)

            # Get file extension
            filenn, extension = os.path.splitext(filen)

            # If it's not md skip file
            if extension != '.md':
                continue

            if 'index' in filen:
                result.append(
                    Post(path=os.path.join(html_path, "index.html"),
                         html=html,
                         meta=md.Meta,
                         config=configuration,
                         index=True))
            else:
                if 'slug' in md.Meta.keys():
                    slug = md.Meta['slug'][0]
                else:
                    slug = filen.split(".")[0]

                md.Meta['slug'] = slug

                result.append(
                    Post(path=os.path.join(html_path, slug, "index.html"),
                         html=html,
                         meta=md.Meta,
                         config=configuration))
    return result
    def _load_posts(self):
        posts = self.read_json(POST_FILE).get('posts')
        if posts is None:
            raise Exception(f'No posts key in {POST_FILE}')

        posts = {p['id']: Post(**p) for p in posts}
        return {
            k: v
            for k, v in sorted(posts.items(), key=lambda item: item[1])
        }
Exemple #4
0
 def solver(self, url: str):
     res = get("%s/%s" %
               (url.strip("/"), "front/articles.action?pageCount=0"))
     soup = BeautifulSoup(res, features="lxml")
     posts = []
     for item in soup.select(".card"):
         posts.append(
             Post(
                 item.select_one(".card-header").get_text(), "%s/%s" %
                 (url.strip("/"), item.parent.get("href").strip("/"))))
     return posts
Exemple #5
0
 def solver(self, url: str):
     res = get(url)
     soup = BeautifulSoup(res, features="lxml")
     posts = []
     for item in soup.select("h3.rpwe-title"):
         link = item.select_one("a")
         posts.append(Post(
             link.get_text(),
             link.get("href"),
             parseToUnix(item.parent.select_one("time").get("datetime")),
         ))
     return posts
Exemple #6
0
 def solver(self, url: str):
     res = get(url)
     soup = BeautifulSoup(res, features="lxml")
     posts = []
     for item in soup.select("a.article-title"):
         posts.append(
             Post(
                 item.get_text(),
                 "%s/%s" % (url.strip("/"), item.get("href").strip("/")),
                 parseToUnix(
                     item.parent.parent.select_one("time").get_text())))
     return posts
Exemple #7
0
 def solver(self, url: str):
     res = get("https://lolimay.cn/archives/",)
     soup = BeautifulSoup(res, features="lxml")
     posts = []
     for item in soup.select(".post-title"):
         link = item.select_one("a")
         posts.append(Post(
             link.get_text(),
             "%s/%s" % (url.strip("/"), link.get("href").strip("/")),
             parseToUnix(item.parent.select_one(".post-date").get_text()),
         ))
     return posts
Exemple #8
0
 def solver(self, url: str):
     res = get("https://jspang.com/")
     soup = BeautifulSoup(res, features="lxml")
     posts = []
     for item in soup.select("div.list-title"):
         link = item.select_one("a")
         posts.append(
             Post(
                 link.get_text(),
                 link.get("href"),
                 parseToUnix(item.parent.select_one("i").parent.get_text()),
             ))
     return posts
Exemple #9
0
 def solver(self, url: str):
     res = get("https://www.sanghangning.cn/json/blog.json")
     data = json.loads(res)
     posts = []
     for post in data["blog"]:
         posts.append(
             Post(
                 post['title'],
                 "%s/%s" % ("https://www.sanghangning.cn".strip("/"),
                            post['url'].strip("/")),
                 parseToUnix(post["date"]),
             ))
     return posts
Exemple #10
0
    def solver(self, url: str):
        res = get(url)
        file = feedparser.parse(res)
        entries = file.entries

        entries.sort(
            key=lambda x: parse(x.published).timestamp(),
            reverse=True,
        )

        posts = []
        for f in entries:
            posts.append(Post(f.title, f.link, parse(f.published).timestamp()))
        return posts
Exemple #11
0
 def solver(self, url: str):
     res = get("https://www.lizenghai.com/user/1/posts")
     soup = BeautifulSoup(res, features="lxml")
     posts = []
     for item in soup.find_all("a", rel="bookmark"):
         posts.append(
             Post(
                 item.get_text(),
                 item.get("href"),
                 parseToUnix(
                     item.parent.parent.parent.select_one("time").get(
                         "datetime")),
             ))
     return posts
Exemple #12
0
    def solver(self, url: str):
        res = get("http://ylinknest.top")
        soup = BeautifulSoup(res, features="lxml")
        posts = []
        for item in soup.select(".post-title"):
            timeTuple = list(map(int, item.parent.select_one(
                ".fa-calendar").parent.get_text().split("/")))

            posts.append(Post(
                item.get_text(),
                item.select_one("a").get("href"),
                datetime(2000+timeTuple[0], timeTuple[1],
                         timeTuple[2]).timestamp(),
            ))
        return posts
Exemple #13
0
 def solver(self, url: str):
     res = get("https://wiki.ioin.in/")
     soup = BeautifulSoup(res, features="lxml")
     posts = []
     for item in soup.select("tr"):
         link = item.select_one("a")
         if link == None:
             continue
         posts.append(
             Post(
                 link.get_text(),
                 "%s/%s" % (url.strip("/"), link.get("href").strip("/")),
                 parseToUnix(item.select_one("td").get_text()),
             ))
     return posts
Exemple #14
0
 def solver(self, url: str):
     res = get("https://jarviswwong.com/")
     soup = BeautifulSoup(res, features="lxml")
     posts = []
     for item in soup.select(".cardmao"):
         y, m, d = map(
             int,
             re.findall(
                 r'(\d+)',
                 item.select_one(".info-date").select_one(
                     "span").get_text()))
         posts.append(
             Post(
                 item.select_one(".card-title").get_text(),
                 item.select_one("a").get("href"),
                 datetime.datetime(y, m, d).timestamp(),
             ))
     return posts
Exemple #15
0
    def solver(self, url: str):
        res = get(url)
        soup = BeautifulSoup(res, features="lxml")
        posts = []
        for item in soup.select("article.kratos-hentry"):
            link = item.select_one(".kratos-entry-title-new").select_one("a")

            timeTuple = list(
                map(
                    int,
                    regxp.findall(
                        item.select_one("i.fa-calendar").parent.get_text())))

            posts.append(
                Post(
                    link.get_text(),
                    link.get("href"),
                    datetime(timeTuple[0], timeTuple[1],
                             timeTuple[2]).timestamp(),
                ))
        return posts