Python process_html_string Exemples, tsing_spider.util.process_html_string Python Exemples

Exemple #1

0

Afficher le fichier

    def parse_reply(item: BeautifulSoup):
        users = []
        for u in item.find_all("a",
                               attrs={
                                   "class": "tshuz_at",
                                   "target": "_blank"
                               }):
            users.append(
                User(process_html_string(u.get_text()),
                     User.extract_uid(u.get("href"))))
        if len(users) < 1:
            raise Exception("Can't find user information")
        content = process_html_string(
            item.find("span", attrs={
                "class": "tshuz_cnt_main"
            }).get_text())
        tick = item.find("div", attrs={
            "class": "tshuz_time"
        }).find("span").get("title")

        return Reply(users[0],
                     content=content,
                     tick=tick,
                     user_to=users[1] if len(users) >= 2 else None)

Exemple #2

0

Afficher le fichier

    def parse_content_text(item):
        content = item.find("td", attrs={"class": "t_f"})
        if content is None:
            content = item.find("div", attrs={"class": "t_f"})
        if content is None:
            raise Exception("Can't find content container in item block.")
        # Remove no permission tip and image text tip
        remove_list = content.find_all(
            "div", attrs={"class": "attach_nopermission attach_tips"})
        if remove_list is not None:
            [s.extract() for s in remove_list]
        remove_list = content.find_all("div",
                                       attrs={"class": "tip tip_4 aimg_tip"})
        if remove_list is not None:
            [s.extract() for s in remove_list]

        return process_html_string(content.get_text())

Exemple #3

0

Afficher le fichier

 def parse_user(a_tag):
     return User(name=process_html_string(a_tag.get_text()),
                 uid=User.extract_uid(a_tag.get("href")))

Exemple #4

0

Afficher le fichier

 def comments(self):
     return [process_html_string(c.get_text()) for c in self.content_list]