コード例 #1
0
ファイル: baidu_data.py プロジェクト: moonlightshadow123/news
 def parse(self, text):
     soup = BeautifulSoup(text, "lxml")
     table = soup.find('table')
     trs = table.find_all('tr')
     data = []
     for tr in trs[1:]:
         index = tr.find(class_="first")
         if not index:
             continue
         index = int(index.text)
         keyword = tr.find(class_="keyword").find('a').text
         href = tr.find(class_="keyword").find('a')['href']
         keyword = keyword.replace('\n', '').replace('\t', '').strip()
         search_index = tr.find(class_="last").text
         search_index = int(search_index)
         data.append({
             'num': index,
             'title': keyword,
             'score': search_index,
             'url': href
         })
     res_dict = {}
     res_dict["list"] = data
     res_dict["date"] = nowStr()
     res_dict["logo"] = logo_url
     res_dict["color"] = bgcolor
     return res_dict
コード例 #2
0
    def parse(self, text):
        html_xpath = etree.HTML(text)
        data = html_xpath.xpath('//*[@id="pl_top_realtimehot"]/table/tbody/tr')
        # print(data)

        res_list = []
        num = 0
        for tr in (data):
            cur_dict = {}
            title = tr.xpath('./td[2]/a/text()')
            hot_score = tr.xpath('./td[2]/span/text()')
            url = host + tr.xpath('./td[2]/a')[0].get("href")
            tag = tr.xpath('./td[3]/i/text()')
            # print(url)
            # if len(tag) != 0: print(tag[0])

            # 过滤第 0 条
            if num == 0:
                pass
            else:
                cur_dict["title"] = title[0]
                cur_dict["score"] = hot_score[0]
                cur_dict["num"] = num
                cur_dict["url"] = url
                if len(tag): cur_dict["tag"] = tag[0]
                res_list.append(cur_dict)
            num += 1
        res_dict = {}
        res_dict["list"] = res_list
        res_dict["date"] = nowStr()
        res_dict["logo"] = logo_url
        res_dict["color"] = bgcolor
        return res_dict
コード例 #3
0
    def parse(self, text):
        html_xpath = etree.HTML(text)
        data = html_xpath.xpath('//*[@class="rank-item"]')
        # print(data)

        res_list = []
        num = 0
        for idx, item in enumerate(data):
            cur_dict = {}
            title = item.xpath(
                './div[@class="content"]/div[@class="info"]/a/text()')
            hot_score = item.xpath(
                './div[@class="content"]/div[@class="info"]/div[@class="pts"]/div/text()'
            )
            url = item.xpath(
                './div[@class="content"]/div[@class="info"]/a')[0].get("href")
            cur_dict["title"] = title[0]
            cur_dict["score"] = hot_score[0]
            cur_dict["num"] = idx + 1
            cur_dict["url"] = url
            res_list.append(cur_dict)
        res_dict = {}
        res_dict["list"] = res_list
        res_dict["date"] = nowStr()
        res_dict["logo"] = logo_url
        res_dict["color"] = bgcolor
        return res_dict
コード例 #4
0
    def parse(self, text):
        html_xpath = etree.HTML(text)
        data = html_xpath.xpath(
            '//*[@id="j-bbs-hotpost"]/*[@class="m-box"]/ul/li')
        # print(data)

        res_list = []
        num = 0
        for idx, li in enumerate(data):
            cur_dict = {}
            title = li.xpath('.//p[@class="title"]/text()')
            # hot_score = tr.xpath('./td[2]/span/text()')
            url = li.xpath('./a')[0].get("href")
            # 过滤第 0 条
            cur_dict["title"] = title[0]
            cur_dict["score"] = ""
            cur_dict["num"] = idx + 1
            cur_dict["url"] = url
            res_list.append(cur_dict)
        res_dict = {}
        res_dict["list"] = res_list
        res_dict["date"] = nowStr()
        res_dict["logo"] = logo_url
        res_dict["color"] = bgcolor
        return res_dict
コード例 #5
0
ファイル: zhihu_data.py プロジェクト: moonlightshadow123/news
 def parse(self, data):
     thelist = findKey(data, "hotList")
     res_list = []
     for idx, entry in enumerate(thelist):
         cur_dict = {}
         cur_dict["title"] = findKey(entry, "titleArea")["text"]
         cur_dict["url"] = findKey(entry, "link")["url"]
         cur_dict["num"] = idx + 1
         cur_dict["score"] = findKey(entry, "metricsArea")["text"]
         if findKey(entry, "excerptArea")["text"]:
             cur_dict["extra"] = findKey(entry, "excerptArea")["text"]
         res_list.append(cur_dict)
     res_dict = {}
     res_dict["list"] = res_list
     res_dict["date"] = nowStr()
     res_dict["logo"] = logo_url
     res_dict["color"] = bgcolor
     return res_dict
コード例 #6
0
    def parse(self, data):
        # print(text)
        topic_list = findKey(findKey(data, "bang_topic"), "topic_list")
        # print(topic_list)

        res_list = []
        num = 0
        for idx,item in enumerate(topic_list):
            cur_dict = {}
            cur_dict["title"] = item["topic_name"]
            cur_dict["score"] = item["discuss_num"]
            cur_dict["num"] = idx + 1
            cur_dict["url"] = item["topic_name"]
            cur_dict["text"] = item["topic_desc"]
            cur_dict["img"] = item["topic_pic"]
            res_list.append(cur_dict)
        res_dict = {}
        res_dict["list"] = res_list
        res_dict["date"] = nowStr()
        res_dict["logo"] = logo_url
        res_dict["color"] = bgcolor
        return res_dict