def parse(self, text): soup = BeautifulSoup(text, "lxml") table = soup.find('table') trs = table.find_all('tr') data = [] for tr in trs[1:]: index = tr.find(class_="first") if not index: continue index = int(index.text) keyword = tr.find(class_="keyword").find('a').text href = tr.find(class_="keyword").find('a')['href'] keyword = keyword.replace('\n', '').replace('\t', '').strip() search_index = tr.find(class_="last").text search_index = int(search_index) data.append({ 'num': index, 'title': keyword, 'score': search_index, 'url': href }) res_dict = {} res_dict["list"] = data res_dict["date"] = nowStr() res_dict["logo"] = logo_url res_dict["color"] = bgcolor return res_dict
def parse(self, text): html_xpath = etree.HTML(text) data = html_xpath.xpath('//*[@id="pl_top_realtimehot"]/table/tbody/tr') # print(data) res_list = [] num = 0 for tr in (data): cur_dict = {} title = tr.xpath('./td[2]/a/text()') hot_score = tr.xpath('./td[2]/span/text()') url = host + tr.xpath('./td[2]/a')[0].get("href") tag = tr.xpath('./td[3]/i/text()') # print(url) # if len(tag) != 0: print(tag[0]) # 过滤第 0 条 if num == 0: pass else: cur_dict["title"] = title[0] cur_dict["score"] = hot_score[0] cur_dict["num"] = num cur_dict["url"] = url if len(tag): cur_dict["tag"] = tag[0] res_list.append(cur_dict) num += 1 res_dict = {} res_dict["list"] = res_list res_dict["date"] = nowStr() res_dict["logo"] = logo_url res_dict["color"] = bgcolor return res_dict
def parse(self, text): html_xpath = etree.HTML(text) data = html_xpath.xpath('//*[@class="rank-item"]') # print(data) res_list = [] num = 0 for idx, item in enumerate(data): cur_dict = {} title = item.xpath( './div[@class="content"]/div[@class="info"]/a/text()') hot_score = item.xpath( './div[@class="content"]/div[@class="info"]/div[@class="pts"]/div/text()' ) url = item.xpath( './div[@class="content"]/div[@class="info"]/a')[0].get("href") cur_dict["title"] = title[0] cur_dict["score"] = hot_score[0] cur_dict["num"] = idx + 1 cur_dict["url"] = url res_list.append(cur_dict) res_dict = {} res_dict["list"] = res_list res_dict["date"] = nowStr() res_dict["logo"] = logo_url res_dict["color"] = bgcolor return res_dict
def parse(self, text): html_xpath = etree.HTML(text) data = html_xpath.xpath( '//*[@id="j-bbs-hotpost"]/*[@class="m-box"]/ul/li') # print(data) res_list = [] num = 0 for idx, li in enumerate(data): cur_dict = {} title = li.xpath('.//p[@class="title"]/text()') # hot_score = tr.xpath('./td[2]/span/text()') url = li.xpath('./a')[0].get("href") # 过滤第 0 条 cur_dict["title"] = title[0] cur_dict["score"] = "" cur_dict["num"] = idx + 1 cur_dict["url"] = url res_list.append(cur_dict) res_dict = {} res_dict["list"] = res_list res_dict["date"] = nowStr() res_dict["logo"] = logo_url res_dict["color"] = bgcolor return res_dict
def parse(self, data): thelist = findKey(data, "hotList") res_list = [] for idx, entry in enumerate(thelist): cur_dict = {} cur_dict["title"] = findKey(entry, "titleArea")["text"] cur_dict["url"] = findKey(entry, "link")["url"] cur_dict["num"] = idx + 1 cur_dict["score"] = findKey(entry, "metricsArea")["text"] if findKey(entry, "excerptArea")["text"]: cur_dict["extra"] = findKey(entry, "excerptArea")["text"] res_list.append(cur_dict) res_dict = {} res_dict["list"] = res_list res_dict["date"] = nowStr() res_dict["logo"] = logo_url res_dict["color"] = bgcolor return res_dict
def parse(self, data): # print(text) topic_list = findKey(findKey(data, "bang_topic"), "topic_list") # print(topic_list) res_list = [] num = 0 for idx,item in enumerate(topic_list): cur_dict = {} cur_dict["title"] = item["topic_name"] cur_dict["score"] = item["discuss_num"] cur_dict["num"] = idx + 1 cur_dict["url"] = item["topic_name"] cur_dict["text"] = item["topic_desc"] cur_dict["img"] = item["topic_pic"] res_list.append(cur_dict) res_dict = {} res_dict["list"] = res_list res_dict["date"] = nowStr() res_dict["logo"] = logo_url res_dict["color"] = bgcolor return res_dict