def getXunLeiAccount(): data = [] html = getPageHTML('http://521xunlei.com/portal.php') soup = BeautifulSoup(html, 'html.parser') elements = soup.find(id="portal_block_62_content") if (elements==None): print(u"未找到资源.") return data tag_a = elements.find_all('a') for link in tag_a: if (utils.checkLink(link.get("title")) >= 0): pageURL = "http://521xunlei.com/" + link.get('href') html = getPageHTML(pageURL) soup = BeautifulSoup(html, 'html.parser') content = soup.find_all("td", class_="t_f")[0] flag = "迅雷" # flag2 = "迅雷会员账号" for text in content.get_text().split("\r\n"): text = text.encode('utf-8') if (text.find("\n")): text = text.split("\n")[0] print text # if (text.find(flag) >= 0): for line in text.split("\n"): if (line.find(flag) >= 0 and len(line) < 90): _data = utils.removeChineseChar(line) if (len(_data.replace(' ', '')) >= 10): data.append(_data) break return data;
def getXunLeiAccount(): data = [] url = "http://xlfans.com" html = getPage(url) soup = BeautifulSoup(html, 'html.parser') tag_a = soup.find_all("article", class_="excerpt")[0] html = getPage(tag_a.find_all("a")[0].get('href')) soup = BeautifulSoup(html, 'html.parser') tag_p = soup.find_all("p") for line in tag_p: text = line.get_text().encode('utf-8') if (text.find("迅雷") >= 0 and text.find("密") >= 0): dataList = utils.removeChineseChar(text).split(u'\n') for _data in dataList: if (len(_data.replace(' ', '')) >= 10): data.append(_data) return data
def getXunLeiAccount(): data=[] url = "http://xlfans.com" html = getPage(url) soup = BeautifulSoup(html, 'html.parser') tag_a = soup.find_all("article", class_="excerpt")[0] html = getPage(tag_a.find_all("a")[0].get('href')) soup = BeautifulSoup(html, 'html.parser') tag_p =soup.find_all("p") for line in tag_p: text=line.get_text().encode('utf-8') if(text.find("迅雷")>=0 and text.find("密")>=0): dataList=utils.removeChineseChar(text).split(u'\n') for _data in dataList: if (len(_data.replace(' ',''))>=10): data.append(_data) return data