self.link.pop() data_re = '账号 *\d' pass_re = '\r\n密码.*' if re.search(data_re, data): self.data_list.append(data) if re.search(pass_re, data): self.pass_list.append(data) def handle_endtag(self, tag): if tag == 'a': self.a_tag = False if __name__ == '__main__': html = req.reqs('http://www.aiqiyivip.com/forum-2-1.html', 'gbk') spider1 = spider() spider1.feed(html) for link in spider1.rlink: vip_page = req.reqs(link, 'gbk') vip_page.replace('<br>', '') spider2 = spider() spider2.feed(vip_page) re_list = zip(spider2.data_list, spider2.pass_list) for data in re_list: for i in data: print(i.lstrip(), end='') print() spider2.close() print('数据来源:http://www.aiqiyivip.com/')
pass else: for (name, value) in attrs: if name == 'title': if re.search(link_re, value): title = True for (name, value) in attrs: if name == 'href' and title: self.link.append(value) def handle_data(self, data): data_re = '账号\d*' if re.match(data_re, data): self.data_list.append(data) if __name__ == '__main__': html = req.reqs('http://www.vipfenxiang.com/aiqiyi/') spider1 = spider() spider1.feed(html) result = spider1.link for link in result: vip_page = req.reqs(link) spider2 = spider() spider2.feed(vip_page) for data in spider2.data_list: print(data) print('数据来源:VIP分享网 http://www.vipfenxiang.com/') spider1.close() spider2.close()
else: for (name, value) in attrs: if name == 'title': if re.search(link_re, value): title = True for (name, value) in attrs: if name == 'href' and title: self.link.append(value) def handle_data(self, data): data_re = '账号.*@' if re.match(data_re, data): self.data_list.append(data) if __name__ == '__main__': html = req.reqs('http://www.vipfenxiang.com/youku/') spider1 = spider() spider1.feed(html) result = spider1.link print('账号无法使用说明:如果输入密码错误5次就会导致账号被锁,使用人数超过3人看电影就会异常。') for link in result: vip_page = req.reqs(link) spider2 = spider() spider2.feed(vip_page) for data in spider2.data_list: print(data) print('数据来源:VIP分享网 http://www.vipfenxiang.com/') spider1.close() spider2.close()
def handle_starttag(self, tag, attrs): if tag == 'span': if len(attrs): for (name, value) in attrs: if value == 'articleSection': self.user = True if value == 'articleBody': self.password = True else: pass def handle_data(self, data): if self.user: self.user_list.append(data) self.user = False if self.password: self.pass_list.append(data) self.password = False if __name__ == '__main__': html = req.reqs('http://www.9sep.org/free-xunlei-vip') spiders = spider() spiders.feed(html) result = zip(spiders.user_list, spiders.pass_list) for (username, password) in result: print('账号{0};密码{1}'.format(username, password)) spiders.close() print('数据来源:VIP分享吧 http://www.vipfx8.com')