def parse(self, soup_obj): assert soup_obj is not None tr_list = soup_obj.select("table.torrents tr") seeds = [] cnt = 0 for tr in tr_list: cnt += 1 if cnt == 1: # skip the caption tr continue seed = SeedInfo() td_list = tr.select("td.rowfollow") if len(td_list) < 9: # skip embedded contents continue seed.since = HttpUtils.get_content(td_list[2], "span") seed.size = float(self.parse_size(td_list[3])) seed.upload_num = int(self.clean_tag(td_list[4])) seed.download_num = int(self.clean_tag(td_list[5])) seed.finish_num = int(self.clean_tag(td_list[6])) seed.done = self.clean_tag(td_list[7]) seed.working = "peer-active" in td_list[7]['class'] td_title = tr.select("td.torrenttr tr td") seed.sticky = len(td_title[0].select("img[alt=\"Sticky\"]")) seed.title = td_title[0].select("a")[0]["title"] seed.url = td_title[0].select("a")[0]['href'] seed.free = len(td_title[0].select("img[alt=\"Free\"]")) > 0 seed.hot = len(td_title[0].select("font.hot")) > 0 if len(td_title[0].select("img[alt=\"50%\"]")) > 0: seed.discount = 50 elif len(td_title[0].select("img[alt=\"30%\"]")) > 0: seed.discount = 30 elif seed.free: seed.discount = 0 else: seed.discount = 100 seed.id = self.parse_id(seed.url) seeds.append(seed) print("Crawl: " + str(len(seeds))) if len(seeds) < 10: EmailSender.send(u"无法解析页面", Config.get("mteam_username")) return seeds
def parse_page(self, soup_obj): tr_list = soup_obj.select("table.torrents tr") seeds = [] cnt = 0 for tr in tr_list: cnt += 1 if cnt == 1: # skip the caption tr continue seed = SeedInfo() td_list = tr.select("td.rowfollow") if len(td_list) < 9: # skip embedded contents continue seed.sticky = len( td_list[1].select("table td img[alt=\"Sticky\"]")) seed.title = td_list[1].select("table td a")[0]["title"] seed.url = td_list[1].select("table td a")[0]['href'] seed.free = len(td_list[1].select("table font.free")) > 0 seed.hot = len(td_list[1].select("table font.hot")) > 0 seed.since = HttpUtils.get_content(td_list[3], "span") seed.size = float(self.parse_size(td_list[4])) seed.upload_num = int(self.clean_tag(td_list[5])) seed.download_num = int(self.clean_tag(td_list[6])) seed.finish_num = int(self.clean_tag(td_list[7])) seed.id = self.parse_id(seed.url) # parse discount if len(td_list[1].select("table td font.halfdown")) > 0: seed.discount = 50 elif len(td_list[1].select("table td font.d30down")) > 0: seed.discount = 30 else: seed.discount = 100 seeds.append(seed) return seeds