Beispiel #1
0
    def parse(self, soup_obj):
        assert soup_obj is not None

        tr_list = soup_obj.select("table.torrents tr")

        seeds = []
        cnt = 0
        for tr in tr_list:
            cnt += 1
            if cnt == 1:
                # skip the caption tr
                continue

            seed = SeedInfo()
            td_list = tr.select("td.rowfollow")
            if len(td_list) < 9:
                # skip embedded contents
                continue

            seed.sticky = len(
                td_list[1].select("table td img[alt=\"Sticky\"]")) > 0
            seed.title = td_list[1].select("table td a")[0]["title"]
            seed.url = td_list[1].select("table td a")[0]['href']
            seed.free = len(td_list[1].select("table font.free")) > 0
            seed.hot = len(td_list[1].select("table font.hot")) > 0
            seed.since = HttpUtils.get_content(td_list[3], "span")
            seed.size = float(self.parse_size(td_list[4]))
            seed.upload_num = int(self.clean_tag(td_list[5]))
            seed.download_num = int(self.clean_tag(td_list[6]))
            seed.finish_num = int(self.clean_tag(td_list[7]))
            seed.id = self.parse_id(seed.url)

            seeds.append(seed)

        return seeds
Beispiel #2
0
    def parse(self, soup_obj):
        assert soup_obj is not None

        tr_list = soup_obj.select("table.torrents tr")

        seeds = []
        cnt = 0
        for tr in tr_list:
            cnt += 1
            if cnt == 1:
                # skip the caption tr
                continue

            seed = SeedInfo()
            td_list = tr.select("td.rowfollow")
            if len(td_list) < 9:
                # skip embedded contents
                continue

            seed.since = HttpUtils.get_content(td_list[2], "span")
            seed.size = float(self.parse_size(td_list[3]))
            seed.upload_num = int(self.clean_tag(td_list[4]))
            seed.download_num = int(self.clean_tag(td_list[5]))
            seed.finish_num = int(self.clean_tag(td_list[6]))
            seed.done = self.clean_tag(td_list[7])
            seed.working = "peer-active" in td_list[7]['class']

            td_title = tr.select("td.torrenttr tr td")
            seed.sticky = len(td_title[0].select("img[alt=\"Sticky\"]"))
            seed.title = td_title[0].select("a")[0]["title"]
            seed.url = td_title[0].select("a")[0]['href']
            seed.free = len(td_title[0].select("img[alt=\"Free\"]")) > 0
            seed.hot = len(td_title[0].select("font.hot")) > 0
            if len(td_title[0].select("img[alt=\"50%\"]")) > 0:
                seed.discount = 50
            elif len(td_title[0].select("img[alt=\"30%\"]")) > 0:
                seed.discount = 30
            elif seed.free:
                seed.discount = 0
            else:
                seed.discount = 100
            seed.id = self.parse_id(seed.url)

            seeds.append(seed)

        print("Crawl: " + str(len(seeds)))
        if len(seeds) < 10:
            EmailSender.send(u"无法解析页面", Config.get("mteam_username"))

        return seeds
Beispiel #3
0
    def parse_page(self, soup_obj):
        items = soup_obj.select("item")
        assert len(items) != 0

        seeds = []
        for item in items:
            try:
                info = HttpUtils.get_content(item, "title").split("[")

                seed = SeedInfo()

                seed.title = info[0].strip()
                seed.size = HttpUtils.pretty_format(info[1].split("]")[0], "MB")
                seed.url = HttpUtils.get_attr(item, "enclosure", "url")
                seed.id = self.parse_id(seed.url)
                #Cache().set(seed.id, str(seed))

                seeds.append(seed)
            except Exception as e:
                print(e.getMessage())

        return seeds
Beispiel #4
0
    def parse_page(self, soup_obj):
        items = soup_obj.select("item")
        assert len(items) != 0

        seeds = []
        for item in items:
            try:
                info = HttpUtils.get_content(item, "title").split("[")

                seed = SeedInfo()

                seed.title = info[0].strip()
                seed.size = HttpUtils.pretty_format(info[1].split(" ")[-2] + info[1].split(" ")[-1], "MB")
                # seed.url = HttpUtils.get_content(item, "link")
                seed.url = item.contents[4]
                seed.id = self.parse_id(seed.url)

                seeds.append(seed)
            except Exception as e:
                pass

        return seeds
Beispiel #5
0
    def parse_page(self, soup_obj):
        tr_list = soup_obj.select("#torrent_table tr")

        seeds = []
        cnt = 0
        for tr in tr_list:
            cnt += 1
            if cnt == 1:
                # skip the caption tr
                continue

            seed = SeedInfo()
            td_list = tr.select("td")
            if len(td_list) < 10:
                continue

            seed.sticky = len(td_list[1].select("div img[alt=\"置顶\"]"))
            seed.title = HttpUtils.get_content(td_list[1].select("div a b"))
            seed.url = td_list[1].select("div a")[0]['href']
            seed.free = len(td_list[1].select("div a img[alt=\"free\"]")) > 0
            seed.since = HttpUtils.get_content(td_list[3], "span")
            seed.size = float(self.parse_size(td_list[4]))
            seed.upload_num = int(self.clean_tag(td_list[5]))
            seed.download_num = int(self.clean_tag(td_list[6]))
            seed.finish_num = int(self.clean_tag(td_list[7]))
            seed.id = self.parse_id(seed.url)

            # parse discount
            if len(td_list[1].select("table td font.halfdown")) > 0:
                seed.discount = 50
            elif len(td_list[1].select("table td font.d30down")) > 0:
                seed.discount = 30
            else:
                seed.discount = 100

            seeds.append(seed)

        return seeds